xref: /freebsd/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp (revision 13ec1e3155c7e9bf037b12af186351b7fa9b9450)
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/APValue.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/AST/Decl.h"
21 #include "clang/AST/OpenMPClause.h"
22 #include "clang/AST/StmtOpenMP.h"
23 #include "clang/AST/StmtVisitor.h"
24 #include "clang/Basic/BitmaskEnum.h"
25 #include "clang/Basic/FileManager.h"
26 #include "clang/Basic/OpenMPKinds.h"
27 #include "clang/Basic/SourceManager.h"
28 #include "clang/CodeGen/ConstantInitBuilder.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/SetOperations.h"
31 #include "llvm/ADT/StringExtras.h"
32 #include "llvm/Bitcode/BitcodeReader.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/Support/AtomicOrdering.h"
38 #include "llvm/Support/Format.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <cassert>
41 #include <numeric>
42 
43 using namespace clang;
44 using namespace CodeGen;
45 using namespace llvm::omp;
46 
47 namespace {
48 /// Base class for handling code generation inside OpenMP regions.
49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
50 public:
51   /// Kinds of OpenMP regions used in codegen.
52   enum CGOpenMPRegionKind {
53     /// Region with outlined function for standalone 'parallel'
54     /// directive.
55     ParallelOutlinedRegion,
56     /// Region with outlined function for standalone 'task' directive.
57     TaskOutlinedRegion,
58     /// Region for constructs that do not require function outlining,
59     /// like 'for', 'sections', 'atomic' etc. directives.
60     InlinedRegion,
61     /// Region with outlined function for standalone 'target' directive.
62     TargetRegion,
63   };
64 
65   CGOpenMPRegionInfo(const CapturedStmt &CS,
66                      const CGOpenMPRegionKind RegionKind,
67                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
68                      bool HasCancel)
69       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
70         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
71 
72   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
73                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
74                      bool HasCancel)
75       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
76         Kind(Kind), HasCancel(HasCancel) {}
77 
78   /// Get a variable or parameter for storing global thread id
79   /// inside OpenMP construct.
80   virtual const VarDecl *getThreadIDVariable() const = 0;
81 
82   /// Emit the captured statement body.
83   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
84 
85   /// Get an LValue for the current ThreadID variable.
86   /// \return LValue for thread id variable. This LValue always has type int32*.
87   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
88 
89   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
90 
91   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
92 
93   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
94 
95   bool hasCancel() const { return HasCancel; }
96 
97   static bool classof(const CGCapturedStmtInfo *Info) {
98     return Info->getKind() == CR_OpenMP;
99   }
100 
101   ~CGOpenMPRegionInfo() override = default;
102 
103 protected:
104   CGOpenMPRegionKind RegionKind;
105   RegionCodeGenTy CodeGen;
106   OpenMPDirectiveKind Kind;
107   bool HasCancel;
108 };
109 
110 /// API for captured statement code generation in OpenMP constructs.
111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
112 public:
113   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
114                              const RegionCodeGenTy &CodeGen,
115                              OpenMPDirectiveKind Kind, bool HasCancel,
116                              StringRef HelperName)
117       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
118                            HasCancel),
119         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
120     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
121   }
122 
123   /// Get a variable or parameter for storing global thread id
124   /// inside OpenMP construct.
125   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
126 
127   /// Get the name of the capture helper.
128   StringRef getHelperName() const override { return HelperName; }
129 
130   static bool classof(const CGCapturedStmtInfo *Info) {
131     return CGOpenMPRegionInfo::classof(Info) &&
132            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
133                ParallelOutlinedRegion;
134   }
135 
136 private:
137   /// A variable or parameter storing global thread id for OpenMP
138   /// constructs.
139   const VarDecl *ThreadIDVar;
140   StringRef HelperName;
141 };
142 
143 /// API for captured statement code generation in OpenMP constructs.
144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
145 public:
146   class UntiedTaskActionTy final : public PrePostActionTy {
147     bool Untied;
148     const VarDecl *PartIDVar;
149     const RegionCodeGenTy UntiedCodeGen;
150     llvm::SwitchInst *UntiedSwitch = nullptr;
151 
152   public:
153     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
154                        const RegionCodeGenTy &UntiedCodeGen)
155         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
156     void Enter(CodeGenFunction &CGF) override {
157       if (Untied) {
158         // Emit task switching point.
159         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
160             CGF.GetAddrOfLocalVar(PartIDVar),
161             PartIDVar->getType()->castAs<PointerType>());
162         llvm::Value *Res =
163             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
164         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
165         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
166         CGF.EmitBlock(DoneBB);
167         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
168         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
169         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
170                               CGF.Builder.GetInsertBlock());
171         emitUntiedSwitch(CGF);
172       }
173     }
174     void emitUntiedSwitch(CodeGenFunction &CGF) const {
175       if (Untied) {
176         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
177             CGF.GetAddrOfLocalVar(PartIDVar),
178             PartIDVar->getType()->castAs<PointerType>());
179         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
180                               PartIdLVal);
181         UntiedCodeGen(CGF);
182         CodeGenFunction::JumpDest CurPoint =
183             CGF.getJumpDestInCurrentScope(".untied.next.");
184         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
185         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
186         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
187                               CGF.Builder.GetInsertBlock());
188         CGF.EmitBranchThroughCleanup(CurPoint);
189         CGF.EmitBlock(CurPoint.getBlock());
190       }
191     }
192     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
193   };
194   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
195                                  const VarDecl *ThreadIDVar,
196                                  const RegionCodeGenTy &CodeGen,
197                                  OpenMPDirectiveKind Kind, bool HasCancel,
198                                  const UntiedTaskActionTy &Action)
199       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
200         ThreadIDVar(ThreadIDVar), Action(Action) {
201     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
202   }
203 
204   /// Get a variable or parameter for storing global thread id
205   /// inside OpenMP construct.
206   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
207 
208   /// Get an LValue for the current ThreadID variable.
209   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
210 
211   /// Get the name of the capture helper.
212   StringRef getHelperName() const override { return ".omp_outlined."; }
213 
214   void emitUntiedSwitch(CodeGenFunction &CGF) override {
215     Action.emitUntiedSwitch(CGF);
216   }
217 
218   static bool classof(const CGCapturedStmtInfo *Info) {
219     return CGOpenMPRegionInfo::classof(Info) &&
220            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
221                TaskOutlinedRegion;
222   }
223 
224 private:
225   /// A variable or parameter storing global thread id for OpenMP
226   /// constructs.
227   const VarDecl *ThreadIDVar;
228   /// Action for emitting code for untied tasks.
229   const UntiedTaskActionTy &Action;
230 };
231 
232 /// API for inlined captured statement code generation in OpenMP
233 /// constructs.
234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
235 public:
236   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
237                             const RegionCodeGenTy &CodeGen,
238                             OpenMPDirectiveKind Kind, bool HasCancel)
239       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
240         OldCSI(OldCSI),
241         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
242 
243   // Retrieve the value of the context parameter.
244   llvm::Value *getContextValue() const override {
245     if (OuterRegionInfo)
246       return OuterRegionInfo->getContextValue();
247     llvm_unreachable("No context value for inlined OpenMP region");
248   }
249 
250   void setContextValue(llvm::Value *V) override {
251     if (OuterRegionInfo) {
252       OuterRegionInfo->setContextValue(V);
253       return;
254     }
255     llvm_unreachable("No context value for inlined OpenMP region");
256   }
257 
258   /// Lookup the captured field decl for a variable.
259   const FieldDecl *lookup(const VarDecl *VD) const override {
260     if (OuterRegionInfo)
261       return OuterRegionInfo->lookup(VD);
262     // If there is no outer outlined region,no need to lookup in a list of
263     // captured variables, we can use the original one.
264     return nullptr;
265   }
266 
267   FieldDecl *getThisFieldDecl() const override {
268     if (OuterRegionInfo)
269       return OuterRegionInfo->getThisFieldDecl();
270     return nullptr;
271   }
272 
273   /// Get a variable or parameter for storing global thread id
274   /// inside OpenMP construct.
275   const VarDecl *getThreadIDVariable() const override {
276     if (OuterRegionInfo)
277       return OuterRegionInfo->getThreadIDVariable();
278     return nullptr;
279   }
280 
281   /// Get an LValue for the current ThreadID variable.
282   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
285     llvm_unreachable("No LValue for inlined OpenMP construct");
286   }
287 
288   /// Get the name of the capture helper.
289   StringRef getHelperName() const override {
290     if (auto *OuterRegionInfo = getOldCSI())
291       return OuterRegionInfo->getHelperName();
292     llvm_unreachable("No helper name for inlined OpenMP construct");
293   }
294 
295   void emitUntiedSwitch(CodeGenFunction &CGF) override {
296     if (OuterRegionInfo)
297       OuterRegionInfo->emitUntiedSwitch(CGF);
298   }
299 
300   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
301 
302   static bool classof(const CGCapturedStmtInfo *Info) {
303     return CGOpenMPRegionInfo::classof(Info) &&
304            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
305   }
306 
307   ~CGOpenMPInlinedRegionInfo() override = default;
308 
309 private:
310   /// CodeGen info about outer OpenMP region.
311   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
312   CGOpenMPRegionInfo *OuterRegionInfo;
313 };
314 
315 /// API for captured statement code generation in OpenMP target
316 /// constructs. For this captures, implicit parameters are used instead of the
317 /// captured fields. The name of the target region has to be unique in a given
318 /// application so it is provided by the client, because only the client has
319 /// the information to generate that.
320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
321 public:
322   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
323                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
324       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
325                            /*HasCancel=*/false),
326         HelperName(HelperName) {}
327 
328   /// This is unused for target regions because each starts executing
329   /// with a single thread.
330   const VarDecl *getThreadIDVariable() const override { return nullptr; }
331 
332   /// Get the name of the capture helper.
333   StringRef getHelperName() const override { return HelperName; }
334 
335   static bool classof(const CGCapturedStmtInfo *Info) {
336     return CGOpenMPRegionInfo::classof(Info) &&
337            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
338   }
339 
340 private:
341   StringRef HelperName;
342 };
343 
344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
345   llvm_unreachable("No codegen for expressions");
346 }
347 /// API for generation of expressions captured in a innermost OpenMP
348 /// region.
349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
350 public:
351   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
352       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
353                                   OMPD_unknown,
354                                   /*HasCancel=*/false),
355         PrivScope(CGF) {
356     // Make sure the globals captured in the provided statement are local by
357     // using the privatization logic. We assume the same variable is not
358     // captured more than once.
359     for (const auto &C : CS.captures()) {
360       if (!C.capturesVariable() && !C.capturesVariableByCopy())
361         continue;
362 
363       const VarDecl *VD = C.getCapturedVar();
364       if (VD->isLocalVarDeclOrParm())
365         continue;
366 
367       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
368                       /*RefersToEnclosingVariableOrCapture=*/false,
369                       VD->getType().getNonReferenceType(), VK_LValue,
370                       C.getLocation());
371       PrivScope.addPrivate(
372           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
373     }
374     (void)PrivScope.Privatize();
375   }
376 
377   /// Lookup the captured field decl for a variable.
378   const FieldDecl *lookup(const VarDecl *VD) const override {
379     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
380       return FD;
381     return nullptr;
382   }
383 
384   /// Emit the captured statement body.
385   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
386     llvm_unreachable("No body for expressions");
387   }
388 
389   /// Get a variable or parameter for storing global thread id
390   /// inside OpenMP construct.
391   const VarDecl *getThreadIDVariable() const override {
392     llvm_unreachable("No thread id for expressions");
393   }
394 
395   /// Get the name of the capture helper.
396   StringRef getHelperName() const override {
397     llvm_unreachable("No helper name for expressions");
398   }
399 
400   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
401 
402 private:
403   /// Private scope to capture global variables.
404   CodeGenFunction::OMPPrivateScope PrivScope;
405 };
406 
407 /// RAII for emitting code of OpenMP constructs.
408 class InlinedOpenMPRegionRAII {
409   CodeGenFunction &CGF;
410   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
411   FieldDecl *LambdaThisCaptureField = nullptr;
412   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
413   bool NoInheritance = false;
414 
415 public:
416   /// Constructs region for combined constructs.
417   /// \param CodeGen Code generation sequence for combined directives. Includes
418   /// a list of functions used for code generation of implicitly inlined
419   /// regions.
420   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
421                           OpenMPDirectiveKind Kind, bool HasCancel,
422                           bool NoInheritance = true)
423       : CGF(CGF), NoInheritance(NoInheritance) {
424     // Start emission for the construct.
425     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
426         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
427     if (NoInheritance) {
428       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
429       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
430       CGF.LambdaThisCaptureField = nullptr;
431       BlockInfo = CGF.BlockInfo;
432       CGF.BlockInfo = nullptr;
433     }
434   }
435 
436   ~InlinedOpenMPRegionRAII() {
437     // Restore original CapturedStmtInfo only if we're done with code emission.
438     auto *OldCSI =
439         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
440     delete CGF.CapturedStmtInfo;
441     CGF.CapturedStmtInfo = OldCSI;
442     if (NoInheritance) {
443       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
444       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
445       CGF.BlockInfo = BlockInfo;
446     }
447   }
448 };
449 
450 /// Values for bit flags used in the ident_t to describe the fields.
451 /// All enumeric elements are named and described in accordance with the code
452 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
453 enum OpenMPLocationFlags : unsigned {
454   /// Use trampoline for internal microtask.
455   OMP_IDENT_IMD = 0x01,
456   /// Use c-style ident structure.
457   OMP_IDENT_KMPC = 0x02,
458   /// Atomic reduction option for kmpc_reduce.
459   OMP_ATOMIC_REDUCE = 0x10,
460   /// Explicit 'barrier' directive.
461   OMP_IDENT_BARRIER_EXPL = 0x20,
462   /// Implicit barrier in code.
463   OMP_IDENT_BARRIER_IMPL = 0x40,
464   /// Implicit barrier in 'for' directive.
465   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
466   /// Implicit barrier in 'sections' directive.
467   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
468   /// Implicit barrier in 'single' directive.
469   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
470   /// Call of __kmp_for_static_init for static loop.
471   OMP_IDENT_WORK_LOOP = 0x200,
472   /// Call of __kmp_for_static_init for sections.
473   OMP_IDENT_WORK_SECTIONS = 0x400,
474   /// Call of __kmp_for_static_init for distribute.
475   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
476   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
477 };
478 
479 namespace {
480 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
481 /// Values for bit flags for marking which requires clauses have been used.
482 enum OpenMPOffloadingRequiresDirFlags : int64_t {
483   /// flag undefined.
484   OMP_REQ_UNDEFINED               = 0x000,
485   /// no requires clause present.
486   OMP_REQ_NONE                    = 0x001,
487   /// reverse_offload clause.
488   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
489   /// unified_address clause.
490   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
491   /// unified_shared_memory clause.
492   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
493   /// dynamic_allocators clause.
494   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
495   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
496 };
497 
498 enum OpenMPOffloadingReservedDeviceIDs {
499   /// Device ID if the device was not defined, runtime should get it
500   /// from environment variables in the spec.
501   OMP_DEVICEID_UNDEF = -1,
502 };
503 } // anonymous namespace
504 
505 /// Describes ident structure that describes a source location.
506 /// All descriptions are taken from
507 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
508 /// Original structure:
509 /// typedef struct ident {
510 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
511 ///                                  see above  */
512 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
513 ///                                  KMP_IDENT_KMPC identifies this union
514 ///                                  member  */
515 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
516 ///                                  see above */
517 ///#if USE_ITT_BUILD
518 ///                            /*  but currently used for storing
519 ///                                region-specific ITT */
520 ///                            /*  contextual information. */
521 ///#endif /* USE_ITT_BUILD */
522 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
523 ///                                 C++  */
524 ///    char const *psource;    /**< String describing the source location.
525 ///                            The string is composed of semi-colon separated
526 //                             fields which describe the source file,
527 ///                            the function and a pair of line numbers that
528 ///                            delimit the construct.
529 ///                             */
530 /// } ident_t;
531 enum IdentFieldIndex {
532   /// might be used in Fortran
533   IdentField_Reserved_1,
534   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
535   IdentField_Flags,
536   /// Not really used in Fortran any more
537   IdentField_Reserved_2,
538   /// Source[4] in Fortran, do not use for C++
539   IdentField_Reserved_3,
540   /// String describing the source location. The string is composed of
541   /// semi-colon separated fields which describe the source file, the function
542   /// and a pair of line numbers that delimit the construct.
543   IdentField_PSource
544 };
545 
546 /// Schedule types for 'omp for' loops (these enumerators are taken from
547 /// the enum sched_type in kmp.h).
548 enum OpenMPSchedType {
549   /// Lower bound for default (unordered) versions.
550   OMP_sch_lower = 32,
551   OMP_sch_static_chunked = 33,
552   OMP_sch_static = 34,
553   OMP_sch_dynamic_chunked = 35,
554   OMP_sch_guided_chunked = 36,
555   OMP_sch_runtime = 37,
556   OMP_sch_auto = 38,
557   /// static with chunk adjustment (e.g., simd)
558   OMP_sch_static_balanced_chunked = 45,
559   /// Lower bound for 'ordered' versions.
560   OMP_ord_lower = 64,
561   OMP_ord_static_chunked = 65,
562   OMP_ord_static = 66,
563   OMP_ord_dynamic_chunked = 67,
564   OMP_ord_guided_chunked = 68,
565   OMP_ord_runtime = 69,
566   OMP_ord_auto = 70,
567   OMP_sch_default = OMP_sch_static,
568   /// dist_schedule types
569   OMP_dist_sch_static_chunked = 91,
570   OMP_dist_sch_static = 92,
571   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
572   /// Set if the monotonic schedule modifier was present.
573   OMP_sch_modifier_monotonic = (1 << 29),
574   /// Set if the nonmonotonic schedule modifier was present.
575   OMP_sch_modifier_nonmonotonic = (1 << 30),
576 };
577 
578 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
579 /// region.
580 class CleanupTy final : public EHScopeStack::Cleanup {
581   PrePostActionTy *Action;
582 
583 public:
584   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
585   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
586     if (!CGF.HaveInsertPoint())
587       return;
588     Action->Exit(CGF);
589   }
590 };
591 
592 } // anonymous namespace
593 
594 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
595   CodeGenFunction::RunCleanupsScope Scope(CGF);
596   if (PrePostAction) {
597     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
598     Callback(CodeGen, CGF, *PrePostAction);
599   } else {
600     PrePostActionTy Action;
601     Callback(CodeGen, CGF, Action);
602   }
603 }
604 
605 /// Check if the combiner is a call to UDR combiner and if it is so return the
606 /// UDR decl used for reduction.
607 static const OMPDeclareReductionDecl *
608 getReductionInit(const Expr *ReductionOp) {
609   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
610     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
611       if (const auto *DRE =
612               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
613         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
614           return DRD;
615   return nullptr;
616 }
617 
618 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
619                                              const OMPDeclareReductionDecl *DRD,
620                                              const Expr *InitOp,
621                                              Address Private, Address Original,
622                                              QualType Ty) {
623   if (DRD->getInitializer()) {
624     std::pair<llvm::Function *, llvm::Function *> Reduction =
625         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
626     const auto *CE = cast<CallExpr>(InitOp);
627     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
628     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
629     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
630     const auto *LHSDRE =
631         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
632     const auto *RHSDRE =
633         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
634     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
635     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
636                             [=]() { return Private; });
637     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
638                             [=]() { return Original; });
639     (void)PrivateScope.Privatize();
640     RValue Func = RValue::get(Reduction.second);
641     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
642     CGF.EmitIgnoredExpr(InitOp);
643   } else {
644     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
645     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
646     auto *GV = new llvm::GlobalVariable(
647         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
648         llvm::GlobalValue::PrivateLinkage, Init, Name);
649     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
650     RValue InitRVal;
651     switch (CGF.getEvaluationKind(Ty)) {
652     case TEK_Scalar:
653       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
654       break;
655     case TEK_Complex:
656       InitRVal =
657           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
658       break;
659     case TEK_Aggregate: {
660       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
661       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
662       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
663                            /*IsInitializer=*/false);
664       return;
665     }
666     }
667     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
668     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
669     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
670                          /*IsInitializer=*/false);
671   }
672 }
673 
674 /// Emit initialization of arrays of complex types.
675 /// \param DestAddr Address of the array.
676 /// \param Type Type of array.
677 /// \param Init Initial expression of array.
678 /// \param SrcAddr Address of the original array.
679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
680                                  QualType Type, bool EmitDeclareReductionInit,
681                                  const Expr *Init,
682                                  const OMPDeclareReductionDecl *DRD,
683                                  Address SrcAddr = Address::invalid()) {
684   // Perform element-by-element initialization.
685   QualType ElementTy;
686 
687   // Drill down to the base element type on both arrays.
688   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
689   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
690   DestAddr =
691       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
692   if (DRD)
693     SrcAddr =
694         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
695 
696   llvm::Value *SrcBegin = nullptr;
697   if (DRD)
698     SrcBegin = SrcAddr.getPointer();
699   llvm::Value *DestBegin = DestAddr.getPointer();
700   // Cast from pointer to array type to pointer to single element.
701   llvm::Value *DestEnd =
702       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
703   // The basic structure here is a while-do loop.
704   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
705   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
706   llvm::Value *IsEmpty =
707       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
708   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
709 
710   // Enter the loop body, making that address the current address.
711   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
712   CGF.EmitBlock(BodyBB);
713 
714   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
715 
716   llvm::PHINode *SrcElementPHI = nullptr;
717   Address SrcElementCurrent = Address::invalid();
718   if (DRD) {
719     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
720                                           "omp.arraycpy.srcElementPast");
721     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
722     SrcElementCurrent =
723         Address(SrcElementPHI,
724                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
725   }
726   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
727       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
728   DestElementPHI->addIncoming(DestBegin, EntryBB);
729   Address DestElementCurrent =
730       Address(DestElementPHI,
731               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
732 
733   // Emit copy.
734   {
735     CodeGenFunction::RunCleanupsScope InitScope(CGF);
736     if (EmitDeclareReductionInit) {
737       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
738                                        SrcElementCurrent, ElementTy);
739     } else
740       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
741                            /*IsInitializer=*/false);
742   }
743 
744   if (DRD) {
745     // Shift the address forward by one element.
746     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
747         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
748         "omp.arraycpy.dest.element");
749     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
750   }
751 
752   // Shift the address forward by one element.
753   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
754       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
755       "omp.arraycpy.dest.element");
756   // Check whether we've reached the end.
757   llvm::Value *Done =
758       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
759   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
760   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
761 
762   // Done.
763   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
764 }
765 
766 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
767   return CGF.EmitOMPSharedLValue(E);
768 }
769 
770 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
771                                             const Expr *E) {
772   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
773     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
774   return LValue();
775 }
776 
777 void ReductionCodeGen::emitAggregateInitialization(
778     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
779     const OMPDeclareReductionDecl *DRD) {
780   // Emit VarDecl with copy init for arrays.
781   // Get the address of the original variable captured in current
782   // captured region.
783   const auto *PrivateVD =
784       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
785   bool EmitDeclareReductionInit =
786       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
787   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
788                        EmitDeclareReductionInit,
789                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
790                                                 : PrivateVD->getInit(),
791                        DRD, SharedLVal.getAddress(CGF));
792 }
793 
794 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
795                                    ArrayRef<const Expr *> Origs,
796                                    ArrayRef<const Expr *> Privates,
797                                    ArrayRef<const Expr *> ReductionOps) {
798   ClausesData.reserve(Shareds.size());
799   SharedAddresses.reserve(Shareds.size());
800   Sizes.reserve(Shareds.size());
801   BaseDecls.reserve(Shareds.size());
802   const auto *IOrig = Origs.begin();
803   const auto *IPriv = Privates.begin();
804   const auto *IRed = ReductionOps.begin();
805   for (const Expr *Ref : Shareds) {
806     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
807     std::advance(IOrig, 1);
808     std::advance(IPriv, 1);
809     std::advance(IRed, 1);
810   }
811 }
812 
813 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
814   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
815          "Number of generated lvalues must be exactly N.");
816   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
817   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
818   SharedAddresses.emplace_back(First, Second);
819   if (ClausesData[N].Shared == ClausesData[N].Ref) {
820     OrigAddresses.emplace_back(First, Second);
821   } else {
822     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
823     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
824     OrigAddresses.emplace_back(First, Second);
825   }
826 }
827 
828 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
829   const auto *PrivateVD =
830       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
831   QualType PrivateType = PrivateVD->getType();
832   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
833   if (!PrivateType->isVariablyModifiedType()) {
834     Sizes.emplace_back(
835         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
836         nullptr);
837     return;
838   }
839   llvm::Value *Size;
840   llvm::Value *SizeInChars;
841   auto *ElemType =
842       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
843           ->getElementType();
844   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
845   if (AsArraySection) {
846     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
847                                      OrigAddresses[N].first.getPointer(CGF));
848     Size = CGF.Builder.CreateNUWAdd(
849         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
850     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
851   } else {
852     SizeInChars =
853         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
854     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
855   }
856   Sizes.emplace_back(SizeInChars, Size);
857   CodeGenFunction::OpaqueValueMapping OpaqueMap(
858       CGF,
859       cast<OpaqueValueExpr>(
860           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
861       RValue::get(Size));
862   CGF.EmitVariablyModifiedType(PrivateType);
863 }
864 
865 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
866                                          llvm::Value *Size) {
867   const auto *PrivateVD =
868       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
869   QualType PrivateType = PrivateVD->getType();
870   if (!PrivateType->isVariablyModifiedType()) {
871     assert(!Size && !Sizes[N].second &&
872            "Size should be nullptr for non-variably modified reduction "
873            "items.");
874     return;
875   }
876   CodeGenFunction::OpaqueValueMapping OpaqueMap(
877       CGF,
878       cast<OpaqueValueExpr>(
879           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
880       RValue::get(Size));
881   CGF.EmitVariablyModifiedType(PrivateType);
882 }
883 
884 void ReductionCodeGen::emitInitialization(
885     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
886     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
887   assert(SharedAddresses.size() > N && "No variable was generated");
888   const auto *PrivateVD =
889       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
890   const OMPDeclareReductionDecl *DRD =
891       getReductionInit(ClausesData[N].ReductionOp);
892   QualType PrivateType = PrivateVD->getType();
893   PrivateAddr = CGF.Builder.CreateElementBitCast(
894       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
895   QualType SharedType = SharedAddresses[N].first.getType();
896   SharedLVal = CGF.MakeAddrLValue(
897       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
898                                        CGF.ConvertTypeForMem(SharedType)),
899       SharedType, SharedAddresses[N].first.getBaseInfo(),
900       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
901   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
902     if (DRD && DRD->getInitializer())
903       (void)DefaultInit(CGF);
904     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
905   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
906     (void)DefaultInit(CGF);
907     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
908                                      PrivateAddr, SharedLVal.getAddress(CGF),
909                                      SharedLVal.getType());
910   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
911              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
912     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
913                          PrivateVD->getType().getQualifiers(),
914                          /*IsInitializer=*/false);
915   }
916 }
917 
918 bool ReductionCodeGen::needCleanups(unsigned N) {
919   const auto *PrivateVD =
920       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
921   QualType PrivateType = PrivateVD->getType();
922   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
923   return DTorKind != QualType::DK_none;
924 }
925 
926 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
927                                     Address PrivateAddr) {
928   const auto *PrivateVD =
929       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
930   QualType PrivateType = PrivateVD->getType();
931   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
932   if (needCleanups(N)) {
933     PrivateAddr = CGF.Builder.CreateElementBitCast(
934         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
935     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
936   }
937 }
938 
939 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
940                           LValue BaseLV) {
941   BaseTy = BaseTy.getNonReferenceType();
942   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
943          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
944     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
945       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
946     } else {
947       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
948       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
949     }
950     BaseTy = BaseTy->getPointeeType();
951   }
952   return CGF.MakeAddrLValue(
953       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
954                                        CGF.ConvertTypeForMem(ElTy)),
955       BaseLV.getType(), BaseLV.getBaseInfo(),
956       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
957 }
958 
959 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
960                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
961                           llvm::Value *Addr) {
962   Address Tmp = Address::invalid();
963   Address TopTmp = Address::invalid();
964   Address MostTopTmp = Address::invalid();
965   BaseTy = BaseTy.getNonReferenceType();
966   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
967          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
968     Tmp = CGF.CreateMemTemp(BaseTy);
969     if (TopTmp.isValid())
970       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
971     else
972       MostTopTmp = Tmp;
973     TopTmp = Tmp;
974     BaseTy = BaseTy->getPointeeType();
975   }
976   llvm::Type *Ty = BaseLVType;
977   if (Tmp.isValid())
978     Ty = Tmp.getElementType();
979   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
980   if (Tmp.isValid()) {
981     CGF.Builder.CreateStore(Addr, Tmp);
982     return MostTopTmp;
983   }
984   return Address(Addr, BaseLVAlignment);
985 }
986 
987 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
988   const VarDecl *OrigVD = nullptr;
989   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
990     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
991     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
992       Base = TempOASE->getBase()->IgnoreParenImpCasts();
993     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
994       Base = TempASE->getBase()->IgnoreParenImpCasts();
995     DE = cast<DeclRefExpr>(Base);
996     OrigVD = cast<VarDecl>(DE->getDecl());
997   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
998     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
999     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1000       Base = TempASE->getBase()->IgnoreParenImpCasts();
1001     DE = cast<DeclRefExpr>(Base);
1002     OrigVD = cast<VarDecl>(DE->getDecl());
1003   }
1004   return OrigVD;
1005 }
1006 
1007 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1008                                                Address PrivateAddr) {
1009   const DeclRefExpr *DE;
1010   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1011     BaseDecls.emplace_back(OrigVD);
1012     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1013     LValue BaseLValue =
1014         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1015                     OriginalBaseLValue);
1016     Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
1017     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1018         BaseLValue.getPointer(CGF), SharedAddr.getPointer());
1019     llvm::Value *PrivatePointer =
1020         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1021             PrivateAddr.getPointer(), SharedAddr.getType());
1022     llvm::Value *Ptr = CGF.Builder.CreateGEP(
1023         SharedAddr.getElementType(), PrivatePointer, Adjustment);
1024     return castToBase(CGF, OrigVD->getType(),
1025                       SharedAddresses[N].first.getType(),
1026                       OriginalBaseLValue.getAddress(CGF).getType(),
1027                       OriginalBaseLValue.getAlignment(), Ptr);
1028   }
1029   BaseDecls.emplace_back(
1030       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1031   return PrivateAddr;
1032 }
1033 
1034 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1035   const OMPDeclareReductionDecl *DRD =
1036       getReductionInit(ClausesData[N].ReductionOp);
1037   return DRD && DRD->getInitializer();
1038 }
1039 
1040 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1041   return CGF.EmitLoadOfPointerLValue(
1042       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1043       getThreadIDVariable()->getType()->castAs<PointerType>());
1044 }
1045 
1046 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1047   if (!CGF.HaveInsertPoint())
1048     return;
1049   // 1.2.2 OpenMP Language Terminology
1050   // Structured block - An executable statement with a single entry at the
1051   // top and a single exit at the bottom.
1052   // The point of exit cannot be a branch out of the structured block.
1053   // longjmp() and throw() must not violate the entry/exit criteria.
1054   CGF.EHStack.pushTerminate();
1055   if (S)
1056     CGF.incrementProfileCounter(S);
1057   CodeGen(CGF);
1058   CGF.EHStack.popTerminate();
1059 }
1060 
1061 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1062     CodeGenFunction &CGF) {
1063   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1064                             getThreadIDVariable()->getType(),
1065                             AlignmentSource::Decl);
1066 }
1067 
1068 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1069                                        QualType FieldTy) {
1070   auto *Field = FieldDecl::Create(
1071       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1072       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1073       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1074   Field->setAccess(AS_public);
1075   DC->addDecl(Field);
1076   return Field;
1077 }
1078 
1079 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1080                                  StringRef Separator)
1081     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1082       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1083   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1084 
1085   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1086   OMPBuilder.initialize();
1087   loadOffloadInfoMetadata();
1088 }
1089 
1090 void CGOpenMPRuntime::clear() {
1091   InternalVars.clear();
1092   // Clean non-target variable declarations possibly used only in debug info.
1093   for (const auto &Data : EmittedNonTargetVariables) {
1094     if (!Data.getValue().pointsToAliveValue())
1095       continue;
1096     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1097     if (!GV)
1098       continue;
1099     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1100       continue;
1101     GV->eraseFromParent();
1102   }
1103 }
1104 
1105 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1106   SmallString<128> Buffer;
1107   llvm::raw_svector_ostream OS(Buffer);
1108   StringRef Sep = FirstSeparator;
1109   for (StringRef Part : Parts) {
1110     OS << Sep << Part;
1111     Sep = Separator;
1112   }
1113   return std::string(OS.str());
1114 }
1115 
1116 static llvm::Function *
1117 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1118                           const Expr *CombinerInitializer, const VarDecl *In,
1119                           const VarDecl *Out, bool IsCombiner) {
1120   // void .omp_combiner.(Ty *in, Ty *out);
1121   ASTContext &C = CGM.getContext();
1122   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1123   FunctionArgList Args;
1124   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1125                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1126   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1127                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1128   Args.push_back(&OmpOutParm);
1129   Args.push_back(&OmpInParm);
1130   const CGFunctionInfo &FnInfo =
1131       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1132   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1133   std::string Name = CGM.getOpenMPRuntime().getName(
1134       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1135   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1136                                     Name, &CGM.getModule());
1137   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1138   if (CGM.getLangOpts().Optimize) {
1139     Fn->removeFnAttr(llvm::Attribute::NoInline);
1140     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1141     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1142   }
1143   CodeGenFunction CGF(CGM);
1144   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1145   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1146   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1147                     Out->getLocation());
1148   CodeGenFunction::OMPPrivateScope Scope(CGF);
1149   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1150   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1151     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1152         .getAddress(CGF);
1153   });
1154   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1155   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1156     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1157         .getAddress(CGF);
1158   });
1159   (void)Scope.Privatize();
1160   if (!IsCombiner && Out->hasInit() &&
1161       !CGF.isTrivialInitializer(Out->getInit())) {
1162     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1163                          Out->getType().getQualifiers(),
1164                          /*IsInitializer=*/true);
1165   }
1166   if (CombinerInitializer)
1167     CGF.EmitIgnoredExpr(CombinerInitializer);
1168   Scope.ForceCleanup();
1169   CGF.FinishFunction();
1170   return Fn;
1171 }
1172 
1173 void CGOpenMPRuntime::emitUserDefinedReduction(
1174     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1175   if (UDRMap.count(D) > 0)
1176     return;
1177   llvm::Function *Combiner = emitCombinerOrInitializer(
1178       CGM, D->getType(), D->getCombiner(),
1179       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1180       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1181       /*IsCombiner=*/true);
1182   llvm::Function *Initializer = nullptr;
1183   if (const Expr *Init = D->getInitializer()) {
1184     Initializer = emitCombinerOrInitializer(
1185         CGM, D->getType(),
1186         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1187                                                                      : nullptr,
1188         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1189         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1190         /*IsCombiner=*/false);
1191   }
1192   UDRMap.try_emplace(D, Combiner, Initializer);
1193   if (CGF) {
1194     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1195     Decls.second.push_back(D);
1196   }
1197 }
1198 
1199 std::pair<llvm::Function *, llvm::Function *>
1200 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1201   auto I = UDRMap.find(D);
1202   if (I != UDRMap.end())
1203     return I->second;
1204   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1205   return UDRMap.lookup(D);
1206 }
1207 
1208 namespace {
1209 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1210 // Builder if one is present.
1211 struct PushAndPopStackRAII {
1212   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1213                       bool HasCancel, llvm::omp::Directive Kind)
1214       : OMPBuilder(OMPBuilder) {
1215     if (!OMPBuilder)
1216       return;
1217 
1218     // The following callback is the crucial part of clangs cleanup process.
1219     //
1220     // NOTE:
1221     // Once the OpenMPIRBuilder is used to create parallel regions (and
1222     // similar), the cancellation destination (Dest below) is determined via
1223     // IP. That means if we have variables to finalize we split the block at IP,
1224     // use the new block (=BB) as destination to build a JumpDest (via
1225     // getJumpDestInCurrentScope(BB)) which then is fed to
1226     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1227     // to push & pop an FinalizationInfo object.
1228     // The FiniCB will still be needed but at the point where the
1229     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1230     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1231       assert(IP.getBlock()->end() == IP.getPoint() &&
1232              "Clang CG should cause non-terminated block!");
1233       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1234       CGF.Builder.restoreIP(IP);
1235       CodeGenFunction::JumpDest Dest =
1236           CGF.getOMPCancelDestination(OMPD_parallel);
1237       CGF.EmitBranchThroughCleanup(Dest);
1238     };
1239 
1240     // TODO: Remove this once we emit parallel regions through the
1241     //       OpenMPIRBuilder as it can do this setup internally.
1242     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1243     OMPBuilder->pushFinalizationCB(std::move(FI));
1244   }
1245   ~PushAndPopStackRAII() {
1246     if (OMPBuilder)
1247       OMPBuilder->popFinalizationCB();
1248   }
1249   llvm::OpenMPIRBuilder *OMPBuilder;
1250 };
1251 } // namespace
1252 
1253 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1254     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1255     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1256     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1257   assert(ThreadIDVar->getType()->isPointerType() &&
1258          "thread id variable must be of type kmp_int32 *");
1259   CodeGenFunction CGF(CGM, true);
1260   bool HasCancel = false;
1261   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1262     HasCancel = OPD->hasCancel();
1263   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1264     HasCancel = OPD->hasCancel();
1265   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1266     HasCancel = OPSD->hasCancel();
1267   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1268     HasCancel = OPFD->hasCancel();
1269   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1270     HasCancel = OPFD->hasCancel();
1271   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1272     HasCancel = OPFD->hasCancel();
1273   else if (const auto *OPFD =
1274                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1275     HasCancel = OPFD->hasCancel();
1276   else if (const auto *OPFD =
1277                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1278     HasCancel = OPFD->hasCancel();
1279 
1280   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1281   //       parallel region to make cancellation barriers work properly.
1282   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1283   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1284   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1285                                     HasCancel, OutlinedHelperName);
1286   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1287   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1288 }
1289 
1290 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1291     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1292     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1293   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1294   return emitParallelOrTeamsOutlinedFunction(
1295       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1296 }
1297 
1298 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1299     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1300     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1301   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1302   return emitParallelOrTeamsOutlinedFunction(
1303       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1304 }
1305 
1306 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1307     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1308     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1309     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1310     bool Tied, unsigned &NumberOfParts) {
1311   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1312                                               PrePostActionTy &) {
1313     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1314     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1315     llvm::Value *TaskArgs[] = {
1316         UpLoc, ThreadID,
1317         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1318                                     TaskTVar->getType()->castAs<PointerType>())
1319             .getPointer(CGF)};
1320     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1321                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1322                         TaskArgs);
1323   };
1324   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1325                                                             UntiedCodeGen);
1326   CodeGen.setAction(Action);
1327   assert(!ThreadIDVar->getType()->isPointerType() &&
1328          "thread id variable must be of type kmp_int32 for tasks");
1329   const OpenMPDirectiveKind Region =
1330       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1331                                                       : OMPD_task;
1332   const CapturedStmt *CS = D.getCapturedStmt(Region);
1333   bool HasCancel = false;
1334   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1335     HasCancel = TD->hasCancel();
1336   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1337     HasCancel = TD->hasCancel();
1338   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1339     HasCancel = TD->hasCancel();
1340   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1341     HasCancel = TD->hasCancel();
1342 
1343   CodeGenFunction CGF(CGM, true);
1344   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1345                                         InnermostKind, HasCancel, Action);
1346   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1347   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1348   if (!Tied)
1349     NumberOfParts = Action.getNumberOfParts();
1350   return Res;
1351 }
1352 
1353 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1354                              const RecordDecl *RD, const CGRecordLayout &RL,
1355                              ArrayRef<llvm::Constant *> Data) {
1356   llvm::StructType *StructTy = RL.getLLVMType();
1357   unsigned PrevIdx = 0;
1358   ConstantInitBuilder CIBuilder(CGM);
1359   auto DI = Data.begin();
1360   for (const FieldDecl *FD : RD->fields()) {
1361     unsigned Idx = RL.getLLVMFieldNo(FD);
1362     // Fill the alignment.
1363     for (unsigned I = PrevIdx; I < Idx; ++I)
1364       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1365     PrevIdx = Idx + 1;
1366     Fields.add(*DI);
1367     ++DI;
1368   }
1369 }
1370 
1371 template <class... As>
1372 static llvm::GlobalVariable *
1373 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1374                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1375                    As &&... Args) {
1376   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1377   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1378   ConstantInitBuilder CIBuilder(CGM);
1379   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1380   buildStructValue(Fields, CGM, RD, RL, Data);
1381   return Fields.finishAndCreateGlobal(
1382       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1383       std::forward<As>(Args)...);
1384 }
1385 
1386 template <typename T>
1387 static void
1388 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1389                                          ArrayRef<llvm::Constant *> Data,
1390                                          T &Parent) {
1391   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1392   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1393   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1394   buildStructValue(Fields, CGM, RD, RL, Data);
1395   Fields.finishAndAddTo(Parent);
1396 }
1397 
1398 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1399                                              bool AtCurrentPoint) {
1400   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1401   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1402 
1403   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1404   if (AtCurrentPoint) {
1405     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1406         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1407   } else {
1408     Elem.second.ServiceInsertPt =
1409         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1410     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1411   }
1412 }
1413 
1414 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1415   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1416   if (Elem.second.ServiceInsertPt) {
1417     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1418     Elem.second.ServiceInsertPt = nullptr;
1419     Ptr->eraseFromParent();
1420   }
1421 }
1422 
1423 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1424                                                   SourceLocation Loc,
1425                                                   SmallString<128> &Buffer) {
1426   llvm::raw_svector_ostream OS(Buffer);
1427   // Build debug location
1428   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1429   OS << ";" << PLoc.getFilename() << ";";
1430   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1431     OS << FD->getQualifiedNameAsString();
1432   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1433   return OS.str();
1434 }
1435 
1436 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1437                                                  SourceLocation Loc,
1438                                                  unsigned Flags) {
1439   llvm::Constant *SrcLocStr;
1440   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1441       Loc.isInvalid()) {
1442     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1443   } else {
1444     std::string FunctionName = "";
1445     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1446       FunctionName = FD->getQualifiedNameAsString();
1447     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1448     const char *FileName = PLoc.getFilename();
1449     unsigned Line = PLoc.getLine();
1450     unsigned Column = PLoc.getColumn();
1451     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName,
1452                                                 Line, Column);
1453   }
1454   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1455   return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1456                                      Reserved2Flags);
1457 }
1458 
1459 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1460                                           SourceLocation Loc) {
1461   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1462   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1463   // the clang invariants used below might be broken.
1464   if (CGM.getLangOpts().OpenMPIRBuilder) {
1465     SmallString<128> Buffer;
1466     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1467     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1468         getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1469     return OMPBuilder.getOrCreateThreadID(
1470         OMPBuilder.getOrCreateIdent(SrcLocStr));
1471   }
1472 
1473   llvm::Value *ThreadID = nullptr;
1474   // Check whether we've already cached a load of the thread id in this
1475   // function.
1476   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1477   if (I != OpenMPLocThreadIDMap.end()) {
1478     ThreadID = I->second.ThreadID;
1479     if (ThreadID != nullptr)
1480       return ThreadID;
1481   }
1482   // If exceptions are enabled, do not use parameter to avoid possible crash.
1483   if (auto *OMPRegionInfo =
1484           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1485     if (OMPRegionInfo->getThreadIDVariable()) {
1486       // Check if this an outlined function with thread id passed as argument.
1487       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1488       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1489       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1490           !CGF.getLangOpts().CXXExceptions ||
1491           CGF.Builder.GetInsertBlock() == TopBlock ||
1492           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1493           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1494               TopBlock ||
1495           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1496               CGF.Builder.GetInsertBlock()) {
1497         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1498         // If value loaded in entry block, cache it and use it everywhere in
1499         // function.
1500         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1501           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1502           Elem.second.ThreadID = ThreadID;
1503         }
1504         return ThreadID;
1505       }
1506     }
1507   }
1508 
1509   // This is not an outlined function region - need to call __kmpc_int32
1510   // kmpc_global_thread_num(ident_t *loc).
1511   // Generate thread id value and cache this value for use across the
1512   // function.
1513   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1514   if (!Elem.second.ServiceInsertPt)
1515     setLocThreadIdInsertPt(CGF);
1516   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1517   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1518   llvm::CallInst *Call = CGF.Builder.CreateCall(
1519       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1520                                             OMPRTL___kmpc_global_thread_num),
1521       emitUpdateLocation(CGF, Loc));
1522   Call->setCallingConv(CGF.getRuntimeCC());
1523   Elem.second.ThreadID = Call;
1524   return Call;
1525 }
1526 
1527 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1528   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1529   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1530     clearLocThreadIdInsertPt(CGF);
1531     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1532   }
1533   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1534     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1535       UDRMap.erase(D);
1536     FunctionUDRMap.erase(CGF.CurFn);
1537   }
1538   auto I = FunctionUDMMap.find(CGF.CurFn);
1539   if (I != FunctionUDMMap.end()) {
1540     for(const auto *D : I->second)
1541       UDMMap.erase(D);
1542     FunctionUDMMap.erase(I);
1543   }
1544   LastprivateConditionalToTypes.erase(CGF.CurFn);
1545   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1546 }
1547 
1548 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1549   return OMPBuilder.IdentPtr;
1550 }
1551 
1552 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1553   if (!Kmpc_MicroTy) {
1554     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1555     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1556                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1557     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1558   }
1559   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1560 }
1561 
1562 llvm::FunctionCallee
1563 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1564   assert((IVSize == 32 || IVSize == 64) &&
1565          "IV size is not compatible with the omp runtime");
1566   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1567                                             : "__kmpc_for_static_init_4u")
1568                                 : (IVSigned ? "__kmpc_for_static_init_8"
1569                                             : "__kmpc_for_static_init_8u");
1570   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1571   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1572   llvm::Type *TypeParams[] = {
1573     getIdentTyPointerTy(),                     // loc
1574     CGM.Int32Ty,                               // tid
1575     CGM.Int32Ty,                               // schedtype
1576     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1577     PtrTy,                                     // p_lower
1578     PtrTy,                                     // p_upper
1579     PtrTy,                                     // p_stride
1580     ITy,                                       // incr
1581     ITy                                        // chunk
1582   };
1583   auto *FnTy =
1584       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1585   return CGM.CreateRuntimeFunction(FnTy, Name);
1586 }
1587 
1588 llvm::FunctionCallee
1589 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1590   assert((IVSize == 32 || IVSize == 64) &&
1591          "IV size is not compatible with the omp runtime");
1592   StringRef Name =
1593       IVSize == 32
1594           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1595           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1596   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1597   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1598                                CGM.Int32Ty,           // tid
1599                                CGM.Int32Ty,           // schedtype
1600                                ITy,                   // lower
1601                                ITy,                   // upper
1602                                ITy,                   // stride
1603                                ITy                    // chunk
1604   };
1605   auto *FnTy =
1606       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1607   return CGM.CreateRuntimeFunction(FnTy, Name);
1608 }
1609 
1610 llvm::FunctionCallee
1611 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1612   assert((IVSize == 32 || IVSize == 64) &&
1613          "IV size is not compatible with the omp runtime");
1614   StringRef Name =
1615       IVSize == 32
1616           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1617           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1618   llvm::Type *TypeParams[] = {
1619       getIdentTyPointerTy(), // loc
1620       CGM.Int32Ty,           // tid
1621   };
1622   auto *FnTy =
1623       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1624   return CGM.CreateRuntimeFunction(FnTy, Name);
1625 }
1626 
1627 llvm::FunctionCallee
1628 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1629   assert((IVSize == 32 || IVSize == 64) &&
1630          "IV size is not compatible with the omp runtime");
1631   StringRef Name =
1632       IVSize == 32
1633           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1634           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1635   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1636   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1637   llvm::Type *TypeParams[] = {
1638     getIdentTyPointerTy(),                     // loc
1639     CGM.Int32Ty,                               // tid
1640     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1641     PtrTy,                                     // p_lower
1642     PtrTy,                                     // p_upper
1643     PtrTy                                      // p_stride
1644   };
1645   auto *FnTy =
1646       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1647   return CGM.CreateRuntimeFunction(FnTy, Name);
1648 }
1649 
1650 /// Obtain information that uniquely identifies a target entry. This
1651 /// consists of the file and device IDs as well as line number associated with
1652 /// the relevant entry source location.
1653 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1654                                      unsigned &DeviceID, unsigned &FileID,
1655                                      unsigned &LineNum) {
1656   SourceManager &SM = C.getSourceManager();
1657 
1658   // The loc should be always valid and have a file ID (the user cannot use
1659   // #pragma directives in macros)
1660 
1661   assert(Loc.isValid() && "Source location is expected to be always valid.");
1662 
1663   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1664   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1665 
1666   llvm::sys::fs::UniqueID ID;
1667   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1668     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1669     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1670     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1671       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1672           << PLoc.getFilename() << EC.message();
1673   }
1674 
1675   DeviceID = ID.getDevice();
1676   FileID = ID.getFile();
1677   LineNum = PLoc.getLine();
1678 }
1679 
1680 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1681   if (CGM.getLangOpts().OpenMPSimd)
1682     return Address::invalid();
1683   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1684       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1685   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1686               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1687                HasRequiresUnifiedSharedMemory))) {
1688     SmallString<64> PtrName;
1689     {
1690       llvm::raw_svector_ostream OS(PtrName);
1691       OS << CGM.getMangledName(GlobalDecl(VD));
1692       if (!VD->isExternallyVisible()) {
1693         unsigned DeviceID, FileID, Line;
1694         getTargetEntryUniqueInfo(CGM.getContext(),
1695                                  VD->getCanonicalDecl()->getBeginLoc(),
1696                                  DeviceID, FileID, Line);
1697         OS << llvm::format("_%x", FileID);
1698       }
1699       OS << "_decl_tgt_ref_ptr";
1700     }
1701     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1702     if (!Ptr) {
1703       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1704       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1705                                         PtrName);
1706 
1707       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1708       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1709 
1710       if (!CGM.getLangOpts().OpenMPIsDevice)
1711         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1712       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1713     }
1714     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1715   }
1716   return Address::invalid();
1717 }
1718 
1719 llvm::Constant *
1720 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1721   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1722          !CGM.getContext().getTargetInfo().isTLSSupported());
1723   // Lookup the entry, lazily creating it if necessary.
1724   std::string Suffix = getName({"cache", ""});
1725   return getOrCreateInternalVariable(
1726       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1727 }
1728 
1729 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1730                                                 const VarDecl *VD,
1731                                                 Address VDAddr,
1732                                                 SourceLocation Loc) {
1733   if (CGM.getLangOpts().OpenMPUseTLS &&
1734       CGM.getContext().getTargetInfo().isTLSSupported())
1735     return VDAddr;
1736 
1737   llvm::Type *VarTy = VDAddr.getElementType();
1738   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1739                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1740                                                        CGM.Int8PtrTy),
1741                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1742                          getOrCreateThreadPrivateCache(VD)};
1743   return Address(CGF.EmitRuntimeCall(
1744                      OMPBuilder.getOrCreateRuntimeFunction(
1745                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1746                      Args),
1747                  VDAddr.getAlignment());
1748 }
1749 
1750 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1751     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1752     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1753   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1754   // library.
1755   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1756   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1757                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1758                       OMPLoc);
1759   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1760   // to register constructor/destructor for variable.
1761   llvm::Value *Args[] = {
1762       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1763       Ctor, CopyCtor, Dtor};
1764   CGF.EmitRuntimeCall(
1765       OMPBuilder.getOrCreateRuntimeFunction(
1766           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1767       Args);
1768 }
1769 
1770 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1771     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1772     bool PerformInit, CodeGenFunction *CGF) {
1773   if (CGM.getLangOpts().OpenMPUseTLS &&
1774       CGM.getContext().getTargetInfo().isTLSSupported())
1775     return nullptr;
1776 
1777   VD = VD->getDefinition(CGM.getContext());
1778   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1779     QualType ASTTy = VD->getType();
1780 
1781     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1782     const Expr *Init = VD->getAnyInitializer();
1783     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1784       // Generate function that re-emits the declaration's initializer into the
1785       // threadprivate copy of the variable VD
1786       CodeGenFunction CtorCGF(CGM);
1787       FunctionArgList Args;
1788       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1789                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1790                             ImplicitParamDecl::Other);
1791       Args.push_back(&Dst);
1792 
1793       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1794           CGM.getContext().VoidPtrTy, Args);
1795       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1796       std::string Name = getName({"__kmpc_global_ctor_", ""});
1797       llvm::Function *Fn =
1798           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1799       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1800                             Args, Loc, Loc);
1801       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1802           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1803           CGM.getContext().VoidPtrTy, Dst.getLocation());
1804       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1805       Arg = CtorCGF.Builder.CreateElementBitCast(
1806           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1807       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1808                                /*IsInitializer=*/true);
1809       ArgVal = CtorCGF.EmitLoadOfScalar(
1810           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1811           CGM.getContext().VoidPtrTy, Dst.getLocation());
1812       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1813       CtorCGF.FinishFunction();
1814       Ctor = Fn;
1815     }
1816     if (VD->getType().isDestructedType() != QualType::DK_none) {
1817       // Generate function that emits destructor call for the threadprivate copy
1818       // of the variable VD
1819       CodeGenFunction DtorCGF(CGM);
1820       FunctionArgList Args;
1821       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1822                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1823                             ImplicitParamDecl::Other);
1824       Args.push_back(&Dst);
1825 
1826       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1827           CGM.getContext().VoidTy, Args);
1828       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1829       std::string Name = getName({"__kmpc_global_dtor_", ""});
1830       llvm::Function *Fn =
1831           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1832       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1833       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1834                             Loc, Loc);
1835       // Create a scope with an artificial location for the body of this function.
1836       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1837       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1838           DtorCGF.GetAddrOfLocalVar(&Dst),
1839           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1840       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1841                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1842                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1843       DtorCGF.FinishFunction();
1844       Dtor = Fn;
1845     }
1846     // Do not emit init function if it is not required.
1847     if (!Ctor && !Dtor)
1848       return nullptr;
1849 
1850     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1851     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1852                                                /*isVarArg=*/false)
1853                            ->getPointerTo();
1854     // Copying constructor for the threadprivate variable.
1855     // Must be NULL - reserved by runtime, but currently it requires that this
1856     // parameter is always NULL. Otherwise it fires assertion.
1857     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1858     if (Ctor == nullptr) {
1859       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1860                                              /*isVarArg=*/false)
1861                          ->getPointerTo();
1862       Ctor = llvm::Constant::getNullValue(CtorTy);
1863     }
1864     if (Dtor == nullptr) {
1865       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1866                                              /*isVarArg=*/false)
1867                          ->getPointerTo();
1868       Dtor = llvm::Constant::getNullValue(DtorTy);
1869     }
1870     if (!CGF) {
1871       auto *InitFunctionTy =
1872           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1873       std::string Name = getName({"__omp_threadprivate_init_", ""});
1874       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1875           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1876       CodeGenFunction InitCGF(CGM);
1877       FunctionArgList ArgList;
1878       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1879                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1880                             Loc, Loc);
1881       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1882       InitCGF.FinishFunction();
1883       return InitFunction;
1884     }
1885     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1886   }
1887   return nullptr;
1888 }
1889 
1890 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1891                                                      llvm::GlobalVariable *Addr,
1892                                                      bool PerformInit) {
1893   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1894       !CGM.getLangOpts().OpenMPIsDevice)
1895     return false;
1896   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1897       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1898   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1899       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1900        HasRequiresUnifiedSharedMemory))
1901     return CGM.getLangOpts().OpenMPIsDevice;
1902   VD = VD->getDefinition(CGM.getContext());
1903   assert(VD && "Unknown VarDecl");
1904 
1905   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1906     return CGM.getLangOpts().OpenMPIsDevice;
1907 
1908   QualType ASTTy = VD->getType();
1909   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1910 
1911   // Produce the unique prefix to identify the new target regions. We use
1912   // the source location of the variable declaration which we know to not
1913   // conflict with any target region.
1914   unsigned DeviceID;
1915   unsigned FileID;
1916   unsigned Line;
1917   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1918   SmallString<128> Buffer, Out;
1919   {
1920     llvm::raw_svector_ostream OS(Buffer);
1921     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1922        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1923   }
1924 
1925   const Expr *Init = VD->getAnyInitializer();
1926   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1927     llvm::Constant *Ctor;
1928     llvm::Constant *ID;
1929     if (CGM.getLangOpts().OpenMPIsDevice) {
1930       // Generate function that re-emits the declaration's initializer into
1931       // the threadprivate copy of the variable VD
1932       CodeGenFunction CtorCGF(CGM);
1933 
1934       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1935       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1936       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1937           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1938       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1939       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1940                             FunctionArgList(), Loc, Loc);
1941       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1942       CtorCGF.EmitAnyExprToMem(Init,
1943                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1944                                Init->getType().getQualifiers(),
1945                                /*IsInitializer=*/true);
1946       CtorCGF.FinishFunction();
1947       Ctor = Fn;
1948       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1949       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1950     } else {
1951       Ctor = new llvm::GlobalVariable(
1952           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1953           llvm::GlobalValue::PrivateLinkage,
1954           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1955       ID = Ctor;
1956     }
1957 
1958     // Register the information for the entry associated with the constructor.
1959     Out.clear();
1960     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1961         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1962         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1963   }
1964   if (VD->getType().isDestructedType() != QualType::DK_none) {
1965     llvm::Constant *Dtor;
1966     llvm::Constant *ID;
1967     if (CGM.getLangOpts().OpenMPIsDevice) {
1968       // Generate function that emits destructor call for the threadprivate
1969       // copy of the variable VD
1970       CodeGenFunction DtorCGF(CGM);
1971 
1972       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1973       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1974       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1975           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1976       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1977       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1978                             FunctionArgList(), Loc, Loc);
1979       // Create a scope with an artificial location for the body of this
1980       // function.
1981       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1982       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1983                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1984                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1985       DtorCGF.FinishFunction();
1986       Dtor = Fn;
1987       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1988       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1989     } else {
1990       Dtor = new llvm::GlobalVariable(
1991           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1992           llvm::GlobalValue::PrivateLinkage,
1993           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1994       ID = Dtor;
1995     }
1996     // Register the information for the entry associated with the destructor.
1997     Out.clear();
1998     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1999         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2000         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2001   }
2002   return CGM.getLangOpts().OpenMPIsDevice;
2003 }
2004 
2005 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2006                                                           QualType VarType,
2007                                                           StringRef Name) {
2008   std::string Suffix = getName({"artificial", ""});
2009   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2010   llvm::Value *GAddr =
2011       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2012   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2013       CGM.getTarget().isTLSSupported()) {
2014     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
2015     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
2016   }
2017   std::string CacheSuffix = getName({"cache", ""});
2018   llvm::Value *Args[] = {
2019       emitUpdateLocation(CGF, SourceLocation()),
2020       getThreadID(CGF, SourceLocation()),
2021       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2022       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2023                                 /*isSigned=*/false),
2024       getOrCreateInternalVariable(
2025           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2026   return Address(
2027       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2028           CGF.EmitRuntimeCall(
2029               OMPBuilder.getOrCreateRuntimeFunction(
2030                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2031               Args),
2032           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2033       CGM.getContext().getTypeAlignInChars(VarType));
2034 }
2035 
2036 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2037                                    const RegionCodeGenTy &ThenGen,
2038                                    const RegionCodeGenTy &ElseGen) {
2039   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2040 
2041   // If the condition constant folds and can be elided, try to avoid emitting
2042   // the condition and the dead arm of the if/else.
2043   bool CondConstant;
2044   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2045     if (CondConstant)
2046       ThenGen(CGF);
2047     else
2048       ElseGen(CGF);
2049     return;
2050   }
2051 
2052   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2053   // emit the conditional branch.
2054   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2055   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2056   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2057   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2058 
2059   // Emit the 'then' code.
2060   CGF.EmitBlock(ThenBlock);
2061   ThenGen(CGF);
2062   CGF.EmitBranch(ContBlock);
2063   // Emit the 'else' code if present.
2064   // There is no need to emit line number for unconditional branch.
2065   (void)ApplyDebugLocation::CreateEmpty(CGF);
2066   CGF.EmitBlock(ElseBlock);
2067   ElseGen(CGF);
2068   // There is no need to emit line number for unconditional branch.
2069   (void)ApplyDebugLocation::CreateEmpty(CGF);
2070   CGF.EmitBranch(ContBlock);
2071   // Emit the continuation block for code after the if.
2072   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2073 }
2074 
2075 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2076                                        llvm::Function *OutlinedFn,
2077                                        ArrayRef<llvm::Value *> CapturedVars,
2078                                        const Expr *IfCond) {
2079   if (!CGF.HaveInsertPoint())
2080     return;
2081   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2082   auto &M = CGM.getModule();
2083   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2084                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2085     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2086     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2087     llvm::Value *Args[] = {
2088         RTLoc,
2089         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2090         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2091     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2092     RealArgs.append(std::begin(Args), std::end(Args));
2093     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2094 
2095     llvm::FunctionCallee RTLFn =
2096         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2097     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2098   };
2099   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2100                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2101     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2102     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2103     // Build calls:
2104     // __kmpc_serialized_parallel(&Loc, GTid);
2105     llvm::Value *Args[] = {RTLoc, ThreadID};
2106     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2107                             M, OMPRTL___kmpc_serialized_parallel),
2108                         Args);
2109 
2110     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2111     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2112     Address ZeroAddrBound =
2113         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2114                                          /*Name=*/".bound.zero.addr");
2115     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2116     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2117     // ThreadId for serialized parallels is 0.
2118     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2119     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2120     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2121 
2122     // Ensure we do not inline the function. This is trivially true for the ones
2123     // passed to __kmpc_fork_call but the ones called in serialized regions
2124     // could be inlined. This is not a perfect but it is closer to the invariant
2125     // we want, namely, every data environment starts with a new function.
2126     // TODO: We should pass the if condition to the runtime function and do the
2127     //       handling there. Much cleaner code.
2128     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2129     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2130     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2131 
2132     // __kmpc_end_serialized_parallel(&Loc, GTid);
2133     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2134     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2135                             M, OMPRTL___kmpc_end_serialized_parallel),
2136                         EndArgs);
2137   };
2138   if (IfCond) {
2139     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2140   } else {
2141     RegionCodeGenTy ThenRCG(ThenGen);
2142     ThenRCG(CGF);
2143   }
2144 }
2145 
2146 // If we're inside an (outlined) parallel region, use the region info's
2147 // thread-ID variable (it is passed in a first argument of the outlined function
2148 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2149 // regular serial code region, get thread ID by calling kmp_int32
2150 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2151 // return the address of that temp.
2152 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2153                                              SourceLocation Loc) {
2154   if (auto *OMPRegionInfo =
2155           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2156     if (OMPRegionInfo->getThreadIDVariable())
2157       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2158 
2159   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2160   QualType Int32Ty =
2161       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2162   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2163   CGF.EmitStoreOfScalar(ThreadID,
2164                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2165 
2166   return ThreadIDTemp;
2167 }
2168 
2169 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2170     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2171   SmallString<256> Buffer;
2172   llvm::raw_svector_ostream Out(Buffer);
2173   Out << Name;
2174   StringRef RuntimeName = Out.str();
2175   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2176   if (Elem.second) {
2177     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2178            "OMP internal variable has different type than requested");
2179     return &*Elem.second;
2180   }
2181 
2182   return Elem.second = new llvm::GlobalVariable(
2183              CGM.getModule(), Ty, /*IsConstant*/ false,
2184              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2185              Elem.first(), /*InsertBefore=*/nullptr,
2186              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2187 }
2188 
2189 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2190   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2191   std::string Name = getName({Prefix, "var"});
2192   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2193 }
2194 
2195 namespace {
2196 /// Common pre(post)-action for different OpenMP constructs.
2197 class CommonActionTy final : public PrePostActionTy {
2198   llvm::FunctionCallee EnterCallee;
2199   ArrayRef<llvm::Value *> EnterArgs;
2200   llvm::FunctionCallee ExitCallee;
2201   ArrayRef<llvm::Value *> ExitArgs;
2202   bool Conditional;
2203   llvm::BasicBlock *ContBlock = nullptr;
2204 
2205 public:
2206   CommonActionTy(llvm::FunctionCallee EnterCallee,
2207                  ArrayRef<llvm::Value *> EnterArgs,
2208                  llvm::FunctionCallee ExitCallee,
2209                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2210       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2211         ExitArgs(ExitArgs), Conditional(Conditional) {}
2212   void Enter(CodeGenFunction &CGF) override {
2213     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2214     if (Conditional) {
2215       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2216       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2217       ContBlock = CGF.createBasicBlock("omp_if.end");
2218       // Generate the branch (If-stmt)
2219       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2220       CGF.EmitBlock(ThenBlock);
2221     }
2222   }
2223   void Done(CodeGenFunction &CGF) {
2224     // Emit the rest of blocks/branches
2225     CGF.EmitBranch(ContBlock);
2226     CGF.EmitBlock(ContBlock, true);
2227   }
2228   void Exit(CodeGenFunction &CGF) override {
2229     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2230   }
2231 };
2232 } // anonymous namespace
2233 
2234 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2235                                          StringRef CriticalName,
2236                                          const RegionCodeGenTy &CriticalOpGen,
2237                                          SourceLocation Loc, const Expr *Hint) {
2238   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2239   // CriticalOpGen();
2240   // __kmpc_end_critical(ident_t *, gtid, Lock);
2241   // Prepare arguments and build a call to __kmpc_critical
2242   if (!CGF.HaveInsertPoint())
2243     return;
2244   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2245                          getCriticalRegionLock(CriticalName)};
2246   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2247                                                 std::end(Args));
2248   if (Hint) {
2249     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2250         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2251   }
2252   CommonActionTy Action(
2253       OMPBuilder.getOrCreateRuntimeFunction(
2254           CGM.getModule(),
2255           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2256       EnterArgs,
2257       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2258                                             OMPRTL___kmpc_end_critical),
2259       Args);
2260   CriticalOpGen.setAction(Action);
2261   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2262 }
2263 
2264 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2265                                        const RegionCodeGenTy &MasterOpGen,
2266                                        SourceLocation Loc) {
2267   if (!CGF.HaveInsertPoint())
2268     return;
2269   // if(__kmpc_master(ident_t *, gtid)) {
2270   //   MasterOpGen();
2271   //   __kmpc_end_master(ident_t *, gtid);
2272   // }
2273   // Prepare arguments and build a call to __kmpc_master
2274   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2275   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2276                             CGM.getModule(), OMPRTL___kmpc_master),
2277                         Args,
2278                         OMPBuilder.getOrCreateRuntimeFunction(
2279                             CGM.getModule(), OMPRTL___kmpc_end_master),
2280                         Args,
2281                         /*Conditional=*/true);
2282   MasterOpGen.setAction(Action);
2283   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2284   Action.Done(CGF);
2285 }
2286 
2287 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2288                                        const RegionCodeGenTy &MaskedOpGen,
2289                                        SourceLocation Loc, const Expr *Filter) {
2290   if (!CGF.HaveInsertPoint())
2291     return;
2292   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2293   //   MaskedOpGen();
2294   //   __kmpc_end_masked(iden_t *, gtid);
2295   // }
2296   // Prepare arguments and build a call to __kmpc_masked
2297   llvm::Value *FilterVal = Filter
2298                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2299                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2300   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2301                          FilterVal};
2302   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2303                             getThreadID(CGF, Loc)};
2304   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2305                             CGM.getModule(), OMPRTL___kmpc_masked),
2306                         Args,
2307                         OMPBuilder.getOrCreateRuntimeFunction(
2308                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2309                         ArgsEnd,
2310                         /*Conditional=*/true);
2311   MaskedOpGen.setAction(Action);
2312   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2313   Action.Done(CGF);
2314 }
2315 
2316 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2317                                         SourceLocation Loc) {
2318   if (!CGF.HaveInsertPoint())
2319     return;
2320   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2321     OMPBuilder.createTaskyield(CGF.Builder);
2322   } else {
2323     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2324     llvm::Value *Args[] = {
2325         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2326         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2327     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2328                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2329                         Args);
2330   }
2331 
2332   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2333     Region->emitUntiedSwitch(CGF);
2334 }
2335 
2336 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2337                                           const RegionCodeGenTy &TaskgroupOpGen,
2338                                           SourceLocation Loc) {
2339   if (!CGF.HaveInsertPoint())
2340     return;
2341   // __kmpc_taskgroup(ident_t *, gtid);
2342   // TaskgroupOpGen();
2343   // __kmpc_end_taskgroup(ident_t *, gtid);
2344   // Prepare arguments and build a call to __kmpc_taskgroup
2345   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2346   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2347                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2348                         Args,
2349                         OMPBuilder.getOrCreateRuntimeFunction(
2350                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2351                         Args);
2352   TaskgroupOpGen.setAction(Action);
2353   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2354 }
2355 
2356 /// Given an array of pointers to variables, project the address of a
2357 /// given variable.
2358 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2359                                       unsigned Index, const VarDecl *Var) {
2360   // Pull out the pointer to the variable.
2361   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2362   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2363 
2364   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2365   Addr = CGF.Builder.CreateElementBitCast(
2366       Addr, CGF.ConvertTypeForMem(Var->getType()));
2367   return Addr;
2368 }
2369 
2370 static llvm::Value *emitCopyprivateCopyFunction(
2371     CodeGenModule &CGM, llvm::Type *ArgsType,
2372     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2373     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2374     SourceLocation Loc) {
2375   ASTContext &C = CGM.getContext();
2376   // void copy_func(void *LHSArg, void *RHSArg);
2377   FunctionArgList Args;
2378   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2379                            ImplicitParamDecl::Other);
2380   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2381                            ImplicitParamDecl::Other);
2382   Args.push_back(&LHSArg);
2383   Args.push_back(&RHSArg);
2384   const auto &CGFI =
2385       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2386   std::string Name =
2387       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2388   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2389                                     llvm::GlobalValue::InternalLinkage, Name,
2390                                     &CGM.getModule());
2391   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2392   Fn->setDoesNotRecurse();
2393   CodeGenFunction CGF(CGM);
2394   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2395   // Dest = (void*[n])(LHSArg);
2396   // Src = (void*[n])(RHSArg);
2397   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2398       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2399       ArgsType), CGF.getPointerAlign());
2400   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2401       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2402       ArgsType), CGF.getPointerAlign());
2403   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2404   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2405   // ...
2406   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2407   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2408     const auto *DestVar =
2409         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2410     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2411 
2412     const auto *SrcVar =
2413         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2414     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2415 
2416     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2417     QualType Type = VD->getType();
2418     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2419   }
2420   CGF.FinishFunction();
2421   return Fn;
2422 }
2423 
2424 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2425                                        const RegionCodeGenTy &SingleOpGen,
2426                                        SourceLocation Loc,
2427                                        ArrayRef<const Expr *> CopyprivateVars,
2428                                        ArrayRef<const Expr *> SrcExprs,
2429                                        ArrayRef<const Expr *> DstExprs,
2430                                        ArrayRef<const Expr *> AssignmentOps) {
2431   if (!CGF.HaveInsertPoint())
2432     return;
2433   assert(CopyprivateVars.size() == SrcExprs.size() &&
2434          CopyprivateVars.size() == DstExprs.size() &&
2435          CopyprivateVars.size() == AssignmentOps.size());
2436   ASTContext &C = CGM.getContext();
2437   // int32 did_it = 0;
2438   // if(__kmpc_single(ident_t *, gtid)) {
2439   //   SingleOpGen();
2440   //   __kmpc_end_single(ident_t *, gtid);
2441   //   did_it = 1;
2442   // }
2443   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2444   // <copy_func>, did_it);
2445 
2446   Address DidIt = Address::invalid();
2447   if (!CopyprivateVars.empty()) {
2448     // int32 did_it = 0;
2449     QualType KmpInt32Ty =
2450         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2451     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2452     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2453   }
2454   // Prepare arguments and build a call to __kmpc_single
2455   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2456   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2457                             CGM.getModule(), OMPRTL___kmpc_single),
2458                         Args,
2459                         OMPBuilder.getOrCreateRuntimeFunction(
2460                             CGM.getModule(), OMPRTL___kmpc_end_single),
2461                         Args,
2462                         /*Conditional=*/true);
2463   SingleOpGen.setAction(Action);
2464   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2465   if (DidIt.isValid()) {
2466     // did_it = 1;
2467     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2468   }
2469   Action.Done(CGF);
2470   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2471   // <copy_func>, did_it);
2472   if (DidIt.isValid()) {
2473     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2474     QualType CopyprivateArrayTy = C.getConstantArrayType(
2475         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2476         /*IndexTypeQuals=*/0);
2477     // Create a list of all private variables for copyprivate.
2478     Address CopyprivateList =
2479         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2480     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2481       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2482       CGF.Builder.CreateStore(
2483           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2484               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2485               CGF.VoidPtrTy),
2486           Elem);
2487     }
2488     // Build function that copies private values from single region to all other
2489     // threads in the corresponding parallel region.
2490     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2491         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2492         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2493     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2494     Address CL =
2495       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2496                                                       CGF.VoidPtrTy);
2497     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2498     llvm::Value *Args[] = {
2499         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2500         getThreadID(CGF, Loc),        // i32 <gtid>
2501         BufSize,                      // size_t <buf_size>
2502         CL.getPointer(),              // void *<copyprivate list>
2503         CpyFn,                        // void (*) (void *, void *) <copy_func>
2504         DidItVal                      // i32 did_it
2505     };
2506     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2507                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2508                         Args);
2509   }
2510 }
2511 
2512 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2513                                         const RegionCodeGenTy &OrderedOpGen,
2514                                         SourceLocation Loc, bool IsThreads) {
2515   if (!CGF.HaveInsertPoint())
2516     return;
2517   // __kmpc_ordered(ident_t *, gtid);
2518   // OrderedOpGen();
2519   // __kmpc_end_ordered(ident_t *, gtid);
2520   // Prepare arguments and build a call to __kmpc_ordered
2521   if (IsThreads) {
2522     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2523     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2524                               CGM.getModule(), OMPRTL___kmpc_ordered),
2525                           Args,
2526                           OMPBuilder.getOrCreateRuntimeFunction(
2527                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2528                           Args);
2529     OrderedOpGen.setAction(Action);
2530     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2531     return;
2532   }
2533   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2534 }
2535 
2536 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2537   unsigned Flags;
2538   if (Kind == OMPD_for)
2539     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2540   else if (Kind == OMPD_sections)
2541     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2542   else if (Kind == OMPD_single)
2543     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2544   else if (Kind == OMPD_barrier)
2545     Flags = OMP_IDENT_BARRIER_EXPL;
2546   else
2547     Flags = OMP_IDENT_BARRIER_IMPL;
2548   return Flags;
2549 }
2550 
2551 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2552     CodeGenFunction &CGF, const OMPLoopDirective &S,
2553     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2554   // Check if the loop directive is actually a doacross loop directive. In this
2555   // case choose static, 1 schedule.
2556   if (llvm::any_of(
2557           S.getClausesOfKind<OMPOrderedClause>(),
2558           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2559     ScheduleKind = OMPC_SCHEDULE_static;
2560     // Chunk size is 1 in this case.
2561     llvm::APInt ChunkSize(32, 1);
2562     ChunkExpr = IntegerLiteral::Create(
2563         CGF.getContext(), ChunkSize,
2564         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2565         SourceLocation());
2566   }
2567 }
2568 
2569 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2570                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2571                                       bool ForceSimpleCall) {
2572   // Check if we should use the OMPBuilder
2573   auto *OMPRegionInfo =
2574       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2575   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2576     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2577         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2578     return;
2579   }
2580 
2581   if (!CGF.HaveInsertPoint())
2582     return;
2583   // Build call __kmpc_cancel_barrier(loc, thread_id);
2584   // Build call __kmpc_barrier(loc, thread_id);
2585   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2586   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2587   // thread_id);
2588   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2589                          getThreadID(CGF, Loc)};
2590   if (OMPRegionInfo) {
2591     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2592       llvm::Value *Result = CGF.EmitRuntimeCall(
2593           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2594                                                 OMPRTL___kmpc_cancel_barrier),
2595           Args);
2596       if (EmitChecks) {
2597         // if (__kmpc_cancel_barrier()) {
2598         //   exit from construct;
2599         // }
2600         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2601         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2602         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2603         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2604         CGF.EmitBlock(ExitBB);
2605         //   exit from construct;
2606         CodeGenFunction::JumpDest CancelDestination =
2607             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2608         CGF.EmitBranchThroughCleanup(CancelDestination);
2609         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2610       }
2611       return;
2612     }
2613   }
2614   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2615                           CGM.getModule(), OMPRTL___kmpc_barrier),
2616                       Args);
2617 }
2618 
2619 /// Map the OpenMP loop schedule to the runtime enumeration.
2620 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2621                                           bool Chunked, bool Ordered) {
2622   switch (ScheduleKind) {
2623   case OMPC_SCHEDULE_static:
2624     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2625                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2626   case OMPC_SCHEDULE_dynamic:
2627     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2628   case OMPC_SCHEDULE_guided:
2629     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2630   case OMPC_SCHEDULE_runtime:
2631     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2632   case OMPC_SCHEDULE_auto:
2633     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2634   case OMPC_SCHEDULE_unknown:
2635     assert(!Chunked && "chunk was specified but schedule kind not known");
2636     return Ordered ? OMP_ord_static : OMP_sch_static;
2637   }
2638   llvm_unreachable("Unexpected runtime schedule");
2639 }
2640 
2641 /// Map the OpenMP distribute schedule to the runtime enumeration.
2642 static OpenMPSchedType
2643 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2644   // only static is allowed for dist_schedule
2645   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2646 }
2647 
2648 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2649                                          bool Chunked) const {
2650   OpenMPSchedType Schedule =
2651       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2652   return Schedule == OMP_sch_static;
2653 }
2654 
2655 bool CGOpenMPRuntime::isStaticNonchunked(
2656     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2657   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2658   return Schedule == OMP_dist_sch_static;
2659 }
2660 
2661 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2662                                       bool Chunked) const {
2663   OpenMPSchedType Schedule =
2664       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2665   return Schedule == OMP_sch_static_chunked;
2666 }
2667 
2668 bool CGOpenMPRuntime::isStaticChunked(
2669     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2670   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2671   return Schedule == OMP_dist_sch_static_chunked;
2672 }
2673 
2674 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2675   OpenMPSchedType Schedule =
2676       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2677   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2678   return Schedule != OMP_sch_static;
2679 }
2680 
2681 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2682                                   OpenMPScheduleClauseModifier M1,
2683                                   OpenMPScheduleClauseModifier M2) {
2684   int Modifier = 0;
2685   switch (M1) {
2686   case OMPC_SCHEDULE_MODIFIER_monotonic:
2687     Modifier = OMP_sch_modifier_monotonic;
2688     break;
2689   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2690     Modifier = OMP_sch_modifier_nonmonotonic;
2691     break;
2692   case OMPC_SCHEDULE_MODIFIER_simd:
2693     if (Schedule == OMP_sch_static_chunked)
2694       Schedule = OMP_sch_static_balanced_chunked;
2695     break;
2696   case OMPC_SCHEDULE_MODIFIER_last:
2697   case OMPC_SCHEDULE_MODIFIER_unknown:
2698     break;
2699   }
2700   switch (M2) {
2701   case OMPC_SCHEDULE_MODIFIER_monotonic:
2702     Modifier = OMP_sch_modifier_monotonic;
2703     break;
2704   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2705     Modifier = OMP_sch_modifier_nonmonotonic;
2706     break;
2707   case OMPC_SCHEDULE_MODIFIER_simd:
2708     if (Schedule == OMP_sch_static_chunked)
2709       Schedule = OMP_sch_static_balanced_chunked;
2710     break;
2711   case OMPC_SCHEDULE_MODIFIER_last:
2712   case OMPC_SCHEDULE_MODIFIER_unknown:
2713     break;
2714   }
2715   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2716   // If the static schedule kind is specified or if the ordered clause is
2717   // specified, and if the nonmonotonic modifier is not specified, the effect is
2718   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2719   // modifier is specified, the effect is as if the nonmonotonic modifier is
2720   // specified.
2721   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2722     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2723           Schedule == OMP_sch_static_balanced_chunked ||
2724           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2725           Schedule == OMP_dist_sch_static_chunked ||
2726           Schedule == OMP_dist_sch_static))
2727       Modifier = OMP_sch_modifier_nonmonotonic;
2728   }
2729   return Schedule | Modifier;
2730 }
2731 
2732 void CGOpenMPRuntime::emitForDispatchInit(
2733     CodeGenFunction &CGF, SourceLocation Loc,
2734     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2735     bool Ordered, const DispatchRTInput &DispatchValues) {
2736   if (!CGF.HaveInsertPoint())
2737     return;
2738   OpenMPSchedType Schedule = getRuntimeSchedule(
2739       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2740   assert(Ordered ||
2741          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2742           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2743           Schedule != OMP_sch_static_balanced_chunked));
2744   // Call __kmpc_dispatch_init(
2745   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2746   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2747   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2748 
2749   // If the Chunk was not specified in the clause - use default value 1.
2750   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2751                                             : CGF.Builder.getIntN(IVSize, 1);
2752   llvm::Value *Args[] = {
2753       emitUpdateLocation(CGF, Loc),
2754       getThreadID(CGF, Loc),
2755       CGF.Builder.getInt32(addMonoNonMonoModifier(
2756           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2757       DispatchValues.LB,                                     // Lower
2758       DispatchValues.UB,                                     // Upper
2759       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2760       Chunk                                                  // Chunk
2761   };
2762   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2763 }
2764 
2765 static void emitForStaticInitCall(
2766     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2767     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2768     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2769     const CGOpenMPRuntime::StaticRTInput &Values) {
2770   if (!CGF.HaveInsertPoint())
2771     return;
2772 
2773   assert(!Values.Ordered);
2774   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2775          Schedule == OMP_sch_static_balanced_chunked ||
2776          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2777          Schedule == OMP_dist_sch_static ||
2778          Schedule == OMP_dist_sch_static_chunked);
2779 
2780   // Call __kmpc_for_static_init(
2781   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2782   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2783   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2784   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2785   llvm::Value *Chunk = Values.Chunk;
2786   if (Chunk == nullptr) {
2787     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2788             Schedule == OMP_dist_sch_static) &&
2789            "expected static non-chunked schedule");
2790     // If the Chunk was not specified in the clause - use default value 1.
2791     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2792   } else {
2793     assert((Schedule == OMP_sch_static_chunked ||
2794             Schedule == OMP_sch_static_balanced_chunked ||
2795             Schedule == OMP_ord_static_chunked ||
2796             Schedule == OMP_dist_sch_static_chunked) &&
2797            "expected static chunked schedule");
2798   }
2799   llvm::Value *Args[] = {
2800       UpdateLocation,
2801       ThreadId,
2802       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2803                                                   M2)), // Schedule type
2804       Values.IL.getPointer(),                           // &isLastIter
2805       Values.LB.getPointer(),                           // &LB
2806       Values.UB.getPointer(),                           // &UB
2807       Values.ST.getPointer(),                           // &Stride
2808       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2809       Chunk                                             // Chunk
2810   };
2811   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2812 }
2813 
2814 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2815                                         SourceLocation Loc,
2816                                         OpenMPDirectiveKind DKind,
2817                                         const OpenMPScheduleTy &ScheduleKind,
2818                                         const StaticRTInput &Values) {
2819   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2820       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2821   assert(isOpenMPWorksharingDirective(DKind) &&
2822          "Expected loop-based or sections-based directive.");
2823   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2824                                              isOpenMPLoopDirective(DKind)
2825                                                  ? OMP_IDENT_WORK_LOOP
2826                                                  : OMP_IDENT_WORK_SECTIONS);
2827   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2828   llvm::FunctionCallee StaticInitFunction =
2829       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2830   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2831   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2832                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2833 }
2834 
2835 void CGOpenMPRuntime::emitDistributeStaticInit(
2836     CodeGenFunction &CGF, SourceLocation Loc,
2837     OpenMPDistScheduleClauseKind SchedKind,
2838     const CGOpenMPRuntime::StaticRTInput &Values) {
2839   OpenMPSchedType ScheduleNum =
2840       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2841   llvm::Value *UpdatedLocation =
2842       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2843   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2844   llvm::FunctionCallee StaticInitFunction =
2845       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2846   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2847                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2848                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2849 }
2850 
2851 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2852                                           SourceLocation Loc,
2853                                           OpenMPDirectiveKind DKind) {
2854   if (!CGF.HaveInsertPoint())
2855     return;
2856   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2857   llvm::Value *Args[] = {
2858       emitUpdateLocation(CGF, Loc,
2859                          isOpenMPDistributeDirective(DKind)
2860                              ? OMP_IDENT_WORK_DISTRIBUTE
2861                              : isOpenMPLoopDirective(DKind)
2862                                    ? OMP_IDENT_WORK_LOOP
2863                                    : OMP_IDENT_WORK_SECTIONS),
2864       getThreadID(CGF, Loc)};
2865   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2866   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2867                           CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2868                       Args);
2869 }
2870 
2871 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2872                                                  SourceLocation Loc,
2873                                                  unsigned IVSize,
2874                                                  bool IVSigned) {
2875   if (!CGF.HaveInsertPoint())
2876     return;
2877   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2878   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2879   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2880 }
2881 
2882 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2883                                           SourceLocation Loc, unsigned IVSize,
2884                                           bool IVSigned, Address IL,
2885                                           Address LB, Address UB,
2886                                           Address ST) {
2887   // Call __kmpc_dispatch_next(
2888   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2889   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2890   //          kmp_int[32|64] *p_stride);
2891   llvm::Value *Args[] = {
2892       emitUpdateLocation(CGF, Loc),
2893       getThreadID(CGF, Loc),
2894       IL.getPointer(), // &isLastIter
2895       LB.getPointer(), // &Lower
2896       UB.getPointer(), // &Upper
2897       ST.getPointer()  // &Stride
2898   };
2899   llvm::Value *Call =
2900       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2901   return CGF.EmitScalarConversion(
2902       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2903       CGF.getContext().BoolTy, Loc);
2904 }
2905 
2906 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2907                                            llvm::Value *NumThreads,
2908                                            SourceLocation Loc) {
2909   if (!CGF.HaveInsertPoint())
2910     return;
2911   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2912   llvm::Value *Args[] = {
2913       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2914       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2915   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2916                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2917                       Args);
2918 }
2919 
2920 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2921                                          ProcBindKind ProcBind,
2922                                          SourceLocation Loc) {
2923   if (!CGF.HaveInsertPoint())
2924     return;
2925   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2926   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2927   llvm::Value *Args[] = {
2928       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2929       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2930   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2931                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2932                       Args);
2933 }
2934 
2935 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2936                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2937   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2938     OMPBuilder.createFlush(CGF.Builder);
2939   } else {
2940     if (!CGF.HaveInsertPoint())
2941       return;
2942     // Build call void __kmpc_flush(ident_t *loc)
2943     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2944                             CGM.getModule(), OMPRTL___kmpc_flush),
2945                         emitUpdateLocation(CGF, Loc));
2946   }
2947 }
2948 
2949 namespace {
2950 /// Indexes of fields for type kmp_task_t.
2951 enum KmpTaskTFields {
2952   /// List of shared variables.
2953   KmpTaskTShareds,
2954   /// Task routine.
2955   KmpTaskTRoutine,
2956   /// Partition id for the untied tasks.
2957   KmpTaskTPartId,
2958   /// Function with call of destructors for private variables.
2959   Data1,
2960   /// Task priority.
2961   Data2,
2962   /// (Taskloops only) Lower bound.
2963   KmpTaskTLowerBound,
2964   /// (Taskloops only) Upper bound.
2965   KmpTaskTUpperBound,
2966   /// (Taskloops only) Stride.
2967   KmpTaskTStride,
2968   /// (Taskloops only) Is last iteration flag.
2969   KmpTaskTLastIter,
2970   /// (Taskloops only) Reduction data.
2971   KmpTaskTReductions,
2972 };
2973 } // anonymous namespace
2974 
2975 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2976   return OffloadEntriesTargetRegion.empty() &&
2977          OffloadEntriesDeviceGlobalVar.empty();
2978 }
2979 
2980 /// Initialize target region entry.
2981 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2982     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2983                                     StringRef ParentName, unsigned LineNum,
2984                                     unsigned Order) {
2985   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2986                                              "only required for the device "
2987                                              "code generation.");
2988   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2989       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2990                                    OMPTargetRegionEntryTargetRegion);
2991   ++OffloadingEntriesNum;
2992 }
2993 
2994 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2995     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2996                                   StringRef ParentName, unsigned LineNum,
2997                                   llvm::Constant *Addr, llvm::Constant *ID,
2998                                   OMPTargetRegionEntryKind Flags) {
2999   // If we are emitting code for a target, the entry is already initialized,
3000   // only has to be registered.
3001   if (CGM.getLangOpts().OpenMPIsDevice) {
3002     // This could happen if the device compilation is invoked standalone.
3003     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
3004       return;
3005     auto &Entry =
3006         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3007     Entry.setAddress(Addr);
3008     Entry.setID(ID);
3009     Entry.setFlags(Flags);
3010   } else {
3011     if (Flags ==
3012             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3013         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3014                                  /*IgnoreAddressId*/ true))
3015       return;
3016     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3017            "Target region entry already registered!");
3018     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3019     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3020     ++OffloadingEntriesNum;
3021   }
3022 }
3023 
3024 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3025     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3026     bool IgnoreAddressId) const {
3027   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3028   if (PerDevice == OffloadEntriesTargetRegion.end())
3029     return false;
3030   auto PerFile = PerDevice->second.find(FileID);
3031   if (PerFile == PerDevice->second.end())
3032     return false;
3033   auto PerParentName = PerFile->second.find(ParentName);
3034   if (PerParentName == PerFile->second.end())
3035     return false;
3036   auto PerLine = PerParentName->second.find(LineNum);
3037   if (PerLine == PerParentName->second.end())
3038     return false;
3039   // Fail if this entry is already registered.
3040   if (!IgnoreAddressId &&
3041       (PerLine->second.getAddress() || PerLine->second.getID()))
3042     return false;
3043   return true;
3044 }
3045 
3046 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3047     const OffloadTargetRegionEntryInfoActTy &Action) {
3048   // Scan all target region entries and perform the provided action.
3049   for (const auto &D : OffloadEntriesTargetRegion)
3050     for (const auto &F : D.second)
3051       for (const auto &P : F.second)
3052         for (const auto &L : P.second)
3053           Action(D.first, F.first, P.first(), L.first, L.second);
3054 }
3055 
3056 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3057     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3058                                        OMPTargetGlobalVarEntryKind Flags,
3059                                        unsigned Order) {
3060   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3061                                              "only required for the device "
3062                                              "code generation.");
3063   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3064   ++OffloadingEntriesNum;
3065 }
3066 
3067 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3068     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3069                                      CharUnits VarSize,
3070                                      OMPTargetGlobalVarEntryKind Flags,
3071                                      llvm::GlobalValue::LinkageTypes Linkage) {
3072   if (CGM.getLangOpts().OpenMPIsDevice) {
3073     // This could happen if the device compilation is invoked standalone.
3074     if (!hasDeviceGlobalVarEntryInfo(VarName))
3075       return;
3076     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3077     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3078       if (Entry.getVarSize().isZero()) {
3079         Entry.setVarSize(VarSize);
3080         Entry.setLinkage(Linkage);
3081       }
3082       return;
3083     }
3084     Entry.setVarSize(VarSize);
3085     Entry.setLinkage(Linkage);
3086     Entry.setAddress(Addr);
3087   } else {
3088     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3089       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3090       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3091              "Entry not initialized!");
3092       if (Entry.getVarSize().isZero()) {
3093         Entry.setVarSize(VarSize);
3094         Entry.setLinkage(Linkage);
3095       }
3096       return;
3097     }
3098     OffloadEntriesDeviceGlobalVar.try_emplace(
3099         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3100     ++OffloadingEntriesNum;
3101   }
3102 }
3103 
3104 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3105     actOnDeviceGlobalVarEntriesInfo(
3106         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3107   // Scan all target region entries and perform the provided action.
3108   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3109     Action(E.getKey(), E.getValue());
3110 }
3111 
3112 void CGOpenMPRuntime::createOffloadEntry(
3113     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3114     llvm::GlobalValue::LinkageTypes Linkage) {
3115   StringRef Name = Addr->getName();
3116   llvm::Module &M = CGM.getModule();
3117   llvm::LLVMContext &C = M.getContext();
3118 
3119   // Create constant string with the name.
3120   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3121 
3122   std::string StringName = getName({"omp_offloading", "entry_name"});
3123   auto *Str = new llvm::GlobalVariable(
3124       M, StrPtrInit->getType(), /*isConstant=*/true,
3125       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3126   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3127 
3128   llvm::Constant *Data[] = {
3129       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3130       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3131       llvm::ConstantInt::get(CGM.SizeTy, Size),
3132       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3133       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3134   std::string EntryName = getName({"omp_offloading", "entry", ""});
3135   llvm::GlobalVariable *Entry = createGlobalStruct(
3136       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3137       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3138 
3139   // The entry has to be created in the section the linker expects it to be.
3140   Entry->setSection("omp_offloading_entries");
3141 }
3142 
3143 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3144   // Emit the offloading entries and metadata so that the device codegen side
3145   // can easily figure out what to emit. The produced metadata looks like
3146   // this:
3147   //
3148   // !omp_offload.info = !{!1, ...}
3149   //
3150   // Right now we only generate metadata for function that contain target
3151   // regions.
3152 
3153   // If we are in simd mode or there are no entries, we don't need to do
3154   // anything.
3155   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3156     return;
3157 
3158   llvm::Module &M = CGM.getModule();
3159   llvm::LLVMContext &C = M.getContext();
3160   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3161                          SourceLocation, StringRef>,
3162               16>
3163       OrderedEntries(OffloadEntriesInfoManager.size());
3164   llvm::SmallVector<StringRef, 16> ParentFunctions(
3165       OffloadEntriesInfoManager.size());
3166 
3167   // Auxiliary methods to create metadata values and strings.
3168   auto &&GetMDInt = [this](unsigned V) {
3169     return llvm::ConstantAsMetadata::get(
3170         llvm::ConstantInt::get(CGM.Int32Ty, V));
3171   };
3172 
3173   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3174 
3175   // Create the offloading info metadata node.
3176   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3177 
3178   // Create function that emits metadata for each target region entry;
3179   auto &&TargetRegionMetadataEmitter =
3180       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3181        &GetMDString](
3182           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3183           unsigned Line,
3184           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3185         // Generate metadata for target regions. Each entry of this metadata
3186         // contains:
3187         // - Entry 0 -> Kind of this type of metadata (0).
3188         // - Entry 1 -> Device ID of the file where the entry was identified.
3189         // - Entry 2 -> File ID of the file where the entry was identified.
3190         // - Entry 3 -> Mangled name of the function where the entry was
3191         // identified.
3192         // - Entry 4 -> Line in the file where the entry was identified.
3193         // - Entry 5 -> Order the entry was created.
3194         // The first element of the metadata node is the kind.
3195         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3196                                  GetMDInt(FileID),      GetMDString(ParentName),
3197                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3198 
3199         SourceLocation Loc;
3200         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3201                   E = CGM.getContext().getSourceManager().fileinfo_end();
3202              I != E; ++I) {
3203           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3204               I->getFirst()->getUniqueID().getFile() == FileID) {
3205             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3206                 I->getFirst(), Line, 1);
3207             break;
3208           }
3209         }
3210         // Save this entry in the right position of the ordered entries array.
3211         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3212         ParentFunctions[E.getOrder()] = ParentName;
3213 
3214         // Add metadata to the named metadata node.
3215         MD->addOperand(llvm::MDNode::get(C, Ops));
3216       };
3217 
3218   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3219       TargetRegionMetadataEmitter);
3220 
3221   // Create function that emits metadata for each device global variable entry;
3222   auto &&DeviceGlobalVarMetadataEmitter =
3223       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3224        MD](StringRef MangledName,
3225            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3226                &E) {
3227         // Generate metadata for global variables. Each entry of this metadata
3228         // contains:
3229         // - Entry 0 -> Kind of this type of metadata (1).
3230         // - Entry 1 -> Mangled name of the variable.
3231         // - Entry 2 -> Declare target kind.
3232         // - Entry 3 -> Order the entry was created.
3233         // The first element of the metadata node is the kind.
3234         llvm::Metadata *Ops[] = {
3235             GetMDInt(E.getKind()), GetMDString(MangledName),
3236             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3237 
3238         // Save this entry in the right position of the ordered entries array.
3239         OrderedEntries[E.getOrder()] =
3240             std::make_tuple(&E, SourceLocation(), MangledName);
3241 
3242         // Add metadata to the named metadata node.
3243         MD->addOperand(llvm::MDNode::get(C, Ops));
3244       };
3245 
3246   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3247       DeviceGlobalVarMetadataEmitter);
3248 
3249   for (const auto &E : OrderedEntries) {
3250     assert(std::get<0>(E) && "All ordered entries must exist!");
3251     if (const auto *CE =
3252             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3253                 std::get<0>(E))) {
3254       if (!CE->getID() || !CE->getAddress()) {
3255         // Do not blame the entry if the parent funtion is not emitted.
3256         StringRef FnName = ParentFunctions[CE->getOrder()];
3257         if (!CGM.GetGlobalValue(FnName))
3258           continue;
3259         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3260             DiagnosticsEngine::Error,
3261             "Offloading entry for target region in %0 is incorrect: either the "
3262             "address or the ID is invalid.");
3263         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3264         continue;
3265       }
3266       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3267                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3268     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3269                                              OffloadEntryInfoDeviceGlobalVar>(
3270                    std::get<0>(E))) {
3271       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3272           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3273               CE->getFlags());
3274       switch (Flags) {
3275       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3276         if (CGM.getLangOpts().OpenMPIsDevice &&
3277             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3278           continue;
3279         if (!CE->getAddress()) {
3280           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3281               DiagnosticsEngine::Error, "Offloading entry for declare target "
3282                                         "variable %0 is incorrect: the "
3283                                         "address is invalid.");
3284           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3285           continue;
3286         }
3287         // The vaiable has no definition - no need to add the entry.
3288         if (CE->getVarSize().isZero())
3289           continue;
3290         break;
3291       }
3292       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3293         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3294                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3295                "Declaret target link address is set.");
3296         if (CGM.getLangOpts().OpenMPIsDevice)
3297           continue;
3298         if (!CE->getAddress()) {
3299           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3300               DiagnosticsEngine::Error,
3301               "Offloading entry for declare target variable is incorrect: the "
3302               "address is invalid.");
3303           CGM.getDiags().Report(DiagID);
3304           continue;
3305         }
3306         break;
3307       }
3308       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3309                          CE->getVarSize().getQuantity(), Flags,
3310                          CE->getLinkage());
3311     } else {
3312       llvm_unreachable("Unsupported entry kind.");
3313     }
3314   }
3315 }
3316 
3317 /// Loads all the offload entries information from the host IR
3318 /// metadata.
3319 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3320   // If we are in target mode, load the metadata from the host IR. This code has
3321   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3322 
3323   if (!CGM.getLangOpts().OpenMPIsDevice)
3324     return;
3325 
3326   if (CGM.getLangOpts().OMPHostIRFile.empty())
3327     return;
3328 
3329   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3330   if (auto EC = Buf.getError()) {
3331     CGM.getDiags().Report(diag::err_cannot_open_file)
3332         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3333     return;
3334   }
3335 
3336   llvm::LLVMContext C;
3337   auto ME = expectedToErrorOrAndEmitErrors(
3338       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3339 
3340   if (auto EC = ME.getError()) {
3341     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3342         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3343     CGM.getDiags().Report(DiagID)
3344         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3345     return;
3346   }
3347 
3348   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3349   if (!MD)
3350     return;
3351 
3352   for (llvm::MDNode *MN : MD->operands()) {
3353     auto &&GetMDInt = [MN](unsigned Idx) {
3354       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3355       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3356     };
3357 
3358     auto &&GetMDString = [MN](unsigned Idx) {
3359       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3360       return V->getString();
3361     };
3362 
3363     switch (GetMDInt(0)) {
3364     default:
3365       llvm_unreachable("Unexpected metadata!");
3366       break;
3367     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3368         OffloadingEntryInfoTargetRegion:
3369       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3370           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3371           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3372           /*Order=*/GetMDInt(5));
3373       break;
3374     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3375         OffloadingEntryInfoDeviceGlobalVar:
3376       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3377           /*MangledName=*/GetMDString(1),
3378           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3379               /*Flags=*/GetMDInt(2)),
3380           /*Order=*/GetMDInt(3));
3381       break;
3382     }
3383   }
3384 }
3385 
3386 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3387   if (!KmpRoutineEntryPtrTy) {
3388     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3389     ASTContext &C = CGM.getContext();
3390     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3391     FunctionProtoType::ExtProtoInfo EPI;
3392     KmpRoutineEntryPtrQTy = C.getPointerType(
3393         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3394     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3395   }
3396 }
3397 
3398 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3399   // Make sure the type of the entry is already created. This is the type we
3400   // have to create:
3401   // struct __tgt_offload_entry{
3402   //   void      *addr;       // Pointer to the offload entry info.
3403   //                          // (function or global)
3404   //   char      *name;       // Name of the function or global.
3405   //   size_t     size;       // Size of the entry info (0 if it a function).
3406   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3407   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3408   // };
3409   if (TgtOffloadEntryQTy.isNull()) {
3410     ASTContext &C = CGM.getContext();
3411     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3412     RD->startDefinition();
3413     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3414     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3415     addFieldToRecordDecl(C, RD, C.getSizeType());
3416     addFieldToRecordDecl(
3417         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3418     addFieldToRecordDecl(
3419         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3420     RD->completeDefinition();
3421     RD->addAttr(PackedAttr::CreateImplicit(C));
3422     TgtOffloadEntryQTy = C.getRecordType(RD);
3423   }
3424   return TgtOffloadEntryQTy;
3425 }
3426 
3427 namespace {
3428 struct PrivateHelpersTy {
3429   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3430                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3431       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3432         PrivateElemInit(PrivateElemInit) {}
3433   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3434   const Expr *OriginalRef = nullptr;
3435   const VarDecl *Original = nullptr;
3436   const VarDecl *PrivateCopy = nullptr;
3437   const VarDecl *PrivateElemInit = nullptr;
3438   bool isLocalPrivate() const {
3439     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3440   }
3441 };
3442 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3443 } // anonymous namespace
3444 
3445 static bool isAllocatableDecl(const VarDecl *VD) {
3446   const VarDecl *CVD = VD->getCanonicalDecl();
3447   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3448     return false;
3449   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3450   // Use the default allocation.
3451   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3452             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3453            !AA->getAllocator());
3454 }
3455 
3456 static RecordDecl *
3457 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3458   if (!Privates.empty()) {
3459     ASTContext &C = CGM.getContext();
3460     // Build struct .kmp_privates_t. {
3461     //         /*  private vars  */
3462     //       };
3463     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3464     RD->startDefinition();
3465     for (const auto &Pair : Privates) {
3466       const VarDecl *VD = Pair.second.Original;
3467       QualType Type = VD->getType().getNonReferenceType();
3468       // If the private variable is a local variable with lvalue ref type,
3469       // allocate the pointer instead of the pointee type.
3470       if (Pair.second.isLocalPrivate()) {
3471         if (VD->getType()->isLValueReferenceType())
3472           Type = C.getPointerType(Type);
3473         if (isAllocatableDecl(VD))
3474           Type = C.getPointerType(Type);
3475       }
3476       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3477       if (VD->hasAttrs()) {
3478         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3479              E(VD->getAttrs().end());
3480              I != E; ++I)
3481           FD->addAttr(*I);
3482       }
3483     }
3484     RD->completeDefinition();
3485     return RD;
3486   }
3487   return nullptr;
3488 }
3489 
3490 static RecordDecl *
3491 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3492                          QualType KmpInt32Ty,
3493                          QualType KmpRoutineEntryPointerQTy) {
3494   ASTContext &C = CGM.getContext();
3495   // Build struct kmp_task_t {
3496   //         void *              shareds;
3497   //         kmp_routine_entry_t routine;
3498   //         kmp_int32           part_id;
3499   //         kmp_cmplrdata_t data1;
3500   //         kmp_cmplrdata_t data2;
3501   // For taskloops additional fields:
3502   //         kmp_uint64          lb;
3503   //         kmp_uint64          ub;
3504   //         kmp_int64           st;
3505   //         kmp_int32           liter;
3506   //         void *              reductions;
3507   //       };
3508   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3509   UD->startDefinition();
3510   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3511   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3512   UD->completeDefinition();
3513   QualType KmpCmplrdataTy = C.getRecordType(UD);
3514   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3515   RD->startDefinition();
3516   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3517   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3518   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3519   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3520   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3521   if (isOpenMPTaskLoopDirective(Kind)) {
3522     QualType KmpUInt64Ty =
3523         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3524     QualType KmpInt64Ty =
3525         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3526     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3527     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3528     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3529     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3530     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3531   }
3532   RD->completeDefinition();
3533   return RD;
3534 }
3535 
3536 static RecordDecl *
3537 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3538                                      ArrayRef<PrivateDataTy> Privates) {
3539   ASTContext &C = CGM.getContext();
3540   // Build struct kmp_task_t_with_privates {
3541   //         kmp_task_t task_data;
3542   //         .kmp_privates_t. privates;
3543   //       };
3544   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3545   RD->startDefinition();
3546   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3547   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3548     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3549   RD->completeDefinition();
3550   return RD;
3551 }
3552 
3553 /// Emit a proxy function which accepts kmp_task_t as the second
3554 /// argument.
3555 /// \code
3556 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3557 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3558 ///   For taskloops:
3559 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3560 ///   tt->reductions, tt->shareds);
3561 ///   return 0;
3562 /// }
3563 /// \endcode
3564 static llvm::Function *
3565 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3566                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3567                       QualType KmpTaskTWithPrivatesPtrQTy,
3568                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3569                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3570                       llvm::Value *TaskPrivatesMap) {
3571   ASTContext &C = CGM.getContext();
3572   FunctionArgList Args;
3573   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3574                             ImplicitParamDecl::Other);
3575   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3576                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3577                                 ImplicitParamDecl::Other);
3578   Args.push_back(&GtidArg);
3579   Args.push_back(&TaskTypeArg);
3580   const auto &TaskEntryFnInfo =
3581       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3582   llvm::FunctionType *TaskEntryTy =
3583       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3584   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3585   auto *TaskEntry = llvm::Function::Create(
3586       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3587   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3588   TaskEntry->setDoesNotRecurse();
3589   CodeGenFunction CGF(CGM);
3590   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3591                     Loc, Loc);
3592 
3593   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3594   // tt,
3595   // For taskloops:
3596   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3597   // tt->task_data.shareds);
3598   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3599       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3600   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3601       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3602       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3603   const auto *KmpTaskTWithPrivatesQTyRD =
3604       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3605   LValue Base =
3606       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3607   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3608   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3609   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3610   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3611 
3612   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3613   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3614   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3615       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3616       CGF.ConvertTypeForMem(SharedsPtrTy));
3617 
3618   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3619   llvm::Value *PrivatesParam;
3620   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3621     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3622     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3623         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3624   } else {
3625     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3626   }
3627 
3628   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3629                                TaskPrivatesMap,
3630                                CGF.Builder
3631                                    .CreatePointerBitCastOrAddrSpaceCast(
3632                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3633                                    .getPointer()};
3634   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3635                                           std::end(CommonArgs));
3636   if (isOpenMPTaskLoopDirective(Kind)) {
3637     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3638     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3639     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3640     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3641     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3642     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3643     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3644     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3645     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3646     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3647     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3648     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3649     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3650     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3651     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3652     CallArgs.push_back(LBParam);
3653     CallArgs.push_back(UBParam);
3654     CallArgs.push_back(StParam);
3655     CallArgs.push_back(LIParam);
3656     CallArgs.push_back(RParam);
3657   }
3658   CallArgs.push_back(SharedsParam);
3659 
3660   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3661                                                   CallArgs);
3662   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3663                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3664   CGF.FinishFunction();
3665   return TaskEntry;
3666 }
3667 
3668 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3669                                             SourceLocation Loc,
3670                                             QualType KmpInt32Ty,
3671                                             QualType KmpTaskTWithPrivatesPtrQTy,
3672                                             QualType KmpTaskTWithPrivatesQTy) {
3673   ASTContext &C = CGM.getContext();
3674   FunctionArgList Args;
3675   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3676                             ImplicitParamDecl::Other);
3677   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3678                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3679                                 ImplicitParamDecl::Other);
3680   Args.push_back(&GtidArg);
3681   Args.push_back(&TaskTypeArg);
3682   const auto &DestructorFnInfo =
3683       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3684   llvm::FunctionType *DestructorFnTy =
3685       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3686   std::string Name =
3687       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3688   auto *DestructorFn =
3689       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3690                              Name, &CGM.getModule());
3691   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3692                                     DestructorFnInfo);
3693   DestructorFn->setDoesNotRecurse();
3694   CodeGenFunction CGF(CGM);
3695   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3696                     Args, Loc, Loc);
3697 
3698   LValue Base = CGF.EmitLoadOfPointerLValue(
3699       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3700       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3701   const auto *KmpTaskTWithPrivatesQTyRD =
3702       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3703   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3704   Base = CGF.EmitLValueForField(Base, *FI);
3705   for (const auto *Field :
3706        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3707     if (QualType::DestructionKind DtorKind =
3708             Field->getType().isDestructedType()) {
3709       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3710       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3711     }
3712   }
3713   CGF.FinishFunction();
3714   return DestructorFn;
3715 }
3716 
3717 /// Emit a privates mapping function for correct handling of private and
3718 /// firstprivate variables.
3719 /// \code
3720 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3721 /// **noalias priv1,...,  <tyn> **noalias privn) {
3722 ///   *priv1 = &.privates.priv1;
3723 ///   ...;
3724 ///   *privn = &.privates.privn;
3725 /// }
3726 /// \endcode
3727 static llvm::Value *
3728 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3729                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3730                                ArrayRef<PrivateDataTy> Privates) {
3731   ASTContext &C = CGM.getContext();
3732   FunctionArgList Args;
3733   ImplicitParamDecl TaskPrivatesArg(
3734       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3735       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3736       ImplicitParamDecl::Other);
3737   Args.push_back(&TaskPrivatesArg);
3738   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3739   unsigned Counter = 1;
3740   for (const Expr *E : Data.PrivateVars) {
3741     Args.push_back(ImplicitParamDecl::Create(
3742         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3743         C.getPointerType(C.getPointerType(E->getType()))
3744             .withConst()
3745             .withRestrict(),
3746         ImplicitParamDecl::Other));
3747     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3748     PrivateVarsPos[VD] = Counter;
3749     ++Counter;
3750   }
3751   for (const Expr *E : Data.FirstprivateVars) {
3752     Args.push_back(ImplicitParamDecl::Create(
3753         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3754         C.getPointerType(C.getPointerType(E->getType()))
3755             .withConst()
3756             .withRestrict(),
3757         ImplicitParamDecl::Other));
3758     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3759     PrivateVarsPos[VD] = Counter;
3760     ++Counter;
3761   }
3762   for (const Expr *E : Data.LastprivateVars) {
3763     Args.push_back(ImplicitParamDecl::Create(
3764         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3765         C.getPointerType(C.getPointerType(E->getType()))
3766             .withConst()
3767             .withRestrict(),
3768         ImplicitParamDecl::Other));
3769     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3770     PrivateVarsPos[VD] = Counter;
3771     ++Counter;
3772   }
3773   for (const VarDecl *VD : Data.PrivateLocals) {
3774     QualType Ty = VD->getType().getNonReferenceType();
3775     if (VD->getType()->isLValueReferenceType())
3776       Ty = C.getPointerType(Ty);
3777     if (isAllocatableDecl(VD))
3778       Ty = C.getPointerType(Ty);
3779     Args.push_back(ImplicitParamDecl::Create(
3780         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3781         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3782         ImplicitParamDecl::Other));
3783     PrivateVarsPos[VD] = Counter;
3784     ++Counter;
3785   }
3786   const auto &TaskPrivatesMapFnInfo =
3787       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3788   llvm::FunctionType *TaskPrivatesMapTy =
3789       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3790   std::string Name =
3791       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3792   auto *TaskPrivatesMap = llvm::Function::Create(
3793       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3794       &CGM.getModule());
3795   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3796                                     TaskPrivatesMapFnInfo);
3797   if (CGM.getLangOpts().Optimize) {
3798     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3799     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3800     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3801   }
3802   CodeGenFunction CGF(CGM);
3803   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3804                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3805 
3806   // *privi = &.privates.privi;
3807   LValue Base = CGF.EmitLoadOfPointerLValue(
3808       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3809       TaskPrivatesArg.getType()->castAs<PointerType>());
3810   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3811   Counter = 0;
3812   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3813     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3814     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3815     LValue RefLVal =
3816         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3817     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3818         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3819     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3820     ++Counter;
3821   }
3822   CGF.FinishFunction();
3823   return TaskPrivatesMap;
3824 }
3825 
3826 /// Emit initialization for private variables in task-based directives.
3827 static void emitPrivatesInit(CodeGenFunction &CGF,
3828                              const OMPExecutableDirective &D,
3829                              Address KmpTaskSharedsPtr, LValue TDBase,
3830                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3831                              QualType SharedsTy, QualType SharedsPtrTy,
3832                              const OMPTaskDataTy &Data,
3833                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3834   ASTContext &C = CGF.getContext();
3835   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3836   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3837   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3838                                  ? OMPD_taskloop
3839                                  : OMPD_task;
3840   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3841   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3842   LValue SrcBase;
3843   bool IsTargetTask =
3844       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3845       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3846   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3847   // PointersArray, SizesArray, and MappersArray. The original variables for
3848   // these arrays are not captured and we get their addresses explicitly.
3849   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3850       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3851     SrcBase = CGF.MakeAddrLValue(
3852         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3853             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3854         SharedsTy);
3855   }
3856   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3857   for (const PrivateDataTy &Pair : Privates) {
3858     // Do not initialize private locals.
3859     if (Pair.second.isLocalPrivate()) {
3860       ++FI;
3861       continue;
3862     }
3863     const VarDecl *VD = Pair.second.PrivateCopy;
3864     const Expr *Init = VD->getAnyInitializer();
3865     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3866                              !CGF.isTrivialInitializer(Init)))) {
3867       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3868       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3869         const VarDecl *OriginalVD = Pair.second.Original;
3870         // Check if the variable is the target-based BasePointersArray,
3871         // PointersArray, SizesArray, or MappersArray.
3872         LValue SharedRefLValue;
3873         QualType Type = PrivateLValue.getType();
3874         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3875         if (IsTargetTask && !SharedField) {
3876           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3877                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3878                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3879                          ->getNumParams() == 0 &&
3880                  isa<TranslationUnitDecl>(
3881                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3882                          ->getDeclContext()) &&
3883                  "Expected artificial target data variable.");
3884           SharedRefLValue =
3885               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3886         } else if (ForDup) {
3887           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3888           SharedRefLValue = CGF.MakeAddrLValue(
3889               Address(SharedRefLValue.getPointer(CGF),
3890                       C.getDeclAlign(OriginalVD)),
3891               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3892               SharedRefLValue.getTBAAInfo());
3893         } else if (CGF.LambdaCaptureFields.count(
3894                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3895                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3896           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3897         } else {
3898           // Processing for implicitly captured variables.
3899           InlinedOpenMPRegionRAII Region(
3900               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3901               /*HasCancel=*/false, /*NoInheritance=*/true);
3902           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3903         }
3904         if (Type->isArrayType()) {
3905           // Initialize firstprivate array.
3906           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3907             // Perform simple memcpy.
3908             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3909           } else {
3910             // Initialize firstprivate array using element-by-element
3911             // initialization.
3912             CGF.EmitOMPAggregateAssign(
3913                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3914                 Type,
3915                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3916                                                   Address SrcElement) {
3917                   // Clean up any temporaries needed by the initialization.
3918                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3919                   InitScope.addPrivate(
3920                       Elem, [SrcElement]() -> Address { return SrcElement; });
3921                   (void)InitScope.Privatize();
3922                   // Emit initialization for single element.
3923                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3924                       CGF, &CapturesInfo);
3925                   CGF.EmitAnyExprToMem(Init, DestElement,
3926                                        Init->getType().getQualifiers(),
3927                                        /*IsInitializer=*/false);
3928                 });
3929           }
3930         } else {
3931           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3932           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3933             return SharedRefLValue.getAddress(CGF);
3934           });
3935           (void)InitScope.Privatize();
3936           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3937           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3938                              /*capturedByInit=*/false);
3939         }
3940       } else {
3941         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3942       }
3943     }
3944     ++FI;
3945   }
3946 }
3947 
3948 /// Check if duplication function is required for taskloops.
3949 static bool checkInitIsRequired(CodeGenFunction &CGF,
3950                                 ArrayRef<PrivateDataTy> Privates) {
3951   bool InitRequired = false;
3952   for (const PrivateDataTy &Pair : Privates) {
3953     if (Pair.second.isLocalPrivate())
3954       continue;
3955     const VarDecl *VD = Pair.second.PrivateCopy;
3956     const Expr *Init = VD->getAnyInitializer();
3957     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3958                                     !CGF.isTrivialInitializer(Init));
3959     if (InitRequired)
3960       break;
3961   }
3962   return InitRequired;
3963 }
3964 
3965 
3966 /// Emit task_dup function (for initialization of
3967 /// private/firstprivate/lastprivate vars and last_iter flag)
3968 /// \code
3969 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3970 /// lastpriv) {
3971 /// // setup lastprivate flag
3972 ///    task_dst->last = lastpriv;
3973 /// // could be constructor calls here...
3974 /// }
3975 /// \endcode
3976 static llvm::Value *
3977 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3978                     const OMPExecutableDirective &D,
3979                     QualType KmpTaskTWithPrivatesPtrQTy,
3980                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3981                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3982                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3983                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3984   ASTContext &C = CGM.getContext();
3985   FunctionArgList Args;
3986   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3987                            KmpTaskTWithPrivatesPtrQTy,
3988                            ImplicitParamDecl::Other);
3989   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3990                            KmpTaskTWithPrivatesPtrQTy,
3991                            ImplicitParamDecl::Other);
3992   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3993                                 ImplicitParamDecl::Other);
3994   Args.push_back(&DstArg);
3995   Args.push_back(&SrcArg);
3996   Args.push_back(&LastprivArg);
3997   const auto &TaskDupFnInfo =
3998       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3999   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4000   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4001   auto *TaskDup = llvm::Function::Create(
4002       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4003   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4004   TaskDup->setDoesNotRecurse();
4005   CodeGenFunction CGF(CGM);
4006   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4007                     Loc);
4008 
4009   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4010       CGF.GetAddrOfLocalVar(&DstArg),
4011       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4012   // task_dst->liter = lastpriv;
4013   if (WithLastIter) {
4014     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4015     LValue Base = CGF.EmitLValueForField(
4016         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4017     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4018     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4019         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4020     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4021   }
4022 
4023   // Emit initial values for private copies (if any).
4024   assert(!Privates.empty());
4025   Address KmpTaskSharedsPtr = Address::invalid();
4026   if (!Data.FirstprivateVars.empty()) {
4027     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4028         CGF.GetAddrOfLocalVar(&SrcArg),
4029         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4030     LValue Base = CGF.EmitLValueForField(
4031         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4032     KmpTaskSharedsPtr = Address(
4033         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4034                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4035                                                   KmpTaskTShareds)),
4036                              Loc),
4037         CGM.getNaturalTypeAlignment(SharedsTy));
4038   }
4039   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4040                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4041   CGF.FinishFunction();
4042   return TaskDup;
4043 }
4044 
4045 /// Checks if destructor function is required to be generated.
4046 /// \return true if cleanups are required, false otherwise.
4047 static bool
4048 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4049                          ArrayRef<PrivateDataTy> Privates) {
4050   for (const PrivateDataTy &P : Privates) {
4051     if (P.second.isLocalPrivate())
4052       continue;
4053     QualType Ty = P.second.Original->getType().getNonReferenceType();
4054     if (Ty.isDestructedType())
4055       return true;
4056   }
4057   return false;
4058 }
4059 
4060 namespace {
4061 /// Loop generator for OpenMP iterator expression.
4062 class OMPIteratorGeneratorScope final
4063     : public CodeGenFunction::OMPPrivateScope {
4064   CodeGenFunction &CGF;
4065   const OMPIteratorExpr *E = nullptr;
4066   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4067   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4068   OMPIteratorGeneratorScope() = delete;
4069   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4070 
4071 public:
4072   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4073       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4074     if (!E)
4075       return;
4076     SmallVector<llvm::Value *, 4> Uppers;
4077     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4078       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4079       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4080       addPrivate(VD, [&CGF, VD]() {
4081         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4082       });
4083       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4084       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4085         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4086                                  "counter.addr");
4087       });
4088     }
4089     Privatize();
4090 
4091     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4092       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4093       LValue CLVal =
4094           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4095                              HelperData.CounterVD->getType());
4096       // Counter = 0;
4097       CGF.EmitStoreOfScalar(
4098           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4099           CLVal);
4100       CodeGenFunction::JumpDest &ContDest =
4101           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4102       CodeGenFunction::JumpDest &ExitDest =
4103           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4104       // N = <number-of_iterations>;
4105       llvm::Value *N = Uppers[I];
4106       // cont:
4107       // if (Counter < N) goto body; else goto exit;
4108       CGF.EmitBlock(ContDest.getBlock());
4109       auto *CVal =
4110           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4111       llvm::Value *Cmp =
4112           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4113               ? CGF.Builder.CreateICmpSLT(CVal, N)
4114               : CGF.Builder.CreateICmpULT(CVal, N);
4115       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4116       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4117       // body:
4118       CGF.EmitBlock(BodyBB);
4119       // Iteri = Begini + Counter * Stepi;
4120       CGF.EmitIgnoredExpr(HelperData.Update);
4121     }
4122   }
4123   ~OMPIteratorGeneratorScope() {
4124     if (!E)
4125       return;
4126     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4127       // Counter = Counter + 1;
4128       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4129       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4130       // goto cont;
4131       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4132       // exit:
4133       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4134     }
4135   }
4136 };
4137 } // namespace
4138 
4139 static std::pair<llvm::Value *, llvm::Value *>
4140 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4141   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4142   llvm::Value *Addr;
4143   if (OASE) {
4144     const Expr *Base = OASE->getBase();
4145     Addr = CGF.EmitScalarExpr(Base);
4146   } else {
4147     Addr = CGF.EmitLValue(E).getPointer(CGF);
4148   }
4149   llvm::Value *SizeVal;
4150   QualType Ty = E->getType();
4151   if (OASE) {
4152     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4153     for (const Expr *SE : OASE->getDimensions()) {
4154       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4155       Sz = CGF.EmitScalarConversion(
4156           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4157       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4158     }
4159   } else if (const auto *ASE =
4160                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4161     LValue UpAddrLVal =
4162         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4163     Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4164     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4165         UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4166     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4167     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4168     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4169   } else {
4170     SizeVal = CGF.getTypeSize(Ty);
4171   }
4172   return std::make_pair(Addr, SizeVal);
4173 }
4174 
4175 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4176 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4177   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4178   if (KmpTaskAffinityInfoTy.isNull()) {
4179     RecordDecl *KmpAffinityInfoRD =
4180         C.buildImplicitRecord("kmp_task_affinity_info_t");
4181     KmpAffinityInfoRD->startDefinition();
4182     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4183     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4184     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4185     KmpAffinityInfoRD->completeDefinition();
4186     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4187   }
4188 }
4189 
4190 CGOpenMPRuntime::TaskResultTy
4191 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4192                               const OMPExecutableDirective &D,
4193                               llvm::Function *TaskFunction, QualType SharedsTy,
4194                               Address Shareds, const OMPTaskDataTy &Data) {
4195   ASTContext &C = CGM.getContext();
4196   llvm::SmallVector<PrivateDataTy, 4> Privates;
4197   // Aggregate privates and sort them by the alignment.
4198   const auto *I = Data.PrivateCopies.begin();
4199   for (const Expr *E : Data.PrivateVars) {
4200     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4201     Privates.emplace_back(
4202         C.getDeclAlign(VD),
4203         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4204                          /*PrivateElemInit=*/nullptr));
4205     ++I;
4206   }
4207   I = Data.FirstprivateCopies.begin();
4208   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4209   for (const Expr *E : Data.FirstprivateVars) {
4210     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4211     Privates.emplace_back(
4212         C.getDeclAlign(VD),
4213         PrivateHelpersTy(
4214             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4215             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4216     ++I;
4217     ++IElemInitRef;
4218   }
4219   I = Data.LastprivateCopies.begin();
4220   for (const Expr *E : Data.LastprivateVars) {
4221     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4222     Privates.emplace_back(
4223         C.getDeclAlign(VD),
4224         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4225                          /*PrivateElemInit=*/nullptr));
4226     ++I;
4227   }
4228   for (const VarDecl *VD : Data.PrivateLocals) {
4229     if (isAllocatableDecl(VD))
4230       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4231     else
4232       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4233   }
4234   llvm::stable_sort(Privates,
4235                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4236                       return L.first > R.first;
4237                     });
4238   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4239   // Build type kmp_routine_entry_t (if not built yet).
4240   emitKmpRoutineEntryT(KmpInt32Ty);
4241   // Build type kmp_task_t (if not built yet).
4242   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4243     if (SavedKmpTaskloopTQTy.isNull()) {
4244       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4245           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4246     }
4247     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4248   } else {
4249     assert((D.getDirectiveKind() == OMPD_task ||
4250             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4251             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4252            "Expected taskloop, task or target directive");
4253     if (SavedKmpTaskTQTy.isNull()) {
4254       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4255           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4256     }
4257     KmpTaskTQTy = SavedKmpTaskTQTy;
4258   }
4259   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4260   // Build particular struct kmp_task_t for the given task.
4261   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4262       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4263   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4264   QualType KmpTaskTWithPrivatesPtrQTy =
4265       C.getPointerType(KmpTaskTWithPrivatesQTy);
4266   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4267   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4268       KmpTaskTWithPrivatesTy->getPointerTo();
4269   llvm::Value *KmpTaskTWithPrivatesTySize =
4270       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4271   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4272 
4273   // Emit initial values for private copies (if any).
4274   llvm::Value *TaskPrivatesMap = nullptr;
4275   llvm::Type *TaskPrivatesMapTy =
4276       std::next(TaskFunction->arg_begin(), 3)->getType();
4277   if (!Privates.empty()) {
4278     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4279     TaskPrivatesMap =
4280         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4281     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4282         TaskPrivatesMap, TaskPrivatesMapTy);
4283   } else {
4284     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4285         cast<llvm::PointerType>(TaskPrivatesMapTy));
4286   }
4287   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4288   // kmp_task_t *tt);
4289   llvm::Function *TaskEntry = emitProxyTaskFunction(
4290       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4291       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4292       TaskPrivatesMap);
4293 
4294   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4295   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4296   // kmp_routine_entry_t *task_entry);
4297   // Task flags. Format is taken from
4298   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4299   // description of kmp_tasking_flags struct.
4300   enum {
4301     TiedFlag = 0x1,
4302     FinalFlag = 0x2,
4303     DestructorsFlag = 0x8,
4304     PriorityFlag = 0x20,
4305     DetachableFlag = 0x40,
4306   };
4307   unsigned Flags = Data.Tied ? TiedFlag : 0;
4308   bool NeedsCleanup = false;
4309   if (!Privates.empty()) {
4310     NeedsCleanup =
4311         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4312     if (NeedsCleanup)
4313       Flags = Flags | DestructorsFlag;
4314   }
4315   if (Data.Priority.getInt())
4316     Flags = Flags | PriorityFlag;
4317   if (D.hasClausesOfKind<OMPDetachClause>())
4318     Flags = Flags | DetachableFlag;
4319   llvm::Value *TaskFlags =
4320       Data.Final.getPointer()
4321           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4322                                      CGF.Builder.getInt32(FinalFlag),
4323                                      CGF.Builder.getInt32(/*C=*/0))
4324           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4325   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4326   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4327   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4328       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4329       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4330           TaskEntry, KmpRoutineEntryPtrTy)};
4331   llvm::Value *NewTask;
4332   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4333     // Check if we have any device clause associated with the directive.
4334     const Expr *Device = nullptr;
4335     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4336       Device = C->getDevice();
4337     // Emit device ID if any otherwise use default value.
4338     llvm::Value *DeviceID;
4339     if (Device)
4340       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4341                                            CGF.Int64Ty, /*isSigned=*/true);
4342     else
4343       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4344     AllocArgs.push_back(DeviceID);
4345     NewTask = CGF.EmitRuntimeCall(
4346         OMPBuilder.getOrCreateRuntimeFunction(
4347             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4348         AllocArgs);
4349   } else {
4350     NewTask =
4351         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4352                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4353                             AllocArgs);
4354   }
4355   // Emit detach clause initialization.
4356   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4357   // task_descriptor);
4358   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4359     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4360     LValue EvtLVal = CGF.EmitLValue(Evt);
4361 
4362     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4363     // int gtid, kmp_task_t *task);
4364     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4365     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4366     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4367     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4368         OMPBuilder.getOrCreateRuntimeFunction(
4369             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4370         {Loc, Tid, NewTask});
4371     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4372                                       Evt->getExprLoc());
4373     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4374   }
4375   // Process affinity clauses.
4376   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4377     // Process list of affinity data.
4378     ASTContext &C = CGM.getContext();
4379     Address AffinitiesArray = Address::invalid();
4380     // Calculate number of elements to form the array of affinity data.
4381     llvm::Value *NumOfElements = nullptr;
4382     unsigned NumAffinities = 0;
4383     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4384       if (const Expr *Modifier = C->getModifier()) {
4385         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4386         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4387           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4388           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4389           NumOfElements =
4390               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4391         }
4392       } else {
4393         NumAffinities += C->varlist_size();
4394       }
4395     }
4396     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4397     // Fields ids in kmp_task_affinity_info record.
4398     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4399 
4400     QualType KmpTaskAffinityInfoArrayTy;
4401     if (NumOfElements) {
4402       NumOfElements = CGF.Builder.CreateNUWAdd(
4403           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4404       OpaqueValueExpr OVE(
4405           Loc,
4406           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4407           VK_PRValue);
4408       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4409                                                     RValue::get(NumOfElements));
4410       KmpTaskAffinityInfoArrayTy =
4411           C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4412                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4413       // Properly emit variable-sized array.
4414       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4415                                            ImplicitParamDecl::Other);
4416       CGF.EmitVarDecl(*PD);
4417       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4418       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4419                                                 /*isSigned=*/false);
4420     } else {
4421       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4422           KmpTaskAffinityInfoTy,
4423           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4424           ArrayType::Normal, /*IndexTypeQuals=*/0);
4425       AffinitiesArray =
4426           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4427       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4428       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4429                                              /*isSigned=*/false);
4430     }
4431 
4432     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4433     // Fill array by elements without iterators.
4434     unsigned Pos = 0;
4435     bool HasIterator = false;
4436     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4437       if (C->getModifier()) {
4438         HasIterator = true;
4439         continue;
4440       }
4441       for (const Expr *E : C->varlists()) {
4442         llvm::Value *Addr;
4443         llvm::Value *Size;
4444         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4445         LValue Base =
4446             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4447                                KmpTaskAffinityInfoTy);
4448         // affs[i].base_addr = &<Affinities[i].second>;
4449         LValue BaseAddrLVal = CGF.EmitLValueForField(
4450             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4451         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4452                               BaseAddrLVal);
4453         // affs[i].len = sizeof(<Affinities[i].second>);
4454         LValue LenLVal = CGF.EmitLValueForField(
4455             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4456         CGF.EmitStoreOfScalar(Size, LenLVal);
4457         ++Pos;
4458       }
4459     }
4460     LValue PosLVal;
4461     if (HasIterator) {
4462       PosLVal = CGF.MakeAddrLValue(
4463           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4464           C.getSizeType());
4465       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4466     }
4467     // Process elements with iterators.
4468     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4469       const Expr *Modifier = C->getModifier();
4470       if (!Modifier)
4471         continue;
4472       OMPIteratorGeneratorScope IteratorScope(
4473           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4474       for (const Expr *E : C->varlists()) {
4475         llvm::Value *Addr;
4476         llvm::Value *Size;
4477         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4478         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4479         LValue Base = CGF.MakeAddrLValue(
4480             Address(CGF.Builder.CreateGEP(AffinitiesArray.getElementType(),
4481                                           AffinitiesArray.getPointer(), Idx),
4482                     AffinitiesArray.getAlignment()),
4483             KmpTaskAffinityInfoTy);
4484         // affs[i].base_addr = &<Affinities[i].second>;
4485         LValue BaseAddrLVal = CGF.EmitLValueForField(
4486             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4487         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4488                               BaseAddrLVal);
4489         // affs[i].len = sizeof(<Affinities[i].second>);
4490         LValue LenLVal = CGF.EmitLValueForField(
4491             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4492         CGF.EmitStoreOfScalar(Size, LenLVal);
4493         Idx = CGF.Builder.CreateNUWAdd(
4494             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4495         CGF.EmitStoreOfScalar(Idx, PosLVal);
4496       }
4497     }
4498     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4499     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4500     // naffins, kmp_task_affinity_info_t *affin_list);
4501     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4502     llvm::Value *GTid = getThreadID(CGF, Loc);
4503     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4504         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4505     // FIXME: Emit the function and ignore its result for now unless the
4506     // runtime function is properly implemented.
4507     (void)CGF.EmitRuntimeCall(
4508         OMPBuilder.getOrCreateRuntimeFunction(
4509             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4510         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4511   }
4512   llvm::Value *NewTaskNewTaskTTy =
4513       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4514           NewTask, KmpTaskTWithPrivatesPtrTy);
4515   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4516                                                KmpTaskTWithPrivatesQTy);
4517   LValue TDBase =
4518       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4519   // Fill the data in the resulting kmp_task_t record.
4520   // Copy shareds if there are any.
4521   Address KmpTaskSharedsPtr = Address::invalid();
4522   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4523     KmpTaskSharedsPtr =
4524         Address(CGF.EmitLoadOfScalar(
4525                     CGF.EmitLValueForField(
4526                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4527                                            KmpTaskTShareds)),
4528                     Loc),
4529                 CGM.getNaturalTypeAlignment(SharedsTy));
4530     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4531     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4532     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4533   }
4534   // Emit initial values for private copies (if any).
4535   TaskResultTy Result;
4536   if (!Privates.empty()) {
4537     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4538                      SharedsTy, SharedsPtrTy, Data, Privates,
4539                      /*ForDup=*/false);
4540     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4541         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4542       Result.TaskDupFn = emitTaskDupFunction(
4543           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4544           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4545           /*WithLastIter=*/!Data.LastprivateVars.empty());
4546     }
4547   }
4548   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4549   enum { Priority = 0, Destructors = 1 };
4550   // Provide pointer to function with destructors for privates.
4551   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4552   const RecordDecl *KmpCmplrdataUD =
4553       (*FI)->getType()->getAsUnionType()->getDecl();
4554   if (NeedsCleanup) {
4555     llvm::Value *DestructorFn = emitDestructorsFunction(
4556         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4557         KmpTaskTWithPrivatesQTy);
4558     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4559     LValue DestructorsLV = CGF.EmitLValueForField(
4560         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4561     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4562                               DestructorFn, KmpRoutineEntryPtrTy),
4563                           DestructorsLV);
4564   }
4565   // Set priority.
4566   if (Data.Priority.getInt()) {
4567     LValue Data2LV = CGF.EmitLValueForField(
4568         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4569     LValue PriorityLV = CGF.EmitLValueForField(
4570         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4571     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4572   }
4573   Result.NewTask = NewTask;
4574   Result.TaskEntry = TaskEntry;
4575   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4576   Result.TDBase = TDBase;
4577   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4578   return Result;
4579 }
4580 
4581 namespace {
4582 /// Dependence kind for RTL.
4583 enum RTLDependenceKindTy {
4584   DepIn = 0x01,
4585   DepInOut = 0x3,
4586   DepMutexInOutSet = 0x4
4587 };
4588 /// Fields ids in kmp_depend_info record.
4589 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4590 } // namespace
4591 
4592 /// Translates internal dependency kind into the runtime kind.
4593 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4594   RTLDependenceKindTy DepKind;
4595   switch (K) {
4596   case OMPC_DEPEND_in:
4597     DepKind = DepIn;
4598     break;
4599   // Out and InOut dependencies must use the same code.
4600   case OMPC_DEPEND_out:
4601   case OMPC_DEPEND_inout:
4602     DepKind = DepInOut;
4603     break;
4604   case OMPC_DEPEND_mutexinoutset:
4605     DepKind = DepMutexInOutSet;
4606     break;
4607   case OMPC_DEPEND_source:
4608   case OMPC_DEPEND_sink:
4609   case OMPC_DEPEND_depobj:
4610   case OMPC_DEPEND_unknown:
4611     llvm_unreachable("Unknown task dependence type");
4612   }
4613   return DepKind;
4614 }
4615 
4616 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4617 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4618                            QualType &FlagsTy) {
4619   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4620   if (KmpDependInfoTy.isNull()) {
4621     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4622     KmpDependInfoRD->startDefinition();
4623     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4624     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4625     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4626     KmpDependInfoRD->completeDefinition();
4627     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4628   }
4629 }
4630 
4631 std::pair<llvm::Value *, LValue>
4632 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4633                                    SourceLocation Loc) {
4634   ASTContext &C = CGM.getContext();
4635   QualType FlagsTy;
4636   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4637   RecordDecl *KmpDependInfoRD =
4638       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4639   LValue Base = CGF.EmitLoadOfPointerLValue(
4640       DepobjLVal.getAddress(CGF),
4641       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4642   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4643   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4644           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4645   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4646                             Base.getTBAAInfo());
4647   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4648       Addr.getElementType(), Addr.getPointer(),
4649       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4650   LValue NumDepsBase = CGF.MakeAddrLValue(
4651       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4652       Base.getBaseInfo(), Base.getTBAAInfo());
4653   // NumDeps = deps[i].base_addr;
4654   LValue BaseAddrLVal = CGF.EmitLValueForField(
4655       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4656   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4657   return std::make_pair(NumDeps, Base);
4658 }
4659 
4660 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4661                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4662                            const OMPTaskDataTy::DependData &Data,
4663                            Address DependenciesArray) {
4664   CodeGenModule &CGM = CGF.CGM;
4665   ASTContext &C = CGM.getContext();
4666   QualType FlagsTy;
4667   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4668   RecordDecl *KmpDependInfoRD =
4669       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4670   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4671 
4672   OMPIteratorGeneratorScope IteratorScope(
4673       CGF, cast_or_null<OMPIteratorExpr>(
4674                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4675                                  : nullptr));
4676   for (const Expr *E : Data.DepExprs) {
4677     llvm::Value *Addr;
4678     llvm::Value *Size;
4679     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4680     LValue Base;
4681     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4682       Base = CGF.MakeAddrLValue(
4683           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4684     } else {
4685       LValue &PosLVal = *Pos.get<LValue *>();
4686       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4687       Base = CGF.MakeAddrLValue(
4688           Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(),
4689                                         DependenciesArray.getPointer(), Idx),
4690                   DependenciesArray.getAlignment()),
4691           KmpDependInfoTy);
4692     }
4693     // deps[i].base_addr = &<Dependencies[i].second>;
4694     LValue BaseAddrLVal = CGF.EmitLValueForField(
4695         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4696     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4697                           BaseAddrLVal);
4698     // deps[i].len = sizeof(<Dependencies[i].second>);
4699     LValue LenLVal = CGF.EmitLValueForField(
4700         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4701     CGF.EmitStoreOfScalar(Size, LenLVal);
4702     // deps[i].flags = <Dependencies[i].first>;
4703     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4704     LValue FlagsLVal = CGF.EmitLValueForField(
4705         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4706     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4707                           FlagsLVal);
4708     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4709       ++(*P);
4710     } else {
4711       LValue &PosLVal = *Pos.get<LValue *>();
4712       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4713       Idx = CGF.Builder.CreateNUWAdd(Idx,
4714                                      llvm::ConstantInt::get(Idx->getType(), 1));
4715       CGF.EmitStoreOfScalar(Idx, PosLVal);
4716     }
4717   }
4718 }
4719 
4720 static SmallVector<llvm::Value *, 4>
4721 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4722                         const OMPTaskDataTy::DependData &Data) {
4723   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4724          "Expected depobj dependecy kind.");
4725   SmallVector<llvm::Value *, 4> Sizes;
4726   SmallVector<LValue, 4> SizeLVals;
4727   ASTContext &C = CGF.getContext();
4728   QualType FlagsTy;
4729   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4730   RecordDecl *KmpDependInfoRD =
4731       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4732   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4733   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4734   {
4735     OMPIteratorGeneratorScope IteratorScope(
4736         CGF, cast_or_null<OMPIteratorExpr>(
4737                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4738                                    : nullptr));
4739     for (const Expr *E : Data.DepExprs) {
4740       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4741       LValue Base = CGF.EmitLoadOfPointerLValue(
4742           DepobjLVal.getAddress(CGF),
4743           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4744       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4745           Base.getAddress(CGF), KmpDependInfoPtrT);
4746       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4747                                 Base.getTBAAInfo());
4748       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4749           Addr.getElementType(), Addr.getPointer(),
4750           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4751       LValue NumDepsBase = CGF.MakeAddrLValue(
4752           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4753           Base.getBaseInfo(), Base.getTBAAInfo());
4754       // NumDeps = deps[i].base_addr;
4755       LValue BaseAddrLVal = CGF.EmitLValueForField(
4756           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4757       llvm::Value *NumDeps =
4758           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4759       LValue NumLVal = CGF.MakeAddrLValue(
4760           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4761           C.getUIntPtrType());
4762       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4763                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4764       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4765       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4766       CGF.EmitStoreOfScalar(Add, NumLVal);
4767       SizeLVals.push_back(NumLVal);
4768     }
4769   }
4770   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4771     llvm::Value *Size =
4772         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4773     Sizes.push_back(Size);
4774   }
4775   return Sizes;
4776 }
4777 
4778 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4779                                LValue PosLVal,
4780                                const OMPTaskDataTy::DependData &Data,
4781                                Address DependenciesArray) {
4782   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4783          "Expected depobj dependecy kind.");
4784   ASTContext &C = CGF.getContext();
4785   QualType FlagsTy;
4786   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4787   RecordDecl *KmpDependInfoRD =
4788       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4789   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4790   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4791   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4792   {
4793     OMPIteratorGeneratorScope IteratorScope(
4794         CGF, cast_or_null<OMPIteratorExpr>(
4795                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4796                                    : nullptr));
4797     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4798       const Expr *E = Data.DepExprs[I];
4799       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4800       LValue Base = CGF.EmitLoadOfPointerLValue(
4801           DepobjLVal.getAddress(CGF),
4802           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4803       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4804           Base.getAddress(CGF), KmpDependInfoPtrT);
4805       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4806                                 Base.getTBAAInfo());
4807 
4808       // Get number of elements in a single depobj.
4809       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4810           Addr.getElementType(), Addr.getPointer(),
4811           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4812       LValue NumDepsBase = CGF.MakeAddrLValue(
4813           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4814           Base.getBaseInfo(), Base.getTBAAInfo());
4815       // NumDeps = deps[i].base_addr;
4816       LValue BaseAddrLVal = CGF.EmitLValueForField(
4817           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4818       llvm::Value *NumDeps =
4819           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4820 
4821       // memcopy dependency data.
4822       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4823           ElSize,
4824           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4825       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4826       Address DepAddr =
4827           Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(),
4828                                         DependenciesArray.getPointer(), Pos),
4829                   DependenciesArray.getAlignment());
4830       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4831 
4832       // Increase pos.
4833       // pos += size;
4834       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4835       CGF.EmitStoreOfScalar(Add, PosLVal);
4836     }
4837   }
4838 }
4839 
4840 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4841     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4842     SourceLocation Loc) {
4843   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4844         return D.DepExprs.empty();
4845       }))
4846     return std::make_pair(nullptr, Address::invalid());
4847   // Process list of dependencies.
4848   ASTContext &C = CGM.getContext();
4849   Address DependenciesArray = Address::invalid();
4850   llvm::Value *NumOfElements = nullptr;
4851   unsigned NumDependencies = std::accumulate(
4852       Dependencies.begin(), Dependencies.end(), 0,
4853       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4854         return D.DepKind == OMPC_DEPEND_depobj
4855                    ? V
4856                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4857       });
4858   QualType FlagsTy;
4859   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4860   bool HasDepobjDeps = false;
4861   bool HasRegularWithIterators = false;
4862   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4863   llvm::Value *NumOfRegularWithIterators =
4864       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4865   // Calculate number of depobj dependecies and regular deps with the iterators.
4866   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4867     if (D.DepKind == OMPC_DEPEND_depobj) {
4868       SmallVector<llvm::Value *, 4> Sizes =
4869           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4870       for (llvm::Value *Size : Sizes) {
4871         NumOfDepobjElements =
4872             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4873       }
4874       HasDepobjDeps = true;
4875       continue;
4876     }
4877     // Include number of iterations, if any.
4878     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4879       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4880         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4881         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4882         NumOfRegularWithIterators =
4883             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4884       }
4885       HasRegularWithIterators = true;
4886       continue;
4887     }
4888   }
4889 
4890   QualType KmpDependInfoArrayTy;
4891   if (HasDepobjDeps || HasRegularWithIterators) {
4892     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4893                                            /*isSigned=*/false);
4894     if (HasDepobjDeps) {
4895       NumOfElements =
4896           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4897     }
4898     if (HasRegularWithIterators) {
4899       NumOfElements =
4900           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4901     }
4902     OpaqueValueExpr OVE(Loc,
4903                         C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4904                         VK_PRValue);
4905     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4906                                                   RValue::get(NumOfElements));
4907     KmpDependInfoArrayTy =
4908         C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4909                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4910     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4911     // Properly emit variable-sized array.
4912     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4913                                          ImplicitParamDecl::Other);
4914     CGF.EmitVarDecl(*PD);
4915     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4916     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4917                                               /*isSigned=*/false);
4918   } else {
4919     KmpDependInfoArrayTy = C.getConstantArrayType(
4920         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4921         ArrayType::Normal, /*IndexTypeQuals=*/0);
4922     DependenciesArray =
4923         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4924     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4925     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4926                                            /*isSigned=*/false);
4927   }
4928   unsigned Pos = 0;
4929   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4930     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4931         Dependencies[I].IteratorExpr)
4932       continue;
4933     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4934                    DependenciesArray);
4935   }
4936   // Copy regular dependecies with iterators.
4937   LValue PosLVal = CGF.MakeAddrLValue(
4938       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4939   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4940   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4941     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4942         !Dependencies[I].IteratorExpr)
4943       continue;
4944     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4945                    DependenciesArray);
4946   }
4947   // Copy final depobj arrays without iterators.
4948   if (HasDepobjDeps) {
4949     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4950       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4951         continue;
4952       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4953                          DependenciesArray);
4954     }
4955   }
4956   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4957       DependenciesArray, CGF.VoidPtrTy);
4958   return std::make_pair(NumOfElements, DependenciesArray);
4959 }
4960 
4961 Address CGOpenMPRuntime::emitDepobjDependClause(
4962     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4963     SourceLocation Loc) {
4964   if (Dependencies.DepExprs.empty())
4965     return Address::invalid();
4966   // Process list of dependencies.
4967   ASTContext &C = CGM.getContext();
4968   Address DependenciesArray = Address::invalid();
4969   unsigned NumDependencies = Dependencies.DepExprs.size();
4970   QualType FlagsTy;
4971   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4972   RecordDecl *KmpDependInfoRD =
4973       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4974 
4975   llvm::Value *Size;
4976   // Define type kmp_depend_info[<Dependencies.size()>];
4977   // For depobj reserve one extra element to store the number of elements.
4978   // It is required to handle depobj(x) update(in) construct.
4979   // kmp_depend_info[<Dependencies.size()>] deps;
4980   llvm::Value *NumDepsVal;
4981   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4982   if (const auto *IE =
4983           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4984     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4985     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4986       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4987       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4988       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4989     }
4990     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4991                                     NumDepsVal);
4992     CharUnits SizeInBytes =
4993         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4994     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4995     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4996     NumDepsVal =
4997         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4998   } else {
4999     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5000         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
5001         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5002     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
5003     Size = CGM.getSize(Sz.alignTo(Align));
5004     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
5005   }
5006   // Need to allocate on the dynamic memory.
5007   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5008   // Use default allocator.
5009   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5010   llvm::Value *Args[] = {ThreadID, Size, Allocator};
5011 
5012   llvm::Value *Addr =
5013       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5014                               CGM.getModule(), OMPRTL___kmpc_alloc),
5015                           Args, ".dep.arr.addr");
5016   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5017       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5018   DependenciesArray = Address(Addr, Align);
5019   // Write number of elements in the first element of array for depobj.
5020   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5021   // deps[i].base_addr = NumDependencies;
5022   LValue BaseAddrLVal = CGF.EmitLValueForField(
5023       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5024   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5025   llvm::PointerUnion<unsigned *, LValue *> Pos;
5026   unsigned Idx = 1;
5027   LValue PosLVal;
5028   if (Dependencies.IteratorExpr) {
5029     PosLVal = CGF.MakeAddrLValue(
5030         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5031         C.getSizeType());
5032     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5033                           /*IsInit=*/true);
5034     Pos = &PosLVal;
5035   } else {
5036     Pos = &Idx;
5037   }
5038   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5039   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5040       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
5041   return DependenciesArray;
5042 }
5043 
5044 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5045                                         SourceLocation Loc) {
5046   ASTContext &C = CGM.getContext();
5047   QualType FlagsTy;
5048   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5049   LValue Base = CGF.EmitLoadOfPointerLValue(
5050       DepobjLVal.getAddress(CGF),
5051       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5052   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5053   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5054       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5055   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5056       Addr.getElementType(), Addr.getPointer(),
5057       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5058   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5059                                                                CGF.VoidPtrTy);
5060   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5061   // Use default allocator.
5062   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5063   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5064 
5065   // _kmpc_free(gtid, addr, nullptr);
5066   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5067                                 CGM.getModule(), OMPRTL___kmpc_free),
5068                             Args);
5069 }
5070 
5071 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5072                                        OpenMPDependClauseKind NewDepKind,
5073                                        SourceLocation Loc) {
5074   ASTContext &C = CGM.getContext();
5075   QualType FlagsTy;
5076   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5077   RecordDecl *KmpDependInfoRD =
5078       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5079   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5080   llvm::Value *NumDeps;
5081   LValue Base;
5082   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5083 
5084   Address Begin = Base.getAddress(CGF);
5085   // Cast from pointer to array type to pointer to single element.
5086   llvm::Value *End = CGF.Builder.CreateGEP(
5087       Begin.getElementType(), Begin.getPointer(), NumDeps);
5088   // The basic structure here is a while-do loop.
5089   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5090   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5091   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5092   CGF.EmitBlock(BodyBB);
5093   llvm::PHINode *ElementPHI =
5094       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5095   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5096   Begin = Address(ElementPHI, Begin.getAlignment());
5097   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5098                             Base.getTBAAInfo());
5099   // deps[i].flags = NewDepKind;
5100   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5101   LValue FlagsLVal = CGF.EmitLValueForField(
5102       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5103   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5104                         FlagsLVal);
5105 
5106   // Shift the address forward by one element.
5107   Address ElementNext =
5108       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5109   ElementPHI->addIncoming(ElementNext.getPointer(),
5110                           CGF.Builder.GetInsertBlock());
5111   llvm::Value *IsEmpty =
5112       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5113   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5114   // Done.
5115   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5116 }
5117 
5118 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5119                                    const OMPExecutableDirective &D,
5120                                    llvm::Function *TaskFunction,
5121                                    QualType SharedsTy, Address Shareds,
5122                                    const Expr *IfCond,
5123                                    const OMPTaskDataTy &Data) {
5124   if (!CGF.HaveInsertPoint())
5125     return;
5126 
5127   TaskResultTy Result =
5128       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5129   llvm::Value *NewTask = Result.NewTask;
5130   llvm::Function *TaskEntry = Result.TaskEntry;
5131   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5132   LValue TDBase = Result.TDBase;
5133   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5134   // Process list of dependences.
5135   Address DependenciesArray = Address::invalid();
5136   llvm::Value *NumOfElements;
5137   std::tie(NumOfElements, DependenciesArray) =
5138       emitDependClause(CGF, Data.Dependences, Loc);
5139 
5140   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5141   // libcall.
5142   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5143   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5144   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5145   // list is not empty
5146   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5147   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5148   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5149   llvm::Value *DepTaskArgs[7];
5150   if (!Data.Dependences.empty()) {
5151     DepTaskArgs[0] = UpLoc;
5152     DepTaskArgs[1] = ThreadID;
5153     DepTaskArgs[2] = NewTask;
5154     DepTaskArgs[3] = NumOfElements;
5155     DepTaskArgs[4] = DependenciesArray.getPointer();
5156     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5157     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5158   }
5159   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5160                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5161     if (!Data.Tied) {
5162       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5163       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5164       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5165     }
5166     if (!Data.Dependences.empty()) {
5167       CGF.EmitRuntimeCall(
5168           OMPBuilder.getOrCreateRuntimeFunction(
5169               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5170           DepTaskArgs);
5171     } else {
5172       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5173                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5174                           TaskArgs);
5175     }
5176     // Check if parent region is untied and build return for untied task;
5177     if (auto *Region =
5178             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5179       Region->emitUntiedSwitch(CGF);
5180   };
5181 
5182   llvm::Value *DepWaitTaskArgs[6];
5183   if (!Data.Dependences.empty()) {
5184     DepWaitTaskArgs[0] = UpLoc;
5185     DepWaitTaskArgs[1] = ThreadID;
5186     DepWaitTaskArgs[2] = NumOfElements;
5187     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5188     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5189     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5190   }
5191   auto &M = CGM.getModule();
5192   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5193                         TaskEntry, &Data, &DepWaitTaskArgs,
5194                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5195     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5196     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5197     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5198     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5199     // is specified.
5200     if (!Data.Dependences.empty())
5201       CGF.EmitRuntimeCall(
5202           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5203           DepWaitTaskArgs);
5204     // Call proxy_task_entry(gtid, new_task);
5205     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5206                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5207       Action.Enter(CGF);
5208       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5209       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5210                                                           OutlinedFnArgs);
5211     };
5212 
5213     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5214     // kmp_task_t *new_task);
5215     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5216     // kmp_task_t *new_task);
5217     RegionCodeGenTy RCG(CodeGen);
5218     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5219                               M, OMPRTL___kmpc_omp_task_begin_if0),
5220                           TaskArgs,
5221                           OMPBuilder.getOrCreateRuntimeFunction(
5222                               M, OMPRTL___kmpc_omp_task_complete_if0),
5223                           TaskArgs);
5224     RCG.setAction(Action);
5225     RCG(CGF);
5226   };
5227 
5228   if (IfCond) {
5229     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5230   } else {
5231     RegionCodeGenTy ThenRCG(ThenCodeGen);
5232     ThenRCG(CGF);
5233   }
5234 }
5235 
5236 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5237                                        const OMPLoopDirective &D,
5238                                        llvm::Function *TaskFunction,
5239                                        QualType SharedsTy, Address Shareds,
5240                                        const Expr *IfCond,
5241                                        const OMPTaskDataTy &Data) {
5242   if (!CGF.HaveInsertPoint())
5243     return;
5244   TaskResultTy Result =
5245       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5246   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5247   // libcall.
5248   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5249   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5250   // sched, kmp_uint64 grainsize, void *task_dup);
5251   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5252   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5253   llvm::Value *IfVal;
5254   if (IfCond) {
5255     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5256                                       /*isSigned=*/true);
5257   } else {
5258     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5259   }
5260 
5261   LValue LBLVal = CGF.EmitLValueForField(
5262       Result.TDBase,
5263       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5264   const auto *LBVar =
5265       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5266   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5267                        LBLVal.getQuals(),
5268                        /*IsInitializer=*/true);
5269   LValue UBLVal = CGF.EmitLValueForField(
5270       Result.TDBase,
5271       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5272   const auto *UBVar =
5273       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5274   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5275                        UBLVal.getQuals(),
5276                        /*IsInitializer=*/true);
5277   LValue StLVal = CGF.EmitLValueForField(
5278       Result.TDBase,
5279       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5280   const auto *StVar =
5281       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5282   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5283                        StLVal.getQuals(),
5284                        /*IsInitializer=*/true);
5285   // Store reductions address.
5286   LValue RedLVal = CGF.EmitLValueForField(
5287       Result.TDBase,
5288       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5289   if (Data.Reductions) {
5290     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5291   } else {
5292     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5293                                CGF.getContext().VoidPtrTy);
5294   }
5295   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5296   llvm::Value *TaskArgs[] = {
5297       UpLoc,
5298       ThreadID,
5299       Result.NewTask,
5300       IfVal,
5301       LBLVal.getPointer(CGF),
5302       UBLVal.getPointer(CGF),
5303       CGF.EmitLoadOfScalar(StLVal, Loc),
5304       llvm::ConstantInt::getSigned(
5305           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5306       llvm::ConstantInt::getSigned(
5307           CGF.IntTy, Data.Schedule.getPointer()
5308                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5309                          : NoSchedule),
5310       Data.Schedule.getPointer()
5311           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5312                                       /*isSigned=*/false)
5313           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5314       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5315                              Result.TaskDupFn, CGF.VoidPtrTy)
5316                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5317   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5318                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5319                       TaskArgs);
5320 }
5321 
5322 /// Emit reduction operation for each element of array (required for
5323 /// array sections) LHS op = RHS.
5324 /// \param Type Type of array.
5325 /// \param LHSVar Variable on the left side of the reduction operation
5326 /// (references element of array in original variable).
5327 /// \param RHSVar Variable on the right side of the reduction operation
5328 /// (references element of array in original variable).
5329 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5330 /// RHSVar.
5331 static void EmitOMPAggregateReduction(
5332     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5333     const VarDecl *RHSVar,
5334     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5335                                   const Expr *, const Expr *)> &RedOpGen,
5336     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5337     const Expr *UpExpr = nullptr) {
5338   // Perform element-by-element initialization.
5339   QualType ElementTy;
5340   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5341   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5342 
5343   // Drill down to the base element type on both arrays.
5344   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5345   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5346 
5347   llvm::Value *RHSBegin = RHSAddr.getPointer();
5348   llvm::Value *LHSBegin = LHSAddr.getPointer();
5349   // Cast from pointer to array type to pointer to single element.
5350   llvm::Value *LHSEnd =
5351       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
5352   // The basic structure here is a while-do loop.
5353   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5354   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5355   llvm::Value *IsEmpty =
5356       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5357   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5358 
5359   // Enter the loop body, making that address the current address.
5360   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5361   CGF.EmitBlock(BodyBB);
5362 
5363   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5364 
5365   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5366       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5367   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5368   Address RHSElementCurrent =
5369       Address(RHSElementPHI,
5370               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5371 
5372   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5373       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5374   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5375   Address LHSElementCurrent =
5376       Address(LHSElementPHI,
5377               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5378 
5379   // Emit copy.
5380   CodeGenFunction::OMPPrivateScope Scope(CGF);
5381   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5382   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5383   Scope.Privatize();
5384   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5385   Scope.ForceCleanup();
5386 
5387   // Shift the address forward by one element.
5388   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5389       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
5390       "omp.arraycpy.dest.element");
5391   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5392       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
5393       "omp.arraycpy.src.element");
5394   // Check whether we've reached the end.
5395   llvm::Value *Done =
5396       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5397   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5398   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5399   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5400 
5401   // Done.
5402   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5403 }
5404 
5405 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5406 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5407 /// UDR combiner function.
5408 static void emitReductionCombiner(CodeGenFunction &CGF,
5409                                   const Expr *ReductionOp) {
5410   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5411     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5412       if (const auto *DRE =
5413               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5414         if (const auto *DRD =
5415                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5416           std::pair<llvm::Function *, llvm::Function *> Reduction =
5417               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5418           RValue Func = RValue::get(Reduction.first);
5419           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5420           CGF.EmitIgnoredExpr(ReductionOp);
5421           return;
5422         }
5423   CGF.EmitIgnoredExpr(ReductionOp);
5424 }
5425 
5426 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5427     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5428     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5429     ArrayRef<const Expr *> ReductionOps) {
5430   ASTContext &C = CGM.getContext();
5431 
5432   // void reduction_func(void *LHSArg, void *RHSArg);
5433   FunctionArgList Args;
5434   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5435                            ImplicitParamDecl::Other);
5436   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5437                            ImplicitParamDecl::Other);
5438   Args.push_back(&LHSArg);
5439   Args.push_back(&RHSArg);
5440   const auto &CGFI =
5441       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5442   std::string Name = getName({"omp", "reduction", "reduction_func"});
5443   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5444                                     llvm::GlobalValue::InternalLinkage, Name,
5445                                     &CGM.getModule());
5446   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5447   Fn->setDoesNotRecurse();
5448   CodeGenFunction CGF(CGM);
5449   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5450 
5451   // Dst = (void*[n])(LHSArg);
5452   // Src = (void*[n])(RHSArg);
5453   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5454       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5455       ArgsType), CGF.getPointerAlign());
5456   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5457       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5458       ArgsType), CGF.getPointerAlign());
5459 
5460   //  ...
5461   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5462   //  ...
5463   CodeGenFunction::OMPPrivateScope Scope(CGF);
5464   auto IPriv = Privates.begin();
5465   unsigned Idx = 0;
5466   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5467     const auto *RHSVar =
5468         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5469     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5470       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5471     });
5472     const auto *LHSVar =
5473         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5474     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5475       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5476     });
5477     QualType PrivTy = (*IPriv)->getType();
5478     if (PrivTy->isVariablyModifiedType()) {
5479       // Get array size and emit VLA type.
5480       ++Idx;
5481       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5482       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5483       const VariableArrayType *VLA =
5484           CGF.getContext().getAsVariableArrayType(PrivTy);
5485       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5486       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5487           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5488       CGF.EmitVariablyModifiedType(PrivTy);
5489     }
5490   }
5491   Scope.Privatize();
5492   IPriv = Privates.begin();
5493   auto ILHS = LHSExprs.begin();
5494   auto IRHS = RHSExprs.begin();
5495   for (const Expr *E : ReductionOps) {
5496     if ((*IPriv)->getType()->isArrayType()) {
5497       // Emit reduction for array section.
5498       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5499       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5500       EmitOMPAggregateReduction(
5501           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5502           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5503             emitReductionCombiner(CGF, E);
5504           });
5505     } else {
5506       // Emit reduction for array subscript or single variable.
5507       emitReductionCombiner(CGF, E);
5508     }
5509     ++IPriv;
5510     ++ILHS;
5511     ++IRHS;
5512   }
5513   Scope.ForceCleanup();
5514   CGF.FinishFunction();
5515   return Fn;
5516 }
5517 
5518 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5519                                                   const Expr *ReductionOp,
5520                                                   const Expr *PrivateRef,
5521                                                   const DeclRefExpr *LHS,
5522                                                   const DeclRefExpr *RHS) {
5523   if (PrivateRef->getType()->isArrayType()) {
5524     // Emit reduction for array section.
5525     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5526     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5527     EmitOMPAggregateReduction(
5528         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5529         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5530           emitReductionCombiner(CGF, ReductionOp);
5531         });
5532   } else {
5533     // Emit reduction for array subscript or single variable.
5534     emitReductionCombiner(CGF, ReductionOp);
5535   }
5536 }
5537 
5538 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5539                                     ArrayRef<const Expr *> Privates,
5540                                     ArrayRef<const Expr *> LHSExprs,
5541                                     ArrayRef<const Expr *> RHSExprs,
5542                                     ArrayRef<const Expr *> ReductionOps,
5543                                     ReductionOptionsTy Options) {
5544   if (!CGF.HaveInsertPoint())
5545     return;
5546 
5547   bool WithNowait = Options.WithNowait;
5548   bool SimpleReduction = Options.SimpleReduction;
5549 
5550   // Next code should be emitted for reduction:
5551   //
5552   // static kmp_critical_name lock = { 0 };
5553   //
5554   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5555   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5556   //  ...
5557   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5558   //  *(Type<n>-1*)rhs[<n>-1]);
5559   // }
5560   //
5561   // ...
5562   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5563   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5564   // RedList, reduce_func, &<lock>)) {
5565   // case 1:
5566   //  ...
5567   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5568   //  ...
5569   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5570   // break;
5571   // case 2:
5572   //  ...
5573   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5574   //  ...
5575   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5576   // break;
5577   // default:;
5578   // }
5579   //
5580   // if SimpleReduction is true, only the next code is generated:
5581   //  ...
5582   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5583   //  ...
5584 
5585   ASTContext &C = CGM.getContext();
5586 
5587   if (SimpleReduction) {
5588     CodeGenFunction::RunCleanupsScope Scope(CGF);
5589     auto IPriv = Privates.begin();
5590     auto ILHS = LHSExprs.begin();
5591     auto IRHS = RHSExprs.begin();
5592     for (const Expr *E : ReductionOps) {
5593       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5594                                   cast<DeclRefExpr>(*IRHS));
5595       ++IPriv;
5596       ++ILHS;
5597       ++IRHS;
5598     }
5599     return;
5600   }
5601 
5602   // 1. Build a list of reduction variables.
5603   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5604   auto Size = RHSExprs.size();
5605   for (const Expr *E : Privates) {
5606     if (E->getType()->isVariablyModifiedType())
5607       // Reserve place for array size.
5608       ++Size;
5609   }
5610   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5611   QualType ReductionArrayTy =
5612       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5613                              /*IndexTypeQuals=*/0);
5614   Address ReductionList =
5615       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5616   auto IPriv = Privates.begin();
5617   unsigned Idx = 0;
5618   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5619     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5620     CGF.Builder.CreateStore(
5621         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5622             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5623         Elem);
5624     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5625       // Store array size.
5626       ++Idx;
5627       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5628       llvm::Value *Size = CGF.Builder.CreateIntCast(
5629           CGF.getVLASize(
5630                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5631               .NumElts,
5632           CGF.SizeTy, /*isSigned=*/false);
5633       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5634                               Elem);
5635     }
5636   }
5637 
5638   // 2. Emit reduce_func().
5639   llvm::Function *ReductionFn = emitReductionFunction(
5640       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5641       LHSExprs, RHSExprs, ReductionOps);
5642 
5643   // 3. Create static kmp_critical_name lock = { 0 };
5644   std::string Name = getName({"reduction"});
5645   llvm::Value *Lock = getCriticalRegionLock(Name);
5646 
5647   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5648   // RedList, reduce_func, &<lock>);
5649   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5650   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5651   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5652   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5653       ReductionList.getPointer(), CGF.VoidPtrTy);
5654   llvm::Value *Args[] = {
5655       IdentTLoc,                             // ident_t *<loc>
5656       ThreadId,                              // i32 <gtid>
5657       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5658       ReductionArrayTySize,                  // size_type sizeof(RedList)
5659       RL,                                    // void *RedList
5660       ReductionFn, // void (*) (void *, void *) <reduce_func>
5661       Lock         // kmp_critical_name *&<lock>
5662   };
5663   llvm::Value *Res = CGF.EmitRuntimeCall(
5664       OMPBuilder.getOrCreateRuntimeFunction(
5665           CGM.getModule(),
5666           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5667       Args);
5668 
5669   // 5. Build switch(res)
5670   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5671   llvm::SwitchInst *SwInst =
5672       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5673 
5674   // 6. Build case 1:
5675   //  ...
5676   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5677   //  ...
5678   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5679   // break;
5680   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5681   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5682   CGF.EmitBlock(Case1BB);
5683 
5684   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5685   llvm::Value *EndArgs[] = {
5686       IdentTLoc, // ident_t *<loc>
5687       ThreadId,  // i32 <gtid>
5688       Lock       // kmp_critical_name *&<lock>
5689   };
5690   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5691                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5692     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5693     auto IPriv = Privates.begin();
5694     auto ILHS = LHSExprs.begin();
5695     auto IRHS = RHSExprs.begin();
5696     for (const Expr *E : ReductionOps) {
5697       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5698                                      cast<DeclRefExpr>(*IRHS));
5699       ++IPriv;
5700       ++ILHS;
5701       ++IRHS;
5702     }
5703   };
5704   RegionCodeGenTy RCG(CodeGen);
5705   CommonActionTy Action(
5706       nullptr, llvm::None,
5707       OMPBuilder.getOrCreateRuntimeFunction(
5708           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5709                                       : OMPRTL___kmpc_end_reduce),
5710       EndArgs);
5711   RCG.setAction(Action);
5712   RCG(CGF);
5713 
5714   CGF.EmitBranch(DefaultBB);
5715 
5716   // 7. Build case 2:
5717   //  ...
5718   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5719   //  ...
5720   // break;
5721   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5722   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5723   CGF.EmitBlock(Case2BB);
5724 
5725   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5726                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5727     auto ILHS = LHSExprs.begin();
5728     auto IRHS = RHSExprs.begin();
5729     auto IPriv = Privates.begin();
5730     for (const Expr *E : ReductionOps) {
5731       const Expr *XExpr = nullptr;
5732       const Expr *EExpr = nullptr;
5733       const Expr *UpExpr = nullptr;
5734       BinaryOperatorKind BO = BO_Comma;
5735       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5736         if (BO->getOpcode() == BO_Assign) {
5737           XExpr = BO->getLHS();
5738           UpExpr = BO->getRHS();
5739         }
5740       }
5741       // Try to emit update expression as a simple atomic.
5742       const Expr *RHSExpr = UpExpr;
5743       if (RHSExpr) {
5744         // Analyze RHS part of the whole expression.
5745         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5746                 RHSExpr->IgnoreParenImpCasts())) {
5747           // If this is a conditional operator, analyze its condition for
5748           // min/max reduction operator.
5749           RHSExpr = ACO->getCond();
5750         }
5751         if (const auto *BORHS =
5752                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5753           EExpr = BORHS->getRHS();
5754           BO = BORHS->getOpcode();
5755         }
5756       }
5757       if (XExpr) {
5758         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5759         auto &&AtomicRedGen = [BO, VD,
5760                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5761                                     const Expr *EExpr, const Expr *UpExpr) {
5762           LValue X = CGF.EmitLValue(XExpr);
5763           RValue E;
5764           if (EExpr)
5765             E = CGF.EmitAnyExpr(EExpr);
5766           CGF.EmitOMPAtomicSimpleUpdateExpr(
5767               X, E, BO, /*IsXLHSInRHSPart=*/true,
5768               llvm::AtomicOrdering::Monotonic, Loc,
5769               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5770                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5771                 PrivateScope.addPrivate(
5772                     VD, [&CGF, VD, XRValue, Loc]() {
5773                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5774                       CGF.emitOMPSimpleStore(
5775                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5776                           VD->getType().getNonReferenceType(), Loc);
5777                       return LHSTemp;
5778                     });
5779                 (void)PrivateScope.Privatize();
5780                 return CGF.EmitAnyExpr(UpExpr);
5781               });
5782         };
5783         if ((*IPriv)->getType()->isArrayType()) {
5784           // Emit atomic reduction for array section.
5785           const auto *RHSVar =
5786               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5787           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5788                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5789         } else {
5790           // Emit atomic reduction for array subscript or single variable.
5791           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5792         }
5793       } else {
5794         // Emit as a critical region.
5795         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5796                                            const Expr *, const Expr *) {
5797           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5798           std::string Name = RT.getName({"atomic_reduction"});
5799           RT.emitCriticalRegion(
5800               CGF, Name,
5801               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5802                 Action.Enter(CGF);
5803                 emitReductionCombiner(CGF, E);
5804               },
5805               Loc);
5806         };
5807         if ((*IPriv)->getType()->isArrayType()) {
5808           const auto *LHSVar =
5809               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5810           const auto *RHSVar =
5811               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5812           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5813                                     CritRedGen);
5814         } else {
5815           CritRedGen(CGF, nullptr, nullptr, nullptr);
5816         }
5817       }
5818       ++ILHS;
5819       ++IRHS;
5820       ++IPriv;
5821     }
5822   };
5823   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5824   if (!WithNowait) {
5825     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5826     llvm::Value *EndArgs[] = {
5827         IdentTLoc, // ident_t *<loc>
5828         ThreadId,  // i32 <gtid>
5829         Lock       // kmp_critical_name *&<lock>
5830     };
5831     CommonActionTy Action(nullptr, llvm::None,
5832                           OMPBuilder.getOrCreateRuntimeFunction(
5833                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5834                           EndArgs);
5835     AtomicRCG.setAction(Action);
5836     AtomicRCG(CGF);
5837   } else {
5838     AtomicRCG(CGF);
5839   }
5840 
5841   CGF.EmitBranch(DefaultBB);
5842   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5843 }
5844 
5845 /// Generates unique name for artificial threadprivate variables.
5846 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5847 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5848                                       const Expr *Ref) {
5849   SmallString<256> Buffer;
5850   llvm::raw_svector_ostream Out(Buffer);
5851   const clang::DeclRefExpr *DE;
5852   const VarDecl *D = ::getBaseDecl(Ref, DE);
5853   if (!D)
5854     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5855   D = D->getCanonicalDecl();
5856   std::string Name = CGM.getOpenMPRuntime().getName(
5857       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5858   Out << Prefix << Name << "_"
5859       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5860   return std::string(Out.str());
5861 }
5862 
5863 /// Emits reduction initializer function:
5864 /// \code
5865 /// void @.red_init(void* %arg, void* %orig) {
5866 /// %0 = bitcast void* %arg to <type>*
5867 /// store <type> <init>, <type>* %0
5868 /// ret void
5869 /// }
5870 /// \endcode
5871 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5872                                            SourceLocation Loc,
5873                                            ReductionCodeGen &RCG, unsigned N) {
5874   ASTContext &C = CGM.getContext();
5875   QualType VoidPtrTy = C.VoidPtrTy;
5876   VoidPtrTy.addRestrict();
5877   FunctionArgList Args;
5878   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5879                           ImplicitParamDecl::Other);
5880   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5881                               ImplicitParamDecl::Other);
5882   Args.emplace_back(&Param);
5883   Args.emplace_back(&ParamOrig);
5884   const auto &FnInfo =
5885       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5886   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5887   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5888   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5889                                     Name, &CGM.getModule());
5890   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5891   Fn->setDoesNotRecurse();
5892   CodeGenFunction CGF(CGM);
5893   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5894   Address PrivateAddr = CGF.EmitLoadOfPointer(
5895       CGF.GetAddrOfLocalVar(&Param),
5896       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5897   llvm::Value *Size = nullptr;
5898   // If the size of the reduction item is non-constant, load it from global
5899   // threadprivate variable.
5900   if (RCG.getSizes(N).second) {
5901     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5902         CGF, CGM.getContext().getSizeType(),
5903         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5904     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5905                                 CGM.getContext().getSizeType(), Loc);
5906   }
5907   RCG.emitAggregateType(CGF, N, Size);
5908   LValue OrigLVal;
5909   // If initializer uses initializer from declare reduction construct, emit a
5910   // pointer to the address of the original reduction item (reuired by reduction
5911   // initializer)
5912   if (RCG.usesReductionInitializer(N)) {
5913     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5914     SharedAddr = CGF.EmitLoadOfPointer(
5915         SharedAddr,
5916         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5917     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5918   } else {
5919     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5920         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5921         CGM.getContext().VoidPtrTy);
5922   }
5923   // Emit the initializer:
5924   // %0 = bitcast void* %arg to <type>*
5925   // store <type> <init>, <type>* %0
5926   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5927                          [](CodeGenFunction &) { return false; });
5928   CGF.FinishFunction();
5929   return Fn;
5930 }
5931 
5932 /// Emits reduction combiner function:
5933 /// \code
5934 /// void @.red_comb(void* %arg0, void* %arg1) {
5935 /// %lhs = bitcast void* %arg0 to <type>*
5936 /// %rhs = bitcast void* %arg1 to <type>*
5937 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5938 /// store <type> %2, <type>* %lhs
5939 /// ret void
5940 /// }
5941 /// \endcode
5942 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5943                                            SourceLocation Loc,
5944                                            ReductionCodeGen &RCG, unsigned N,
5945                                            const Expr *ReductionOp,
5946                                            const Expr *LHS, const Expr *RHS,
5947                                            const Expr *PrivateRef) {
5948   ASTContext &C = CGM.getContext();
5949   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5950   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5951   FunctionArgList Args;
5952   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5953                                C.VoidPtrTy, ImplicitParamDecl::Other);
5954   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5955                             ImplicitParamDecl::Other);
5956   Args.emplace_back(&ParamInOut);
5957   Args.emplace_back(&ParamIn);
5958   const auto &FnInfo =
5959       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5960   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5961   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5962   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5963                                     Name, &CGM.getModule());
5964   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5965   Fn->setDoesNotRecurse();
5966   CodeGenFunction CGF(CGM);
5967   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5968   llvm::Value *Size = nullptr;
5969   // If the size of the reduction item is non-constant, load it from global
5970   // threadprivate variable.
5971   if (RCG.getSizes(N).second) {
5972     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5973         CGF, CGM.getContext().getSizeType(),
5974         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5975     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5976                                 CGM.getContext().getSizeType(), Loc);
5977   }
5978   RCG.emitAggregateType(CGF, N, Size);
5979   // Remap lhs and rhs variables to the addresses of the function arguments.
5980   // %lhs = bitcast void* %arg0 to <type>*
5981   // %rhs = bitcast void* %arg1 to <type>*
5982   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5983   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5984     // Pull out the pointer to the variable.
5985     Address PtrAddr = CGF.EmitLoadOfPointer(
5986         CGF.GetAddrOfLocalVar(&ParamInOut),
5987         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5988     return CGF.Builder.CreateElementBitCast(
5989         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5990   });
5991   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5992     // Pull out the pointer to the variable.
5993     Address PtrAddr = CGF.EmitLoadOfPointer(
5994         CGF.GetAddrOfLocalVar(&ParamIn),
5995         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5996     return CGF.Builder.CreateElementBitCast(
5997         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5998   });
5999   PrivateScope.Privatize();
6000   // Emit the combiner body:
6001   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6002   // store <type> %2, <type>* %lhs
6003   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6004       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6005       cast<DeclRefExpr>(RHS));
6006   CGF.FinishFunction();
6007   return Fn;
6008 }
6009 
6010 /// Emits reduction finalizer function:
6011 /// \code
6012 /// void @.red_fini(void* %arg) {
6013 /// %0 = bitcast void* %arg to <type>*
6014 /// <destroy>(<type>* %0)
6015 /// ret void
6016 /// }
6017 /// \endcode
6018 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6019                                            SourceLocation Loc,
6020                                            ReductionCodeGen &RCG, unsigned N) {
6021   if (!RCG.needCleanups(N))
6022     return nullptr;
6023   ASTContext &C = CGM.getContext();
6024   FunctionArgList Args;
6025   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6026                           ImplicitParamDecl::Other);
6027   Args.emplace_back(&Param);
6028   const auto &FnInfo =
6029       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6030   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6031   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6032   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6033                                     Name, &CGM.getModule());
6034   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6035   Fn->setDoesNotRecurse();
6036   CodeGenFunction CGF(CGM);
6037   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6038   Address PrivateAddr = CGF.EmitLoadOfPointer(
6039       CGF.GetAddrOfLocalVar(&Param),
6040       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6041   llvm::Value *Size = nullptr;
6042   // If the size of the reduction item is non-constant, load it from global
6043   // threadprivate variable.
6044   if (RCG.getSizes(N).second) {
6045     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6046         CGF, CGM.getContext().getSizeType(),
6047         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6048     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6049                                 CGM.getContext().getSizeType(), Loc);
6050   }
6051   RCG.emitAggregateType(CGF, N, Size);
6052   // Emit the finalizer body:
6053   // <destroy>(<type>* %0)
6054   RCG.emitCleanups(CGF, N, PrivateAddr);
6055   CGF.FinishFunction(Loc);
6056   return Fn;
6057 }
6058 
6059 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6060     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6061     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6062   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6063     return nullptr;
6064 
6065   // Build typedef struct:
6066   // kmp_taskred_input {
6067   //   void *reduce_shar; // shared reduction item
6068   //   void *reduce_orig; // original reduction item used for initialization
6069   //   size_t reduce_size; // size of data item
6070   //   void *reduce_init; // data initialization routine
6071   //   void *reduce_fini; // data finalization routine
6072   //   void *reduce_comb; // data combiner routine
6073   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6074   // } kmp_taskred_input_t;
6075   ASTContext &C = CGM.getContext();
6076   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6077   RD->startDefinition();
6078   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6079   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6080   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6081   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6082   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6083   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6084   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6085       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6086   RD->completeDefinition();
6087   QualType RDType = C.getRecordType(RD);
6088   unsigned Size = Data.ReductionVars.size();
6089   llvm::APInt ArraySize(/*numBits=*/64, Size);
6090   QualType ArrayRDType = C.getConstantArrayType(
6091       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6092   // kmp_task_red_input_t .rd_input.[Size];
6093   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6094   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6095                        Data.ReductionCopies, Data.ReductionOps);
6096   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6097     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6098     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6099                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6100     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6101         TaskRedInput.getPointer(), Idxs,
6102         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6103         ".rd_input.gep.");
6104     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6105     // ElemLVal.reduce_shar = &Shareds[Cnt];
6106     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6107     RCG.emitSharedOrigLValue(CGF, Cnt);
6108     llvm::Value *CastedShared =
6109         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6110     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6111     // ElemLVal.reduce_orig = &Origs[Cnt];
6112     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6113     llvm::Value *CastedOrig =
6114         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6115     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6116     RCG.emitAggregateType(CGF, Cnt);
6117     llvm::Value *SizeValInChars;
6118     llvm::Value *SizeVal;
6119     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6120     // We use delayed creation/initialization for VLAs and array sections. It is
6121     // required because runtime does not provide the way to pass the sizes of
6122     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6123     // threadprivate global variables are used to store these values and use
6124     // them in the functions.
6125     bool DelayedCreation = !!SizeVal;
6126     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6127                                                /*isSigned=*/false);
6128     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6129     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6130     // ElemLVal.reduce_init = init;
6131     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6132     llvm::Value *InitAddr =
6133         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6134     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6135     // ElemLVal.reduce_fini = fini;
6136     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6137     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6138     llvm::Value *FiniAddr = Fini
6139                                 ? CGF.EmitCastToVoidPtr(Fini)
6140                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6141     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6142     // ElemLVal.reduce_comb = comb;
6143     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6144     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6145         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6146         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6147     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6148     // ElemLVal.flags = 0;
6149     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6150     if (DelayedCreation) {
6151       CGF.EmitStoreOfScalar(
6152           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6153           FlagsLVal);
6154     } else
6155       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6156                                  FlagsLVal.getType());
6157   }
6158   if (Data.IsReductionWithTaskMod) {
6159     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6160     // is_ws, int num, void *data);
6161     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6162     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6163                                                   CGM.IntTy, /*isSigned=*/true);
6164     llvm::Value *Args[] = {
6165         IdentTLoc, GTid,
6166         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6167                                /*isSigned=*/true),
6168         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6169         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6170             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6171     return CGF.EmitRuntimeCall(
6172         OMPBuilder.getOrCreateRuntimeFunction(
6173             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6174         Args);
6175   }
6176   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6177   llvm::Value *Args[] = {
6178       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6179                                 /*isSigned=*/true),
6180       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6181       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6182                                                       CGM.VoidPtrTy)};
6183   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6184                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6185                              Args);
6186 }
6187 
6188 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6189                                             SourceLocation Loc,
6190                                             bool IsWorksharingReduction) {
6191   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6192   // is_ws, int num, void *data);
6193   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6194   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6195                                                 CGM.IntTy, /*isSigned=*/true);
6196   llvm::Value *Args[] = {IdentTLoc, GTid,
6197                          llvm::ConstantInt::get(CGM.IntTy,
6198                                                 IsWorksharingReduction ? 1 : 0,
6199                                                 /*isSigned=*/true)};
6200   (void)CGF.EmitRuntimeCall(
6201       OMPBuilder.getOrCreateRuntimeFunction(
6202           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6203       Args);
6204 }
6205 
6206 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6207                                               SourceLocation Loc,
6208                                               ReductionCodeGen &RCG,
6209                                               unsigned N) {
6210   auto Sizes = RCG.getSizes(N);
6211   // Emit threadprivate global variable if the type is non-constant
6212   // (Sizes.second = nullptr).
6213   if (Sizes.second) {
6214     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6215                                                      /*isSigned=*/false);
6216     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6217         CGF, CGM.getContext().getSizeType(),
6218         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6219     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6220   }
6221 }
6222 
6223 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6224                                               SourceLocation Loc,
6225                                               llvm::Value *ReductionsPtr,
6226                                               LValue SharedLVal) {
6227   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6228   // *d);
6229   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6230                                                    CGM.IntTy,
6231                                                    /*isSigned=*/true),
6232                          ReductionsPtr,
6233                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6234                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6235   return Address(
6236       CGF.EmitRuntimeCall(
6237           OMPBuilder.getOrCreateRuntimeFunction(
6238               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6239           Args),
6240       SharedLVal.getAlignment());
6241 }
6242 
6243 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6244                                        SourceLocation Loc) {
6245   if (!CGF.HaveInsertPoint())
6246     return;
6247 
6248   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6249     OMPBuilder.createTaskwait(CGF.Builder);
6250   } else {
6251     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6252     // global_tid);
6253     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6254     // Ignore return result until untied tasks are supported.
6255     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6256                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6257                         Args);
6258   }
6259 
6260   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6261     Region->emitUntiedSwitch(CGF);
6262 }
6263 
6264 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6265                                            OpenMPDirectiveKind InnerKind,
6266                                            const RegionCodeGenTy &CodeGen,
6267                                            bool HasCancel) {
6268   if (!CGF.HaveInsertPoint())
6269     return;
6270   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6271                                  InnerKind != OMPD_critical &&
6272                                      InnerKind != OMPD_master &&
6273                                      InnerKind != OMPD_masked);
6274   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6275 }
6276 
6277 namespace {
6278 enum RTCancelKind {
6279   CancelNoreq = 0,
6280   CancelParallel = 1,
6281   CancelLoop = 2,
6282   CancelSections = 3,
6283   CancelTaskgroup = 4
6284 };
6285 } // anonymous namespace
6286 
6287 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6288   RTCancelKind CancelKind = CancelNoreq;
6289   if (CancelRegion == OMPD_parallel)
6290     CancelKind = CancelParallel;
6291   else if (CancelRegion == OMPD_for)
6292     CancelKind = CancelLoop;
6293   else if (CancelRegion == OMPD_sections)
6294     CancelKind = CancelSections;
6295   else {
6296     assert(CancelRegion == OMPD_taskgroup);
6297     CancelKind = CancelTaskgroup;
6298   }
6299   return CancelKind;
6300 }
6301 
6302 void CGOpenMPRuntime::emitCancellationPointCall(
6303     CodeGenFunction &CGF, SourceLocation Loc,
6304     OpenMPDirectiveKind CancelRegion) {
6305   if (!CGF.HaveInsertPoint())
6306     return;
6307   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6308   // global_tid, kmp_int32 cncl_kind);
6309   if (auto *OMPRegionInfo =
6310           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6311     // For 'cancellation point taskgroup', the task region info may not have a
6312     // cancel. This may instead happen in another adjacent task.
6313     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6314       llvm::Value *Args[] = {
6315           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6316           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6317       // Ignore return result until untied tasks are supported.
6318       llvm::Value *Result = CGF.EmitRuntimeCall(
6319           OMPBuilder.getOrCreateRuntimeFunction(
6320               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6321           Args);
6322       // if (__kmpc_cancellationpoint()) {
6323       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6324       //   exit from construct;
6325       // }
6326       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6327       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6328       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6329       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6330       CGF.EmitBlock(ExitBB);
6331       if (CancelRegion == OMPD_parallel)
6332         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6333       // exit from construct;
6334       CodeGenFunction::JumpDest CancelDest =
6335           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6336       CGF.EmitBranchThroughCleanup(CancelDest);
6337       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6338     }
6339   }
6340 }
6341 
6342 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6343                                      const Expr *IfCond,
6344                                      OpenMPDirectiveKind CancelRegion) {
6345   if (!CGF.HaveInsertPoint())
6346     return;
6347   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6348   // kmp_int32 cncl_kind);
6349   auto &M = CGM.getModule();
6350   if (auto *OMPRegionInfo =
6351           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6352     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6353                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6354       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6355       llvm::Value *Args[] = {
6356           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6357           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6358       // Ignore return result until untied tasks are supported.
6359       llvm::Value *Result = CGF.EmitRuntimeCall(
6360           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6361       // if (__kmpc_cancel()) {
6362       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6363       //   exit from construct;
6364       // }
6365       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6366       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6367       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6368       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6369       CGF.EmitBlock(ExitBB);
6370       if (CancelRegion == OMPD_parallel)
6371         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6372       // exit from construct;
6373       CodeGenFunction::JumpDest CancelDest =
6374           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6375       CGF.EmitBranchThroughCleanup(CancelDest);
6376       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6377     };
6378     if (IfCond) {
6379       emitIfClause(CGF, IfCond, ThenGen,
6380                    [](CodeGenFunction &, PrePostActionTy &) {});
6381     } else {
6382       RegionCodeGenTy ThenRCG(ThenGen);
6383       ThenRCG(CGF);
6384     }
6385   }
6386 }
6387 
6388 namespace {
6389 /// Cleanup action for uses_allocators support.
6390 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6391   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6392 
6393 public:
6394   OMPUsesAllocatorsActionTy(
6395       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6396       : Allocators(Allocators) {}
6397   void Enter(CodeGenFunction &CGF) override {
6398     if (!CGF.HaveInsertPoint())
6399       return;
6400     for (const auto &AllocatorData : Allocators) {
6401       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6402           CGF, AllocatorData.first, AllocatorData.second);
6403     }
6404   }
6405   void Exit(CodeGenFunction &CGF) override {
6406     if (!CGF.HaveInsertPoint())
6407       return;
6408     for (const auto &AllocatorData : Allocators) {
6409       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6410                                                         AllocatorData.first);
6411     }
6412   }
6413 };
6414 } // namespace
6415 
6416 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6417     const OMPExecutableDirective &D, StringRef ParentName,
6418     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6419     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6420   assert(!ParentName.empty() && "Invalid target region parent name!");
6421   HasEmittedTargetRegion = true;
6422   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6423   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6424     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6425       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6426       if (!D.AllocatorTraits)
6427         continue;
6428       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6429     }
6430   }
6431   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6432   CodeGen.setAction(UsesAllocatorAction);
6433   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6434                                    IsOffloadEntry, CodeGen);
6435 }
6436 
6437 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6438                                              const Expr *Allocator,
6439                                              const Expr *AllocatorTraits) {
6440   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6441   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6442   // Use default memspace handle.
6443   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6444   llvm::Value *NumTraits = llvm::ConstantInt::get(
6445       CGF.IntTy, cast<ConstantArrayType>(
6446                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6447                      ->getSize()
6448                      .getLimitedValue());
6449   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6450   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6451       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6452   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6453                                            AllocatorTraitsLVal.getBaseInfo(),
6454                                            AllocatorTraitsLVal.getTBAAInfo());
6455   llvm::Value *Traits =
6456       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6457 
6458   llvm::Value *AllocatorVal =
6459       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6460                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6461                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6462   // Store to allocator.
6463   CGF.EmitVarDecl(*cast<VarDecl>(
6464       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6465   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6466   AllocatorVal =
6467       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6468                                Allocator->getType(), Allocator->getExprLoc());
6469   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6470 }
6471 
6472 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6473                                              const Expr *Allocator) {
6474   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6475   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6476   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6477   llvm::Value *AllocatorVal =
6478       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6479   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6480                                           CGF.getContext().VoidPtrTy,
6481                                           Allocator->getExprLoc());
6482   (void)CGF.EmitRuntimeCall(
6483       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6484                                             OMPRTL___kmpc_destroy_allocator),
6485       {ThreadId, AllocatorVal});
6486 }
6487 
6488 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6489     const OMPExecutableDirective &D, StringRef ParentName,
6490     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6491     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6492   // Create a unique name for the entry function using the source location
6493   // information of the current target region. The name will be something like:
6494   //
6495   // __omp_offloading_DD_FFFF_PP_lBB
6496   //
6497   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6498   // mangled name of the function that encloses the target region and BB is the
6499   // line number of the target region.
6500 
6501   unsigned DeviceID;
6502   unsigned FileID;
6503   unsigned Line;
6504   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6505                            Line);
6506   SmallString<64> EntryFnName;
6507   {
6508     llvm::raw_svector_ostream OS(EntryFnName);
6509     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6510        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6511   }
6512 
6513   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6514 
6515   CodeGenFunction CGF(CGM, true);
6516   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6517   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6518 
6519   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6520 
6521   // If this target outline function is not an offload entry, we don't need to
6522   // register it.
6523   if (!IsOffloadEntry)
6524     return;
6525 
6526   // The target region ID is used by the runtime library to identify the current
6527   // target region, so it only has to be unique and not necessarily point to
6528   // anything. It could be the pointer to the outlined function that implements
6529   // the target region, but we aren't using that so that the compiler doesn't
6530   // need to keep that, and could therefore inline the host function if proven
6531   // worthwhile during optimization. In the other hand, if emitting code for the
6532   // device, the ID has to be the function address so that it can retrieved from
6533   // the offloading entry and launched by the runtime library. We also mark the
6534   // outlined function to have external linkage in case we are emitting code for
6535   // the device, because these functions will be entry points to the device.
6536 
6537   if (CGM.getLangOpts().OpenMPIsDevice) {
6538     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6539     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6540     OutlinedFn->setDSOLocal(false);
6541     if (CGM.getTriple().isAMDGCN())
6542       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6543   } else {
6544     std::string Name = getName({EntryFnName, "region_id"});
6545     OutlinedFnID = new llvm::GlobalVariable(
6546         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6547         llvm::GlobalValue::WeakAnyLinkage,
6548         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6549   }
6550 
6551   // Register the information for the entry associated with this target region.
6552   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6553       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6554       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6555 
6556   // Add NumTeams and ThreadLimit attributes to the outlined GPU function
6557   int32_t DefaultValTeams = -1;
6558   getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6559   if (DefaultValTeams > 0) {
6560     OutlinedFn->addFnAttr("omp_target_num_teams",
6561                           std::to_string(DefaultValTeams));
6562   }
6563   int32_t DefaultValThreads = -1;
6564   getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6565   if (DefaultValThreads > 0) {
6566     OutlinedFn->addFnAttr("omp_target_thread_limit",
6567                           std::to_string(DefaultValThreads));
6568   }
6569 }
6570 
6571 /// Checks if the expression is constant or does not have non-trivial function
6572 /// calls.
6573 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6574   // We can skip constant expressions.
6575   // We can skip expressions with trivial calls or simple expressions.
6576   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6577           !E->hasNonTrivialCall(Ctx)) &&
6578          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6579 }
6580 
6581 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6582                                                     const Stmt *Body) {
6583   const Stmt *Child = Body->IgnoreContainers();
6584   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6585     Child = nullptr;
6586     for (const Stmt *S : C->body()) {
6587       if (const auto *E = dyn_cast<Expr>(S)) {
6588         if (isTrivial(Ctx, E))
6589           continue;
6590       }
6591       // Some of the statements can be ignored.
6592       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6593           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6594         continue;
6595       // Analyze declarations.
6596       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6597         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6598               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6599                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6600                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6601                   isa<UsingDirectiveDecl>(D) ||
6602                   isa<OMPDeclareReductionDecl>(D) ||
6603                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6604                 return true;
6605               const auto *VD = dyn_cast<VarDecl>(D);
6606               if (!VD)
6607                 return false;
6608               return VD->hasGlobalStorage() || !VD->isUsed();
6609             }))
6610           continue;
6611       }
6612       // Found multiple children - cannot get the one child only.
6613       if (Child)
6614         return nullptr;
6615       Child = S;
6616     }
6617     if (Child)
6618       Child = Child->IgnoreContainers();
6619   }
6620   return Child;
6621 }
6622 
6623 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6624     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6625     int32_t &DefaultVal) {
6626 
6627   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6628   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6629          "Expected target-based executable directive.");
6630   switch (DirectiveKind) {
6631   case OMPD_target: {
6632     const auto *CS = D.getInnermostCapturedStmt();
6633     const auto *Body =
6634         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6635     const Stmt *ChildStmt =
6636         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6637     if (const auto *NestedDir =
6638             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6639       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6640         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6641           const Expr *NumTeams =
6642               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6643           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6644             if (auto Constant =
6645                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6646               DefaultVal = Constant->getExtValue();
6647           return NumTeams;
6648         }
6649         DefaultVal = 0;
6650         return nullptr;
6651       }
6652       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6653           isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6654         DefaultVal = 1;
6655         return nullptr;
6656       }
6657       DefaultVal = 1;
6658       return nullptr;
6659     }
6660     // A value of -1 is used to check if we need to emit no teams region
6661     DefaultVal = -1;
6662     return nullptr;
6663   }
6664   case OMPD_target_teams:
6665   case OMPD_target_teams_distribute:
6666   case OMPD_target_teams_distribute_simd:
6667   case OMPD_target_teams_distribute_parallel_for:
6668   case OMPD_target_teams_distribute_parallel_for_simd: {
6669     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6670       const Expr *NumTeams =
6671           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6672       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6673         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6674           DefaultVal = Constant->getExtValue();
6675       return NumTeams;
6676     }
6677     DefaultVal = 0;
6678     return nullptr;
6679   }
6680   case OMPD_target_parallel:
6681   case OMPD_target_parallel_for:
6682   case OMPD_target_parallel_for_simd:
6683   case OMPD_target_simd:
6684     DefaultVal = 1;
6685     return nullptr;
6686   case OMPD_parallel:
6687   case OMPD_for:
6688   case OMPD_parallel_for:
6689   case OMPD_parallel_master:
6690   case OMPD_parallel_sections:
6691   case OMPD_for_simd:
6692   case OMPD_parallel_for_simd:
6693   case OMPD_cancel:
6694   case OMPD_cancellation_point:
6695   case OMPD_ordered:
6696   case OMPD_threadprivate:
6697   case OMPD_allocate:
6698   case OMPD_task:
6699   case OMPD_simd:
6700   case OMPD_tile:
6701   case OMPD_unroll:
6702   case OMPD_sections:
6703   case OMPD_section:
6704   case OMPD_single:
6705   case OMPD_master:
6706   case OMPD_critical:
6707   case OMPD_taskyield:
6708   case OMPD_barrier:
6709   case OMPD_taskwait:
6710   case OMPD_taskgroup:
6711   case OMPD_atomic:
6712   case OMPD_flush:
6713   case OMPD_depobj:
6714   case OMPD_scan:
6715   case OMPD_teams:
6716   case OMPD_target_data:
6717   case OMPD_target_exit_data:
6718   case OMPD_target_enter_data:
6719   case OMPD_distribute:
6720   case OMPD_distribute_simd:
6721   case OMPD_distribute_parallel_for:
6722   case OMPD_distribute_parallel_for_simd:
6723   case OMPD_teams_distribute:
6724   case OMPD_teams_distribute_simd:
6725   case OMPD_teams_distribute_parallel_for:
6726   case OMPD_teams_distribute_parallel_for_simd:
6727   case OMPD_target_update:
6728   case OMPD_declare_simd:
6729   case OMPD_declare_variant:
6730   case OMPD_begin_declare_variant:
6731   case OMPD_end_declare_variant:
6732   case OMPD_declare_target:
6733   case OMPD_end_declare_target:
6734   case OMPD_declare_reduction:
6735   case OMPD_declare_mapper:
6736   case OMPD_taskloop:
6737   case OMPD_taskloop_simd:
6738   case OMPD_master_taskloop:
6739   case OMPD_master_taskloop_simd:
6740   case OMPD_parallel_master_taskloop:
6741   case OMPD_parallel_master_taskloop_simd:
6742   case OMPD_requires:
6743   case OMPD_unknown:
6744     break;
6745   default:
6746     break;
6747   }
6748   llvm_unreachable("Unexpected directive kind.");
6749 }
6750 
6751 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6752     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6753   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6754          "Clauses associated with the teams directive expected to be emitted "
6755          "only for the host!");
6756   CGBuilderTy &Bld = CGF.Builder;
6757   int32_t DefaultNT = -1;
6758   const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6759   if (NumTeams != nullptr) {
6760     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6761 
6762     switch (DirectiveKind) {
6763     case OMPD_target: {
6764       const auto *CS = D.getInnermostCapturedStmt();
6765       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6766       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6767       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6768                                                   /*IgnoreResultAssign*/ true);
6769       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6770                              /*isSigned=*/true);
6771     }
6772     case OMPD_target_teams:
6773     case OMPD_target_teams_distribute:
6774     case OMPD_target_teams_distribute_simd:
6775     case OMPD_target_teams_distribute_parallel_for:
6776     case OMPD_target_teams_distribute_parallel_for_simd: {
6777       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6778       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6779                                                   /*IgnoreResultAssign*/ true);
6780       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6781                              /*isSigned=*/true);
6782     }
6783     default:
6784       break;
6785     }
6786   } else if (DefaultNT == -1) {
6787     return nullptr;
6788   }
6789 
6790   return Bld.getInt32(DefaultNT);
6791 }
6792 
6793 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6794                                   llvm::Value *DefaultThreadLimitVal) {
6795   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6796       CGF.getContext(), CS->getCapturedStmt());
6797   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6798     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6799       llvm::Value *NumThreads = nullptr;
6800       llvm::Value *CondVal = nullptr;
6801       // Handle if clause. If if clause present, the number of threads is
6802       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6803       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6804         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6805         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6806         const OMPIfClause *IfClause = nullptr;
6807         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6808           if (C->getNameModifier() == OMPD_unknown ||
6809               C->getNameModifier() == OMPD_parallel) {
6810             IfClause = C;
6811             break;
6812           }
6813         }
6814         if (IfClause) {
6815           const Expr *Cond = IfClause->getCondition();
6816           bool Result;
6817           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6818             if (!Result)
6819               return CGF.Builder.getInt32(1);
6820           } else {
6821             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6822             if (const auto *PreInit =
6823                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6824               for (const auto *I : PreInit->decls()) {
6825                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6826                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6827                 } else {
6828                   CodeGenFunction::AutoVarEmission Emission =
6829                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6830                   CGF.EmitAutoVarCleanups(Emission);
6831                 }
6832               }
6833             }
6834             CondVal = CGF.EvaluateExprAsBool(Cond);
6835           }
6836         }
6837       }
6838       // Check the value of num_threads clause iff if clause was not specified
6839       // or is not evaluated to false.
6840       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6841         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6842         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6843         const auto *NumThreadsClause =
6844             Dir->getSingleClause<OMPNumThreadsClause>();
6845         CodeGenFunction::LexicalScope Scope(
6846             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6847         if (const auto *PreInit =
6848                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6849           for (const auto *I : PreInit->decls()) {
6850             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6851               CGF.EmitVarDecl(cast<VarDecl>(*I));
6852             } else {
6853               CodeGenFunction::AutoVarEmission Emission =
6854                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6855               CGF.EmitAutoVarCleanups(Emission);
6856             }
6857           }
6858         }
6859         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6860         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6861                                                /*isSigned=*/false);
6862         if (DefaultThreadLimitVal)
6863           NumThreads = CGF.Builder.CreateSelect(
6864               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6865               DefaultThreadLimitVal, NumThreads);
6866       } else {
6867         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6868                                            : CGF.Builder.getInt32(0);
6869       }
6870       // Process condition of the if clause.
6871       if (CondVal) {
6872         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6873                                               CGF.Builder.getInt32(1));
6874       }
6875       return NumThreads;
6876     }
6877     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6878       return CGF.Builder.getInt32(1);
6879     return DefaultThreadLimitVal;
6880   }
6881   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6882                                : CGF.Builder.getInt32(0);
6883 }
6884 
6885 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6886     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6887     int32_t &DefaultVal) {
6888   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6889   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6890          "Expected target-based executable directive.");
6891 
6892   switch (DirectiveKind) {
6893   case OMPD_target:
6894     // Teams have no clause thread_limit
6895     return nullptr;
6896   case OMPD_target_teams:
6897   case OMPD_target_teams_distribute:
6898     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6899       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6900       const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6901       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6902         if (auto Constant =
6903                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6904           DefaultVal = Constant->getExtValue();
6905       return ThreadLimit;
6906     }
6907     return nullptr;
6908   case OMPD_target_parallel:
6909   case OMPD_target_parallel_for:
6910   case OMPD_target_parallel_for_simd:
6911   case OMPD_target_teams_distribute_parallel_for:
6912   case OMPD_target_teams_distribute_parallel_for_simd: {
6913     Expr *ThreadLimit = nullptr;
6914     Expr *NumThreads = nullptr;
6915     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6916       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6917       ThreadLimit = ThreadLimitClause->getThreadLimit();
6918       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6919         if (auto Constant =
6920                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6921           DefaultVal = Constant->getExtValue();
6922     }
6923     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6924       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6925       NumThreads = NumThreadsClause->getNumThreads();
6926       if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6927         if (auto Constant =
6928                 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6929           if (Constant->getExtValue() < DefaultVal) {
6930             DefaultVal = Constant->getExtValue();
6931             ThreadLimit = NumThreads;
6932           }
6933         }
6934       }
6935     }
6936     return ThreadLimit;
6937   }
6938   case OMPD_target_teams_distribute_simd:
6939   case OMPD_target_simd:
6940     DefaultVal = 1;
6941     return nullptr;
6942   case OMPD_parallel:
6943   case OMPD_for:
6944   case OMPD_parallel_for:
6945   case OMPD_parallel_master:
6946   case OMPD_parallel_sections:
6947   case OMPD_for_simd:
6948   case OMPD_parallel_for_simd:
6949   case OMPD_cancel:
6950   case OMPD_cancellation_point:
6951   case OMPD_ordered:
6952   case OMPD_threadprivate:
6953   case OMPD_allocate:
6954   case OMPD_task:
6955   case OMPD_simd:
6956   case OMPD_tile:
6957   case OMPD_unroll:
6958   case OMPD_sections:
6959   case OMPD_section:
6960   case OMPD_single:
6961   case OMPD_master:
6962   case OMPD_critical:
6963   case OMPD_taskyield:
6964   case OMPD_barrier:
6965   case OMPD_taskwait:
6966   case OMPD_taskgroup:
6967   case OMPD_atomic:
6968   case OMPD_flush:
6969   case OMPD_depobj:
6970   case OMPD_scan:
6971   case OMPD_teams:
6972   case OMPD_target_data:
6973   case OMPD_target_exit_data:
6974   case OMPD_target_enter_data:
6975   case OMPD_distribute:
6976   case OMPD_distribute_simd:
6977   case OMPD_distribute_parallel_for:
6978   case OMPD_distribute_parallel_for_simd:
6979   case OMPD_teams_distribute:
6980   case OMPD_teams_distribute_simd:
6981   case OMPD_teams_distribute_parallel_for:
6982   case OMPD_teams_distribute_parallel_for_simd:
6983   case OMPD_target_update:
6984   case OMPD_declare_simd:
6985   case OMPD_declare_variant:
6986   case OMPD_begin_declare_variant:
6987   case OMPD_end_declare_variant:
6988   case OMPD_declare_target:
6989   case OMPD_end_declare_target:
6990   case OMPD_declare_reduction:
6991   case OMPD_declare_mapper:
6992   case OMPD_taskloop:
6993   case OMPD_taskloop_simd:
6994   case OMPD_master_taskloop:
6995   case OMPD_master_taskloop_simd:
6996   case OMPD_parallel_master_taskloop:
6997   case OMPD_parallel_master_taskloop_simd:
6998   case OMPD_requires:
6999   case OMPD_unknown:
7000     break;
7001   default:
7002     break;
7003   }
7004   llvm_unreachable("Unsupported directive kind.");
7005 }
7006 
7007 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
7008     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
7009   assert(!CGF.getLangOpts().OpenMPIsDevice &&
7010          "Clauses associated with the teams directive expected to be emitted "
7011          "only for the host!");
7012   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7013   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7014          "Expected target-based executable directive.");
7015   CGBuilderTy &Bld = CGF.Builder;
7016   llvm::Value *ThreadLimitVal = nullptr;
7017   llvm::Value *NumThreadsVal = nullptr;
7018   switch (DirectiveKind) {
7019   case OMPD_target: {
7020     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7021     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7022       return NumThreads;
7023     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7024         CGF.getContext(), CS->getCapturedStmt());
7025     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7026       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
7027         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7028         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7029         const auto *ThreadLimitClause =
7030             Dir->getSingleClause<OMPThreadLimitClause>();
7031         CodeGenFunction::LexicalScope Scope(
7032             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
7033         if (const auto *PreInit =
7034                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
7035           for (const auto *I : PreInit->decls()) {
7036             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7037               CGF.EmitVarDecl(cast<VarDecl>(*I));
7038             } else {
7039               CodeGenFunction::AutoVarEmission Emission =
7040                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7041               CGF.EmitAutoVarCleanups(Emission);
7042             }
7043           }
7044         }
7045         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7046             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7047         ThreadLimitVal =
7048             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7049       }
7050       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
7051           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
7052         CS = Dir->getInnermostCapturedStmt();
7053         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7054             CGF.getContext(), CS->getCapturedStmt());
7055         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
7056       }
7057       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
7058           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
7059         CS = Dir->getInnermostCapturedStmt();
7060         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7061           return NumThreads;
7062       }
7063       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
7064         return Bld.getInt32(1);
7065     }
7066     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7067   }
7068   case OMPD_target_teams: {
7069     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7070       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7071       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7072       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7073           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7074       ThreadLimitVal =
7075           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7076     }
7077     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7078     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7079       return NumThreads;
7080     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7081         CGF.getContext(), CS->getCapturedStmt());
7082     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7083       if (Dir->getDirectiveKind() == OMPD_distribute) {
7084         CS = Dir->getInnermostCapturedStmt();
7085         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7086           return NumThreads;
7087       }
7088     }
7089     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7090   }
7091   case OMPD_target_teams_distribute:
7092     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7093       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7094       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7095       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7096           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7097       ThreadLimitVal =
7098           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7099     }
7100     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7101   case OMPD_target_parallel:
7102   case OMPD_target_parallel_for:
7103   case OMPD_target_parallel_for_simd:
7104   case OMPD_target_teams_distribute_parallel_for:
7105   case OMPD_target_teams_distribute_parallel_for_simd: {
7106     llvm::Value *CondVal = nullptr;
7107     // Handle if clause. If if clause present, the number of threads is
7108     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7109     if (D.hasClausesOfKind<OMPIfClause>()) {
7110       const OMPIfClause *IfClause = nullptr;
7111       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7112         if (C->getNameModifier() == OMPD_unknown ||
7113             C->getNameModifier() == OMPD_parallel) {
7114           IfClause = C;
7115           break;
7116         }
7117       }
7118       if (IfClause) {
7119         const Expr *Cond = IfClause->getCondition();
7120         bool Result;
7121         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7122           if (!Result)
7123             return Bld.getInt32(1);
7124         } else {
7125           CodeGenFunction::RunCleanupsScope Scope(CGF);
7126           CondVal = CGF.EvaluateExprAsBool(Cond);
7127         }
7128       }
7129     }
7130     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7131       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7132       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7133       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7134           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7135       ThreadLimitVal =
7136           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7137     }
7138     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7139       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7140       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7141       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7142           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7143       NumThreadsVal =
7144           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7145       ThreadLimitVal = ThreadLimitVal
7146                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7147                                                                 ThreadLimitVal),
7148                                               NumThreadsVal, ThreadLimitVal)
7149                            : NumThreadsVal;
7150     }
7151     if (!ThreadLimitVal)
7152       ThreadLimitVal = Bld.getInt32(0);
7153     if (CondVal)
7154       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7155     return ThreadLimitVal;
7156   }
7157   case OMPD_target_teams_distribute_simd:
7158   case OMPD_target_simd:
7159     return Bld.getInt32(1);
7160   case OMPD_parallel:
7161   case OMPD_for:
7162   case OMPD_parallel_for:
7163   case OMPD_parallel_master:
7164   case OMPD_parallel_sections:
7165   case OMPD_for_simd:
7166   case OMPD_parallel_for_simd:
7167   case OMPD_cancel:
7168   case OMPD_cancellation_point:
7169   case OMPD_ordered:
7170   case OMPD_threadprivate:
7171   case OMPD_allocate:
7172   case OMPD_task:
7173   case OMPD_simd:
7174   case OMPD_tile:
7175   case OMPD_unroll:
7176   case OMPD_sections:
7177   case OMPD_section:
7178   case OMPD_single:
7179   case OMPD_master:
7180   case OMPD_critical:
7181   case OMPD_taskyield:
7182   case OMPD_barrier:
7183   case OMPD_taskwait:
7184   case OMPD_taskgroup:
7185   case OMPD_atomic:
7186   case OMPD_flush:
7187   case OMPD_depobj:
7188   case OMPD_scan:
7189   case OMPD_teams:
7190   case OMPD_target_data:
7191   case OMPD_target_exit_data:
7192   case OMPD_target_enter_data:
7193   case OMPD_distribute:
7194   case OMPD_distribute_simd:
7195   case OMPD_distribute_parallel_for:
7196   case OMPD_distribute_parallel_for_simd:
7197   case OMPD_teams_distribute:
7198   case OMPD_teams_distribute_simd:
7199   case OMPD_teams_distribute_parallel_for:
7200   case OMPD_teams_distribute_parallel_for_simd:
7201   case OMPD_target_update:
7202   case OMPD_declare_simd:
7203   case OMPD_declare_variant:
7204   case OMPD_begin_declare_variant:
7205   case OMPD_end_declare_variant:
7206   case OMPD_declare_target:
7207   case OMPD_end_declare_target:
7208   case OMPD_declare_reduction:
7209   case OMPD_declare_mapper:
7210   case OMPD_taskloop:
7211   case OMPD_taskloop_simd:
7212   case OMPD_master_taskloop:
7213   case OMPD_master_taskloop_simd:
7214   case OMPD_parallel_master_taskloop:
7215   case OMPD_parallel_master_taskloop_simd:
7216   case OMPD_requires:
7217   case OMPD_unknown:
7218     break;
7219   default:
7220     break;
7221   }
7222   llvm_unreachable("Unsupported directive kind.");
7223 }
7224 
7225 namespace {
7226 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7227 
7228 // Utility to handle information from clauses associated with a given
7229 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7230 // It provides a convenient interface to obtain the information and generate
7231 // code for that information.
7232 class MappableExprsHandler {
7233 public:
7234   /// Values for bit flags used to specify the mapping type for
7235   /// offloading.
7236   enum OpenMPOffloadMappingFlags : uint64_t {
7237     /// No flags
7238     OMP_MAP_NONE = 0x0,
7239     /// Allocate memory on the device and move data from host to device.
7240     OMP_MAP_TO = 0x01,
7241     /// Allocate memory on the device and move data from device to host.
7242     OMP_MAP_FROM = 0x02,
7243     /// Always perform the requested mapping action on the element, even
7244     /// if it was already mapped before.
7245     OMP_MAP_ALWAYS = 0x04,
7246     /// Delete the element from the device environment, ignoring the
7247     /// current reference count associated with the element.
7248     OMP_MAP_DELETE = 0x08,
7249     /// The element being mapped is a pointer-pointee pair; both the
7250     /// pointer and the pointee should be mapped.
7251     OMP_MAP_PTR_AND_OBJ = 0x10,
7252     /// This flags signals that the base address of an entry should be
7253     /// passed to the target kernel as an argument.
7254     OMP_MAP_TARGET_PARAM = 0x20,
7255     /// Signal that the runtime library has to return the device pointer
7256     /// in the current position for the data being mapped. Used when we have the
7257     /// use_device_ptr or use_device_addr clause.
7258     OMP_MAP_RETURN_PARAM = 0x40,
7259     /// This flag signals that the reference being passed is a pointer to
7260     /// private data.
7261     OMP_MAP_PRIVATE = 0x80,
7262     /// Pass the element to the device by value.
7263     OMP_MAP_LITERAL = 0x100,
7264     /// Implicit map
7265     OMP_MAP_IMPLICIT = 0x200,
7266     /// Close is a hint to the runtime to allocate memory close to
7267     /// the target device.
7268     OMP_MAP_CLOSE = 0x400,
7269     /// 0x800 is reserved for compatibility with XLC.
7270     /// Produce a runtime error if the data is not already allocated.
7271     OMP_MAP_PRESENT = 0x1000,
7272     /// Signal that the runtime library should use args as an array of
7273     /// descriptor_dim pointers and use args_size as dims. Used when we have
7274     /// non-contiguous list items in target update directive
7275     OMP_MAP_NON_CONTIG = 0x100000000000,
7276     /// The 16 MSBs of the flags indicate whether the entry is member of some
7277     /// struct/class.
7278     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7279     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7280   };
7281 
7282   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7283   static unsigned getFlagMemberOffset() {
7284     unsigned Offset = 0;
7285     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7286          Remain = Remain >> 1)
7287       Offset++;
7288     return Offset;
7289   }
7290 
7291   /// Class that holds debugging information for a data mapping to be passed to
7292   /// the runtime library.
7293   class MappingExprInfo {
7294     /// The variable declaration used for the data mapping.
7295     const ValueDecl *MapDecl = nullptr;
7296     /// The original expression used in the map clause, or null if there is
7297     /// none.
7298     const Expr *MapExpr = nullptr;
7299 
7300   public:
7301     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7302         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7303 
7304     const ValueDecl *getMapDecl() const { return MapDecl; }
7305     const Expr *getMapExpr() const { return MapExpr; }
7306   };
7307 
7308   /// Class that associates information with a base pointer to be passed to the
7309   /// runtime library.
7310   class BasePointerInfo {
7311     /// The base pointer.
7312     llvm::Value *Ptr = nullptr;
7313     /// The base declaration that refers to this device pointer, or null if
7314     /// there is none.
7315     const ValueDecl *DevPtrDecl = nullptr;
7316 
7317   public:
7318     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7319         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7320     llvm::Value *operator*() const { return Ptr; }
7321     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7322     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7323   };
7324 
7325   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7326   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7327   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7328   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7329   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7330   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7331   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7332 
7333   /// This structure contains combined information generated for mappable
7334   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7335   /// mappers, and non-contiguous information.
7336   struct MapCombinedInfoTy {
7337     struct StructNonContiguousInfo {
7338       bool IsNonContiguous = false;
7339       MapDimArrayTy Dims;
7340       MapNonContiguousArrayTy Offsets;
7341       MapNonContiguousArrayTy Counts;
7342       MapNonContiguousArrayTy Strides;
7343     };
7344     MapExprsArrayTy Exprs;
7345     MapBaseValuesArrayTy BasePointers;
7346     MapValuesArrayTy Pointers;
7347     MapValuesArrayTy Sizes;
7348     MapFlagsArrayTy Types;
7349     MapMappersArrayTy Mappers;
7350     StructNonContiguousInfo NonContigInfo;
7351 
7352     /// Append arrays in \a CurInfo.
7353     void append(MapCombinedInfoTy &CurInfo) {
7354       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7355       BasePointers.append(CurInfo.BasePointers.begin(),
7356                           CurInfo.BasePointers.end());
7357       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7358       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7359       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7360       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7361       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7362                                  CurInfo.NonContigInfo.Dims.end());
7363       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7364                                     CurInfo.NonContigInfo.Offsets.end());
7365       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7366                                    CurInfo.NonContigInfo.Counts.end());
7367       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7368                                     CurInfo.NonContigInfo.Strides.end());
7369     }
7370   };
7371 
7372   /// Map between a struct and the its lowest & highest elements which have been
7373   /// mapped.
7374   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7375   ///                    HE(FieldIndex, Pointer)}
7376   struct StructRangeInfoTy {
7377     MapCombinedInfoTy PreliminaryMapData;
7378     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7379         0, Address::invalid()};
7380     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7381         0, Address::invalid()};
7382     Address Base = Address::invalid();
7383     Address LB = Address::invalid();
7384     bool IsArraySection = false;
7385     bool HasCompleteRecord = false;
7386   };
7387 
7388 private:
7389   /// Kind that defines how a device pointer has to be returned.
7390   struct MapInfo {
7391     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7392     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7393     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7394     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7395     bool ReturnDevicePointer = false;
7396     bool IsImplicit = false;
7397     const ValueDecl *Mapper = nullptr;
7398     const Expr *VarRef = nullptr;
7399     bool ForDeviceAddr = false;
7400 
7401     MapInfo() = default;
7402     MapInfo(
7403         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7404         OpenMPMapClauseKind MapType,
7405         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7406         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7407         bool ReturnDevicePointer, bool IsImplicit,
7408         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7409         bool ForDeviceAddr = false)
7410         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7411           MotionModifiers(MotionModifiers),
7412           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7413           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7414   };
7415 
7416   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7417   /// member and there is no map information about it, then emission of that
7418   /// entry is deferred until the whole struct has been processed.
7419   struct DeferredDevicePtrEntryTy {
7420     const Expr *IE = nullptr;
7421     const ValueDecl *VD = nullptr;
7422     bool ForDeviceAddr = false;
7423 
7424     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7425                              bool ForDeviceAddr)
7426         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7427   };
7428 
7429   /// The target directive from where the mappable clauses were extracted. It
7430   /// is either a executable directive or a user-defined mapper directive.
7431   llvm::PointerUnion<const OMPExecutableDirective *,
7432                      const OMPDeclareMapperDecl *>
7433       CurDir;
7434 
7435   /// Function the directive is being generated for.
7436   CodeGenFunction &CGF;
7437 
7438   /// Set of all first private variables in the current directive.
7439   /// bool data is set to true if the variable is implicitly marked as
7440   /// firstprivate, false otherwise.
7441   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7442 
7443   /// Map between device pointer declarations and their expression components.
7444   /// The key value for declarations in 'this' is null.
7445   llvm::DenseMap<
7446       const ValueDecl *,
7447       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7448       DevPointersMap;
7449 
7450   llvm::Value *getExprTypeSize(const Expr *E) const {
7451     QualType ExprTy = E->getType().getCanonicalType();
7452 
7453     // Calculate the size for array shaping expression.
7454     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7455       llvm::Value *Size =
7456           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7457       for (const Expr *SE : OAE->getDimensions()) {
7458         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7459         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7460                                       CGF.getContext().getSizeType(),
7461                                       SE->getExprLoc());
7462         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7463       }
7464       return Size;
7465     }
7466 
7467     // Reference types are ignored for mapping purposes.
7468     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7469       ExprTy = RefTy->getPointeeType().getCanonicalType();
7470 
7471     // Given that an array section is considered a built-in type, we need to
7472     // do the calculation based on the length of the section instead of relying
7473     // on CGF.getTypeSize(E->getType()).
7474     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7475       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7476                             OAE->getBase()->IgnoreParenImpCasts())
7477                             .getCanonicalType();
7478 
7479       // If there is no length associated with the expression and lower bound is
7480       // not specified too, that means we are using the whole length of the
7481       // base.
7482       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7483           !OAE->getLowerBound())
7484         return CGF.getTypeSize(BaseTy);
7485 
7486       llvm::Value *ElemSize;
7487       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7488         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7489       } else {
7490         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7491         assert(ATy && "Expecting array type if not a pointer type.");
7492         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7493       }
7494 
7495       // If we don't have a length at this point, that is because we have an
7496       // array section with a single element.
7497       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7498         return ElemSize;
7499 
7500       if (const Expr *LenExpr = OAE->getLength()) {
7501         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7502         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7503                                              CGF.getContext().getSizeType(),
7504                                              LenExpr->getExprLoc());
7505         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7506       }
7507       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7508              OAE->getLowerBound() && "expected array_section[lb:].");
7509       // Size = sizetype - lb * elemtype;
7510       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7511       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7512       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7513                                        CGF.getContext().getSizeType(),
7514                                        OAE->getLowerBound()->getExprLoc());
7515       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7516       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7517       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7518       LengthVal = CGF.Builder.CreateSelect(
7519           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7520       return LengthVal;
7521     }
7522     return CGF.getTypeSize(ExprTy);
7523   }
7524 
7525   /// Return the corresponding bits for a given map clause modifier. Add
7526   /// a flag marking the map as a pointer if requested. Add a flag marking the
7527   /// map as the first one of a series of maps that relate to the same map
7528   /// expression.
7529   OpenMPOffloadMappingFlags getMapTypeBits(
7530       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7531       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7532       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7533     OpenMPOffloadMappingFlags Bits =
7534         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7535     switch (MapType) {
7536     case OMPC_MAP_alloc:
7537     case OMPC_MAP_release:
7538       // alloc and release is the default behavior in the runtime library,  i.e.
7539       // if we don't pass any bits alloc/release that is what the runtime is
7540       // going to do. Therefore, we don't need to signal anything for these two
7541       // type modifiers.
7542       break;
7543     case OMPC_MAP_to:
7544       Bits |= OMP_MAP_TO;
7545       break;
7546     case OMPC_MAP_from:
7547       Bits |= OMP_MAP_FROM;
7548       break;
7549     case OMPC_MAP_tofrom:
7550       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7551       break;
7552     case OMPC_MAP_delete:
7553       Bits |= OMP_MAP_DELETE;
7554       break;
7555     case OMPC_MAP_unknown:
7556       llvm_unreachable("Unexpected map type!");
7557     }
7558     if (AddPtrFlag)
7559       Bits |= OMP_MAP_PTR_AND_OBJ;
7560     if (AddIsTargetParamFlag)
7561       Bits |= OMP_MAP_TARGET_PARAM;
7562     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7563         != MapModifiers.end())
7564       Bits |= OMP_MAP_ALWAYS;
7565     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7566         != MapModifiers.end())
7567       Bits |= OMP_MAP_CLOSE;
7568     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) !=
7569             MapModifiers.end() ||
7570         llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) !=
7571             MotionModifiers.end())
7572       Bits |= OMP_MAP_PRESENT;
7573     if (IsNonContiguous)
7574       Bits |= OMP_MAP_NON_CONTIG;
7575     return Bits;
7576   }
7577 
7578   /// Return true if the provided expression is a final array section. A
7579   /// final array section, is one whose length can't be proved to be one.
7580   bool isFinalArraySectionExpression(const Expr *E) const {
7581     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7582 
7583     // It is not an array section and therefore not a unity-size one.
7584     if (!OASE)
7585       return false;
7586 
7587     // An array section with no colon always refer to a single element.
7588     if (OASE->getColonLocFirst().isInvalid())
7589       return false;
7590 
7591     const Expr *Length = OASE->getLength();
7592 
7593     // If we don't have a length we have to check if the array has size 1
7594     // for this dimension. Also, we should always expect a length if the
7595     // base type is pointer.
7596     if (!Length) {
7597       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7598                              OASE->getBase()->IgnoreParenImpCasts())
7599                              .getCanonicalType();
7600       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7601         return ATy->getSize().getSExtValue() != 1;
7602       // If we don't have a constant dimension length, we have to consider
7603       // the current section as having any size, so it is not necessarily
7604       // unitary. If it happen to be unity size, that's user fault.
7605       return true;
7606     }
7607 
7608     // Check if the length evaluates to 1.
7609     Expr::EvalResult Result;
7610     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7611       return true; // Can have more that size 1.
7612 
7613     llvm::APSInt ConstLength = Result.Val.getInt();
7614     return ConstLength.getSExtValue() != 1;
7615   }
7616 
7617   /// Generate the base pointers, section pointers, sizes, map type bits, and
7618   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7619   /// map type, map or motion modifiers, and expression components.
7620   /// \a IsFirstComponent should be set to true if the provided set of
7621   /// components is the first associated with a capture.
7622   void generateInfoForComponentList(
7623       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7624       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7625       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7626       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7627       bool IsFirstComponentList, bool IsImplicit,
7628       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7629       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7630       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7631           OverlappedElements = llvm::None) const {
7632     // The following summarizes what has to be generated for each map and the
7633     // types below. The generated information is expressed in this order:
7634     // base pointer, section pointer, size, flags
7635     // (to add to the ones that come from the map type and modifier).
7636     //
7637     // double d;
7638     // int i[100];
7639     // float *p;
7640     //
7641     // struct S1 {
7642     //   int i;
7643     //   float f[50];
7644     // }
7645     // struct S2 {
7646     //   int i;
7647     //   float f[50];
7648     //   S1 s;
7649     //   double *p;
7650     //   struct S2 *ps;
7651     //   int &ref;
7652     // }
7653     // S2 s;
7654     // S2 *ps;
7655     //
7656     // map(d)
7657     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7658     //
7659     // map(i)
7660     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7661     //
7662     // map(i[1:23])
7663     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7664     //
7665     // map(p)
7666     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7667     //
7668     // map(p[1:24])
7669     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7670     // in unified shared memory mode or for local pointers
7671     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7672     //
7673     // map(s)
7674     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7675     //
7676     // map(s.i)
7677     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7678     //
7679     // map(s.s.f)
7680     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7681     //
7682     // map(s.p)
7683     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7684     //
7685     // map(to: s.p[:22])
7686     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7687     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7688     // &(s.p), &(s.p[0]), 22*sizeof(double),
7689     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7690     // (*) alloc space for struct members, only this is a target parameter
7691     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7692     //      optimizes this entry out, same in the examples below)
7693     // (***) map the pointee (map: to)
7694     //
7695     // map(to: s.ref)
7696     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7697     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7698     // (*) alloc space for struct members, only this is a target parameter
7699     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7700     //      optimizes this entry out, same in the examples below)
7701     // (***) map the pointee (map: to)
7702     //
7703     // map(s.ps)
7704     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7705     //
7706     // map(from: s.ps->s.i)
7707     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7708     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7709     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7710     //
7711     // map(to: s.ps->ps)
7712     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7713     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7714     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7715     //
7716     // map(s.ps->ps->ps)
7717     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7718     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7719     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7720     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7721     //
7722     // map(to: s.ps->ps->s.f[:22])
7723     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7724     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7725     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7726     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7727     //
7728     // map(ps)
7729     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7730     //
7731     // map(ps->i)
7732     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7733     //
7734     // map(ps->s.f)
7735     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7736     //
7737     // map(from: ps->p)
7738     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7739     //
7740     // map(to: ps->p[:22])
7741     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7742     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7743     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7744     //
7745     // map(ps->ps)
7746     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7747     //
7748     // map(from: ps->ps->s.i)
7749     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7750     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7751     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7752     //
7753     // map(from: ps->ps->ps)
7754     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7755     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7756     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7757     //
7758     // map(ps->ps->ps->ps)
7759     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7760     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7761     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7762     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7763     //
7764     // map(to: ps->ps->ps->s.f[:22])
7765     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7766     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7767     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7768     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7769     //
7770     // map(to: s.f[:22]) map(from: s.p[:33])
7771     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7772     //     sizeof(double*) (**), TARGET_PARAM
7773     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7774     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7775     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7776     // (*) allocate contiguous space needed to fit all mapped members even if
7777     //     we allocate space for members not mapped (in this example,
7778     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7779     //     them as well because they fall between &s.f[0] and &s.p)
7780     //
7781     // map(from: s.f[:22]) map(to: ps->p[:33])
7782     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7783     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7784     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7785     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7786     // (*) the struct this entry pertains to is the 2nd element in the list of
7787     //     arguments, hence MEMBER_OF(2)
7788     //
7789     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7790     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7791     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7792     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7793     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7794     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7795     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7796     // (*) the struct this entry pertains to is the 4th element in the list
7797     //     of arguments, hence MEMBER_OF(4)
7798 
7799     // Track if the map information being generated is the first for a capture.
7800     bool IsCaptureFirstInfo = IsFirstComponentList;
7801     // When the variable is on a declare target link or in a to clause with
7802     // unified memory, a reference is needed to hold the host/device address
7803     // of the variable.
7804     bool RequiresReference = false;
7805 
7806     // Scan the components from the base to the complete expression.
7807     auto CI = Components.rbegin();
7808     auto CE = Components.rend();
7809     auto I = CI;
7810 
7811     // Track if the map information being generated is the first for a list of
7812     // components.
7813     bool IsExpressionFirstInfo = true;
7814     bool FirstPointerInComplexData = false;
7815     Address BP = Address::invalid();
7816     const Expr *AssocExpr = I->getAssociatedExpression();
7817     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7818     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7819     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7820 
7821     if (isa<MemberExpr>(AssocExpr)) {
7822       // The base is the 'this' pointer. The content of the pointer is going
7823       // to be the base of the field being mapped.
7824       BP = CGF.LoadCXXThisAddress();
7825     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7826                (OASE &&
7827                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7828       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7829     } else if (OAShE &&
7830                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7831       BP = Address(
7832           CGF.EmitScalarExpr(OAShE->getBase()),
7833           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7834     } else {
7835       // The base is the reference to the variable.
7836       // BP = &Var.
7837       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7838       if (const auto *VD =
7839               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7840         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7841                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7842           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7843               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7844                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7845             RequiresReference = true;
7846             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7847           }
7848         }
7849       }
7850 
7851       // If the variable is a pointer and is being dereferenced (i.e. is not
7852       // the last component), the base has to be the pointer itself, not its
7853       // reference. References are ignored for mapping purposes.
7854       QualType Ty =
7855           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7856       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7857         // No need to generate individual map information for the pointer, it
7858         // can be associated with the combined storage if shared memory mode is
7859         // active or the base declaration is not global variable.
7860         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7861         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7862             !VD || VD->hasLocalStorage())
7863           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7864         else
7865           FirstPointerInComplexData = true;
7866         ++I;
7867       }
7868     }
7869 
7870     // Track whether a component of the list should be marked as MEMBER_OF some
7871     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7872     // in a component list should be marked as MEMBER_OF, all subsequent entries
7873     // do not belong to the base struct. E.g.
7874     // struct S2 s;
7875     // s.ps->ps->ps->f[:]
7876     //   (1) (2) (3) (4)
7877     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7878     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7879     // is the pointee of ps(2) which is not member of struct s, so it should not
7880     // be marked as such (it is still PTR_AND_OBJ).
7881     // The variable is initialized to false so that PTR_AND_OBJ entries which
7882     // are not struct members are not considered (e.g. array of pointers to
7883     // data).
7884     bool ShouldBeMemberOf = false;
7885 
7886     // Variable keeping track of whether or not we have encountered a component
7887     // in the component list which is a member expression. Useful when we have a
7888     // pointer or a final array section, in which case it is the previous
7889     // component in the list which tells us whether we have a member expression.
7890     // E.g. X.f[:]
7891     // While processing the final array section "[:]" it is "f" which tells us
7892     // whether we are dealing with a member of a declared struct.
7893     const MemberExpr *EncounteredME = nullptr;
7894 
7895     // Track for the total number of dimension. Start from one for the dummy
7896     // dimension.
7897     uint64_t DimSize = 1;
7898 
7899     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7900     bool IsPrevMemberReference = false;
7901 
7902     for (; I != CE; ++I) {
7903       // If the current component is member of a struct (parent struct) mark it.
7904       if (!EncounteredME) {
7905         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7906         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7907         // as MEMBER_OF the parent struct.
7908         if (EncounteredME) {
7909           ShouldBeMemberOf = true;
7910           // Do not emit as complex pointer if this is actually not array-like
7911           // expression.
7912           if (FirstPointerInComplexData) {
7913             QualType Ty = std::prev(I)
7914                               ->getAssociatedDeclaration()
7915                               ->getType()
7916                               .getNonReferenceType();
7917             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7918             FirstPointerInComplexData = false;
7919           }
7920         }
7921       }
7922 
7923       auto Next = std::next(I);
7924 
7925       // We need to generate the addresses and sizes if this is the last
7926       // component, if the component is a pointer or if it is an array section
7927       // whose length can't be proved to be one. If this is a pointer, it
7928       // becomes the base address for the following components.
7929 
7930       // A final array section, is one whose length can't be proved to be one.
7931       // If the map item is non-contiguous then we don't treat any array section
7932       // as final array section.
7933       bool IsFinalArraySection =
7934           !IsNonContiguous &&
7935           isFinalArraySectionExpression(I->getAssociatedExpression());
7936 
7937       // If we have a declaration for the mapping use that, otherwise use
7938       // the base declaration of the map clause.
7939       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7940                                      ? I->getAssociatedDeclaration()
7941                                      : BaseDecl;
7942       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7943                                                : MapExpr;
7944 
7945       // Get information on whether the element is a pointer. Have to do a
7946       // special treatment for array sections given that they are built-in
7947       // types.
7948       const auto *OASE =
7949           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7950       const auto *OAShE =
7951           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7952       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7953       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7954       bool IsPointer =
7955           OAShE ||
7956           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7957                        .getCanonicalType()
7958                        ->isAnyPointerType()) ||
7959           I->getAssociatedExpression()->getType()->isAnyPointerType();
7960       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7961                                MapDecl &&
7962                                MapDecl->getType()->isLValueReferenceType();
7963       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7964 
7965       if (OASE)
7966         ++DimSize;
7967 
7968       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7969           IsFinalArraySection) {
7970         // If this is not the last component, we expect the pointer to be
7971         // associated with an array expression or member expression.
7972         assert((Next == CE ||
7973                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7974                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7975                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7976                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7977                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7978                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7979                "Unexpected expression");
7980 
7981         Address LB = Address::invalid();
7982         Address LowestElem = Address::invalid();
7983         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7984                                        const MemberExpr *E) {
7985           const Expr *BaseExpr = E->getBase();
7986           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
7987           // scalar.
7988           LValue BaseLV;
7989           if (E->isArrow()) {
7990             LValueBaseInfo BaseInfo;
7991             TBAAAccessInfo TBAAInfo;
7992             Address Addr =
7993                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7994             QualType PtrTy = BaseExpr->getType()->getPointeeType();
7995             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7996           } else {
7997             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7998           }
7999           return BaseLV;
8000         };
8001         if (OAShE) {
8002           LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
8003                                     CGF.getContext().getTypeAlignInChars(
8004                                         OAShE->getBase()->getType()));
8005         } else if (IsMemberReference) {
8006           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
8007           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8008           LowestElem = CGF.EmitLValueForFieldInitialization(
8009                               BaseLVal, cast<FieldDecl>(MapDecl))
8010                            .getAddress(CGF);
8011           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
8012                    .getAddress(CGF);
8013         } else {
8014           LowestElem = LB =
8015               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8016                   .getAddress(CGF);
8017         }
8018 
8019         // If this component is a pointer inside the base struct then we don't
8020         // need to create any entry for it - it will be combined with the object
8021         // it is pointing to into a single PTR_AND_OBJ entry.
8022         bool IsMemberPointerOrAddr =
8023             EncounteredME &&
8024             (((IsPointer || ForDeviceAddr) &&
8025               I->getAssociatedExpression() == EncounteredME) ||
8026              (IsPrevMemberReference && !IsPointer) ||
8027              (IsMemberReference && Next != CE &&
8028               !Next->getAssociatedExpression()->getType()->isPointerType()));
8029         if (!OverlappedElements.empty() && Next == CE) {
8030           // Handle base element with the info for overlapped elements.
8031           assert(!PartialStruct.Base.isValid() && "The base element is set.");
8032           assert(!IsPointer &&
8033                  "Unexpected base element with the pointer type.");
8034           // Mark the whole struct as the struct that requires allocation on the
8035           // device.
8036           PartialStruct.LowestElem = {0, LowestElem};
8037           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8038               I->getAssociatedExpression()->getType());
8039           Address HB = CGF.Builder.CreateConstGEP(
8040               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem,
8041                                                               CGF.VoidPtrTy),
8042               TypeSize.getQuantity() - 1);
8043           PartialStruct.HighestElem = {
8044               std::numeric_limits<decltype(
8045                   PartialStruct.HighestElem.first)>::max(),
8046               HB};
8047           PartialStruct.Base = BP;
8048           PartialStruct.LB = LB;
8049           assert(
8050               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8051               "Overlapped elements must be used only once for the variable.");
8052           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8053           // Emit data for non-overlapped data.
8054           OpenMPOffloadMappingFlags Flags =
8055               OMP_MAP_MEMBER_OF |
8056               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8057                              /*AddPtrFlag=*/false,
8058                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8059           llvm::Value *Size = nullptr;
8060           // Do bitcopy of all non-overlapped structure elements.
8061           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8062                    Component : OverlappedElements) {
8063             Address ComponentLB = Address::invalid();
8064             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8065                  Component) {
8066               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8067                 const auto *FD = dyn_cast<FieldDecl>(VD);
8068                 if (FD && FD->getType()->isLValueReferenceType()) {
8069                   const auto *ME =
8070                       cast<MemberExpr>(MC.getAssociatedExpression());
8071                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8072                   ComponentLB =
8073                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
8074                           .getAddress(CGF);
8075                 } else {
8076                   ComponentLB =
8077                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8078                           .getAddress(CGF);
8079                 }
8080                 Size = CGF.Builder.CreatePtrDiff(
8081                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8082                     CGF.EmitCastToVoidPtr(LB.getPointer()));
8083                 break;
8084               }
8085             }
8086             assert(Size && "Failed to determine structure size");
8087             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8088             CombinedInfo.BasePointers.push_back(BP.getPointer());
8089             CombinedInfo.Pointers.push_back(LB.getPointer());
8090             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8091                 Size, CGF.Int64Ty, /*isSigned=*/true));
8092             CombinedInfo.Types.push_back(Flags);
8093             CombinedInfo.Mappers.push_back(nullptr);
8094             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8095                                                                       : 1);
8096             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8097           }
8098           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8099           CombinedInfo.BasePointers.push_back(BP.getPointer());
8100           CombinedInfo.Pointers.push_back(LB.getPointer());
8101           Size = CGF.Builder.CreatePtrDiff(
8102               CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
8103               CGF.EmitCastToVoidPtr(LB.getPointer()));
8104           CombinedInfo.Sizes.push_back(
8105               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8106           CombinedInfo.Types.push_back(Flags);
8107           CombinedInfo.Mappers.push_back(nullptr);
8108           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8109                                                                     : 1);
8110           break;
8111         }
8112         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8113         if (!IsMemberPointerOrAddr ||
8114             (Next == CE && MapType != OMPC_MAP_unknown)) {
8115           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8116           CombinedInfo.BasePointers.push_back(BP.getPointer());
8117           CombinedInfo.Pointers.push_back(LB.getPointer());
8118           CombinedInfo.Sizes.push_back(
8119               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8120           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8121                                                                     : 1);
8122 
8123           // If Mapper is valid, the last component inherits the mapper.
8124           bool HasMapper = Mapper && Next == CE;
8125           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8126 
8127           // We need to add a pointer flag for each map that comes from the
8128           // same expression except for the first one. We also need to signal
8129           // this map is the first one that relates with the current capture
8130           // (there is a set of entries for each capture).
8131           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8132               MapType, MapModifiers, MotionModifiers, IsImplicit,
8133               !IsExpressionFirstInfo || RequiresReference ||
8134                   FirstPointerInComplexData || IsMemberReference,
8135               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8136 
8137           if (!IsExpressionFirstInfo || IsMemberReference) {
8138             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8139             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8140             if (IsPointer || (IsMemberReference && Next != CE))
8141               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8142                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
8143 
8144             if (ShouldBeMemberOf) {
8145               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8146               // should be later updated with the correct value of MEMBER_OF.
8147               Flags |= OMP_MAP_MEMBER_OF;
8148               // From now on, all subsequent PTR_AND_OBJ entries should not be
8149               // marked as MEMBER_OF.
8150               ShouldBeMemberOf = false;
8151             }
8152           }
8153 
8154           CombinedInfo.Types.push_back(Flags);
8155         }
8156 
8157         // If we have encountered a member expression so far, keep track of the
8158         // mapped member. If the parent is "*this", then the value declaration
8159         // is nullptr.
8160         if (EncounteredME) {
8161           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8162           unsigned FieldIndex = FD->getFieldIndex();
8163 
8164           // Update info about the lowest and highest elements for this struct
8165           if (!PartialStruct.Base.isValid()) {
8166             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8167             if (IsFinalArraySection) {
8168               Address HB =
8169                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
8170                       .getAddress(CGF);
8171               PartialStruct.HighestElem = {FieldIndex, HB};
8172             } else {
8173               PartialStruct.HighestElem = {FieldIndex, LowestElem};
8174             }
8175             PartialStruct.Base = BP;
8176             PartialStruct.LB = BP;
8177           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8178             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8179           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8180             PartialStruct.HighestElem = {FieldIndex, LowestElem};
8181           }
8182         }
8183 
8184         // Need to emit combined struct for array sections.
8185         if (IsFinalArraySection || IsNonContiguous)
8186           PartialStruct.IsArraySection = true;
8187 
8188         // If we have a final array section, we are done with this expression.
8189         if (IsFinalArraySection)
8190           break;
8191 
8192         // The pointer becomes the base for the next element.
8193         if (Next != CE)
8194           BP = IsMemberReference ? LowestElem : LB;
8195 
8196         IsExpressionFirstInfo = false;
8197         IsCaptureFirstInfo = false;
8198         FirstPointerInComplexData = false;
8199         IsPrevMemberReference = IsMemberReference;
8200       } else if (FirstPointerInComplexData) {
8201         QualType Ty = Components.rbegin()
8202                           ->getAssociatedDeclaration()
8203                           ->getType()
8204                           .getNonReferenceType();
8205         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8206         FirstPointerInComplexData = false;
8207       }
8208     }
8209     // If ran into the whole component - allocate the space for the whole
8210     // record.
8211     if (!EncounteredME)
8212       PartialStruct.HasCompleteRecord = true;
8213 
8214     if (!IsNonContiguous)
8215       return;
8216 
8217     const ASTContext &Context = CGF.getContext();
8218 
8219     // For supporting stride in array section, we need to initialize the first
8220     // dimension size as 1, first offset as 0, and first count as 1
8221     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8222     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8223     MapValuesArrayTy CurStrides;
8224     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8225     uint64_t ElementTypeSize;
8226 
8227     // Collect Size information for each dimension and get the element size as
8228     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8229     // should be [10, 10] and the first stride is 4 btyes.
8230     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8231          Components) {
8232       const Expr *AssocExpr = Component.getAssociatedExpression();
8233       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8234 
8235       if (!OASE)
8236         continue;
8237 
8238       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8239       auto *CAT = Context.getAsConstantArrayType(Ty);
8240       auto *VAT = Context.getAsVariableArrayType(Ty);
8241 
8242       // We need all the dimension size except for the last dimension.
8243       assert((VAT || CAT || &Component == &*Components.begin()) &&
8244              "Should be either ConstantArray or VariableArray if not the "
8245              "first Component");
8246 
8247       // Get element size if CurStrides is empty.
8248       if (CurStrides.empty()) {
8249         const Type *ElementType = nullptr;
8250         if (CAT)
8251           ElementType = CAT->getElementType().getTypePtr();
8252         else if (VAT)
8253           ElementType = VAT->getElementType().getTypePtr();
8254         else
8255           assert(&Component == &*Components.begin() &&
8256                  "Only expect pointer (non CAT or VAT) when this is the "
8257                  "first Component");
8258         // If ElementType is null, then it means the base is a pointer
8259         // (neither CAT nor VAT) and we'll attempt to get ElementType again
8260         // for next iteration.
8261         if (ElementType) {
8262           // For the case that having pointer as base, we need to remove one
8263           // level of indirection.
8264           if (&Component != &*Components.begin())
8265             ElementType = ElementType->getPointeeOrArrayElementType();
8266           ElementTypeSize =
8267               Context.getTypeSizeInChars(ElementType).getQuantity();
8268           CurStrides.push_back(
8269               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8270         }
8271       }
8272       // Get dimension value except for the last dimension since we don't need
8273       // it.
8274       if (DimSizes.size() < Components.size() - 1) {
8275         if (CAT)
8276           DimSizes.push_back(llvm::ConstantInt::get(
8277               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8278         else if (VAT)
8279           DimSizes.push_back(CGF.Builder.CreateIntCast(
8280               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8281               /*IsSigned=*/false));
8282       }
8283     }
8284 
8285     // Skip the dummy dimension since we have already have its information.
8286     auto DI = DimSizes.begin() + 1;
8287     // Product of dimension.
8288     llvm::Value *DimProd =
8289         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8290 
8291     // Collect info for non-contiguous. Notice that offset, count, and stride
8292     // are only meaningful for array-section, so we insert a null for anything
8293     // other than array-section.
8294     // Also, the size of offset, count, and stride are not the same as
8295     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8296     // count, and stride are the same as the number of non-contiguous
8297     // declaration in target update to/from clause.
8298     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8299          Components) {
8300       const Expr *AssocExpr = Component.getAssociatedExpression();
8301 
8302       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8303         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8304             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8305             /*isSigned=*/false);
8306         CurOffsets.push_back(Offset);
8307         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8308         CurStrides.push_back(CurStrides.back());
8309         continue;
8310       }
8311 
8312       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8313 
8314       if (!OASE)
8315         continue;
8316 
8317       // Offset
8318       const Expr *OffsetExpr = OASE->getLowerBound();
8319       llvm::Value *Offset = nullptr;
8320       if (!OffsetExpr) {
8321         // If offset is absent, then we just set it to zero.
8322         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8323       } else {
8324         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8325                                            CGF.Int64Ty,
8326                                            /*isSigned=*/false);
8327       }
8328       CurOffsets.push_back(Offset);
8329 
8330       // Count
8331       const Expr *CountExpr = OASE->getLength();
8332       llvm::Value *Count = nullptr;
8333       if (!CountExpr) {
8334         // In Clang, once a high dimension is an array section, we construct all
8335         // the lower dimension as array section, however, for case like
8336         // arr[0:2][2], Clang construct the inner dimension as an array section
8337         // but it actually is not in an array section form according to spec.
8338         if (!OASE->getColonLocFirst().isValid() &&
8339             !OASE->getColonLocSecond().isValid()) {
8340           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8341         } else {
8342           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8343           // When the length is absent it defaults to ⌈(size −
8344           // lower-bound)/stride⌉, where size is the size of the array
8345           // dimension.
8346           const Expr *StrideExpr = OASE->getStride();
8347           llvm::Value *Stride =
8348               StrideExpr
8349                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8350                                               CGF.Int64Ty, /*isSigned=*/false)
8351                   : nullptr;
8352           if (Stride)
8353             Count = CGF.Builder.CreateUDiv(
8354                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8355           else
8356             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8357         }
8358       } else {
8359         Count = CGF.EmitScalarExpr(CountExpr);
8360       }
8361       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8362       CurCounts.push_back(Count);
8363 
8364       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8365       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8366       //              Offset      Count     Stride
8367       //    D0          0           1         4    (int)    <- dummy dimension
8368       //    D1          0           2         8    (2 * (1) * 4)
8369       //    D2          1           2         20   (1 * (1 * 5) * 4)
8370       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8371       const Expr *StrideExpr = OASE->getStride();
8372       llvm::Value *Stride =
8373           StrideExpr
8374               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8375                                           CGF.Int64Ty, /*isSigned=*/false)
8376               : nullptr;
8377       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8378       if (Stride)
8379         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8380       else
8381         CurStrides.push_back(DimProd);
8382       if (DI != DimSizes.end())
8383         ++DI;
8384     }
8385 
8386     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8387     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8388     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8389   }
8390 
8391   /// Return the adjusted map modifiers if the declaration a capture refers to
8392   /// appears in a first-private clause. This is expected to be used only with
8393   /// directives that start with 'target'.
8394   MappableExprsHandler::OpenMPOffloadMappingFlags
8395   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8396     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8397 
8398     // A first private variable captured by reference will use only the
8399     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8400     // declaration is known as first-private in this handler.
8401     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8402       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8403         return MappableExprsHandler::OMP_MAP_TO |
8404                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8405       return MappableExprsHandler::OMP_MAP_PRIVATE |
8406              MappableExprsHandler::OMP_MAP_TO;
8407     }
8408     return MappableExprsHandler::OMP_MAP_TO |
8409            MappableExprsHandler::OMP_MAP_FROM;
8410   }
8411 
8412   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8413     // Rotate by getFlagMemberOffset() bits.
8414     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8415                                                   << getFlagMemberOffset());
8416   }
8417 
8418   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8419                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8420     // If the entry is PTR_AND_OBJ but has not been marked with the special
8421     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8422     // marked as MEMBER_OF.
8423     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8424         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8425       return;
8426 
8427     // Reset the placeholder value to prepare the flag for the assignment of the
8428     // proper MEMBER_OF value.
8429     Flags &= ~OMP_MAP_MEMBER_OF;
8430     Flags |= MemberOfFlag;
8431   }
8432 
8433   void getPlainLayout(const CXXRecordDecl *RD,
8434                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8435                       bool AsBase) const {
8436     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8437 
8438     llvm::StructType *St =
8439         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8440 
8441     unsigned NumElements = St->getNumElements();
8442     llvm::SmallVector<
8443         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8444         RecordLayout(NumElements);
8445 
8446     // Fill bases.
8447     for (const auto &I : RD->bases()) {
8448       if (I.isVirtual())
8449         continue;
8450       const auto *Base = I.getType()->getAsCXXRecordDecl();
8451       // Ignore empty bases.
8452       if (Base->isEmpty() || CGF.getContext()
8453                                  .getASTRecordLayout(Base)
8454                                  .getNonVirtualSize()
8455                                  .isZero())
8456         continue;
8457 
8458       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8459       RecordLayout[FieldIndex] = Base;
8460     }
8461     // Fill in virtual bases.
8462     for (const auto &I : RD->vbases()) {
8463       const auto *Base = I.getType()->getAsCXXRecordDecl();
8464       // Ignore empty bases.
8465       if (Base->isEmpty())
8466         continue;
8467       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8468       if (RecordLayout[FieldIndex])
8469         continue;
8470       RecordLayout[FieldIndex] = Base;
8471     }
8472     // Fill in all the fields.
8473     assert(!RD->isUnion() && "Unexpected union.");
8474     for (const auto *Field : RD->fields()) {
8475       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8476       // will fill in later.)
8477       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8478         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8479         RecordLayout[FieldIndex] = Field;
8480       }
8481     }
8482     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8483              &Data : RecordLayout) {
8484       if (Data.isNull())
8485         continue;
8486       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8487         getPlainLayout(Base, Layout, /*AsBase=*/true);
8488       else
8489         Layout.push_back(Data.get<const FieldDecl *>());
8490     }
8491   }
8492 
8493   /// Generate all the base pointers, section pointers, sizes, map types, and
8494   /// mappers for the extracted mappable expressions (all included in \a
8495   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8496   /// pair of the relevant declaration and index where it occurs is appended to
8497   /// the device pointers info array.
8498   void generateAllInfoForClauses(
8499       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8500       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8501           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8502     // We have to process the component lists that relate with the same
8503     // declaration in a single chunk so that we can generate the map flags
8504     // correctly. Therefore, we organize all lists in a map.
8505     enum MapKind { Present, Allocs, Other, Total };
8506     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8507                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8508         Info;
8509 
8510     // Helper function to fill the information map for the different supported
8511     // clauses.
8512     auto &&InfoGen =
8513         [&Info, &SkipVarSet](
8514             const ValueDecl *D, MapKind Kind,
8515             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8516             OpenMPMapClauseKind MapType,
8517             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8518             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8519             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8520             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8521           if (SkipVarSet.contains(D))
8522             return;
8523           auto It = Info.find(D);
8524           if (It == Info.end())
8525             It = Info
8526                      .insert(std::make_pair(
8527                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8528                      .first;
8529           It->second[Kind].emplace_back(
8530               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8531               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8532         };
8533 
8534     for (const auto *Cl : Clauses) {
8535       const auto *C = dyn_cast<OMPMapClause>(Cl);
8536       if (!C)
8537         continue;
8538       MapKind Kind = Other;
8539       if (!C->getMapTypeModifiers().empty() &&
8540           llvm::any_of(C->getMapTypeModifiers(), [](OpenMPMapModifierKind K) {
8541             return K == OMPC_MAP_MODIFIER_present;
8542           }))
8543         Kind = Present;
8544       else if (C->getMapType() == OMPC_MAP_alloc)
8545         Kind = Allocs;
8546       const auto *EI = C->getVarRefs().begin();
8547       for (const auto L : C->component_lists()) {
8548         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8549         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8550                 C->getMapTypeModifiers(), llvm::None,
8551                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8552                 E);
8553         ++EI;
8554       }
8555     }
8556     for (const auto *Cl : Clauses) {
8557       const auto *C = dyn_cast<OMPToClause>(Cl);
8558       if (!C)
8559         continue;
8560       MapKind Kind = Other;
8561       if (!C->getMotionModifiers().empty() &&
8562           llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8563             return K == OMPC_MOTION_MODIFIER_present;
8564           }))
8565         Kind = Present;
8566       const auto *EI = C->getVarRefs().begin();
8567       for (const auto L : C->component_lists()) {
8568         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8569                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8570                 C->isImplicit(), std::get<2>(L), *EI);
8571         ++EI;
8572       }
8573     }
8574     for (const auto *Cl : Clauses) {
8575       const auto *C = dyn_cast<OMPFromClause>(Cl);
8576       if (!C)
8577         continue;
8578       MapKind Kind = Other;
8579       if (!C->getMotionModifiers().empty() &&
8580           llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8581             return K == OMPC_MOTION_MODIFIER_present;
8582           }))
8583         Kind = Present;
8584       const auto *EI = C->getVarRefs().begin();
8585       for (const auto L : C->component_lists()) {
8586         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8587                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8588                 C->isImplicit(), std::get<2>(L), *EI);
8589         ++EI;
8590       }
8591     }
8592 
8593     // Look at the use_device_ptr clause information and mark the existing map
8594     // entries as such. If there is no map information for an entry in the
8595     // use_device_ptr list, we create one with map type 'alloc' and zero size
8596     // section. It is the user fault if that was not mapped before. If there is
8597     // no map information and the pointer is a struct member, then we defer the
8598     // emission of that entry until the whole struct has been processed.
8599     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8600                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8601         DeferredInfo;
8602     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8603 
8604     for (const auto *Cl : Clauses) {
8605       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8606       if (!C)
8607         continue;
8608       for (const auto L : C->component_lists()) {
8609         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8610             std::get<1>(L);
8611         assert(!Components.empty() &&
8612                "Not expecting empty list of components!");
8613         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8614         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8615         const Expr *IE = Components.back().getAssociatedExpression();
8616         // If the first component is a member expression, we have to look into
8617         // 'this', which maps to null in the map of map information. Otherwise
8618         // look directly for the information.
8619         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8620 
8621         // We potentially have map information for this declaration already.
8622         // Look for the first set of components that refer to it.
8623         if (It != Info.end()) {
8624           bool Found = false;
8625           for (auto &Data : It->second) {
8626             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8627               return MI.Components.back().getAssociatedDeclaration() == VD;
8628             });
8629             // If we found a map entry, signal that the pointer has to be
8630             // returned and move on to the next declaration. Exclude cases where
8631             // the base pointer is mapped as array subscript, array section or
8632             // array shaping. The base address is passed as a pointer to base in
8633             // this case and cannot be used as a base for use_device_ptr list
8634             // item.
8635             if (CI != Data.end()) {
8636               auto PrevCI = std::next(CI->Components.rbegin());
8637               const auto *VarD = dyn_cast<VarDecl>(VD);
8638               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8639                   isa<MemberExpr>(IE) ||
8640                   !VD->getType().getNonReferenceType()->isPointerType() ||
8641                   PrevCI == CI->Components.rend() ||
8642                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8643                   VarD->hasLocalStorage()) {
8644                 CI->ReturnDevicePointer = true;
8645                 Found = true;
8646                 break;
8647               }
8648             }
8649           }
8650           if (Found)
8651             continue;
8652         }
8653 
8654         // We didn't find any match in our map information - generate a zero
8655         // size array section - if the pointer is a struct member we defer this
8656         // action until the whole struct has been processed.
8657         if (isa<MemberExpr>(IE)) {
8658           // Insert the pointer into Info to be processed by
8659           // generateInfoForComponentList. Because it is a member pointer
8660           // without a pointee, no entry will be generated for it, therefore
8661           // we need to generate one after the whole struct has been processed.
8662           // Nonetheless, generateInfoForComponentList must be called to take
8663           // the pointer into account for the calculation of the range of the
8664           // partial struct.
8665           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8666                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8667                   nullptr);
8668           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8669         } else {
8670           llvm::Value *Ptr =
8671               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8672           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8673           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8674           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8675           UseDevicePtrCombinedInfo.Sizes.push_back(
8676               llvm::Constant::getNullValue(CGF.Int64Ty));
8677           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8678           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8679         }
8680       }
8681     }
8682 
8683     // Look at the use_device_addr clause information and mark the existing map
8684     // entries as such. If there is no map information for an entry in the
8685     // use_device_addr list, we create one with map type 'alloc' and zero size
8686     // section. It is the user fault if that was not mapped before. If there is
8687     // no map information and the pointer is a struct member, then we defer the
8688     // emission of that entry until the whole struct has been processed.
8689     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8690     for (const auto *Cl : Clauses) {
8691       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8692       if (!C)
8693         continue;
8694       for (const auto L : C->component_lists()) {
8695         assert(!std::get<1>(L).empty() &&
8696                "Not expecting empty list of components!");
8697         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8698         if (!Processed.insert(VD).second)
8699           continue;
8700         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8701         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8702         // If the first component is a member expression, we have to look into
8703         // 'this', which maps to null in the map of map information. Otherwise
8704         // look directly for the information.
8705         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8706 
8707         // We potentially have map information for this declaration already.
8708         // Look for the first set of components that refer to it.
8709         if (It != Info.end()) {
8710           bool Found = false;
8711           for (auto &Data : It->second) {
8712             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8713               return MI.Components.back().getAssociatedDeclaration() == VD;
8714             });
8715             // If we found a map entry, signal that the pointer has to be
8716             // returned and move on to the next declaration.
8717             if (CI != Data.end()) {
8718               CI->ReturnDevicePointer = true;
8719               Found = true;
8720               break;
8721             }
8722           }
8723           if (Found)
8724             continue;
8725         }
8726 
8727         // We didn't find any match in our map information - generate a zero
8728         // size array section - if the pointer is a struct member we defer this
8729         // action until the whole struct has been processed.
8730         if (isa<MemberExpr>(IE)) {
8731           // Insert the pointer into Info to be processed by
8732           // generateInfoForComponentList. Because it is a member pointer
8733           // without a pointee, no entry will be generated for it, therefore
8734           // we need to generate one after the whole struct has been processed.
8735           // Nonetheless, generateInfoForComponentList must be called to take
8736           // the pointer into account for the calculation of the range of the
8737           // partial struct.
8738           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8739                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8740                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8741           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8742         } else {
8743           llvm::Value *Ptr;
8744           if (IE->isGLValue())
8745             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8746           else
8747             Ptr = CGF.EmitScalarExpr(IE);
8748           CombinedInfo.Exprs.push_back(VD);
8749           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8750           CombinedInfo.Pointers.push_back(Ptr);
8751           CombinedInfo.Sizes.push_back(
8752               llvm::Constant::getNullValue(CGF.Int64Ty));
8753           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8754           CombinedInfo.Mappers.push_back(nullptr);
8755         }
8756       }
8757     }
8758 
8759     for (const auto &Data : Info) {
8760       StructRangeInfoTy PartialStruct;
8761       // Temporary generated information.
8762       MapCombinedInfoTy CurInfo;
8763       const Decl *D = Data.first;
8764       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8765       for (const auto &M : Data.second) {
8766         for (const MapInfo &L : M) {
8767           assert(!L.Components.empty() &&
8768                  "Not expecting declaration with no component lists.");
8769 
8770           // Remember the current base pointer index.
8771           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8772           CurInfo.NonContigInfo.IsNonContiguous =
8773               L.Components.back().isNonContiguous();
8774           generateInfoForComponentList(
8775               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8776               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8777               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8778 
8779           // If this entry relates with a device pointer, set the relevant
8780           // declaration and add the 'return pointer' flag.
8781           if (L.ReturnDevicePointer) {
8782             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8783                    "Unexpected number of mapped base pointers.");
8784 
8785             const ValueDecl *RelevantVD =
8786                 L.Components.back().getAssociatedDeclaration();
8787             assert(RelevantVD &&
8788                    "No relevant declaration related with device pointer??");
8789 
8790             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8791                 RelevantVD);
8792             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8793           }
8794         }
8795       }
8796 
8797       // Append any pending zero-length pointers which are struct members and
8798       // used with use_device_ptr or use_device_addr.
8799       auto CI = DeferredInfo.find(Data.first);
8800       if (CI != DeferredInfo.end()) {
8801         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8802           llvm::Value *BasePtr;
8803           llvm::Value *Ptr;
8804           if (L.ForDeviceAddr) {
8805             if (L.IE->isGLValue())
8806               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8807             else
8808               Ptr = this->CGF.EmitScalarExpr(L.IE);
8809             BasePtr = Ptr;
8810             // Entry is RETURN_PARAM. Also, set the placeholder value
8811             // MEMBER_OF=FFFF so that the entry is later updated with the
8812             // correct value of MEMBER_OF.
8813             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8814           } else {
8815             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8816             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8817                                              L.IE->getExprLoc());
8818             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8819             // placeholder value MEMBER_OF=FFFF so that the entry is later
8820             // updated with the correct value of MEMBER_OF.
8821             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8822                                     OMP_MAP_MEMBER_OF);
8823           }
8824           CurInfo.Exprs.push_back(L.VD);
8825           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8826           CurInfo.Pointers.push_back(Ptr);
8827           CurInfo.Sizes.push_back(
8828               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8829           CurInfo.Mappers.push_back(nullptr);
8830         }
8831       }
8832       // If there is an entry in PartialStruct it means we have a struct with
8833       // individual members mapped. Emit an extra combined entry.
8834       if (PartialStruct.Base.isValid()) {
8835         CurInfo.NonContigInfo.Dims.push_back(0);
8836         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8837       }
8838 
8839       // We need to append the results of this capture to what we already
8840       // have.
8841       CombinedInfo.append(CurInfo);
8842     }
8843     // Append data for use_device_ptr clauses.
8844     CombinedInfo.append(UseDevicePtrCombinedInfo);
8845   }
8846 
8847 public:
8848   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8849       : CurDir(&Dir), CGF(CGF) {
8850     // Extract firstprivate clause information.
8851     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8852       for (const auto *D : C->varlists())
8853         FirstPrivateDecls.try_emplace(
8854             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8855     // Extract implicit firstprivates from uses_allocators clauses.
8856     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8857       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8858         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8859         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8860           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8861                                         /*Implicit=*/true);
8862         else if (const auto *VD = dyn_cast<VarDecl>(
8863                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8864                          ->getDecl()))
8865           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8866       }
8867     }
8868     // Extract device pointer clause information.
8869     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8870       for (auto L : C->component_lists())
8871         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8872   }
8873 
8874   /// Constructor for the declare mapper directive.
8875   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8876       : CurDir(&Dir), CGF(CGF) {}
8877 
8878   /// Generate code for the combined entry if we have a partially mapped struct
8879   /// and take care of the mapping flags of the arguments corresponding to
8880   /// individual struct members.
8881   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8882                          MapFlagsArrayTy &CurTypes,
8883                          const StructRangeInfoTy &PartialStruct,
8884                          const ValueDecl *VD = nullptr,
8885                          bool NotTargetParams = true) const {
8886     if (CurTypes.size() == 1 &&
8887         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8888         !PartialStruct.IsArraySection)
8889       return;
8890     Address LBAddr = PartialStruct.LowestElem.second;
8891     Address HBAddr = PartialStruct.HighestElem.second;
8892     if (PartialStruct.HasCompleteRecord) {
8893       LBAddr = PartialStruct.LB;
8894       HBAddr = PartialStruct.LB;
8895     }
8896     CombinedInfo.Exprs.push_back(VD);
8897     // Base is the base of the struct
8898     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8899     // Pointer is the address of the lowest element
8900     llvm::Value *LB = LBAddr.getPointer();
8901     CombinedInfo.Pointers.push_back(LB);
8902     // There should not be a mapper for a combined entry.
8903     CombinedInfo.Mappers.push_back(nullptr);
8904     // Size is (addr of {highest+1} element) - (addr of lowest element)
8905     llvm::Value *HB = HBAddr.getPointer();
8906     llvm::Value *HAddr =
8907         CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
8908     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8909     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8910     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8911     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8912                                                   /*isSigned=*/false);
8913     CombinedInfo.Sizes.push_back(Size);
8914     // Map type is always TARGET_PARAM, if generate info for captures.
8915     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8916                                                  : OMP_MAP_TARGET_PARAM);
8917     // If any element has the present modifier, then make sure the runtime
8918     // doesn't attempt to allocate the struct.
8919     if (CurTypes.end() !=
8920         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8921           return Type & OMP_MAP_PRESENT;
8922         }))
8923       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8924     // Remove TARGET_PARAM flag from the first element
8925     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8926 
8927     // All other current entries will be MEMBER_OF the combined entry
8928     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8929     // 0xFFFF in the MEMBER_OF field).
8930     OpenMPOffloadMappingFlags MemberOfFlag =
8931         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8932     for (auto &M : CurTypes)
8933       setCorrectMemberOfFlag(M, MemberOfFlag);
8934   }
8935 
8936   /// Generate all the base pointers, section pointers, sizes, map types, and
8937   /// mappers for the extracted mappable expressions (all included in \a
8938   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8939   /// pair of the relevant declaration and index where it occurs is appended to
8940   /// the device pointers info array.
8941   void generateAllInfo(
8942       MapCombinedInfoTy &CombinedInfo,
8943       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8944           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8945     assert(CurDir.is<const OMPExecutableDirective *>() &&
8946            "Expect a executable directive");
8947     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8948     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
8949   }
8950 
8951   /// Generate all the base pointers, section pointers, sizes, map types, and
8952   /// mappers for the extracted map clauses of user-defined mapper (all included
8953   /// in \a CombinedInfo).
8954   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8955     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8956            "Expect a declare mapper directive");
8957     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8958     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
8959   }
8960 
8961   /// Emit capture info for lambdas for variables captured by reference.
8962   void generateInfoForLambdaCaptures(
8963       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8964       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8965     const auto *RD = VD->getType()
8966                          .getCanonicalType()
8967                          .getNonReferenceType()
8968                          ->getAsCXXRecordDecl();
8969     if (!RD || !RD->isLambda())
8970       return;
8971     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8972     LValue VDLVal = CGF.MakeAddrLValue(
8973         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8974     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8975     FieldDecl *ThisCapture = nullptr;
8976     RD->getCaptureFields(Captures, ThisCapture);
8977     if (ThisCapture) {
8978       LValue ThisLVal =
8979           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8980       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8981       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8982                                  VDLVal.getPointer(CGF));
8983       CombinedInfo.Exprs.push_back(VD);
8984       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8985       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8986       CombinedInfo.Sizes.push_back(
8987           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8988                                     CGF.Int64Ty, /*isSigned=*/true));
8989       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8990                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8991       CombinedInfo.Mappers.push_back(nullptr);
8992     }
8993     for (const LambdaCapture &LC : RD->captures()) {
8994       if (!LC.capturesVariable())
8995         continue;
8996       const VarDecl *VD = LC.getCapturedVar();
8997       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8998         continue;
8999       auto It = Captures.find(VD);
9000       assert(It != Captures.end() && "Found lambda capture without field.");
9001       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9002       if (LC.getCaptureKind() == LCK_ByRef) {
9003         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9004         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9005                                    VDLVal.getPointer(CGF));
9006         CombinedInfo.Exprs.push_back(VD);
9007         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9008         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9009         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9010             CGF.getTypeSize(
9011                 VD->getType().getCanonicalType().getNonReferenceType()),
9012             CGF.Int64Ty, /*isSigned=*/true));
9013       } else {
9014         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9015         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9016                                    VDLVal.getPointer(CGF));
9017         CombinedInfo.Exprs.push_back(VD);
9018         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9019         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9020         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9021       }
9022       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9023                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9024       CombinedInfo.Mappers.push_back(nullptr);
9025     }
9026   }
9027 
9028   /// Set correct indices for lambdas captures.
9029   void adjustMemberOfForLambdaCaptures(
9030       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9031       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9032       MapFlagsArrayTy &Types) const {
9033     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9034       // Set correct member_of idx for all implicit lambda captures.
9035       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9036                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
9037         continue;
9038       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
9039       assert(BasePtr && "Unable to find base lambda address.");
9040       int TgtIdx = -1;
9041       for (unsigned J = I; J > 0; --J) {
9042         unsigned Idx = J - 1;
9043         if (Pointers[Idx] != BasePtr)
9044           continue;
9045         TgtIdx = Idx;
9046         break;
9047       }
9048       assert(TgtIdx != -1 && "Unable to find parent lambda.");
9049       // All other current entries will be MEMBER_OF the combined entry
9050       // (except for PTR_AND_OBJ entries which do not have a placeholder value
9051       // 0xFFFF in the MEMBER_OF field).
9052       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
9053       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9054     }
9055   }
9056 
9057   /// Generate the base pointers, section pointers, sizes, map types, and
9058   /// mappers associated to a given capture (all included in \a CombinedInfo).
9059   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
9060                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9061                               StructRangeInfoTy &PartialStruct) const {
9062     assert(!Cap->capturesVariableArrayType() &&
9063            "Not expecting to generate map info for a variable array type!");
9064 
9065     // We need to know when we generating information for the first component
9066     const ValueDecl *VD = Cap->capturesThis()
9067                               ? nullptr
9068                               : Cap->getCapturedVar()->getCanonicalDecl();
9069 
9070     // If this declaration appears in a is_device_ptr clause we just have to
9071     // pass the pointer by value. If it is a reference to a declaration, we just
9072     // pass its value.
9073     if (DevPointersMap.count(VD)) {
9074       CombinedInfo.Exprs.push_back(VD);
9075       CombinedInfo.BasePointers.emplace_back(Arg, VD);
9076       CombinedInfo.Pointers.push_back(Arg);
9077       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9078           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9079           /*isSigned=*/true));
9080       CombinedInfo.Types.push_back(
9081           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
9082           OMP_MAP_TARGET_PARAM);
9083       CombinedInfo.Mappers.push_back(nullptr);
9084       return;
9085     }
9086 
9087     using MapData =
9088         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
9089                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
9090                    const ValueDecl *, const Expr *>;
9091     SmallVector<MapData, 4> DeclComponentLists;
9092     assert(CurDir.is<const OMPExecutableDirective *>() &&
9093            "Expect a executable directive");
9094     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9095     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9096       const auto *EI = C->getVarRefs().begin();
9097       for (const auto L : C->decl_component_lists(VD)) {
9098         const ValueDecl *VDecl, *Mapper;
9099         // The Expression is not correct if the mapping is implicit
9100         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9101         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9102         std::tie(VDecl, Components, Mapper) = L;
9103         assert(VDecl == VD && "We got information for the wrong declaration??");
9104         assert(!Components.empty() &&
9105                "Not expecting declaration with no component lists.");
9106         DeclComponentLists.emplace_back(Components, C->getMapType(),
9107                                         C->getMapTypeModifiers(),
9108                                         C->isImplicit(), Mapper, E);
9109         ++EI;
9110       }
9111     }
9112     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9113                                              const MapData &RHS) {
9114       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9115       OpenMPMapClauseKind MapType = std::get<1>(RHS);
9116       bool HasPresent = !MapModifiers.empty() &&
9117                         llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
9118                           return K == clang::OMPC_MAP_MODIFIER_present;
9119                         });
9120       bool HasAllocs = MapType == OMPC_MAP_alloc;
9121       MapModifiers = std::get<2>(RHS);
9122       MapType = std::get<1>(LHS);
9123       bool HasPresentR =
9124           !MapModifiers.empty() &&
9125           llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
9126             return K == clang::OMPC_MAP_MODIFIER_present;
9127           });
9128       bool HasAllocsR = MapType == OMPC_MAP_alloc;
9129       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9130     });
9131 
9132     // Find overlapping elements (including the offset from the base element).
9133     llvm::SmallDenseMap<
9134         const MapData *,
9135         llvm::SmallVector<
9136             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
9137         4>
9138         OverlappedData;
9139     size_t Count = 0;
9140     for (const MapData &L : DeclComponentLists) {
9141       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9142       OpenMPMapClauseKind MapType;
9143       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9144       bool IsImplicit;
9145       const ValueDecl *Mapper;
9146       const Expr *VarRef;
9147       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9148           L;
9149       ++Count;
9150       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
9151         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
9152         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9153                  VarRef) = L1;
9154         auto CI = Components.rbegin();
9155         auto CE = Components.rend();
9156         auto SI = Components1.rbegin();
9157         auto SE = Components1.rend();
9158         for (; CI != CE && SI != SE; ++CI, ++SI) {
9159           if (CI->getAssociatedExpression()->getStmtClass() !=
9160               SI->getAssociatedExpression()->getStmtClass())
9161             break;
9162           // Are we dealing with different variables/fields?
9163           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9164             break;
9165         }
9166         // Found overlapping if, at least for one component, reached the head
9167         // of the components list.
9168         if (CI == CE || SI == SE) {
9169           // Ignore it if it is the same component.
9170           if (CI == CE && SI == SE)
9171             continue;
9172           const auto It = (SI == SE) ? CI : SI;
9173           // If one component is a pointer and another one is a kind of
9174           // dereference of this pointer (array subscript, section, dereference,
9175           // etc.), it is not an overlapping.
9176           // Same, if one component is a base and another component is a
9177           // dereferenced pointer memberexpr with the same base.
9178           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9179               (std::prev(It)->getAssociatedDeclaration() &&
9180                std::prev(It)
9181                    ->getAssociatedDeclaration()
9182                    ->getType()
9183                    ->isPointerType()) ||
9184               (It->getAssociatedDeclaration() &&
9185                It->getAssociatedDeclaration()->getType()->isPointerType() &&
9186                std::next(It) != CE && std::next(It) != SE))
9187             continue;
9188           const MapData &BaseData = CI == CE ? L : L1;
9189           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9190               SI == SE ? Components : Components1;
9191           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9192           OverlappedElements.getSecond().push_back(SubData);
9193         }
9194       }
9195     }
9196     // Sort the overlapped elements for each item.
9197     llvm::SmallVector<const FieldDecl *, 4> Layout;
9198     if (!OverlappedData.empty()) {
9199       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9200       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9201       while (BaseType != OrigType) {
9202         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9203         OrigType = BaseType->getPointeeOrArrayElementType();
9204       }
9205 
9206       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9207         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9208       else {
9209         const auto *RD = BaseType->getAsRecordDecl();
9210         Layout.append(RD->field_begin(), RD->field_end());
9211       }
9212     }
9213     for (auto &Pair : OverlappedData) {
9214       llvm::stable_sort(
9215           Pair.getSecond(),
9216           [&Layout](
9217               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9218               OMPClauseMappableExprCommon::MappableExprComponentListRef
9219                   Second) {
9220             auto CI = First.rbegin();
9221             auto CE = First.rend();
9222             auto SI = Second.rbegin();
9223             auto SE = Second.rend();
9224             for (; CI != CE && SI != SE; ++CI, ++SI) {
9225               if (CI->getAssociatedExpression()->getStmtClass() !=
9226                   SI->getAssociatedExpression()->getStmtClass())
9227                 break;
9228               // Are we dealing with different variables/fields?
9229               if (CI->getAssociatedDeclaration() !=
9230                   SI->getAssociatedDeclaration())
9231                 break;
9232             }
9233 
9234             // Lists contain the same elements.
9235             if (CI == CE && SI == SE)
9236               return false;
9237 
9238             // List with less elements is less than list with more elements.
9239             if (CI == CE || SI == SE)
9240               return CI == CE;
9241 
9242             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9243             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9244             if (FD1->getParent() == FD2->getParent())
9245               return FD1->getFieldIndex() < FD2->getFieldIndex();
9246             const auto *It =
9247                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9248                   return FD == FD1 || FD == FD2;
9249                 });
9250             return *It == FD1;
9251           });
9252     }
9253 
9254     // Associated with a capture, because the mapping flags depend on it.
9255     // Go through all of the elements with the overlapped elements.
9256     bool IsFirstComponentList = true;
9257     for (const auto &Pair : OverlappedData) {
9258       const MapData &L = *Pair.getFirst();
9259       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9260       OpenMPMapClauseKind MapType;
9261       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9262       bool IsImplicit;
9263       const ValueDecl *Mapper;
9264       const Expr *VarRef;
9265       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9266           L;
9267       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9268           OverlappedComponents = Pair.getSecond();
9269       generateInfoForComponentList(
9270           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9271           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9272           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9273       IsFirstComponentList = false;
9274     }
9275     // Go through other elements without overlapped elements.
9276     for (const MapData &L : DeclComponentLists) {
9277       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9278       OpenMPMapClauseKind MapType;
9279       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9280       bool IsImplicit;
9281       const ValueDecl *Mapper;
9282       const Expr *VarRef;
9283       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9284           L;
9285       auto It = OverlappedData.find(&L);
9286       if (It == OverlappedData.end())
9287         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9288                                      Components, CombinedInfo, PartialStruct,
9289                                      IsFirstComponentList, IsImplicit, Mapper,
9290                                      /*ForDeviceAddr=*/false, VD, VarRef);
9291       IsFirstComponentList = false;
9292     }
9293   }
9294 
9295   /// Generate the default map information for a given capture \a CI,
9296   /// record field declaration \a RI and captured value \a CV.
9297   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9298                               const FieldDecl &RI, llvm::Value *CV,
9299                               MapCombinedInfoTy &CombinedInfo) const {
9300     bool IsImplicit = true;
9301     // Do the default mapping.
9302     if (CI.capturesThis()) {
9303       CombinedInfo.Exprs.push_back(nullptr);
9304       CombinedInfo.BasePointers.push_back(CV);
9305       CombinedInfo.Pointers.push_back(CV);
9306       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9307       CombinedInfo.Sizes.push_back(
9308           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9309                                     CGF.Int64Ty, /*isSigned=*/true));
9310       // Default map type.
9311       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9312     } else if (CI.capturesVariableByCopy()) {
9313       const VarDecl *VD = CI.getCapturedVar();
9314       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9315       CombinedInfo.BasePointers.push_back(CV);
9316       CombinedInfo.Pointers.push_back(CV);
9317       if (!RI.getType()->isAnyPointerType()) {
9318         // We have to signal to the runtime captures passed by value that are
9319         // not pointers.
9320         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9321         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9322             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9323       } else {
9324         // Pointers are implicitly mapped with a zero size and no flags
9325         // (other than first map that is added for all implicit maps).
9326         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9327         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9328       }
9329       auto I = FirstPrivateDecls.find(VD);
9330       if (I != FirstPrivateDecls.end())
9331         IsImplicit = I->getSecond();
9332     } else {
9333       assert(CI.capturesVariable() && "Expected captured reference.");
9334       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9335       QualType ElementType = PtrTy->getPointeeType();
9336       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9337           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9338       // The default map type for a scalar/complex type is 'to' because by
9339       // default the value doesn't have to be retrieved. For an aggregate
9340       // type, the default is 'tofrom'.
9341       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9342       const VarDecl *VD = CI.getCapturedVar();
9343       auto I = FirstPrivateDecls.find(VD);
9344       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9345       CombinedInfo.BasePointers.push_back(CV);
9346       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9347         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9348             CV, ElementType, CGF.getContext().getDeclAlign(VD),
9349             AlignmentSource::Decl));
9350         CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9351       } else {
9352         CombinedInfo.Pointers.push_back(CV);
9353       }
9354       if (I != FirstPrivateDecls.end())
9355         IsImplicit = I->getSecond();
9356     }
9357     // Every default map produces a single argument which is a target parameter.
9358     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9359 
9360     // Add flag stating this is an implicit map.
9361     if (IsImplicit)
9362       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9363 
9364     // No user-defined mapper for default mapping.
9365     CombinedInfo.Mappers.push_back(nullptr);
9366   }
9367 };
9368 } // anonymous namespace
9369 
9370 static void emitNonContiguousDescriptor(
9371     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9372     CGOpenMPRuntime::TargetDataInfo &Info) {
9373   CodeGenModule &CGM = CGF.CGM;
9374   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9375       &NonContigInfo = CombinedInfo.NonContigInfo;
9376 
9377   // Build an array of struct descriptor_dim and then assign it to
9378   // offload_args.
9379   //
9380   // struct descriptor_dim {
9381   //  uint64_t offset;
9382   //  uint64_t count;
9383   //  uint64_t stride
9384   // };
9385   ASTContext &C = CGF.getContext();
9386   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9387   RecordDecl *RD;
9388   RD = C.buildImplicitRecord("descriptor_dim");
9389   RD->startDefinition();
9390   addFieldToRecordDecl(C, RD, Int64Ty);
9391   addFieldToRecordDecl(C, RD, Int64Ty);
9392   addFieldToRecordDecl(C, RD, Int64Ty);
9393   RD->completeDefinition();
9394   QualType DimTy = C.getRecordType(RD);
9395 
9396   enum { OffsetFD = 0, CountFD, StrideFD };
9397   // We need two index variable here since the size of "Dims" is the same as the
9398   // size of Components, however, the size of offset, count, and stride is equal
9399   // to the size of base declaration that is non-contiguous.
9400   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9401     // Skip emitting ir if dimension size is 1 since it cannot be
9402     // non-contiguous.
9403     if (NonContigInfo.Dims[I] == 1)
9404       continue;
9405     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9406     QualType ArrayTy =
9407         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9408     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9409     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9410       unsigned RevIdx = EE - II - 1;
9411       LValue DimsLVal = CGF.MakeAddrLValue(
9412           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9413       // Offset
9414       LValue OffsetLVal = CGF.EmitLValueForField(
9415           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9416       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9417       // Count
9418       LValue CountLVal = CGF.EmitLValueForField(
9419           DimsLVal, *std::next(RD->field_begin(), CountFD));
9420       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9421       // Stride
9422       LValue StrideLVal = CGF.EmitLValueForField(
9423           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9424       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9425     }
9426     // args[I] = &dims
9427     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9428         DimsAddr, CGM.Int8PtrTy);
9429     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9430         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9431         Info.PointersArray, 0, I);
9432     Address PAddr(P, CGF.getPointerAlign());
9433     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9434     ++L;
9435   }
9436 }
9437 
9438 /// Emit a string constant containing the names of the values mapped to the
9439 /// offloading runtime library.
9440 llvm::Constant *
9441 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9442                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9443   llvm::Constant *SrcLocStr;
9444   if (!MapExprs.getMapDecl()) {
9445     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
9446   } else {
9447     std::string ExprName = "";
9448     if (MapExprs.getMapExpr()) {
9449       PrintingPolicy P(CGF.getContext().getLangOpts());
9450       llvm::raw_string_ostream OS(ExprName);
9451       MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9452       OS.flush();
9453     } else {
9454       ExprName = MapExprs.getMapDecl()->getNameAsString();
9455     }
9456 
9457     SourceLocation Loc = MapExprs.getMapDecl()->getLocation();
9458     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9459     const char *FileName = PLoc.getFilename();
9460     unsigned Line = PLoc.getLine();
9461     unsigned Column = PLoc.getColumn();
9462     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(),
9463                                                 Line, Column);
9464   }
9465   return SrcLocStr;
9466 }
9467 
9468 /// Emit the arrays used to pass the captures and map information to the
9469 /// offloading runtime library. If there is no map or capture information,
9470 /// return nullptr by reference.
9471 static void emitOffloadingArrays(
9472     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9473     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9474     bool IsNonContiguous = false) {
9475   CodeGenModule &CGM = CGF.CGM;
9476   ASTContext &Ctx = CGF.getContext();
9477 
9478   // Reset the array information.
9479   Info.clearArrayInfo();
9480   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9481 
9482   if (Info.NumberOfPtrs) {
9483     // Detect if we have any capture size requiring runtime evaluation of the
9484     // size so that a constant array could be eventually used.
9485     bool hasRuntimeEvaluationCaptureSize = false;
9486     for (llvm::Value *S : CombinedInfo.Sizes)
9487       if (!isa<llvm::Constant>(S)) {
9488         hasRuntimeEvaluationCaptureSize = true;
9489         break;
9490       }
9491 
9492     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9493     QualType PointerArrayType = Ctx.getConstantArrayType(
9494         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9495         /*IndexTypeQuals=*/0);
9496 
9497     Info.BasePointersArray =
9498         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9499     Info.PointersArray =
9500         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9501     Address MappersArray =
9502         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9503     Info.MappersArray = MappersArray.getPointer();
9504 
9505     // If we don't have any VLA types or other types that require runtime
9506     // evaluation, we can use a constant array for the map sizes, otherwise we
9507     // need to fill up the arrays as we do for the pointers.
9508     QualType Int64Ty =
9509         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9510     if (hasRuntimeEvaluationCaptureSize) {
9511       QualType SizeArrayType = Ctx.getConstantArrayType(
9512           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9513           /*IndexTypeQuals=*/0);
9514       Info.SizesArray =
9515           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9516     } else {
9517       // We expect all the sizes to be constant, so we collect them to create
9518       // a constant array.
9519       SmallVector<llvm::Constant *, 16> ConstSizes;
9520       for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9521         if (IsNonContiguous &&
9522             (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9523           ConstSizes.push_back(llvm::ConstantInt::get(
9524               CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9525         } else {
9526           ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9527         }
9528       }
9529 
9530       auto *SizesArrayInit = llvm::ConstantArray::get(
9531           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9532       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9533       auto *SizesArrayGbl = new llvm::GlobalVariable(
9534           CGM.getModule(), SizesArrayInit->getType(),
9535           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9536           SizesArrayInit, Name);
9537       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9538       Info.SizesArray = SizesArrayGbl;
9539     }
9540 
9541     // The map types are always constant so we don't need to generate code to
9542     // fill arrays. Instead, we create an array constant.
9543     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9544     llvm::copy(CombinedInfo.Types, Mapping.begin());
9545     std::string MaptypesName =
9546         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9547     auto *MapTypesArrayGbl =
9548         OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9549     Info.MapTypesArray = MapTypesArrayGbl;
9550 
9551     // The information types are only built if there is debug information
9552     // requested.
9553     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9554       Info.MapNamesArray = llvm::Constant::getNullValue(
9555           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9556     } else {
9557       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9558         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9559       };
9560       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9561       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9562       std::string MapnamesName =
9563           CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9564       auto *MapNamesArrayGbl =
9565           OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9566       Info.MapNamesArray = MapNamesArrayGbl;
9567     }
9568 
9569     // If there's a present map type modifier, it must not be applied to the end
9570     // of a region, so generate a separate map type array in that case.
9571     if (Info.separateBeginEndCalls()) {
9572       bool EndMapTypesDiffer = false;
9573       for (uint64_t &Type : Mapping) {
9574         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9575           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9576           EndMapTypesDiffer = true;
9577         }
9578       }
9579       if (EndMapTypesDiffer) {
9580         MapTypesArrayGbl =
9581             OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9582         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9583       }
9584     }
9585 
9586     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9587       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9588       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9589           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9590           Info.BasePointersArray, 0, I);
9591       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9592           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9593       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9594       CGF.Builder.CreateStore(BPVal, BPAddr);
9595 
9596       if (Info.requiresDevicePointerInfo())
9597         if (const ValueDecl *DevVD =
9598                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9599           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9600 
9601       llvm::Value *PVal = CombinedInfo.Pointers[I];
9602       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9603           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9604           Info.PointersArray, 0, I);
9605       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9606           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9607       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9608       CGF.Builder.CreateStore(PVal, PAddr);
9609 
9610       if (hasRuntimeEvaluationCaptureSize) {
9611         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9612             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9613             Info.SizesArray,
9614             /*Idx0=*/0,
9615             /*Idx1=*/I);
9616         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9617         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9618                                                           CGM.Int64Ty,
9619                                                           /*isSigned=*/true),
9620                                 SAddr);
9621       }
9622 
9623       // Fill up the mapper array.
9624       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9625       if (CombinedInfo.Mappers[I]) {
9626         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9627             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9628         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9629         Info.HasMapper = true;
9630       }
9631       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9632       CGF.Builder.CreateStore(MFunc, MAddr);
9633     }
9634   }
9635 
9636   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9637       Info.NumberOfPtrs == 0)
9638     return;
9639 
9640   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9641 }
9642 
9643 namespace {
9644 /// Additional arguments for emitOffloadingArraysArgument function.
9645 struct ArgumentsOptions {
9646   bool ForEndCall = false;
9647   ArgumentsOptions() = default;
9648   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9649 };
9650 } // namespace
9651 
9652 /// Emit the arguments to be passed to the runtime library based on the
9653 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9654 /// ForEndCall, emit map types to be passed for the end of the region instead of
9655 /// the beginning.
9656 static void emitOffloadingArraysArgument(
9657     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9658     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9659     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9660     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9661     const ArgumentsOptions &Options = ArgumentsOptions()) {
9662   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9663          "expected region end call to runtime only when end call is separate");
9664   CodeGenModule &CGM = CGF.CGM;
9665   if (Info.NumberOfPtrs) {
9666     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9667         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9668         Info.BasePointersArray,
9669         /*Idx0=*/0, /*Idx1=*/0);
9670     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9671         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9672         Info.PointersArray,
9673         /*Idx0=*/0,
9674         /*Idx1=*/0);
9675     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9676         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9677         /*Idx0=*/0, /*Idx1=*/0);
9678     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9679         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9680         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9681                                                     : Info.MapTypesArray,
9682         /*Idx0=*/0,
9683         /*Idx1=*/0);
9684 
9685     // Only emit the mapper information arrays if debug information is
9686     // requested.
9687     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9688       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9689     else
9690       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9691           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9692           Info.MapNamesArray,
9693           /*Idx0=*/0,
9694           /*Idx1=*/0);
9695     // If there is no user-defined mapper, set the mapper array to nullptr to
9696     // avoid an unnecessary data privatization
9697     if (!Info.HasMapper)
9698       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9699     else
9700       MappersArrayArg =
9701           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9702   } else {
9703     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9704     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9705     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9706     MapTypesArrayArg =
9707         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9708     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9709     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9710   }
9711 }
9712 
9713 /// Check for inner distribute directive.
9714 static const OMPExecutableDirective *
9715 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9716   const auto *CS = D.getInnermostCapturedStmt();
9717   const auto *Body =
9718       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9719   const Stmt *ChildStmt =
9720       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9721 
9722   if (const auto *NestedDir =
9723           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9724     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9725     switch (D.getDirectiveKind()) {
9726     case OMPD_target:
9727       if (isOpenMPDistributeDirective(DKind))
9728         return NestedDir;
9729       if (DKind == OMPD_teams) {
9730         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9731             /*IgnoreCaptured=*/true);
9732         if (!Body)
9733           return nullptr;
9734         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9735         if (const auto *NND =
9736                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9737           DKind = NND->getDirectiveKind();
9738           if (isOpenMPDistributeDirective(DKind))
9739             return NND;
9740         }
9741       }
9742       return nullptr;
9743     case OMPD_target_teams:
9744       if (isOpenMPDistributeDirective(DKind))
9745         return NestedDir;
9746       return nullptr;
9747     case OMPD_target_parallel:
9748     case OMPD_target_simd:
9749     case OMPD_target_parallel_for:
9750     case OMPD_target_parallel_for_simd:
9751       return nullptr;
9752     case OMPD_target_teams_distribute:
9753     case OMPD_target_teams_distribute_simd:
9754     case OMPD_target_teams_distribute_parallel_for:
9755     case OMPD_target_teams_distribute_parallel_for_simd:
9756     case OMPD_parallel:
9757     case OMPD_for:
9758     case OMPD_parallel_for:
9759     case OMPD_parallel_master:
9760     case OMPD_parallel_sections:
9761     case OMPD_for_simd:
9762     case OMPD_parallel_for_simd:
9763     case OMPD_cancel:
9764     case OMPD_cancellation_point:
9765     case OMPD_ordered:
9766     case OMPD_threadprivate:
9767     case OMPD_allocate:
9768     case OMPD_task:
9769     case OMPD_simd:
9770     case OMPD_tile:
9771     case OMPD_unroll:
9772     case OMPD_sections:
9773     case OMPD_section:
9774     case OMPD_single:
9775     case OMPD_master:
9776     case OMPD_critical:
9777     case OMPD_taskyield:
9778     case OMPD_barrier:
9779     case OMPD_taskwait:
9780     case OMPD_taskgroup:
9781     case OMPD_atomic:
9782     case OMPD_flush:
9783     case OMPD_depobj:
9784     case OMPD_scan:
9785     case OMPD_teams:
9786     case OMPD_target_data:
9787     case OMPD_target_exit_data:
9788     case OMPD_target_enter_data:
9789     case OMPD_distribute:
9790     case OMPD_distribute_simd:
9791     case OMPD_distribute_parallel_for:
9792     case OMPD_distribute_parallel_for_simd:
9793     case OMPD_teams_distribute:
9794     case OMPD_teams_distribute_simd:
9795     case OMPD_teams_distribute_parallel_for:
9796     case OMPD_teams_distribute_parallel_for_simd:
9797     case OMPD_target_update:
9798     case OMPD_declare_simd:
9799     case OMPD_declare_variant:
9800     case OMPD_begin_declare_variant:
9801     case OMPD_end_declare_variant:
9802     case OMPD_declare_target:
9803     case OMPD_end_declare_target:
9804     case OMPD_declare_reduction:
9805     case OMPD_declare_mapper:
9806     case OMPD_taskloop:
9807     case OMPD_taskloop_simd:
9808     case OMPD_master_taskloop:
9809     case OMPD_master_taskloop_simd:
9810     case OMPD_parallel_master_taskloop:
9811     case OMPD_parallel_master_taskloop_simd:
9812     case OMPD_requires:
9813     case OMPD_unknown:
9814     default:
9815       llvm_unreachable("Unexpected directive.");
9816     }
9817   }
9818 
9819   return nullptr;
9820 }
9821 
9822 /// Emit the user-defined mapper function. The code generation follows the
9823 /// pattern in the example below.
9824 /// \code
9825 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9826 ///                                           void *base, void *begin,
9827 ///                                           int64_t size, int64_t type,
9828 ///                                           void *name = nullptr) {
9829 ///   // Allocate space for an array section first or add a base/begin for
9830 ///   // pointer dereference.
9831 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9832 ///       !maptype.IsDelete)
9833 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9834 ///                                 size*sizeof(Ty), clearToFromMember(type));
9835 ///   // Map members.
9836 ///   for (unsigned i = 0; i < size; i++) {
9837 ///     // For each component specified by this mapper:
9838 ///     for (auto c : begin[i]->all_components) {
9839 ///       if (c.hasMapper())
9840 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9841 ///                       c.arg_type, c.arg_name);
9842 ///       else
9843 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9844 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9845 ///                                     c.arg_name);
9846 ///     }
9847 ///   }
9848 ///   // Delete the array section.
9849 ///   if (size > 1 && maptype.IsDelete)
9850 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9851 ///                                 size*sizeof(Ty), clearToFromMember(type));
9852 /// }
9853 /// \endcode
9854 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9855                                             CodeGenFunction *CGF) {
9856   if (UDMMap.count(D) > 0)
9857     return;
9858   ASTContext &C = CGM.getContext();
9859   QualType Ty = D->getType();
9860   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9861   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9862   auto *MapperVarDecl =
9863       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9864   SourceLocation Loc = D->getLocation();
9865   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9866 
9867   // Prepare mapper function arguments and attributes.
9868   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9869                               C.VoidPtrTy, ImplicitParamDecl::Other);
9870   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9871                             ImplicitParamDecl::Other);
9872   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9873                              C.VoidPtrTy, ImplicitParamDecl::Other);
9874   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9875                             ImplicitParamDecl::Other);
9876   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9877                             ImplicitParamDecl::Other);
9878   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9879                             ImplicitParamDecl::Other);
9880   FunctionArgList Args;
9881   Args.push_back(&HandleArg);
9882   Args.push_back(&BaseArg);
9883   Args.push_back(&BeginArg);
9884   Args.push_back(&SizeArg);
9885   Args.push_back(&TypeArg);
9886   Args.push_back(&NameArg);
9887   const CGFunctionInfo &FnInfo =
9888       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9889   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9890   SmallString<64> TyStr;
9891   llvm::raw_svector_ostream Out(TyStr);
9892   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9893   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9894   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9895                                     Name, &CGM.getModule());
9896   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9897   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9898   // Start the mapper function code generation.
9899   CodeGenFunction MapperCGF(CGM);
9900   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9901   // Compute the starting and end addresses of array elements.
9902   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9903       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9904       C.getPointerType(Int64Ty), Loc);
9905   // Prepare common arguments for array initiation and deletion.
9906   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9907       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9908       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9909   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9910       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9911       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9912   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9913       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9914       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9915   // Convert the size in bytes into the number of array elements.
9916   Size = MapperCGF.Builder.CreateExactUDiv(
9917       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9918   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9919       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9920   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(
9921       PtrBegin->getType()->getPointerElementType(), PtrBegin, Size);
9922   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9923       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9924       C.getPointerType(Int64Ty), Loc);
9925   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9926       MapperCGF.GetAddrOfLocalVar(&NameArg),
9927       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9928 
9929   // Emit array initiation if this is an array section and \p MapType indicates
9930   // that memory allocation is required.
9931   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9932   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9933                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
9934 
9935   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9936 
9937   // Emit the loop header block.
9938   MapperCGF.EmitBlock(HeadBB);
9939   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9940   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9941   // Evaluate whether the initial condition is satisfied.
9942   llvm::Value *IsEmpty =
9943       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9944   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9945   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9946 
9947   // Emit the loop body block.
9948   MapperCGF.EmitBlock(BodyBB);
9949   llvm::BasicBlock *LastBB = BodyBB;
9950   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9951       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9952   PtrPHI->addIncoming(PtrBegin, EntryBB);
9953   Address PtrCurrent =
9954       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9955                           .getAlignment()
9956                           .alignmentOfArrayElement(ElementSize));
9957   // Privatize the declared variable of mapper to be the current array element.
9958   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9959   Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; });
9960   (void)Scope.Privatize();
9961 
9962   // Get map clause information. Fill up the arrays with all mapped variables.
9963   MappableExprsHandler::MapCombinedInfoTy Info;
9964   MappableExprsHandler MEHandler(*D, MapperCGF);
9965   MEHandler.generateAllInfoForMapper(Info);
9966 
9967   // Call the runtime API __tgt_mapper_num_components to get the number of
9968   // pre-existing components.
9969   llvm::Value *OffloadingArgs[] = {Handle};
9970   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9971       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9972                                             OMPRTL___tgt_mapper_num_components),
9973       OffloadingArgs);
9974   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9975       PreviousSize,
9976       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9977 
9978   // Fill up the runtime mapper handle for all components.
9979   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9980     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9981         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9982     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9983         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9984     llvm::Value *CurSizeArg = Info.Sizes[I];
9985     llvm::Value *CurNameArg =
9986         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9987             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9988             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9989 
9990     // Extract the MEMBER_OF field from the map type.
9991     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
9992     llvm::Value *MemberMapType =
9993         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9994 
9995     // Combine the map type inherited from user-defined mapper with that
9996     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9997     // bits of the \a MapType, which is the input argument of the mapper
9998     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9999     // bits of MemberMapType.
10000     // [OpenMP 5.0], 1.2.6. map-type decay.
10001     //        | alloc |  to   | from  | tofrom | release | delete
10002     // ----------------------------------------------------------
10003     // alloc  | alloc | alloc | alloc | alloc  | release | delete
10004     // to     | alloc |  to   | alloc |   to   | release | delete
10005     // from   | alloc | alloc | from  |  from  | release | delete
10006     // tofrom | alloc |  to   | from  | tofrom | release | delete
10007     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
10008         MapType,
10009         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
10010                                    MappableExprsHandler::OMP_MAP_FROM));
10011     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
10012     llvm::BasicBlock *AllocElseBB =
10013         MapperCGF.createBasicBlock("omp.type.alloc.else");
10014     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
10015     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
10016     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
10017     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
10018     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
10019     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10020     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
10021     MapperCGF.EmitBlock(AllocBB);
10022     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
10023         MemberMapType,
10024         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10025                                      MappableExprsHandler::OMP_MAP_FROM)));
10026     MapperCGF.Builder.CreateBr(EndBB);
10027     MapperCGF.EmitBlock(AllocElseBB);
10028     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
10029         LeftToFrom,
10030         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
10031     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10032     // In case of to, clear OMP_MAP_FROM.
10033     MapperCGF.EmitBlock(ToBB);
10034     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
10035         MemberMapType,
10036         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
10037     MapperCGF.Builder.CreateBr(EndBB);
10038     MapperCGF.EmitBlock(ToElseBB);
10039     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
10040         LeftToFrom,
10041         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
10042     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10043     // In case of from, clear OMP_MAP_TO.
10044     MapperCGF.EmitBlock(FromBB);
10045     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
10046         MemberMapType,
10047         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
10048     // In case of tofrom, do nothing.
10049     MapperCGF.EmitBlock(EndBB);
10050     LastBB = EndBB;
10051     llvm::PHINode *CurMapType =
10052         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
10053     CurMapType->addIncoming(AllocMapType, AllocBB);
10054     CurMapType->addIncoming(ToMapType, ToBB);
10055     CurMapType->addIncoming(FromMapType, FromBB);
10056     CurMapType->addIncoming(MemberMapType, ToElseBB);
10057 
10058     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
10059                                      CurSizeArg, CurMapType, CurNameArg};
10060     if (Info.Mappers[I]) {
10061       // Call the corresponding mapper function.
10062       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
10063           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
10064       assert(MapperFunc && "Expect a valid mapper function is available.");
10065       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
10066     } else {
10067       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10068       // data structure.
10069       MapperCGF.EmitRuntimeCall(
10070           OMPBuilder.getOrCreateRuntimeFunction(
10071               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
10072           OffloadingArgs);
10073     }
10074   }
10075 
10076   // Update the pointer to point to the next element that needs to be mapped,
10077   // and check whether we have mapped all elements.
10078   llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType();
10079   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
10080       ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
10081   PtrPHI->addIncoming(PtrNext, LastBB);
10082   llvm::Value *IsDone =
10083       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
10084   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
10085   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10086 
10087   MapperCGF.EmitBlock(ExitBB);
10088   // Emit array deletion if this is an array section and \p MapType indicates
10089   // that deletion is required.
10090   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10091                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
10092 
10093   // Emit the function exit block.
10094   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
10095   MapperCGF.FinishFunction();
10096   UDMMap.try_emplace(D, Fn);
10097   if (CGF) {
10098     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
10099     Decls.second.push_back(D);
10100   }
10101 }
10102 
10103 /// Emit the array initialization or deletion portion for user-defined mapper
10104 /// code generation. First, it evaluates whether an array section is mapped and
10105 /// whether the \a MapType instructs to delete this section. If \a IsInit is
10106 /// true, and \a MapType indicates to not delete this array, array
10107 /// initialization code is generated. If \a IsInit is false, and \a MapType
10108 /// indicates to not this array, array deletion code is generated.
10109 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
10110     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
10111     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
10112     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
10113     bool IsInit) {
10114   StringRef Prefix = IsInit ? ".init" : ".del";
10115 
10116   // Evaluate if this is an array section.
10117   llvm::BasicBlock *BodyBB =
10118       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
10119   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
10120       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
10121   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
10122       MapType,
10123       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
10124   llvm::Value *DeleteCond;
10125   llvm::Value *Cond;
10126   if (IsInit) {
10127     // base != begin?
10128     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull(
10129         MapperCGF.Builder.CreatePtrDiff(Base, Begin));
10130     // IsPtrAndObj?
10131     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
10132         MapType,
10133         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
10134     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
10135     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
10136     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
10137     DeleteCond = MapperCGF.Builder.CreateIsNull(
10138         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10139   } else {
10140     Cond = IsArray;
10141     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
10142         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10143   }
10144   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
10145   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
10146 
10147   MapperCGF.EmitBlock(BodyBB);
10148   // Get the array size by multiplying element size and element number (i.e., \p
10149   // Size).
10150   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
10151       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10152   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
10153   // memory allocation/deletion purpose only.
10154   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
10155       MapType,
10156       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10157                                    MappableExprsHandler::OMP_MAP_FROM)));
10158   MapTypeArg = MapperCGF.Builder.CreateOr(
10159       MapTypeArg,
10160       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
10161 
10162   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10163   // data structure.
10164   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
10165                                    ArraySize, MapTypeArg, MapName};
10166   MapperCGF.EmitRuntimeCall(
10167       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10168                                             OMPRTL___tgt_push_mapper_component),
10169       OffloadingArgs);
10170 }
10171 
10172 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10173     const OMPDeclareMapperDecl *D) {
10174   auto I = UDMMap.find(D);
10175   if (I != UDMMap.end())
10176     return I->second;
10177   emitUserDefinedMapper(D);
10178   return UDMMap.lookup(D);
10179 }
10180 
10181 void CGOpenMPRuntime::emitTargetNumIterationsCall(
10182     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10183     llvm::Value *DeviceID,
10184     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10185                                      const OMPLoopDirective &D)>
10186         SizeEmitter) {
10187   OpenMPDirectiveKind Kind = D.getDirectiveKind();
10188   const OMPExecutableDirective *TD = &D;
10189   // Get nested teams distribute kind directive, if any.
10190   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10191     TD = getNestedDistributeDirective(CGM.getContext(), D);
10192   if (!TD)
10193     return;
10194   const auto *LD = cast<OMPLoopDirective>(TD);
10195   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10196                                                          PrePostActionTy &) {
10197     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10198       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10199       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10200       CGF.EmitRuntimeCall(
10201           OMPBuilder.getOrCreateRuntimeFunction(
10202               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10203           Args);
10204     }
10205   };
10206   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10207 }
10208 
10209 void CGOpenMPRuntime::emitTargetCall(
10210     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10211     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10212     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10213     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10214                                      const OMPLoopDirective &D)>
10215         SizeEmitter) {
10216   if (!CGF.HaveInsertPoint())
10217     return;
10218 
10219   assert(OutlinedFn && "Invalid outlined function!");
10220 
10221   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10222                                  D.hasClausesOfKind<OMPNowaitClause>();
10223   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10224   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10225   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10226                                             PrePostActionTy &) {
10227     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10228   };
10229   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10230 
10231   CodeGenFunction::OMPTargetDataInfo InputInfo;
10232   llvm::Value *MapTypesArray = nullptr;
10233   llvm::Value *MapNamesArray = nullptr;
10234   // Fill up the pointer arrays and transfer execution to the device.
10235   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
10236                     &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
10237                     &CapturedVars,
10238                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10239     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10240       // Reverse offloading is not supported, so just execute on the host.
10241       if (RequiresOuterTask) {
10242         CapturedVars.clear();
10243         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10244       }
10245       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10246       return;
10247     }
10248 
10249     // On top of the arrays that were filled up, the target offloading call
10250     // takes as arguments the device id as well as the host pointer. The host
10251     // pointer is used by the runtime library to identify the current target
10252     // region, so it only has to be unique and not necessarily point to
10253     // anything. It could be the pointer to the outlined function that
10254     // implements the target region, but we aren't using that so that the
10255     // compiler doesn't need to keep that, and could therefore inline the host
10256     // function if proven worthwhile during optimization.
10257 
10258     // From this point on, we need to have an ID of the target region defined.
10259     assert(OutlinedFnID && "Invalid outlined function ID!");
10260 
10261     // Emit device ID if any.
10262     llvm::Value *DeviceID;
10263     if (Device.getPointer()) {
10264       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10265               Device.getInt() == OMPC_DEVICE_device_num) &&
10266              "Expected device_num modifier.");
10267       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10268       DeviceID =
10269           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10270     } else {
10271       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10272     }
10273 
10274     // Emit the number of elements in the offloading arrays.
10275     llvm::Value *PointerNum =
10276         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10277 
10278     // Return value of the runtime offloading call.
10279     llvm::Value *Return;
10280 
10281     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10282     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10283 
10284     // Source location for the ident struct
10285     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10286 
10287     // Emit tripcount for the target loop-based directive.
10288     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10289 
10290     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10291     // The target region is an outlined function launched by the runtime
10292     // via calls __tgt_target() or __tgt_target_teams().
10293     //
10294     // __tgt_target() launches a target region with one team and one thread,
10295     // executing a serial region.  This master thread may in turn launch
10296     // more threads within its team upon encountering a parallel region,
10297     // however, no additional teams can be launched on the device.
10298     //
10299     // __tgt_target_teams() launches a target region with one or more teams,
10300     // each with one or more threads.  This call is required for target
10301     // constructs such as:
10302     //  'target teams'
10303     //  'target' / 'teams'
10304     //  'target teams distribute parallel for'
10305     //  'target parallel'
10306     // and so on.
10307     //
10308     // Note that on the host and CPU targets, the runtime implementation of
10309     // these calls simply call the outlined function without forking threads.
10310     // The outlined functions themselves have runtime calls to
10311     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10312     // the compiler in emitTeamsCall() and emitParallelCall().
10313     //
10314     // In contrast, on the NVPTX target, the implementation of
10315     // __tgt_target_teams() launches a GPU kernel with the requested number
10316     // of teams and threads so no additional calls to the runtime are required.
10317     if (NumTeams) {
10318       // If we have NumTeams defined this means that we have an enclosed teams
10319       // region. Therefore we also expect to have NumThreads defined. These two
10320       // values should be defined in the presence of a teams directive,
10321       // regardless of having any clauses associated. If the user is using teams
10322       // but no clauses, these two values will be the default that should be
10323       // passed to the runtime library - a 32-bit integer with the value zero.
10324       assert(NumThreads && "Thread limit expression should be available along "
10325                            "with number of teams.");
10326       SmallVector<llvm::Value *> OffloadingArgs = {
10327           RTLoc,
10328           DeviceID,
10329           OutlinedFnID,
10330           PointerNum,
10331           InputInfo.BasePointersArray.getPointer(),
10332           InputInfo.PointersArray.getPointer(),
10333           InputInfo.SizesArray.getPointer(),
10334           MapTypesArray,
10335           MapNamesArray,
10336           InputInfo.MappersArray.getPointer(),
10337           NumTeams,
10338           NumThreads};
10339       if (HasNowait) {
10340         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10341         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10342         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10343         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10344         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10345         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10346       }
10347       Return = CGF.EmitRuntimeCall(
10348           OMPBuilder.getOrCreateRuntimeFunction(
10349               CGM.getModule(), HasNowait
10350                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10351                                    : OMPRTL___tgt_target_teams_mapper),
10352           OffloadingArgs);
10353     } else {
10354       SmallVector<llvm::Value *> OffloadingArgs = {
10355           RTLoc,
10356           DeviceID,
10357           OutlinedFnID,
10358           PointerNum,
10359           InputInfo.BasePointersArray.getPointer(),
10360           InputInfo.PointersArray.getPointer(),
10361           InputInfo.SizesArray.getPointer(),
10362           MapTypesArray,
10363           MapNamesArray,
10364           InputInfo.MappersArray.getPointer()};
10365       if (HasNowait) {
10366         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10367         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10368         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10369         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10370         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10371         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10372       }
10373       Return = CGF.EmitRuntimeCall(
10374           OMPBuilder.getOrCreateRuntimeFunction(
10375               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10376                                          : OMPRTL___tgt_target_mapper),
10377           OffloadingArgs);
10378     }
10379 
10380     // Check the error code and execute the host version if required.
10381     llvm::BasicBlock *OffloadFailedBlock =
10382         CGF.createBasicBlock("omp_offload.failed");
10383     llvm::BasicBlock *OffloadContBlock =
10384         CGF.createBasicBlock("omp_offload.cont");
10385     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10386     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10387 
10388     CGF.EmitBlock(OffloadFailedBlock);
10389     if (RequiresOuterTask) {
10390       CapturedVars.clear();
10391       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10392     }
10393     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10394     CGF.EmitBranch(OffloadContBlock);
10395 
10396     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10397   };
10398 
10399   // Notify that the host version must be executed.
10400   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10401                     RequiresOuterTask](CodeGenFunction &CGF,
10402                                        PrePostActionTy &) {
10403     if (RequiresOuterTask) {
10404       CapturedVars.clear();
10405       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10406     }
10407     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10408   };
10409 
10410   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10411                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10412                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10413     // Fill up the arrays with all the captured variables.
10414     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10415 
10416     // Get mappable expression information.
10417     MappableExprsHandler MEHandler(D, CGF);
10418     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10419     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10420 
10421     auto RI = CS.getCapturedRecordDecl()->field_begin();
10422     auto *CV = CapturedVars.begin();
10423     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10424                                               CE = CS.capture_end();
10425          CI != CE; ++CI, ++RI, ++CV) {
10426       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10427       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10428 
10429       // VLA sizes are passed to the outlined region by copy and do not have map
10430       // information associated.
10431       if (CI->capturesVariableArrayType()) {
10432         CurInfo.Exprs.push_back(nullptr);
10433         CurInfo.BasePointers.push_back(*CV);
10434         CurInfo.Pointers.push_back(*CV);
10435         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10436             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10437         // Copy to the device as an argument. No need to retrieve it.
10438         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10439                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10440                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10441         CurInfo.Mappers.push_back(nullptr);
10442       } else {
10443         // If we have any information in the map clause, we use it, otherwise we
10444         // just do a default mapping.
10445         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10446         if (!CI->capturesThis())
10447           MappedVarSet.insert(CI->getCapturedVar());
10448         else
10449           MappedVarSet.insert(nullptr);
10450         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10451           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10452         // Generate correct mapping for variables captured by reference in
10453         // lambdas.
10454         if (CI->capturesVariable())
10455           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10456                                                   CurInfo, LambdaPointers);
10457       }
10458       // We expect to have at least an element of information for this capture.
10459       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10460              "Non-existing map pointer for capture!");
10461       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10462              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10463              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10464              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10465              "Inconsistent map information sizes!");
10466 
10467       // If there is an entry in PartialStruct it means we have a struct with
10468       // individual members mapped. Emit an extra combined entry.
10469       if (PartialStruct.Base.isValid()) {
10470         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10471         MEHandler.emitCombinedEntry(
10472             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10473             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10474       }
10475 
10476       // We need to append the results of this capture to what we already have.
10477       CombinedInfo.append(CurInfo);
10478     }
10479     // Adjust MEMBER_OF flags for the lambdas captures.
10480     MEHandler.adjustMemberOfForLambdaCaptures(
10481         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10482         CombinedInfo.Types);
10483     // Map any list items in a map clause that were not captures because they
10484     // weren't referenced within the construct.
10485     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10486 
10487     TargetDataInfo Info;
10488     // Fill up the arrays and create the arguments.
10489     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10490     emitOffloadingArraysArgument(
10491         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10492         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10493         {/*ForEndTask=*/false});
10494 
10495     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10496     InputInfo.BasePointersArray =
10497         Address(Info.BasePointersArray, CGM.getPointerAlign());
10498     InputInfo.PointersArray =
10499         Address(Info.PointersArray, CGM.getPointerAlign());
10500     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10501     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10502     MapTypesArray = Info.MapTypesArray;
10503     MapNamesArray = Info.MapNamesArray;
10504     if (RequiresOuterTask)
10505       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10506     else
10507       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10508   };
10509 
10510   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10511                              CodeGenFunction &CGF, PrePostActionTy &) {
10512     if (RequiresOuterTask) {
10513       CodeGenFunction::OMPTargetDataInfo InputInfo;
10514       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10515     } else {
10516       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10517     }
10518   };
10519 
10520   // If we have a target function ID it means that we need to support
10521   // offloading, otherwise, just execute on the host. We need to execute on host
10522   // regardless of the conditional in the if clause if, e.g., the user do not
10523   // specify target triples.
10524   if (OutlinedFnID) {
10525     if (IfCond) {
10526       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10527     } else {
10528       RegionCodeGenTy ThenRCG(TargetThenGen);
10529       ThenRCG(CGF);
10530     }
10531   } else {
10532     RegionCodeGenTy ElseRCG(TargetElseGen);
10533     ElseRCG(CGF);
10534   }
10535 }
10536 
10537 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10538                                                     StringRef ParentName) {
10539   if (!S)
10540     return;
10541 
10542   // Codegen OMP target directives that offload compute to the device.
10543   bool RequiresDeviceCodegen =
10544       isa<OMPExecutableDirective>(S) &&
10545       isOpenMPTargetExecutionDirective(
10546           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10547 
10548   if (RequiresDeviceCodegen) {
10549     const auto &E = *cast<OMPExecutableDirective>(S);
10550     unsigned DeviceID;
10551     unsigned FileID;
10552     unsigned Line;
10553     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10554                              FileID, Line);
10555 
10556     // Is this a target region that should not be emitted as an entry point? If
10557     // so just signal we are done with this target region.
10558     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10559                                                             ParentName, Line))
10560       return;
10561 
10562     switch (E.getDirectiveKind()) {
10563     case OMPD_target:
10564       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10565                                                    cast<OMPTargetDirective>(E));
10566       break;
10567     case OMPD_target_parallel:
10568       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10569           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10570       break;
10571     case OMPD_target_teams:
10572       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10573           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10574       break;
10575     case OMPD_target_teams_distribute:
10576       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10577           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10578       break;
10579     case OMPD_target_teams_distribute_simd:
10580       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10581           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10582       break;
10583     case OMPD_target_parallel_for:
10584       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10585           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10586       break;
10587     case OMPD_target_parallel_for_simd:
10588       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10589           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10590       break;
10591     case OMPD_target_simd:
10592       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10593           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10594       break;
10595     case OMPD_target_teams_distribute_parallel_for:
10596       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10597           CGM, ParentName,
10598           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10599       break;
10600     case OMPD_target_teams_distribute_parallel_for_simd:
10601       CodeGenFunction::
10602           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10603               CGM, ParentName,
10604               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10605       break;
10606     case OMPD_parallel:
10607     case OMPD_for:
10608     case OMPD_parallel_for:
10609     case OMPD_parallel_master:
10610     case OMPD_parallel_sections:
10611     case OMPD_for_simd:
10612     case OMPD_parallel_for_simd:
10613     case OMPD_cancel:
10614     case OMPD_cancellation_point:
10615     case OMPD_ordered:
10616     case OMPD_threadprivate:
10617     case OMPD_allocate:
10618     case OMPD_task:
10619     case OMPD_simd:
10620     case OMPD_tile:
10621     case OMPD_unroll:
10622     case OMPD_sections:
10623     case OMPD_section:
10624     case OMPD_single:
10625     case OMPD_master:
10626     case OMPD_critical:
10627     case OMPD_taskyield:
10628     case OMPD_barrier:
10629     case OMPD_taskwait:
10630     case OMPD_taskgroup:
10631     case OMPD_atomic:
10632     case OMPD_flush:
10633     case OMPD_depobj:
10634     case OMPD_scan:
10635     case OMPD_teams:
10636     case OMPD_target_data:
10637     case OMPD_target_exit_data:
10638     case OMPD_target_enter_data:
10639     case OMPD_distribute:
10640     case OMPD_distribute_simd:
10641     case OMPD_distribute_parallel_for:
10642     case OMPD_distribute_parallel_for_simd:
10643     case OMPD_teams_distribute:
10644     case OMPD_teams_distribute_simd:
10645     case OMPD_teams_distribute_parallel_for:
10646     case OMPD_teams_distribute_parallel_for_simd:
10647     case OMPD_target_update:
10648     case OMPD_declare_simd:
10649     case OMPD_declare_variant:
10650     case OMPD_begin_declare_variant:
10651     case OMPD_end_declare_variant:
10652     case OMPD_declare_target:
10653     case OMPD_end_declare_target:
10654     case OMPD_declare_reduction:
10655     case OMPD_declare_mapper:
10656     case OMPD_taskloop:
10657     case OMPD_taskloop_simd:
10658     case OMPD_master_taskloop:
10659     case OMPD_master_taskloop_simd:
10660     case OMPD_parallel_master_taskloop:
10661     case OMPD_parallel_master_taskloop_simd:
10662     case OMPD_requires:
10663     case OMPD_unknown:
10664     default:
10665       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10666     }
10667     return;
10668   }
10669 
10670   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10671     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10672       return;
10673 
10674     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10675     return;
10676   }
10677 
10678   // If this is a lambda function, look into its body.
10679   if (const auto *L = dyn_cast<LambdaExpr>(S))
10680     S = L->getBody();
10681 
10682   // Keep looking for target regions recursively.
10683   for (const Stmt *II : S->children())
10684     scanForTargetRegionsFunctions(II, ParentName);
10685 }
10686 
10687 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10688   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10689       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10690   if (!DevTy)
10691     return false;
10692   // Do not emit device_type(nohost) functions for the host.
10693   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10694     return true;
10695   // Do not emit device_type(host) functions for the device.
10696   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10697     return true;
10698   return false;
10699 }
10700 
10701 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10702   // If emitting code for the host, we do not process FD here. Instead we do
10703   // the normal code generation.
10704   if (!CGM.getLangOpts().OpenMPIsDevice) {
10705     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10706       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10707                                   CGM.getLangOpts().OpenMPIsDevice))
10708         return true;
10709     return false;
10710   }
10711 
10712   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10713   // Try to detect target regions in the function.
10714   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10715     StringRef Name = CGM.getMangledName(GD);
10716     scanForTargetRegionsFunctions(FD->getBody(), Name);
10717     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10718                                 CGM.getLangOpts().OpenMPIsDevice))
10719       return true;
10720   }
10721 
10722   // Do not to emit function if it is not marked as declare target.
10723   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10724          AlreadyEmittedTargetDecls.count(VD) == 0;
10725 }
10726 
10727 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10728   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10729                               CGM.getLangOpts().OpenMPIsDevice))
10730     return true;
10731 
10732   if (!CGM.getLangOpts().OpenMPIsDevice)
10733     return false;
10734 
10735   // Check if there are Ctors/Dtors in this declaration and look for target
10736   // regions in it. We use the complete variant to produce the kernel name
10737   // mangling.
10738   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10739   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10740     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10741       StringRef ParentName =
10742           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10743       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10744     }
10745     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10746       StringRef ParentName =
10747           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10748       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10749     }
10750   }
10751 
10752   // Do not to emit variable if it is not marked as declare target.
10753   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10754       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10755           cast<VarDecl>(GD.getDecl()));
10756   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10757       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10758        HasRequiresUnifiedSharedMemory)) {
10759     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10760     return true;
10761   }
10762   return false;
10763 }
10764 
10765 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10766                                                    llvm::Constant *Addr) {
10767   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10768       !CGM.getLangOpts().OpenMPIsDevice)
10769     return;
10770 
10771   // If we have host/nohost variables, they do not need to be registered.
10772   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10773       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10774   if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any)
10775     return;
10776 
10777   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10778       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10779   if (!Res) {
10780     if (CGM.getLangOpts().OpenMPIsDevice) {
10781       // Register non-target variables being emitted in device code (debug info
10782       // may cause this).
10783       StringRef VarName = CGM.getMangledName(VD);
10784       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10785     }
10786     return;
10787   }
10788   // Register declare target variables.
10789   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10790   StringRef VarName;
10791   CharUnits VarSize;
10792   llvm::GlobalValue::LinkageTypes Linkage;
10793 
10794   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10795       !HasRequiresUnifiedSharedMemory) {
10796     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10797     VarName = CGM.getMangledName(VD);
10798     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10799       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10800       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10801     } else {
10802       VarSize = CharUnits::Zero();
10803     }
10804     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10805     // Temp solution to prevent optimizations of the internal variables.
10806     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10807       // Do not create a "ref-variable" if the original is not also available
10808       // on the host.
10809       if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10810         return;
10811       std::string RefName = getName({VarName, "ref"});
10812       if (!CGM.GetGlobalValue(RefName)) {
10813         llvm::Constant *AddrRef =
10814             getOrCreateInternalVariable(Addr->getType(), RefName);
10815         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10816         GVAddrRef->setConstant(/*Val=*/true);
10817         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10818         GVAddrRef->setInitializer(Addr);
10819         CGM.addCompilerUsedGlobal(GVAddrRef);
10820       }
10821     }
10822   } else {
10823     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10824             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10825              HasRequiresUnifiedSharedMemory)) &&
10826            "Declare target attribute must link or to with unified memory.");
10827     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10828       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10829     else
10830       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10831 
10832     if (CGM.getLangOpts().OpenMPIsDevice) {
10833       VarName = Addr->getName();
10834       Addr = nullptr;
10835     } else {
10836       VarName = getAddrOfDeclareTargetVar(VD).getName();
10837       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10838     }
10839     VarSize = CGM.getPointerSize();
10840     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10841   }
10842 
10843   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10844       VarName, Addr, VarSize, Flags, Linkage);
10845 }
10846 
10847 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10848   if (isa<FunctionDecl>(GD.getDecl()) ||
10849       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10850     return emitTargetFunctions(GD);
10851 
10852   return emitTargetGlobalVariable(GD);
10853 }
10854 
10855 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10856   for (const VarDecl *VD : DeferredGlobalVariables) {
10857     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10858         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10859     if (!Res)
10860       continue;
10861     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10862         !HasRequiresUnifiedSharedMemory) {
10863       CGM.EmitGlobal(VD);
10864     } else {
10865       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10866               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10867                HasRequiresUnifiedSharedMemory)) &&
10868              "Expected link clause or to clause with unified memory.");
10869       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10870     }
10871   }
10872 }
10873 
10874 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10875     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10876   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10877          " Expected target-based directive.");
10878 }
10879 
10880 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10881   for (const OMPClause *Clause : D->clauselists()) {
10882     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10883       HasRequiresUnifiedSharedMemory = true;
10884     } else if (const auto *AC =
10885                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10886       switch (AC->getAtomicDefaultMemOrderKind()) {
10887       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10888         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10889         break;
10890       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10891         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10892         break;
10893       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10894         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10895         break;
10896       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10897         break;
10898       }
10899     }
10900   }
10901 }
10902 
10903 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10904   return RequiresAtomicOrdering;
10905 }
10906 
10907 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10908                                                        LangAS &AS) {
10909   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10910     return false;
10911   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10912   switch(A->getAllocatorType()) {
10913   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10914   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10915   // Not supported, fallback to the default mem space.
10916   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10917   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10918   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10919   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10920   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10921   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10922   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10923     AS = LangAS::Default;
10924     return true;
10925   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10926     llvm_unreachable("Expected predefined allocator for the variables with the "
10927                      "static storage.");
10928   }
10929   return false;
10930 }
10931 
10932 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10933   return HasRequiresUnifiedSharedMemory;
10934 }
10935 
10936 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10937     CodeGenModule &CGM)
10938     : CGM(CGM) {
10939   if (CGM.getLangOpts().OpenMPIsDevice) {
10940     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10941     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10942   }
10943 }
10944 
10945 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10946   if (CGM.getLangOpts().OpenMPIsDevice)
10947     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10948 }
10949 
10950 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10951   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10952     return true;
10953 
10954   const auto *D = cast<FunctionDecl>(GD.getDecl());
10955   // Do not to emit function if it is marked as declare target as it was already
10956   // emitted.
10957   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10958     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10959       if (auto *F = dyn_cast_or_null<llvm::Function>(
10960               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10961         return !F->isDeclaration();
10962       return false;
10963     }
10964     return true;
10965   }
10966 
10967   return !AlreadyEmittedTargetDecls.insert(D).second;
10968 }
10969 
10970 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10971   // If we don't have entries or if we are emitting code for the device, we
10972   // don't need to do anything.
10973   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10974       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10975       (OffloadEntriesInfoManager.empty() &&
10976        !HasEmittedDeclareTargetRegion &&
10977        !HasEmittedTargetRegion))
10978     return nullptr;
10979 
10980   // Create and register the function that handles the requires directives.
10981   ASTContext &C = CGM.getContext();
10982 
10983   llvm::Function *RequiresRegFn;
10984   {
10985     CodeGenFunction CGF(CGM);
10986     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10987     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10988     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10989     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10990     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10991     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10992     // TODO: check for other requires clauses.
10993     // The requires directive takes effect only when a target region is
10994     // present in the compilation unit. Otherwise it is ignored and not
10995     // passed to the runtime. This avoids the runtime from throwing an error
10996     // for mismatching requires clauses across compilation units that don't
10997     // contain at least 1 target region.
10998     assert((HasEmittedTargetRegion ||
10999             HasEmittedDeclareTargetRegion ||
11000             !OffloadEntriesInfoManager.empty()) &&
11001            "Target or declare target region expected.");
11002     if (HasRequiresUnifiedSharedMemory)
11003       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
11004     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11005                             CGM.getModule(), OMPRTL___tgt_register_requires),
11006                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
11007     CGF.FinishFunction();
11008   }
11009   return RequiresRegFn;
11010 }
11011 
11012 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
11013                                     const OMPExecutableDirective &D,
11014                                     SourceLocation Loc,
11015                                     llvm::Function *OutlinedFn,
11016                                     ArrayRef<llvm::Value *> CapturedVars) {
11017   if (!CGF.HaveInsertPoint())
11018     return;
11019 
11020   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11021   CodeGenFunction::RunCleanupsScope Scope(CGF);
11022 
11023   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11024   llvm::Value *Args[] = {
11025       RTLoc,
11026       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11027       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
11028   llvm::SmallVector<llvm::Value *, 16> RealArgs;
11029   RealArgs.append(std::begin(Args), std::end(Args));
11030   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11031 
11032   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11033       CGM.getModule(), OMPRTL___kmpc_fork_teams);
11034   CGF.EmitRuntimeCall(RTLFn, RealArgs);
11035 }
11036 
11037 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11038                                          const Expr *NumTeams,
11039                                          const Expr *ThreadLimit,
11040                                          SourceLocation Loc) {
11041   if (!CGF.HaveInsertPoint())
11042     return;
11043 
11044   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11045 
11046   llvm::Value *NumTeamsVal =
11047       NumTeams
11048           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11049                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11050           : CGF.Builder.getInt32(0);
11051 
11052   llvm::Value *ThreadLimitVal =
11053       ThreadLimit
11054           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11055                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11056           : CGF.Builder.getInt32(0);
11057 
11058   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11059   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11060                                      ThreadLimitVal};
11061   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11062                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11063                       PushNumTeamsArgs);
11064 }
11065 
11066 void CGOpenMPRuntime::emitTargetDataCalls(
11067     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11068     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11069   if (!CGF.HaveInsertPoint())
11070     return;
11071 
11072   // Action used to replace the default codegen action and turn privatization
11073   // off.
11074   PrePostActionTy NoPrivAction;
11075 
11076   // Generate the code for the opening of the data environment. Capture all the
11077   // arguments of the runtime call by reference because they are used in the
11078   // closing of the region.
11079   auto &&BeginThenGen = [this, &D, Device, &Info,
11080                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
11081     // Fill up the arrays with all the mapped variables.
11082     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11083 
11084     // Get map clause information.
11085     MappableExprsHandler MEHandler(D, CGF);
11086     MEHandler.generateAllInfo(CombinedInfo);
11087 
11088     // Fill up the arrays and create the arguments.
11089     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11090                          /*IsNonContiguous=*/true);
11091 
11092     llvm::Value *BasePointersArrayArg = nullptr;
11093     llvm::Value *PointersArrayArg = nullptr;
11094     llvm::Value *SizesArrayArg = nullptr;
11095     llvm::Value *MapTypesArrayArg = nullptr;
11096     llvm::Value *MapNamesArrayArg = nullptr;
11097     llvm::Value *MappersArrayArg = nullptr;
11098     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11099                                  SizesArrayArg, MapTypesArrayArg,
11100                                  MapNamesArrayArg, MappersArrayArg, Info);
11101 
11102     // Emit device ID if any.
11103     llvm::Value *DeviceID = nullptr;
11104     if (Device) {
11105       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11106                                            CGF.Int64Ty, /*isSigned=*/true);
11107     } else {
11108       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11109     }
11110 
11111     // Emit the number of elements in the offloading arrays.
11112     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11113     //
11114     // Source location for the ident struct
11115     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11116 
11117     llvm::Value *OffloadingArgs[] = {RTLoc,
11118                                      DeviceID,
11119                                      PointerNum,
11120                                      BasePointersArrayArg,
11121                                      PointersArrayArg,
11122                                      SizesArrayArg,
11123                                      MapTypesArrayArg,
11124                                      MapNamesArrayArg,
11125                                      MappersArrayArg};
11126     CGF.EmitRuntimeCall(
11127         OMPBuilder.getOrCreateRuntimeFunction(
11128             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
11129         OffloadingArgs);
11130 
11131     // If device pointer privatization is required, emit the body of the region
11132     // here. It will have to be duplicated: with and without privatization.
11133     if (!Info.CaptureDeviceAddrMap.empty())
11134       CodeGen(CGF);
11135   };
11136 
11137   // Generate code for the closing of the data region.
11138   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
11139                                                 PrePostActionTy &) {
11140     assert(Info.isValid() && "Invalid data environment closing arguments.");
11141 
11142     llvm::Value *BasePointersArrayArg = nullptr;
11143     llvm::Value *PointersArrayArg = nullptr;
11144     llvm::Value *SizesArrayArg = nullptr;
11145     llvm::Value *MapTypesArrayArg = nullptr;
11146     llvm::Value *MapNamesArrayArg = nullptr;
11147     llvm::Value *MappersArrayArg = nullptr;
11148     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11149                                  SizesArrayArg, MapTypesArrayArg,
11150                                  MapNamesArrayArg, MappersArrayArg, Info,
11151                                  {/*ForEndCall=*/true});
11152 
11153     // Emit device ID if any.
11154     llvm::Value *DeviceID = nullptr;
11155     if (Device) {
11156       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11157                                            CGF.Int64Ty, /*isSigned=*/true);
11158     } else {
11159       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11160     }
11161 
11162     // Emit the number of elements in the offloading arrays.
11163     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11164 
11165     // Source location for the ident struct
11166     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11167 
11168     llvm::Value *OffloadingArgs[] = {RTLoc,
11169                                      DeviceID,
11170                                      PointerNum,
11171                                      BasePointersArrayArg,
11172                                      PointersArrayArg,
11173                                      SizesArrayArg,
11174                                      MapTypesArrayArg,
11175                                      MapNamesArrayArg,
11176                                      MappersArrayArg};
11177     CGF.EmitRuntimeCall(
11178         OMPBuilder.getOrCreateRuntimeFunction(
11179             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11180         OffloadingArgs);
11181   };
11182 
11183   // If we need device pointer privatization, we need to emit the body of the
11184   // region with no privatization in the 'else' branch of the conditional.
11185   // Otherwise, we don't have to do anything.
11186   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11187                                                          PrePostActionTy &) {
11188     if (!Info.CaptureDeviceAddrMap.empty()) {
11189       CodeGen.setAction(NoPrivAction);
11190       CodeGen(CGF);
11191     }
11192   };
11193 
11194   // We don't have to do anything to close the region if the if clause evaluates
11195   // to false.
11196   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11197 
11198   if (IfCond) {
11199     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11200   } else {
11201     RegionCodeGenTy RCG(BeginThenGen);
11202     RCG(CGF);
11203   }
11204 
11205   // If we don't require privatization of device pointers, we emit the body in
11206   // between the runtime calls. This avoids duplicating the body code.
11207   if (Info.CaptureDeviceAddrMap.empty()) {
11208     CodeGen.setAction(NoPrivAction);
11209     CodeGen(CGF);
11210   }
11211 
11212   if (IfCond) {
11213     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11214   } else {
11215     RegionCodeGenTy RCG(EndThenGen);
11216     RCG(CGF);
11217   }
11218 }
11219 
11220 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11221     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11222     const Expr *Device) {
11223   if (!CGF.HaveInsertPoint())
11224     return;
11225 
11226   assert((isa<OMPTargetEnterDataDirective>(D) ||
11227           isa<OMPTargetExitDataDirective>(D) ||
11228           isa<OMPTargetUpdateDirective>(D)) &&
11229          "Expecting either target enter, exit data, or update directives.");
11230 
11231   CodeGenFunction::OMPTargetDataInfo InputInfo;
11232   llvm::Value *MapTypesArray = nullptr;
11233   llvm::Value *MapNamesArray = nullptr;
11234   // Generate the code for the opening of the data environment.
11235   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11236                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11237     // Emit device ID if any.
11238     llvm::Value *DeviceID = nullptr;
11239     if (Device) {
11240       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11241                                            CGF.Int64Ty, /*isSigned=*/true);
11242     } else {
11243       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11244     }
11245 
11246     // Emit the number of elements in the offloading arrays.
11247     llvm::Constant *PointerNum =
11248         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11249 
11250     // Source location for the ident struct
11251     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11252 
11253     llvm::Value *OffloadingArgs[] = {RTLoc,
11254                                      DeviceID,
11255                                      PointerNum,
11256                                      InputInfo.BasePointersArray.getPointer(),
11257                                      InputInfo.PointersArray.getPointer(),
11258                                      InputInfo.SizesArray.getPointer(),
11259                                      MapTypesArray,
11260                                      MapNamesArray,
11261                                      InputInfo.MappersArray.getPointer()};
11262 
11263     // Select the right runtime function call for each standalone
11264     // directive.
11265     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11266     RuntimeFunction RTLFn;
11267     switch (D.getDirectiveKind()) {
11268     case OMPD_target_enter_data:
11269       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11270                         : OMPRTL___tgt_target_data_begin_mapper;
11271       break;
11272     case OMPD_target_exit_data:
11273       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11274                         : OMPRTL___tgt_target_data_end_mapper;
11275       break;
11276     case OMPD_target_update:
11277       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11278                         : OMPRTL___tgt_target_data_update_mapper;
11279       break;
11280     case OMPD_parallel:
11281     case OMPD_for:
11282     case OMPD_parallel_for:
11283     case OMPD_parallel_master:
11284     case OMPD_parallel_sections:
11285     case OMPD_for_simd:
11286     case OMPD_parallel_for_simd:
11287     case OMPD_cancel:
11288     case OMPD_cancellation_point:
11289     case OMPD_ordered:
11290     case OMPD_threadprivate:
11291     case OMPD_allocate:
11292     case OMPD_task:
11293     case OMPD_simd:
11294     case OMPD_tile:
11295     case OMPD_unroll:
11296     case OMPD_sections:
11297     case OMPD_section:
11298     case OMPD_single:
11299     case OMPD_master:
11300     case OMPD_critical:
11301     case OMPD_taskyield:
11302     case OMPD_barrier:
11303     case OMPD_taskwait:
11304     case OMPD_taskgroup:
11305     case OMPD_atomic:
11306     case OMPD_flush:
11307     case OMPD_depobj:
11308     case OMPD_scan:
11309     case OMPD_teams:
11310     case OMPD_target_data:
11311     case OMPD_distribute:
11312     case OMPD_distribute_simd:
11313     case OMPD_distribute_parallel_for:
11314     case OMPD_distribute_parallel_for_simd:
11315     case OMPD_teams_distribute:
11316     case OMPD_teams_distribute_simd:
11317     case OMPD_teams_distribute_parallel_for:
11318     case OMPD_teams_distribute_parallel_for_simd:
11319     case OMPD_declare_simd:
11320     case OMPD_declare_variant:
11321     case OMPD_begin_declare_variant:
11322     case OMPD_end_declare_variant:
11323     case OMPD_declare_target:
11324     case OMPD_end_declare_target:
11325     case OMPD_declare_reduction:
11326     case OMPD_declare_mapper:
11327     case OMPD_taskloop:
11328     case OMPD_taskloop_simd:
11329     case OMPD_master_taskloop:
11330     case OMPD_master_taskloop_simd:
11331     case OMPD_parallel_master_taskloop:
11332     case OMPD_parallel_master_taskloop_simd:
11333     case OMPD_target:
11334     case OMPD_target_simd:
11335     case OMPD_target_teams_distribute:
11336     case OMPD_target_teams_distribute_simd:
11337     case OMPD_target_teams_distribute_parallel_for:
11338     case OMPD_target_teams_distribute_parallel_for_simd:
11339     case OMPD_target_teams:
11340     case OMPD_target_parallel:
11341     case OMPD_target_parallel_for:
11342     case OMPD_target_parallel_for_simd:
11343     case OMPD_requires:
11344     case OMPD_unknown:
11345     default:
11346       llvm_unreachable("Unexpected standalone target data directive.");
11347       break;
11348     }
11349     CGF.EmitRuntimeCall(
11350         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11351         OffloadingArgs);
11352   };
11353 
11354   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11355                           &MapNamesArray](CodeGenFunction &CGF,
11356                                           PrePostActionTy &) {
11357     // Fill up the arrays with all the mapped variables.
11358     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11359 
11360     // Get map clause information.
11361     MappableExprsHandler MEHandler(D, CGF);
11362     MEHandler.generateAllInfo(CombinedInfo);
11363 
11364     TargetDataInfo Info;
11365     // Fill up the arrays and create the arguments.
11366     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11367                          /*IsNonContiguous=*/true);
11368     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11369                              D.hasClausesOfKind<OMPNowaitClause>();
11370     emitOffloadingArraysArgument(
11371         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11372         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11373         {/*ForEndTask=*/false});
11374     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11375     InputInfo.BasePointersArray =
11376         Address(Info.BasePointersArray, CGM.getPointerAlign());
11377     InputInfo.PointersArray =
11378         Address(Info.PointersArray, CGM.getPointerAlign());
11379     InputInfo.SizesArray =
11380         Address(Info.SizesArray, CGM.getPointerAlign());
11381     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
11382     MapTypesArray = Info.MapTypesArray;
11383     MapNamesArray = Info.MapNamesArray;
11384     if (RequiresOuterTask)
11385       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11386     else
11387       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11388   };
11389 
11390   if (IfCond) {
11391     emitIfClause(CGF, IfCond, TargetThenGen,
11392                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11393   } else {
11394     RegionCodeGenTy ThenRCG(TargetThenGen);
11395     ThenRCG(CGF);
11396   }
11397 }
11398 
11399 namespace {
11400   /// Kind of parameter in a function with 'declare simd' directive.
11401   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11402   /// Attribute set of the parameter.
11403   struct ParamAttrTy {
11404     ParamKindTy Kind = Vector;
11405     llvm::APSInt StrideOrArg;
11406     llvm::APSInt Alignment;
11407   };
11408 } // namespace
11409 
11410 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11411                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11412   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11413   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11414   // of that clause. The VLEN value must be power of 2.
11415   // In other case the notion of the function`s "characteristic data type" (CDT)
11416   // is used to compute the vector length.
11417   // CDT is defined in the following order:
11418   //   a) For non-void function, the CDT is the return type.
11419   //   b) If the function has any non-uniform, non-linear parameters, then the
11420   //   CDT is the type of the first such parameter.
11421   //   c) If the CDT determined by a) or b) above is struct, union, or class
11422   //   type which is pass-by-value (except for the type that maps to the
11423   //   built-in complex data type), the characteristic data type is int.
11424   //   d) If none of the above three cases is applicable, the CDT is int.
11425   // The VLEN is then determined based on the CDT and the size of vector
11426   // register of that ISA for which current vector version is generated. The
11427   // VLEN is computed using the formula below:
11428   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11429   // where vector register size specified in section 3.2.1 Registers and the
11430   // Stack Frame of original AMD64 ABI document.
11431   QualType RetType = FD->getReturnType();
11432   if (RetType.isNull())
11433     return 0;
11434   ASTContext &C = FD->getASTContext();
11435   QualType CDT;
11436   if (!RetType.isNull() && !RetType->isVoidType()) {
11437     CDT = RetType;
11438   } else {
11439     unsigned Offset = 0;
11440     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11441       if (ParamAttrs[Offset].Kind == Vector)
11442         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11443       ++Offset;
11444     }
11445     if (CDT.isNull()) {
11446       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11447         if (ParamAttrs[I + Offset].Kind == Vector) {
11448           CDT = FD->getParamDecl(I)->getType();
11449           break;
11450         }
11451       }
11452     }
11453   }
11454   if (CDT.isNull())
11455     CDT = C.IntTy;
11456   CDT = CDT->getCanonicalTypeUnqualified();
11457   if (CDT->isRecordType() || CDT->isUnionType())
11458     CDT = C.IntTy;
11459   return C.getTypeSize(CDT);
11460 }
11461 
11462 static void
11463 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11464                            const llvm::APSInt &VLENVal,
11465                            ArrayRef<ParamAttrTy> ParamAttrs,
11466                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11467   struct ISADataTy {
11468     char ISA;
11469     unsigned VecRegSize;
11470   };
11471   ISADataTy ISAData[] = {
11472       {
11473           'b', 128
11474       }, // SSE
11475       {
11476           'c', 256
11477       }, // AVX
11478       {
11479           'd', 256
11480       }, // AVX2
11481       {
11482           'e', 512
11483       }, // AVX512
11484   };
11485   llvm::SmallVector<char, 2> Masked;
11486   switch (State) {
11487   case OMPDeclareSimdDeclAttr::BS_Undefined:
11488     Masked.push_back('N');
11489     Masked.push_back('M');
11490     break;
11491   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11492     Masked.push_back('N');
11493     break;
11494   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11495     Masked.push_back('M');
11496     break;
11497   }
11498   for (char Mask : Masked) {
11499     for (const ISADataTy &Data : ISAData) {
11500       SmallString<256> Buffer;
11501       llvm::raw_svector_ostream Out(Buffer);
11502       Out << "_ZGV" << Data.ISA << Mask;
11503       if (!VLENVal) {
11504         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11505         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11506         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11507       } else {
11508         Out << VLENVal;
11509       }
11510       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11511         switch (ParamAttr.Kind){
11512         case LinearWithVarStride:
11513           Out << 's' << ParamAttr.StrideOrArg;
11514           break;
11515         case Linear:
11516           Out << 'l';
11517           if (ParamAttr.StrideOrArg != 1)
11518             Out << ParamAttr.StrideOrArg;
11519           break;
11520         case Uniform:
11521           Out << 'u';
11522           break;
11523         case Vector:
11524           Out << 'v';
11525           break;
11526         }
11527         if (!!ParamAttr.Alignment)
11528           Out << 'a' << ParamAttr.Alignment;
11529       }
11530       Out << '_' << Fn->getName();
11531       Fn->addFnAttr(Out.str());
11532     }
11533   }
11534 }
11535 
11536 // This are the Functions that are needed to mangle the name of the
11537 // vector functions generated by the compiler, according to the rules
11538 // defined in the "Vector Function ABI specifications for AArch64",
11539 // available at
11540 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11541 
11542 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11543 ///
11544 /// TODO: Need to implement the behavior for reference marked with a
11545 /// var or no linear modifiers (1.b in the section). For this, we
11546 /// need to extend ParamKindTy to support the linear modifiers.
11547 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11548   QT = QT.getCanonicalType();
11549 
11550   if (QT->isVoidType())
11551     return false;
11552 
11553   if (Kind == ParamKindTy::Uniform)
11554     return false;
11555 
11556   if (Kind == ParamKindTy::Linear)
11557     return false;
11558 
11559   // TODO: Handle linear references with modifiers
11560 
11561   if (Kind == ParamKindTy::LinearWithVarStride)
11562     return false;
11563 
11564   return true;
11565 }
11566 
11567 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11568 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11569   QT = QT.getCanonicalType();
11570   unsigned Size = C.getTypeSize(QT);
11571 
11572   // Only scalars and complex within 16 bytes wide set PVB to true.
11573   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11574     return false;
11575 
11576   if (QT->isFloatingType())
11577     return true;
11578 
11579   if (QT->isIntegerType())
11580     return true;
11581 
11582   if (QT->isPointerType())
11583     return true;
11584 
11585   // TODO: Add support for complex types (section 3.1.2, item 2).
11586 
11587   return false;
11588 }
11589 
11590 /// Computes the lane size (LS) of a return type or of an input parameter,
11591 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11592 /// TODO: Add support for references, section 3.2.1, item 1.
11593 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11594   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11595     QualType PTy = QT.getCanonicalType()->getPointeeType();
11596     if (getAArch64PBV(PTy, C))
11597       return C.getTypeSize(PTy);
11598   }
11599   if (getAArch64PBV(QT, C))
11600     return C.getTypeSize(QT);
11601 
11602   return C.getTypeSize(C.getUIntPtrType());
11603 }
11604 
11605 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11606 // signature of the scalar function, as defined in 3.2.2 of the
11607 // AAVFABI.
11608 static std::tuple<unsigned, unsigned, bool>
11609 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11610   QualType RetType = FD->getReturnType().getCanonicalType();
11611 
11612   ASTContext &C = FD->getASTContext();
11613 
11614   bool OutputBecomesInput = false;
11615 
11616   llvm::SmallVector<unsigned, 8> Sizes;
11617   if (!RetType->isVoidType()) {
11618     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11619     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11620       OutputBecomesInput = true;
11621   }
11622   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11623     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11624     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11625   }
11626 
11627   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11628   // The LS of a function parameter / return value can only be a power
11629   // of 2, starting from 8 bits, up to 128.
11630   assert(std::all_of(Sizes.begin(), Sizes.end(),
11631                      [](unsigned Size) {
11632                        return Size == 8 || Size == 16 || Size == 32 ||
11633                               Size == 64 || Size == 128;
11634                      }) &&
11635          "Invalid size");
11636 
11637   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11638                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11639                          OutputBecomesInput);
11640 }
11641 
11642 /// Mangle the parameter part of the vector function name according to
11643 /// their OpenMP classification. The mangling function is defined in
11644 /// section 3.5 of the AAVFABI.
11645 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11646   SmallString<256> Buffer;
11647   llvm::raw_svector_ostream Out(Buffer);
11648   for (const auto &ParamAttr : ParamAttrs) {
11649     switch (ParamAttr.Kind) {
11650     case LinearWithVarStride:
11651       Out << "ls" << ParamAttr.StrideOrArg;
11652       break;
11653     case Linear:
11654       Out << 'l';
11655       // Don't print the step value if it is not present or if it is
11656       // equal to 1.
11657       if (ParamAttr.StrideOrArg != 1)
11658         Out << ParamAttr.StrideOrArg;
11659       break;
11660     case Uniform:
11661       Out << 'u';
11662       break;
11663     case Vector:
11664       Out << 'v';
11665       break;
11666     }
11667 
11668     if (!!ParamAttr.Alignment)
11669       Out << 'a' << ParamAttr.Alignment;
11670   }
11671 
11672   return std::string(Out.str());
11673 }
11674 
11675 // Function used to add the attribute. The parameter `VLEN` is
11676 // templated to allow the use of "x" when targeting scalable functions
11677 // for SVE.
11678 template <typename T>
11679 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11680                                  char ISA, StringRef ParSeq,
11681                                  StringRef MangledName, bool OutputBecomesInput,
11682                                  llvm::Function *Fn) {
11683   SmallString<256> Buffer;
11684   llvm::raw_svector_ostream Out(Buffer);
11685   Out << Prefix << ISA << LMask << VLEN;
11686   if (OutputBecomesInput)
11687     Out << "v";
11688   Out << ParSeq << "_" << MangledName;
11689   Fn->addFnAttr(Out.str());
11690 }
11691 
11692 // Helper function to generate the Advanced SIMD names depending on
11693 // the value of the NDS when simdlen is not present.
11694 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11695                                       StringRef Prefix, char ISA,
11696                                       StringRef ParSeq, StringRef MangledName,
11697                                       bool OutputBecomesInput,
11698                                       llvm::Function *Fn) {
11699   switch (NDS) {
11700   case 8:
11701     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11702                          OutputBecomesInput, Fn);
11703     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11704                          OutputBecomesInput, Fn);
11705     break;
11706   case 16:
11707     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11708                          OutputBecomesInput, Fn);
11709     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11710                          OutputBecomesInput, Fn);
11711     break;
11712   case 32:
11713     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11714                          OutputBecomesInput, Fn);
11715     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11716                          OutputBecomesInput, Fn);
11717     break;
11718   case 64:
11719   case 128:
11720     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11721                          OutputBecomesInput, Fn);
11722     break;
11723   default:
11724     llvm_unreachable("Scalar type is too wide.");
11725   }
11726 }
11727 
11728 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11729 static void emitAArch64DeclareSimdFunction(
11730     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11731     ArrayRef<ParamAttrTy> ParamAttrs,
11732     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11733     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11734 
11735   // Get basic data for building the vector signature.
11736   const auto Data = getNDSWDS(FD, ParamAttrs);
11737   const unsigned NDS = std::get<0>(Data);
11738   const unsigned WDS = std::get<1>(Data);
11739   const bool OutputBecomesInput = std::get<2>(Data);
11740 
11741   // Check the values provided via `simdlen` by the user.
11742   // 1. A `simdlen(1)` doesn't produce vector signatures,
11743   if (UserVLEN == 1) {
11744     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11745         DiagnosticsEngine::Warning,
11746         "The clause simdlen(1) has no effect when targeting aarch64.");
11747     CGM.getDiags().Report(SLoc, DiagID);
11748     return;
11749   }
11750 
11751   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11752   // Advanced SIMD output.
11753   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11754     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11755         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11756                                     "power of 2 when targeting Advanced SIMD.");
11757     CGM.getDiags().Report(SLoc, DiagID);
11758     return;
11759   }
11760 
11761   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11762   // limits.
11763   if (ISA == 's' && UserVLEN != 0) {
11764     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11765       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11766           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11767                                       "lanes in the architectural constraints "
11768                                       "for SVE (min is 128-bit, max is "
11769                                       "2048-bit, by steps of 128-bit)");
11770       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11771       return;
11772     }
11773   }
11774 
11775   // Sort out parameter sequence.
11776   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11777   StringRef Prefix = "_ZGV";
11778   // Generate simdlen from user input (if any).
11779   if (UserVLEN) {
11780     if (ISA == 's') {
11781       // SVE generates only a masked function.
11782       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11783                            OutputBecomesInput, Fn);
11784     } else {
11785       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11786       // Advanced SIMD generates one or two functions, depending on
11787       // the `[not]inbranch` clause.
11788       switch (State) {
11789       case OMPDeclareSimdDeclAttr::BS_Undefined:
11790         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11791                              OutputBecomesInput, Fn);
11792         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11793                              OutputBecomesInput, Fn);
11794         break;
11795       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11796         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11797                              OutputBecomesInput, Fn);
11798         break;
11799       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11800         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11801                              OutputBecomesInput, Fn);
11802         break;
11803       }
11804     }
11805   } else {
11806     // If no user simdlen is provided, follow the AAVFABI rules for
11807     // generating the vector length.
11808     if (ISA == 's') {
11809       // SVE, section 3.4.1, item 1.
11810       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11811                            OutputBecomesInput, Fn);
11812     } else {
11813       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11814       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11815       // two vector names depending on the use of the clause
11816       // `[not]inbranch`.
11817       switch (State) {
11818       case OMPDeclareSimdDeclAttr::BS_Undefined:
11819         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11820                                   OutputBecomesInput, Fn);
11821         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11822                                   OutputBecomesInput, Fn);
11823         break;
11824       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11825         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11826                                   OutputBecomesInput, Fn);
11827         break;
11828       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11829         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11830                                   OutputBecomesInput, Fn);
11831         break;
11832       }
11833     }
11834   }
11835 }
11836 
11837 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11838                                               llvm::Function *Fn) {
11839   ASTContext &C = CGM.getContext();
11840   FD = FD->getMostRecentDecl();
11841   // Map params to their positions in function decl.
11842   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11843   if (isa<CXXMethodDecl>(FD))
11844     ParamPositions.try_emplace(FD, 0);
11845   unsigned ParamPos = ParamPositions.size();
11846   for (const ParmVarDecl *P : FD->parameters()) {
11847     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11848     ++ParamPos;
11849   }
11850   while (FD) {
11851     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11852       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11853       // Mark uniform parameters.
11854       for (const Expr *E : Attr->uniforms()) {
11855         E = E->IgnoreParenImpCasts();
11856         unsigned Pos;
11857         if (isa<CXXThisExpr>(E)) {
11858           Pos = ParamPositions[FD];
11859         } else {
11860           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11861                                 ->getCanonicalDecl();
11862           Pos = ParamPositions[PVD];
11863         }
11864         ParamAttrs[Pos].Kind = Uniform;
11865       }
11866       // Get alignment info.
11867       auto NI = Attr->alignments_begin();
11868       for (const Expr *E : Attr->aligneds()) {
11869         E = E->IgnoreParenImpCasts();
11870         unsigned Pos;
11871         QualType ParmTy;
11872         if (isa<CXXThisExpr>(E)) {
11873           Pos = ParamPositions[FD];
11874           ParmTy = E->getType();
11875         } else {
11876           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11877                                 ->getCanonicalDecl();
11878           Pos = ParamPositions[PVD];
11879           ParmTy = PVD->getType();
11880         }
11881         ParamAttrs[Pos].Alignment =
11882             (*NI)
11883                 ? (*NI)->EvaluateKnownConstInt(C)
11884                 : llvm::APSInt::getUnsigned(
11885                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11886                           .getQuantity());
11887         ++NI;
11888       }
11889       // Mark linear parameters.
11890       auto SI = Attr->steps_begin();
11891       auto MI = Attr->modifiers_begin();
11892       for (const Expr *E : Attr->linears()) {
11893         E = E->IgnoreParenImpCasts();
11894         unsigned Pos;
11895         // Rescaling factor needed to compute the linear parameter
11896         // value in the mangled name.
11897         unsigned PtrRescalingFactor = 1;
11898         if (isa<CXXThisExpr>(E)) {
11899           Pos = ParamPositions[FD];
11900         } else {
11901           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11902                                 ->getCanonicalDecl();
11903           Pos = ParamPositions[PVD];
11904           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11905             PtrRescalingFactor = CGM.getContext()
11906                                      .getTypeSizeInChars(P->getPointeeType())
11907                                      .getQuantity();
11908         }
11909         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11910         ParamAttr.Kind = Linear;
11911         // Assuming a stride of 1, for `linear` without modifiers.
11912         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11913         if (*SI) {
11914           Expr::EvalResult Result;
11915           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11916             if (const auto *DRE =
11917                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11918               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
11919                 ParamAttr.Kind = LinearWithVarStride;
11920                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
11921                     ParamPositions[StridePVD->getCanonicalDecl()]);
11922               }
11923             }
11924           } else {
11925             ParamAttr.StrideOrArg = Result.Val.getInt();
11926           }
11927         }
11928         // If we are using a linear clause on a pointer, we need to
11929         // rescale the value of linear_step with the byte size of the
11930         // pointee type.
11931         if (Linear == ParamAttr.Kind)
11932           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11933         ++SI;
11934         ++MI;
11935       }
11936       llvm::APSInt VLENVal;
11937       SourceLocation ExprLoc;
11938       const Expr *VLENExpr = Attr->getSimdlen();
11939       if (VLENExpr) {
11940         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11941         ExprLoc = VLENExpr->getExprLoc();
11942       }
11943       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11944       if (CGM.getTriple().isX86()) {
11945         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11946       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11947         unsigned VLEN = VLENVal.getExtValue();
11948         StringRef MangledName = Fn->getName();
11949         if (CGM.getTarget().hasFeature("sve"))
11950           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11951                                          MangledName, 's', 128, Fn, ExprLoc);
11952         if (CGM.getTarget().hasFeature("neon"))
11953           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11954                                          MangledName, 'n', 128, Fn, ExprLoc);
11955       }
11956     }
11957     FD = FD->getPreviousDecl();
11958   }
11959 }
11960 
11961 namespace {
11962 /// Cleanup action for doacross support.
11963 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11964 public:
11965   static const int DoacrossFinArgs = 2;
11966 
11967 private:
11968   llvm::FunctionCallee RTLFn;
11969   llvm::Value *Args[DoacrossFinArgs];
11970 
11971 public:
11972   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11973                     ArrayRef<llvm::Value *> CallArgs)
11974       : RTLFn(RTLFn) {
11975     assert(CallArgs.size() == DoacrossFinArgs);
11976     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11977   }
11978   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11979     if (!CGF.HaveInsertPoint())
11980       return;
11981     CGF.EmitRuntimeCall(RTLFn, Args);
11982   }
11983 };
11984 } // namespace
11985 
11986 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11987                                        const OMPLoopDirective &D,
11988                                        ArrayRef<Expr *> NumIterations) {
11989   if (!CGF.HaveInsertPoint())
11990     return;
11991 
11992   ASTContext &C = CGM.getContext();
11993   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11994   RecordDecl *RD;
11995   if (KmpDimTy.isNull()) {
11996     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11997     //  kmp_int64 lo; // lower
11998     //  kmp_int64 up; // upper
11999     //  kmp_int64 st; // stride
12000     // };
12001     RD = C.buildImplicitRecord("kmp_dim");
12002     RD->startDefinition();
12003     addFieldToRecordDecl(C, RD, Int64Ty);
12004     addFieldToRecordDecl(C, RD, Int64Ty);
12005     addFieldToRecordDecl(C, RD, Int64Ty);
12006     RD->completeDefinition();
12007     KmpDimTy = C.getRecordType(RD);
12008   } else {
12009     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
12010   }
12011   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12012   QualType ArrayTy =
12013       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
12014 
12015   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12016   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12017   enum { LowerFD = 0, UpperFD, StrideFD };
12018   // Fill dims with data.
12019   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12020     LValue DimsLVal = CGF.MakeAddrLValue(
12021         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12022     // dims.upper = num_iterations;
12023     LValue UpperLVal = CGF.EmitLValueForField(
12024         DimsLVal, *std::next(RD->field_begin(), UpperFD));
12025     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12026         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12027         Int64Ty, NumIterations[I]->getExprLoc());
12028     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12029     // dims.stride = 1;
12030     LValue StrideLVal = CGF.EmitLValueForField(
12031         DimsLVal, *std::next(RD->field_begin(), StrideFD));
12032     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12033                           StrideLVal);
12034   }
12035 
12036   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12037   // kmp_int32 num_dims, struct kmp_dim * dims);
12038   llvm::Value *Args[] = {
12039       emitUpdateLocation(CGF, D.getBeginLoc()),
12040       getThreadID(CGF, D.getBeginLoc()),
12041       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12042       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12043           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
12044           CGM.VoidPtrTy)};
12045 
12046   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12047       CGM.getModule(), OMPRTL___kmpc_doacross_init);
12048   CGF.EmitRuntimeCall(RTLFn, Args);
12049   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12050       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12051   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12052       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12053   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12054                                              llvm::makeArrayRef(FiniArgs));
12055 }
12056 
12057 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12058                                           const OMPDependClause *C) {
12059   QualType Int64Ty =
12060       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12061   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12062   QualType ArrayTy = CGM.getContext().getConstantArrayType(
12063       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
12064   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12065   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12066     const Expr *CounterVal = C->getLoopData(I);
12067     assert(CounterVal);
12068     llvm::Value *CntVal = CGF.EmitScalarConversion(
12069         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12070         CounterVal->getExprLoc());
12071     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12072                           /*Volatile=*/false, Int64Ty);
12073   }
12074   llvm::Value *Args[] = {
12075       emitUpdateLocation(CGF, C->getBeginLoc()),
12076       getThreadID(CGF, C->getBeginLoc()),
12077       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
12078   llvm::FunctionCallee RTLFn;
12079   if (C->getDependencyKind() == OMPC_DEPEND_source) {
12080     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12081                                                   OMPRTL___kmpc_doacross_post);
12082   } else {
12083     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
12084     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12085                                                   OMPRTL___kmpc_doacross_wait);
12086   }
12087   CGF.EmitRuntimeCall(RTLFn, Args);
12088 }
12089 
12090 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12091                                llvm::FunctionCallee Callee,
12092                                ArrayRef<llvm::Value *> Args) const {
12093   assert(Loc.isValid() && "Outlined function call location must be valid.");
12094   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
12095 
12096   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12097     if (Fn->doesNotThrow()) {
12098       CGF.EmitNounwindRuntimeCall(Fn, Args);
12099       return;
12100     }
12101   }
12102   CGF.EmitRuntimeCall(Callee, Args);
12103 }
12104 
12105 void CGOpenMPRuntime::emitOutlinedFunctionCall(
12106     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12107     ArrayRef<llvm::Value *> Args) const {
12108   emitCall(CGF, Loc, OutlinedFn, Args);
12109 }
12110 
12111 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12112   if (const auto *FD = dyn_cast<FunctionDecl>(D))
12113     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12114       HasEmittedDeclareTargetRegion = true;
12115 }
12116 
12117 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12118                                              const VarDecl *NativeParam,
12119                                              const VarDecl *TargetParam) const {
12120   return CGF.GetAddrOfLocalVar(NativeParam);
12121 }
12122 
12123 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12124                                                    const VarDecl *VD) {
12125   if (!VD)
12126     return Address::invalid();
12127   Address UntiedAddr = Address::invalid();
12128   Address UntiedRealAddr = Address::invalid();
12129   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12130   if (It != FunctionToUntiedTaskStackMap.end()) {
12131     const UntiedLocalVarsAddressesMap &UntiedData =
12132         UntiedLocalVarsStack[It->second];
12133     auto I = UntiedData.find(VD);
12134     if (I != UntiedData.end()) {
12135       UntiedAddr = I->second.first;
12136       UntiedRealAddr = I->second.second;
12137     }
12138   }
12139   const VarDecl *CVD = VD->getCanonicalDecl();
12140   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12141     // Use the default allocation.
12142     if (!isAllocatableDecl(VD))
12143       return UntiedAddr;
12144     llvm::Value *Size;
12145     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12146     if (CVD->getType()->isVariablyModifiedType()) {
12147       Size = CGF.getTypeSize(CVD->getType());
12148       // Align the size: ((size + align - 1) / align) * align
12149       Size = CGF.Builder.CreateNUWAdd(
12150           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12151       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12152       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12153     } else {
12154       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12155       Size = CGM.getSize(Sz.alignTo(Align));
12156     }
12157     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12158     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12159     assert(AA->getAllocator() &&
12160            "Expected allocator expression for non-default allocator.");
12161     llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
12162     // According to the standard, the original allocator type is a enum
12163     // (integer). Convert to pointer type, if required.
12164     Allocator = CGF.EmitScalarConversion(
12165         Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
12166         AA->getAllocator()->getExprLoc());
12167     llvm::Value *Args[] = {ThreadID, Size, Allocator};
12168 
12169     llvm::Value *Addr =
12170         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
12171                                 CGM.getModule(), OMPRTL___kmpc_alloc),
12172                             Args, getName({CVD->getName(), ".void.addr"}));
12173     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12174         CGM.getModule(), OMPRTL___kmpc_free);
12175     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12176     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12177         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12178     if (UntiedAddr.isValid())
12179       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12180 
12181     // Cleanup action for allocate support.
12182     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12183       llvm::FunctionCallee RTLFn;
12184       SourceLocation::UIntTy LocEncoding;
12185       Address Addr;
12186       const Expr *Allocator;
12187 
12188     public:
12189       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12190                            SourceLocation::UIntTy LocEncoding, Address Addr,
12191                            const Expr *Allocator)
12192           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12193             Allocator(Allocator) {}
12194       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12195         if (!CGF.HaveInsertPoint())
12196           return;
12197         llvm::Value *Args[3];
12198         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12199             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12200         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12201             Addr.getPointer(), CGF.VoidPtrTy);
12202         llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
12203         // According to the standard, the original allocator type is a enum
12204         // (integer). Convert to pointer type, if required.
12205         AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12206                                             CGF.getContext().VoidPtrTy,
12207                                             Allocator->getExprLoc());
12208         Args[2] = AllocVal;
12209 
12210         CGF.EmitRuntimeCall(RTLFn, Args);
12211       }
12212     };
12213     Address VDAddr =
12214         UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
12215     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12216         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12217         VDAddr, AA->getAllocator());
12218     if (UntiedRealAddr.isValid())
12219       if (auto *Region =
12220               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12221         Region->emitUntiedSwitch(CGF);
12222     return VDAddr;
12223   }
12224   return UntiedAddr;
12225 }
12226 
12227 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12228                                              const VarDecl *VD) const {
12229   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12230   if (It == FunctionToUntiedTaskStackMap.end())
12231     return false;
12232   return UntiedLocalVarsStack[It->second].count(VD) > 0;
12233 }
12234 
12235 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12236     CodeGenModule &CGM, const OMPLoopDirective &S)
12237     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12238   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12239   if (!NeedToPush)
12240     return;
12241   NontemporalDeclsSet &DS =
12242       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12243   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12244     for (const Stmt *Ref : C->private_refs()) {
12245       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12246       const ValueDecl *VD;
12247       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12248         VD = DRE->getDecl();
12249       } else {
12250         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12251         assert((ME->isImplicitCXXThis() ||
12252                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12253                "Expected member of current class.");
12254         VD = ME->getMemberDecl();
12255       }
12256       DS.insert(VD);
12257     }
12258   }
12259 }
12260 
12261 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12262   if (!NeedToPush)
12263     return;
12264   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12265 }
12266 
12267 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12268     CodeGenFunction &CGF,
12269     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12270                           std::pair<Address, Address>> &LocalVars)
12271     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12272   if (!NeedToPush)
12273     return;
12274   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12275       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12276   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12277 }
12278 
12279 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12280   if (!NeedToPush)
12281     return;
12282   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12283 }
12284 
12285 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12286   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12287 
12288   return llvm::any_of(
12289       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12290       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
12291 }
12292 
12293 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12294     const OMPExecutableDirective &S,
12295     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12296     const {
12297   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12298   // Vars in target/task regions must be excluded completely.
12299   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12300       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12301     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12302     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12303     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12304     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12305       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12306         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12307     }
12308   }
12309   // Exclude vars in private clauses.
12310   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12311     for (const Expr *Ref : C->varlists()) {
12312       if (!Ref->getType()->isScalarType())
12313         continue;
12314       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12315       if (!DRE)
12316         continue;
12317       NeedToCheckForLPCs.insert(DRE->getDecl());
12318     }
12319   }
12320   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12321     for (const Expr *Ref : C->varlists()) {
12322       if (!Ref->getType()->isScalarType())
12323         continue;
12324       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12325       if (!DRE)
12326         continue;
12327       NeedToCheckForLPCs.insert(DRE->getDecl());
12328     }
12329   }
12330   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12331     for (const Expr *Ref : C->varlists()) {
12332       if (!Ref->getType()->isScalarType())
12333         continue;
12334       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12335       if (!DRE)
12336         continue;
12337       NeedToCheckForLPCs.insert(DRE->getDecl());
12338     }
12339   }
12340   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12341     for (const Expr *Ref : C->varlists()) {
12342       if (!Ref->getType()->isScalarType())
12343         continue;
12344       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12345       if (!DRE)
12346         continue;
12347       NeedToCheckForLPCs.insert(DRE->getDecl());
12348     }
12349   }
12350   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12351     for (const Expr *Ref : C->varlists()) {
12352       if (!Ref->getType()->isScalarType())
12353         continue;
12354       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12355       if (!DRE)
12356         continue;
12357       NeedToCheckForLPCs.insert(DRE->getDecl());
12358     }
12359   }
12360   for (const Decl *VD : NeedToCheckForLPCs) {
12361     for (const LastprivateConditionalData &Data :
12362          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12363       if (Data.DeclToUniqueName.count(VD) > 0) {
12364         if (!Data.Disabled)
12365           NeedToAddForLPCsAsDisabled.insert(VD);
12366         break;
12367       }
12368     }
12369   }
12370 }
12371 
12372 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12373     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12374     : CGM(CGF.CGM),
12375       Action((CGM.getLangOpts().OpenMP >= 50 &&
12376               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12377                            [](const OMPLastprivateClause *C) {
12378                              return C->getKind() ==
12379                                     OMPC_LASTPRIVATE_conditional;
12380                            }))
12381                  ? ActionToDo::PushAsLastprivateConditional
12382                  : ActionToDo::DoNotPush) {
12383   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12384   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12385     return;
12386   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12387          "Expected a push action.");
12388   LastprivateConditionalData &Data =
12389       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12390   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12391     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12392       continue;
12393 
12394     for (const Expr *Ref : C->varlists()) {
12395       Data.DeclToUniqueName.insert(std::make_pair(
12396           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12397           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12398     }
12399   }
12400   Data.IVLVal = IVLVal;
12401   Data.Fn = CGF.CurFn;
12402 }
12403 
12404 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12405     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12406     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12407   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12408   if (CGM.getLangOpts().OpenMP < 50)
12409     return;
12410   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12411   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12412   if (!NeedToAddForLPCsAsDisabled.empty()) {
12413     Action = ActionToDo::DisableLastprivateConditional;
12414     LastprivateConditionalData &Data =
12415         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12416     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12417       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12418     Data.Fn = CGF.CurFn;
12419     Data.Disabled = true;
12420   }
12421 }
12422 
12423 CGOpenMPRuntime::LastprivateConditionalRAII
12424 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12425     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12426   return LastprivateConditionalRAII(CGF, S);
12427 }
12428 
12429 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12430   if (CGM.getLangOpts().OpenMP < 50)
12431     return;
12432   if (Action == ActionToDo::DisableLastprivateConditional) {
12433     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12434            "Expected list of disabled private vars.");
12435     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12436   }
12437   if (Action == ActionToDo::PushAsLastprivateConditional) {
12438     assert(
12439         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12440         "Expected list of lastprivate conditional vars.");
12441     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12442   }
12443 }
12444 
12445 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12446                                                         const VarDecl *VD) {
12447   ASTContext &C = CGM.getContext();
12448   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12449   if (I == LastprivateConditionalToTypes.end())
12450     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12451   QualType NewType;
12452   const FieldDecl *VDField;
12453   const FieldDecl *FiredField;
12454   LValue BaseLVal;
12455   auto VI = I->getSecond().find(VD);
12456   if (VI == I->getSecond().end()) {
12457     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12458     RD->startDefinition();
12459     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12460     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12461     RD->completeDefinition();
12462     NewType = C.getRecordType(RD);
12463     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12464     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12465     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12466   } else {
12467     NewType = std::get<0>(VI->getSecond());
12468     VDField = std::get<1>(VI->getSecond());
12469     FiredField = std::get<2>(VI->getSecond());
12470     BaseLVal = std::get<3>(VI->getSecond());
12471   }
12472   LValue FiredLVal =
12473       CGF.EmitLValueForField(BaseLVal, FiredField);
12474   CGF.EmitStoreOfScalar(
12475       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12476       FiredLVal);
12477   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12478 }
12479 
12480 namespace {
12481 /// Checks if the lastprivate conditional variable is referenced in LHS.
12482 class LastprivateConditionalRefChecker final
12483     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12484   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12485   const Expr *FoundE = nullptr;
12486   const Decl *FoundD = nullptr;
12487   StringRef UniqueDeclName;
12488   LValue IVLVal;
12489   llvm::Function *FoundFn = nullptr;
12490   SourceLocation Loc;
12491 
12492 public:
12493   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12494     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12495          llvm::reverse(LPM)) {
12496       auto It = D.DeclToUniqueName.find(E->getDecl());
12497       if (It == D.DeclToUniqueName.end())
12498         continue;
12499       if (D.Disabled)
12500         return false;
12501       FoundE = E;
12502       FoundD = E->getDecl()->getCanonicalDecl();
12503       UniqueDeclName = It->second;
12504       IVLVal = D.IVLVal;
12505       FoundFn = D.Fn;
12506       break;
12507     }
12508     return FoundE == E;
12509   }
12510   bool VisitMemberExpr(const MemberExpr *E) {
12511     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12512       return false;
12513     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12514          llvm::reverse(LPM)) {
12515       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12516       if (It == D.DeclToUniqueName.end())
12517         continue;
12518       if (D.Disabled)
12519         return false;
12520       FoundE = E;
12521       FoundD = E->getMemberDecl()->getCanonicalDecl();
12522       UniqueDeclName = It->second;
12523       IVLVal = D.IVLVal;
12524       FoundFn = D.Fn;
12525       break;
12526     }
12527     return FoundE == E;
12528   }
12529   bool VisitStmt(const Stmt *S) {
12530     for (const Stmt *Child : S->children()) {
12531       if (!Child)
12532         continue;
12533       if (const auto *E = dyn_cast<Expr>(Child))
12534         if (!E->isGLValue())
12535           continue;
12536       if (Visit(Child))
12537         return true;
12538     }
12539     return false;
12540   }
12541   explicit LastprivateConditionalRefChecker(
12542       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12543       : LPM(LPM) {}
12544   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12545   getFoundData() const {
12546     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12547   }
12548 };
12549 } // namespace
12550 
12551 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12552                                                        LValue IVLVal,
12553                                                        StringRef UniqueDeclName,
12554                                                        LValue LVal,
12555                                                        SourceLocation Loc) {
12556   // Last updated loop counter for the lastprivate conditional var.
12557   // int<xx> last_iv = 0;
12558   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12559   llvm::Constant *LastIV =
12560       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12561   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12562       IVLVal.getAlignment().getAsAlign());
12563   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12564 
12565   // Last value of the lastprivate conditional.
12566   // decltype(priv_a) last_a;
12567   llvm::Constant *Last = getOrCreateInternalVariable(
12568       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12569   cast<llvm::GlobalVariable>(Last)->setAlignment(
12570       LVal.getAlignment().getAsAlign());
12571   LValue LastLVal =
12572       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12573 
12574   // Global loop counter. Required to handle inner parallel-for regions.
12575   // iv
12576   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12577 
12578   // #pragma omp critical(a)
12579   // if (last_iv <= iv) {
12580   //   last_iv = iv;
12581   //   last_a = priv_a;
12582   // }
12583   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12584                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12585     Action.Enter(CGF);
12586     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12587     // (last_iv <= iv) ? Check if the variable is updated and store new
12588     // value in global var.
12589     llvm::Value *CmpRes;
12590     if (IVLVal.getType()->isSignedIntegerType()) {
12591       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12592     } else {
12593       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12594              "Loop iteration variable must be integer.");
12595       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12596     }
12597     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12598     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12599     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12600     // {
12601     CGF.EmitBlock(ThenBB);
12602 
12603     //   last_iv = iv;
12604     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12605 
12606     //   last_a = priv_a;
12607     switch (CGF.getEvaluationKind(LVal.getType())) {
12608     case TEK_Scalar: {
12609       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12610       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12611       break;
12612     }
12613     case TEK_Complex: {
12614       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12615       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12616       break;
12617     }
12618     case TEK_Aggregate:
12619       llvm_unreachable(
12620           "Aggregates are not supported in lastprivate conditional.");
12621     }
12622     // }
12623     CGF.EmitBranch(ExitBB);
12624     // There is no need to emit line number for unconditional branch.
12625     (void)ApplyDebugLocation::CreateEmpty(CGF);
12626     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12627   };
12628 
12629   if (CGM.getLangOpts().OpenMPSimd) {
12630     // Do not emit as a critical region as no parallel region could be emitted.
12631     RegionCodeGenTy ThenRCG(CodeGen);
12632     ThenRCG(CGF);
12633   } else {
12634     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12635   }
12636 }
12637 
12638 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12639                                                          const Expr *LHS) {
12640   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12641     return;
12642   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12643   if (!Checker.Visit(LHS))
12644     return;
12645   const Expr *FoundE;
12646   const Decl *FoundD;
12647   StringRef UniqueDeclName;
12648   LValue IVLVal;
12649   llvm::Function *FoundFn;
12650   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12651       Checker.getFoundData();
12652   if (FoundFn != CGF.CurFn) {
12653     // Special codegen for inner parallel regions.
12654     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12655     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12656     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12657            "Lastprivate conditional is not found in outer region.");
12658     QualType StructTy = std::get<0>(It->getSecond());
12659     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12660     LValue PrivLVal = CGF.EmitLValue(FoundE);
12661     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12662         PrivLVal.getAddress(CGF),
12663         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12664     LValue BaseLVal =
12665         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12666     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12667     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12668                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12669                         FiredLVal, llvm::AtomicOrdering::Unordered,
12670                         /*IsVolatile=*/true, /*isInit=*/false);
12671     return;
12672   }
12673 
12674   // Private address of the lastprivate conditional in the current context.
12675   // priv_a
12676   LValue LVal = CGF.EmitLValue(FoundE);
12677   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12678                                    FoundE->getExprLoc());
12679 }
12680 
12681 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12682     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12683     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12684   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12685     return;
12686   auto Range = llvm::reverse(LastprivateConditionalStack);
12687   auto It = llvm::find_if(
12688       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12689   if (It == Range.end() || It->Fn != CGF.CurFn)
12690     return;
12691   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12692   assert(LPCI != LastprivateConditionalToTypes.end() &&
12693          "Lastprivates must be registered already.");
12694   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12695   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12696   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12697   for (const auto &Pair : It->DeclToUniqueName) {
12698     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12699     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
12700       continue;
12701     auto I = LPCI->getSecond().find(Pair.first);
12702     assert(I != LPCI->getSecond().end() &&
12703            "Lastprivate must be rehistered already.");
12704     // bool Cmp = priv_a.Fired != 0;
12705     LValue BaseLVal = std::get<3>(I->getSecond());
12706     LValue FiredLVal =
12707         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12708     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12709     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12710     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12711     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12712     // if (Cmp) {
12713     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12714     CGF.EmitBlock(ThenBB);
12715     Address Addr = CGF.GetAddrOfLocalVar(VD);
12716     LValue LVal;
12717     if (VD->getType()->isReferenceType())
12718       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12719                                            AlignmentSource::Decl);
12720     else
12721       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12722                                 AlignmentSource::Decl);
12723     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12724                                      D.getBeginLoc());
12725     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12726     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12727     // }
12728   }
12729 }
12730 
12731 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12732     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12733     SourceLocation Loc) {
12734   if (CGF.getLangOpts().OpenMP < 50)
12735     return;
12736   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12737   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12738          "Unknown lastprivate conditional variable.");
12739   StringRef UniqueName = It->second;
12740   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12741   // The variable was not updated in the region - exit.
12742   if (!GV)
12743     return;
12744   LValue LPLVal = CGF.MakeAddrLValue(
12745       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12746   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12747   CGF.EmitStoreOfScalar(Res, PrivLVal);
12748 }
12749 
12750 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12751     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12752     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12753   llvm_unreachable("Not supported in SIMD-only mode");
12754 }
12755 
12756 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12757     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12758     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12759   llvm_unreachable("Not supported in SIMD-only mode");
12760 }
12761 
12762 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12763     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12764     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12765     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12766     bool Tied, unsigned &NumberOfParts) {
12767   llvm_unreachable("Not supported in SIMD-only mode");
12768 }
12769 
12770 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12771                                            SourceLocation Loc,
12772                                            llvm::Function *OutlinedFn,
12773                                            ArrayRef<llvm::Value *> CapturedVars,
12774                                            const Expr *IfCond) {
12775   llvm_unreachable("Not supported in SIMD-only mode");
12776 }
12777 
12778 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12779     CodeGenFunction &CGF, StringRef CriticalName,
12780     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12781     const Expr *Hint) {
12782   llvm_unreachable("Not supported in SIMD-only mode");
12783 }
12784 
12785 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12786                                            const RegionCodeGenTy &MasterOpGen,
12787                                            SourceLocation Loc) {
12788   llvm_unreachable("Not supported in SIMD-only mode");
12789 }
12790 
12791 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12792                                            const RegionCodeGenTy &MasterOpGen,
12793                                            SourceLocation Loc,
12794                                            const Expr *Filter) {
12795   llvm_unreachable("Not supported in SIMD-only mode");
12796 }
12797 
12798 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12799                                             SourceLocation Loc) {
12800   llvm_unreachable("Not supported in SIMD-only mode");
12801 }
12802 
12803 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12804     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12805     SourceLocation Loc) {
12806   llvm_unreachable("Not supported in SIMD-only mode");
12807 }
12808 
12809 void CGOpenMPSIMDRuntime::emitSingleRegion(
12810     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12811     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12812     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12813     ArrayRef<const Expr *> AssignmentOps) {
12814   llvm_unreachable("Not supported in SIMD-only mode");
12815 }
12816 
12817 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12818                                             const RegionCodeGenTy &OrderedOpGen,
12819                                             SourceLocation Loc,
12820                                             bool IsThreads) {
12821   llvm_unreachable("Not supported in SIMD-only mode");
12822 }
12823 
12824 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12825                                           SourceLocation Loc,
12826                                           OpenMPDirectiveKind Kind,
12827                                           bool EmitChecks,
12828                                           bool ForceSimpleCall) {
12829   llvm_unreachable("Not supported in SIMD-only mode");
12830 }
12831 
12832 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12833     CodeGenFunction &CGF, SourceLocation Loc,
12834     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12835     bool Ordered, const DispatchRTInput &DispatchValues) {
12836   llvm_unreachable("Not supported in SIMD-only mode");
12837 }
12838 
12839 void CGOpenMPSIMDRuntime::emitForStaticInit(
12840     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12841     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12842   llvm_unreachable("Not supported in SIMD-only mode");
12843 }
12844 
12845 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12846     CodeGenFunction &CGF, SourceLocation Loc,
12847     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12848   llvm_unreachable("Not supported in SIMD-only mode");
12849 }
12850 
12851 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12852                                                      SourceLocation Loc,
12853                                                      unsigned IVSize,
12854                                                      bool IVSigned) {
12855   llvm_unreachable("Not supported in SIMD-only mode");
12856 }
12857 
12858 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12859                                               SourceLocation Loc,
12860                                               OpenMPDirectiveKind DKind) {
12861   llvm_unreachable("Not supported in SIMD-only mode");
12862 }
12863 
12864 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12865                                               SourceLocation Loc,
12866                                               unsigned IVSize, bool IVSigned,
12867                                               Address IL, Address LB,
12868                                               Address UB, Address ST) {
12869   llvm_unreachable("Not supported in SIMD-only mode");
12870 }
12871 
12872 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12873                                                llvm::Value *NumThreads,
12874                                                SourceLocation Loc) {
12875   llvm_unreachable("Not supported in SIMD-only mode");
12876 }
12877 
12878 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12879                                              ProcBindKind ProcBind,
12880                                              SourceLocation Loc) {
12881   llvm_unreachable("Not supported in SIMD-only mode");
12882 }
12883 
12884 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12885                                                     const VarDecl *VD,
12886                                                     Address VDAddr,
12887                                                     SourceLocation Loc) {
12888   llvm_unreachable("Not supported in SIMD-only mode");
12889 }
12890 
12891 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12892     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12893     CodeGenFunction *CGF) {
12894   llvm_unreachable("Not supported in SIMD-only mode");
12895 }
12896 
12897 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12898     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12899   llvm_unreachable("Not supported in SIMD-only mode");
12900 }
12901 
12902 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12903                                     ArrayRef<const Expr *> Vars,
12904                                     SourceLocation Loc,
12905                                     llvm::AtomicOrdering AO) {
12906   llvm_unreachable("Not supported in SIMD-only mode");
12907 }
12908 
12909 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12910                                        const OMPExecutableDirective &D,
12911                                        llvm::Function *TaskFunction,
12912                                        QualType SharedsTy, Address Shareds,
12913                                        const Expr *IfCond,
12914                                        const OMPTaskDataTy &Data) {
12915   llvm_unreachable("Not supported in SIMD-only mode");
12916 }
12917 
12918 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12919     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12920     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12921     const Expr *IfCond, const OMPTaskDataTy &Data) {
12922   llvm_unreachable("Not supported in SIMD-only mode");
12923 }
12924 
12925 void CGOpenMPSIMDRuntime::emitReduction(
12926     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12927     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12928     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12929   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12930   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12931                                  ReductionOps, Options);
12932 }
12933 
12934 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12935     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12936     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12937   llvm_unreachable("Not supported in SIMD-only mode");
12938 }
12939 
12940 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12941                                                 SourceLocation Loc,
12942                                                 bool IsWorksharingReduction) {
12943   llvm_unreachable("Not supported in SIMD-only mode");
12944 }
12945 
12946 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12947                                                   SourceLocation Loc,
12948                                                   ReductionCodeGen &RCG,
12949                                                   unsigned N) {
12950   llvm_unreachable("Not supported in SIMD-only mode");
12951 }
12952 
12953 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12954                                                   SourceLocation Loc,
12955                                                   llvm::Value *ReductionsPtr,
12956                                                   LValue SharedLVal) {
12957   llvm_unreachable("Not supported in SIMD-only mode");
12958 }
12959 
12960 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12961                                            SourceLocation Loc) {
12962   llvm_unreachable("Not supported in SIMD-only mode");
12963 }
12964 
12965 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12966     CodeGenFunction &CGF, SourceLocation Loc,
12967     OpenMPDirectiveKind CancelRegion) {
12968   llvm_unreachable("Not supported in SIMD-only mode");
12969 }
12970 
12971 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12972                                          SourceLocation Loc, const Expr *IfCond,
12973                                          OpenMPDirectiveKind CancelRegion) {
12974   llvm_unreachable("Not supported in SIMD-only mode");
12975 }
12976 
12977 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12978     const OMPExecutableDirective &D, StringRef ParentName,
12979     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12980     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12981   llvm_unreachable("Not supported in SIMD-only mode");
12982 }
12983 
12984 void CGOpenMPSIMDRuntime::emitTargetCall(
12985     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12986     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12987     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12988     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12989                                      const OMPLoopDirective &D)>
12990         SizeEmitter) {
12991   llvm_unreachable("Not supported in SIMD-only mode");
12992 }
12993 
12994 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12995   llvm_unreachable("Not supported in SIMD-only mode");
12996 }
12997 
12998 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12999   llvm_unreachable("Not supported in SIMD-only mode");
13000 }
13001 
13002 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13003   return false;
13004 }
13005 
13006 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13007                                         const OMPExecutableDirective &D,
13008                                         SourceLocation Loc,
13009                                         llvm::Function *OutlinedFn,
13010                                         ArrayRef<llvm::Value *> CapturedVars) {
13011   llvm_unreachable("Not supported in SIMD-only mode");
13012 }
13013 
13014 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13015                                              const Expr *NumTeams,
13016                                              const Expr *ThreadLimit,
13017                                              SourceLocation Loc) {
13018   llvm_unreachable("Not supported in SIMD-only mode");
13019 }
13020 
13021 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13022     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13023     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
13024   llvm_unreachable("Not supported in SIMD-only mode");
13025 }
13026 
13027 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13028     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13029     const Expr *Device) {
13030   llvm_unreachable("Not supported in SIMD-only mode");
13031 }
13032 
13033 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13034                                            const OMPLoopDirective &D,
13035                                            ArrayRef<Expr *> NumIterations) {
13036   llvm_unreachable("Not supported in SIMD-only mode");
13037 }
13038 
13039 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13040                                               const OMPDependClause *C) {
13041   llvm_unreachable("Not supported in SIMD-only mode");
13042 }
13043 
13044 const VarDecl *
13045 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13046                                         const VarDecl *NativeParam) const {
13047   llvm_unreachable("Not supported in SIMD-only mode");
13048 }
13049 
13050 Address
13051 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13052                                          const VarDecl *NativeParam,
13053                                          const VarDecl *TargetParam) const {
13054   llvm_unreachable("Not supported in SIMD-only mode");
13055 }
13056