xref: /freebsd/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp (revision 1db9f3b21e39176dd5b67cf8ac378633b172463e)
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/Attr.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/OpenMPClause.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/BitmaskEnum.h"
26 #include "clang/Basic/FileManager.h"
27 #include "clang/Basic/OpenMPKinds.h"
28 #include "clang/Basic/SourceManager.h"
29 #include "clang/CodeGen/ConstantInitBuilder.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SetOperations.h"
32 #include "llvm/ADT/SmallBitVector.h"
33 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/Bitcode/BitcodeReader.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/GlobalValue.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Value.h"
40 #include "llvm/Support/AtomicOrdering.h"
41 #include "llvm/Support/Format.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include <cassert>
44 #include <cstdint>
45 #include <numeric>
46 #include <optional>
47 
48 using namespace clang;
49 using namespace CodeGen;
50 using namespace llvm::omp;
51 
52 namespace {
53 /// Base class for handling code generation inside OpenMP regions.
54 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
55 public:
56   /// Kinds of OpenMP regions used in codegen.
57   enum CGOpenMPRegionKind {
58     /// Region with outlined function for standalone 'parallel'
59     /// directive.
60     ParallelOutlinedRegion,
61     /// Region with outlined function for standalone 'task' directive.
62     TaskOutlinedRegion,
63     /// Region for constructs that do not require function outlining,
64     /// like 'for', 'sections', 'atomic' etc. directives.
65     InlinedRegion,
66     /// Region with outlined function for standalone 'target' directive.
67     TargetRegion,
68   };
69 
70   CGOpenMPRegionInfo(const CapturedStmt &CS,
71                      const CGOpenMPRegionKind RegionKind,
72                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73                      bool HasCancel)
74       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
75         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
76 
77   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
78                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
79                      bool HasCancel)
80       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
81         Kind(Kind), HasCancel(HasCancel) {}
82 
83   /// Get a variable or parameter for storing global thread id
84   /// inside OpenMP construct.
85   virtual const VarDecl *getThreadIDVariable() const = 0;
86 
87   /// Emit the captured statement body.
88   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
89 
90   /// Get an LValue for the current ThreadID variable.
91   /// \return LValue for thread id variable. This LValue always has type int32*.
92   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
93 
94   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
95 
96   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
97 
98   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
99 
100   bool hasCancel() const { return HasCancel; }
101 
102   static bool classof(const CGCapturedStmtInfo *Info) {
103     return Info->getKind() == CR_OpenMP;
104   }
105 
106   ~CGOpenMPRegionInfo() override = default;
107 
108 protected:
109   CGOpenMPRegionKind RegionKind;
110   RegionCodeGenTy CodeGen;
111   OpenMPDirectiveKind Kind;
112   bool HasCancel;
113 };
114 
115 /// API for captured statement code generation in OpenMP constructs.
116 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
117 public:
118   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
119                              const RegionCodeGenTy &CodeGen,
120                              OpenMPDirectiveKind Kind, bool HasCancel,
121                              StringRef HelperName)
122       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
123                            HasCancel),
124         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
125     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
126   }
127 
128   /// Get a variable or parameter for storing global thread id
129   /// inside OpenMP construct.
130   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
131 
132   /// Get the name of the capture helper.
133   StringRef getHelperName() const override { return HelperName; }
134 
135   static bool classof(const CGCapturedStmtInfo *Info) {
136     return CGOpenMPRegionInfo::classof(Info) &&
137            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
138                ParallelOutlinedRegion;
139   }
140 
141 private:
142   /// A variable or parameter storing global thread id for OpenMP
143   /// constructs.
144   const VarDecl *ThreadIDVar;
145   StringRef HelperName;
146 };
147 
148 /// API for captured statement code generation in OpenMP constructs.
149 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
150 public:
151   class UntiedTaskActionTy final : public PrePostActionTy {
152     bool Untied;
153     const VarDecl *PartIDVar;
154     const RegionCodeGenTy UntiedCodeGen;
155     llvm::SwitchInst *UntiedSwitch = nullptr;
156 
157   public:
158     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
159                        const RegionCodeGenTy &UntiedCodeGen)
160         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
161     void Enter(CodeGenFunction &CGF) override {
162       if (Untied) {
163         // Emit task switching point.
164         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
165             CGF.GetAddrOfLocalVar(PartIDVar),
166             PartIDVar->getType()->castAs<PointerType>());
167         llvm::Value *Res =
168             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
169         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
170         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
171         CGF.EmitBlock(DoneBB);
172         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
173         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
174         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
175                               CGF.Builder.GetInsertBlock());
176         emitUntiedSwitch(CGF);
177       }
178     }
179     void emitUntiedSwitch(CodeGenFunction &CGF) const {
180       if (Untied) {
181         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
182             CGF.GetAddrOfLocalVar(PartIDVar),
183             PartIDVar->getType()->castAs<PointerType>());
184         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
185                               PartIdLVal);
186         UntiedCodeGen(CGF);
187         CodeGenFunction::JumpDest CurPoint =
188             CGF.getJumpDestInCurrentScope(".untied.next.");
189         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
190         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
191         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
192                               CGF.Builder.GetInsertBlock());
193         CGF.EmitBranchThroughCleanup(CurPoint);
194         CGF.EmitBlock(CurPoint.getBlock());
195       }
196     }
197     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
198   };
199   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
200                                  const VarDecl *ThreadIDVar,
201                                  const RegionCodeGenTy &CodeGen,
202                                  OpenMPDirectiveKind Kind, bool HasCancel,
203                                  const UntiedTaskActionTy &Action)
204       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
205         ThreadIDVar(ThreadIDVar), Action(Action) {
206     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
207   }
208 
209   /// Get a variable or parameter for storing global thread id
210   /// inside OpenMP construct.
211   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
212 
213   /// Get an LValue for the current ThreadID variable.
214   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
215 
216   /// Get the name of the capture helper.
217   StringRef getHelperName() const override { return ".omp_outlined."; }
218 
219   void emitUntiedSwitch(CodeGenFunction &CGF) override {
220     Action.emitUntiedSwitch(CGF);
221   }
222 
223   static bool classof(const CGCapturedStmtInfo *Info) {
224     return CGOpenMPRegionInfo::classof(Info) &&
225            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
226                TaskOutlinedRegion;
227   }
228 
229 private:
230   /// A variable or parameter storing global thread id for OpenMP
231   /// constructs.
232   const VarDecl *ThreadIDVar;
233   /// Action for emitting code for untied tasks.
234   const UntiedTaskActionTy &Action;
235 };
236 
237 /// API for inlined captured statement code generation in OpenMP
238 /// constructs.
239 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
240 public:
241   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
242                             const RegionCodeGenTy &CodeGen,
243                             OpenMPDirectiveKind Kind, bool HasCancel)
244       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
245         OldCSI(OldCSI),
246         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
247 
248   // Retrieve the value of the context parameter.
249   llvm::Value *getContextValue() const override {
250     if (OuterRegionInfo)
251       return OuterRegionInfo->getContextValue();
252     llvm_unreachable("No context value for inlined OpenMP region");
253   }
254 
255   void setContextValue(llvm::Value *V) override {
256     if (OuterRegionInfo) {
257       OuterRegionInfo->setContextValue(V);
258       return;
259     }
260     llvm_unreachable("No context value for inlined OpenMP region");
261   }
262 
263   /// Lookup the captured field decl for a variable.
264   const FieldDecl *lookup(const VarDecl *VD) const override {
265     if (OuterRegionInfo)
266       return OuterRegionInfo->lookup(VD);
267     // If there is no outer outlined region,no need to lookup in a list of
268     // captured variables, we can use the original one.
269     return nullptr;
270   }
271 
272   FieldDecl *getThisFieldDecl() const override {
273     if (OuterRegionInfo)
274       return OuterRegionInfo->getThisFieldDecl();
275     return nullptr;
276   }
277 
278   /// Get a variable or parameter for storing global thread id
279   /// inside OpenMP construct.
280   const VarDecl *getThreadIDVariable() const override {
281     if (OuterRegionInfo)
282       return OuterRegionInfo->getThreadIDVariable();
283     return nullptr;
284   }
285 
286   /// Get an LValue for the current ThreadID variable.
287   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
288     if (OuterRegionInfo)
289       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
290     llvm_unreachable("No LValue for inlined OpenMP construct");
291   }
292 
293   /// Get the name of the capture helper.
294   StringRef getHelperName() const override {
295     if (auto *OuterRegionInfo = getOldCSI())
296       return OuterRegionInfo->getHelperName();
297     llvm_unreachable("No helper name for inlined OpenMP construct");
298   }
299 
300   void emitUntiedSwitch(CodeGenFunction &CGF) override {
301     if (OuterRegionInfo)
302       OuterRegionInfo->emitUntiedSwitch(CGF);
303   }
304 
305   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
306 
307   static bool classof(const CGCapturedStmtInfo *Info) {
308     return CGOpenMPRegionInfo::classof(Info) &&
309            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
310   }
311 
312   ~CGOpenMPInlinedRegionInfo() override = default;
313 
314 private:
315   /// CodeGen info about outer OpenMP region.
316   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
317   CGOpenMPRegionInfo *OuterRegionInfo;
318 };
319 
320 /// API for captured statement code generation in OpenMP target
321 /// constructs. For this captures, implicit parameters are used instead of the
322 /// captured fields. The name of the target region has to be unique in a given
323 /// application so it is provided by the client, because only the client has
324 /// the information to generate that.
325 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
326 public:
327   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
328                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
329       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
330                            /*HasCancel=*/false),
331         HelperName(HelperName) {}
332 
333   /// This is unused for target regions because each starts executing
334   /// with a single thread.
335   const VarDecl *getThreadIDVariable() const override { return nullptr; }
336 
337   /// Get the name of the capture helper.
338   StringRef getHelperName() const override { return HelperName; }
339 
340   static bool classof(const CGCapturedStmtInfo *Info) {
341     return CGOpenMPRegionInfo::classof(Info) &&
342            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
343   }
344 
345 private:
346   StringRef HelperName;
347 };
348 
349 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
350   llvm_unreachable("No codegen for expressions");
351 }
352 /// API for generation of expressions captured in a innermost OpenMP
353 /// region.
354 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
355 public:
356   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
357       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
358                                   OMPD_unknown,
359                                   /*HasCancel=*/false),
360         PrivScope(CGF) {
361     // Make sure the globals captured in the provided statement are local by
362     // using the privatization logic. We assume the same variable is not
363     // captured more than once.
364     for (const auto &C : CS.captures()) {
365       if (!C.capturesVariable() && !C.capturesVariableByCopy())
366         continue;
367 
368       const VarDecl *VD = C.getCapturedVar();
369       if (VD->isLocalVarDeclOrParm())
370         continue;
371 
372       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
373                       /*RefersToEnclosingVariableOrCapture=*/false,
374                       VD->getType().getNonReferenceType(), VK_LValue,
375                       C.getLocation());
376       PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
377     }
378     (void)PrivScope.Privatize();
379   }
380 
381   /// Lookup the captured field decl for a variable.
382   const FieldDecl *lookup(const VarDecl *VD) const override {
383     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
384       return FD;
385     return nullptr;
386   }
387 
388   /// Emit the captured statement body.
389   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
390     llvm_unreachable("No body for expressions");
391   }
392 
393   /// Get a variable or parameter for storing global thread id
394   /// inside OpenMP construct.
395   const VarDecl *getThreadIDVariable() const override {
396     llvm_unreachable("No thread id for expressions");
397   }
398 
399   /// Get the name of the capture helper.
400   StringRef getHelperName() const override {
401     llvm_unreachable("No helper name for expressions");
402   }
403 
404   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
405 
406 private:
407   /// Private scope to capture global variables.
408   CodeGenFunction::OMPPrivateScope PrivScope;
409 };
410 
411 /// RAII for emitting code of OpenMP constructs.
412 class InlinedOpenMPRegionRAII {
413   CodeGenFunction &CGF;
414   llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
415   FieldDecl *LambdaThisCaptureField = nullptr;
416   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
417   bool NoInheritance = false;
418 
419 public:
420   /// Constructs region for combined constructs.
421   /// \param CodeGen Code generation sequence for combined directives. Includes
422   /// a list of functions used for code generation of implicitly inlined
423   /// regions.
424   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
425                           OpenMPDirectiveKind Kind, bool HasCancel,
426                           bool NoInheritance = true)
427       : CGF(CGF), NoInheritance(NoInheritance) {
428     // Start emission for the construct.
429     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
430         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
431     if (NoInheritance) {
432       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
433       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
434       CGF.LambdaThisCaptureField = nullptr;
435       BlockInfo = CGF.BlockInfo;
436       CGF.BlockInfo = nullptr;
437     }
438   }
439 
440   ~InlinedOpenMPRegionRAII() {
441     // Restore original CapturedStmtInfo only if we're done with code emission.
442     auto *OldCSI =
443         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
444     delete CGF.CapturedStmtInfo;
445     CGF.CapturedStmtInfo = OldCSI;
446     if (NoInheritance) {
447       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
448       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
449       CGF.BlockInfo = BlockInfo;
450     }
451   }
452 };
453 
454 /// Values for bit flags used in the ident_t to describe the fields.
455 /// All enumeric elements are named and described in accordance with the code
456 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
457 enum OpenMPLocationFlags : unsigned {
458   /// Use trampoline for internal microtask.
459   OMP_IDENT_IMD = 0x01,
460   /// Use c-style ident structure.
461   OMP_IDENT_KMPC = 0x02,
462   /// Atomic reduction option for kmpc_reduce.
463   OMP_ATOMIC_REDUCE = 0x10,
464   /// Explicit 'barrier' directive.
465   OMP_IDENT_BARRIER_EXPL = 0x20,
466   /// Implicit barrier in code.
467   OMP_IDENT_BARRIER_IMPL = 0x40,
468   /// Implicit barrier in 'for' directive.
469   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
470   /// Implicit barrier in 'sections' directive.
471   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
472   /// Implicit barrier in 'single' directive.
473   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
474   /// Call of __kmp_for_static_init for static loop.
475   OMP_IDENT_WORK_LOOP = 0x200,
476   /// Call of __kmp_for_static_init for sections.
477   OMP_IDENT_WORK_SECTIONS = 0x400,
478   /// Call of __kmp_for_static_init for distribute.
479   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
480   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
481 };
482 
483 /// Describes ident structure that describes a source location.
484 /// All descriptions are taken from
485 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
486 /// Original structure:
487 /// typedef struct ident {
488 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
489 ///                                  see above  */
490 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
491 ///                                  KMP_IDENT_KMPC identifies this union
492 ///                                  member  */
493 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
494 ///                                  see above */
495 ///#if USE_ITT_BUILD
496 ///                            /*  but currently used for storing
497 ///                                region-specific ITT */
498 ///                            /*  contextual information. */
499 ///#endif /* USE_ITT_BUILD */
500 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
501 ///                                 C++  */
502 ///    char const *psource;    /**< String describing the source location.
503 ///                            The string is composed of semi-colon separated
504 //                             fields which describe the source file,
505 ///                            the function and a pair of line numbers that
506 ///                            delimit the construct.
507 ///                             */
508 /// } ident_t;
509 enum IdentFieldIndex {
510   /// might be used in Fortran
511   IdentField_Reserved_1,
512   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
513   IdentField_Flags,
514   /// Not really used in Fortran any more
515   IdentField_Reserved_2,
516   /// Source[4] in Fortran, do not use for C++
517   IdentField_Reserved_3,
518   /// String describing the source location. The string is composed of
519   /// semi-colon separated fields which describe the source file, the function
520   /// and a pair of line numbers that delimit the construct.
521   IdentField_PSource
522 };
523 
524 /// Schedule types for 'omp for' loops (these enumerators are taken from
525 /// the enum sched_type in kmp.h).
526 enum OpenMPSchedType {
527   /// Lower bound for default (unordered) versions.
528   OMP_sch_lower = 32,
529   OMP_sch_static_chunked = 33,
530   OMP_sch_static = 34,
531   OMP_sch_dynamic_chunked = 35,
532   OMP_sch_guided_chunked = 36,
533   OMP_sch_runtime = 37,
534   OMP_sch_auto = 38,
535   /// static with chunk adjustment (e.g., simd)
536   OMP_sch_static_balanced_chunked = 45,
537   /// Lower bound for 'ordered' versions.
538   OMP_ord_lower = 64,
539   OMP_ord_static_chunked = 65,
540   OMP_ord_static = 66,
541   OMP_ord_dynamic_chunked = 67,
542   OMP_ord_guided_chunked = 68,
543   OMP_ord_runtime = 69,
544   OMP_ord_auto = 70,
545   OMP_sch_default = OMP_sch_static,
546   /// dist_schedule types
547   OMP_dist_sch_static_chunked = 91,
548   OMP_dist_sch_static = 92,
549   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
550   /// Set if the monotonic schedule modifier was present.
551   OMP_sch_modifier_monotonic = (1 << 29),
552   /// Set if the nonmonotonic schedule modifier was present.
553   OMP_sch_modifier_nonmonotonic = (1 << 30),
554 };
555 
556 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
557 /// region.
558 class CleanupTy final : public EHScopeStack::Cleanup {
559   PrePostActionTy *Action;
560 
561 public:
562   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
563   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
564     if (!CGF.HaveInsertPoint())
565       return;
566     Action->Exit(CGF);
567   }
568 };
569 
570 } // anonymous namespace
571 
572 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
573   CodeGenFunction::RunCleanupsScope Scope(CGF);
574   if (PrePostAction) {
575     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
576     Callback(CodeGen, CGF, *PrePostAction);
577   } else {
578     PrePostActionTy Action;
579     Callback(CodeGen, CGF, Action);
580   }
581 }
582 
583 /// Check if the combiner is a call to UDR combiner and if it is so return the
584 /// UDR decl used for reduction.
585 static const OMPDeclareReductionDecl *
586 getReductionInit(const Expr *ReductionOp) {
587   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
588     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
589       if (const auto *DRE =
590               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
591         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
592           return DRD;
593   return nullptr;
594 }
595 
596 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
597                                              const OMPDeclareReductionDecl *DRD,
598                                              const Expr *InitOp,
599                                              Address Private, Address Original,
600                                              QualType Ty) {
601   if (DRD->getInitializer()) {
602     std::pair<llvm::Function *, llvm::Function *> Reduction =
603         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
604     const auto *CE = cast<CallExpr>(InitOp);
605     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
606     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
607     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
608     const auto *LHSDRE =
609         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
610     const auto *RHSDRE =
611         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
612     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
613     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
614     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
615     (void)PrivateScope.Privatize();
616     RValue Func = RValue::get(Reduction.second);
617     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
618     CGF.EmitIgnoredExpr(InitOp);
619   } else {
620     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
621     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
622     auto *GV = new llvm::GlobalVariable(
623         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
624         llvm::GlobalValue::PrivateLinkage, Init, Name);
625     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
626     RValue InitRVal;
627     switch (CGF.getEvaluationKind(Ty)) {
628     case TEK_Scalar:
629       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
630       break;
631     case TEK_Complex:
632       InitRVal =
633           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
634       break;
635     case TEK_Aggregate: {
636       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
637       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
638       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
639                            /*IsInitializer=*/false);
640       return;
641     }
642     }
643     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
644     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
645     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
646                          /*IsInitializer=*/false);
647   }
648 }
649 
650 /// Emit initialization of arrays of complex types.
651 /// \param DestAddr Address of the array.
652 /// \param Type Type of array.
653 /// \param Init Initial expression of array.
654 /// \param SrcAddr Address of the original array.
655 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
656                                  QualType Type, bool EmitDeclareReductionInit,
657                                  const Expr *Init,
658                                  const OMPDeclareReductionDecl *DRD,
659                                  Address SrcAddr = Address::invalid()) {
660   // Perform element-by-element initialization.
661   QualType ElementTy;
662 
663   // Drill down to the base element type on both arrays.
664   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
665   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
666   if (DRD)
667     SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
668 
669   llvm::Value *SrcBegin = nullptr;
670   if (DRD)
671     SrcBegin = SrcAddr.getPointer();
672   llvm::Value *DestBegin = DestAddr.getPointer();
673   // Cast from pointer to array type to pointer to single element.
674   llvm::Value *DestEnd =
675       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
676   // The basic structure here is a while-do loop.
677   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
678   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
679   llvm::Value *IsEmpty =
680       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
681   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
682 
683   // Enter the loop body, making that address the current address.
684   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
685   CGF.EmitBlock(BodyBB);
686 
687   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
688 
689   llvm::PHINode *SrcElementPHI = nullptr;
690   Address SrcElementCurrent = Address::invalid();
691   if (DRD) {
692     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
693                                           "omp.arraycpy.srcElementPast");
694     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
695     SrcElementCurrent =
696         Address(SrcElementPHI, SrcAddr.getElementType(),
697                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
698   }
699   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
700       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
701   DestElementPHI->addIncoming(DestBegin, EntryBB);
702   Address DestElementCurrent =
703       Address(DestElementPHI, DestAddr.getElementType(),
704               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
705 
706   // Emit copy.
707   {
708     CodeGenFunction::RunCleanupsScope InitScope(CGF);
709     if (EmitDeclareReductionInit) {
710       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
711                                        SrcElementCurrent, ElementTy);
712     } else
713       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
714                            /*IsInitializer=*/false);
715   }
716 
717   if (DRD) {
718     // Shift the address forward by one element.
719     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
720         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
721         "omp.arraycpy.dest.element");
722     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
723   }
724 
725   // Shift the address forward by one element.
726   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
727       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
728       "omp.arraycpy.dest.element");
729   // Check whether we've reached the end.
730   llvm::Value *Done =
731       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
732   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
733   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
734 
735   // Done.
736   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
737 }
738 
739 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
740   return CGF.EmitOMPSharedLValue(E);
741 }
742 
743 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
744                                             const Expr *E) {
745   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
746     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
747   return LValue();
748 }
749 
750 void ReductionCodeGen::emitAggregateInitialization(
751     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
752     const OMPDeclareReductionDecl *DRD) {
753   // Emit VarDecl with copy init for arrays.
754   // Get the address of the original variable captured in current
755   // captured region.
756   const auto *PrivateVD =
757       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
758   bool EmitDeclareReductionInit =
759       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
760   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
761                        EmitDeclareReductionInit,
762                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
763                                                 : PrivateVD->getInit(),
764                        DRD, SharedAddr);
765 }
766 
767 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
768                                    ArrayRef<const Expr *> Origs,
769                                    ArrayRef<const Expr *> Privates,
770                                    ArrayRef<const Expr *> ReductionOps) {
771   ClausesData.reserve(Shareds.size());
772   SharedAddresses.reserve(Shareds.size());
773   Sizes.reserve(Shareds.size());
774   BaseDecls.reserve(Shareds.size());
775   const auto *IOrig = Origs.begin();
776   const auto *IPriv = Privates.begin();
777   const auto *IRed = ReductionOps.begin();
778   for (const Expr *Ref : Shareds) {
779     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
780     std::advance(IOrig, 1);
781     std::advance(IPriv, 1);
782     std::advance(IRed, 1);
783   }
784 }
785 
786 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
787   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
788          "Number of generated lvalues must be exactly N.");
789   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
790   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
791   SharedAddresses.emplace_back(First, Second);
792   if (ClausesData[N].Shared == ClausesData[N].Ref) {
793     OrigAddresses.emplace_back(First, Second);
794   } else {
795     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
796     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
797     OrigAddresses.emplace_back(First, Second);
798   }
799 }
800 
801 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
802   QualType PrivateType = getPrivateType(N);
803   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
804   if (!PrivateType->isVariablyModifiedType()) {
805     Sizes.emplace_back(
806         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
807         nullptr);
808     return;
809   }
810   llvm::Value *Size;
811   llvm::Value *SizeInChars;
812   auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
813   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
814   if (AsArraySection) {
815     Size = CGF.Builder.CreatePtrDiff(ElemType,
816                                      OrigAddresses[N].second.getPointer(CGF),
817                                      OrigAddresses[N].first.getPointer(CGF));
818     Size = CGF.Builder.CreateNUWAdd(
819         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
820     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
821   } else {
822     SizeInChars =
823         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
824     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
825   }
826   Sizes.emplace_back(SizeInChars, Size);
827   CodeGenFunction::OpaqueValueMapping OpaqueMap(
828       CGF,
829       cast<OpaqueValueExpr>(
830           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
831       RValue::get(Size));
832   CGF.EmitVariablyModifiedType(PrivateType);
833 }
834 
835 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
836                                          llvm::Value *Size) {
837   QualType PrivateType = getPrivateType(N);
838   if (!PrivateType->isVariablyModifiedType()) {
839     assert(!Size && !Sizes[N].second &&
840            "Size should be nullptr for non-variably modified reduction "
841            "items.");
842     return;
843   }
844   CodeGenFunction::OpaqueValueMapping OpaqueMap(
845       CGF,
846       cast<OpaqueValueExpr>(
847           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
848       RValue::get(Size));
849   CGF.EmitVariablyModifiedType(PrivateType);
850 }
851 
852 void ReductionCodeGen::emitInitialization(
853     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
854     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
855   assert(SharedAddresses.size() > N && "No variable was generated");
856   const auto *PrivateVD =
857       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
858   const OMPDeclareReductionDecl *DRD =
859       getReductionInit(ClausesData[N].ReductionOp);
860   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
861     if (DRD && DRD->getInitializer())
862       (void)DefaultInit(CGF);
863     emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
864   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
865     (void)DefaultInit(CGF);
866     QualType SharedType = SharedAddresses[N].first.getType();
867     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
868                                      PrivateAddr, SharedAddr, SharedType);
869   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
870              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
871     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
872                          PrivateVD->getType().getQualifiers(),
873                          /*IsInitializer=*/false);
874   }
875 }
876 
877 bool ReductionCodeGen::needCleanups(unsigned N) {
878   QualType PrivateType = getPrivateType(N);
879   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
880   return DTorKind != QualType::DK_none;
881 }
882 
883 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
884                                     Address PrivateAddr) {
885   QualType PrivateType = getPrivateType(N);
886   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
887   if (needCleanups(N)) {
888     PrivateAddr =
889         PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
890     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
891   }
892 }
893 
894 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
895                           LValue BaseLV) {
896   BaseTy = BaseTy.getNonReferenceType();
897   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
898          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
899     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
900       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
901     } else {
902       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
903       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
904     }
905     BaseTy = BaseTy->getPointeeType();
906   }
907   return CGF.MakeAddrLValue(
908       BaseLV.getAddress(CGF).withElementType(CGF.ConvertTypeForMem(ElTy)),
909       BaseLV.getType(), BaseLV.getBaseInfo(),
910       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
911 }
912 
913 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
914                           Address OriginalBaseAddress, llvm::Value *Addr) {
915   Address Tmp = Address::invalid();
916   Address TopTmp = Address::invalid();
917   Address MostTopTmp = Address::invalid();
918   BaseTy = BaseTy.getNonReferenceType();
919   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
920          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
921     Tmp = CGF.CreateMemTemp(BaseTy);
922     if (TopTmp.isValid())
923       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
924     else
925       MostTopTmp = Tmp;
926     TopTmp = Tmp;
927     BaseTy = BaseTy->getPointeeType();
928   }
929 
930   if (Tmp.isValid()) {
931     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
932         Addr, Tmp.getElementType());
933     CGF.Builder.CreateStore(Addr, Tmp);
934     return MostTopTmp;
935   }
936 
937   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
938       Addr, OriginalBaseAddress.getType());
939   return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
940 }
941 
942 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
943   const VarDecl *OrigVD = nullptr;
944   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
945     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
946     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
947       Base = TempOASE->getBase()->IgnoreParenImpCasts();
948     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
949       Base = TempASE->getBase()->IgnoreParenImpCasts();
950     DE = cast<DeclRefExpr>(Base);
951     OrigVD = cast<VarDecl>(DE->getDecl());
952   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
953     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
954     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
955       Base = TempASE->getBase()->IgnoreParenImpCasts();
956     DE = cast<DeclRefExpr>(Base);
957     OrigVD = cast<VarDecl>(DE->getDecl());
958   }
959   return OrigVD;
960 }
961 
962 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
963                                                Address PrivateAddr) {
964   const DeclRefExpr *DE;
965   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
966     BaseDecls.emplace_back(OrigVD);
967     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
968     LValue BaseLValue =
969         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
970                     OriginalBaseLValue);
971     Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
972     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
973         SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
974         SharedAddr.getPointer());
975     llvm::Value *PrivatePointer =
976         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
977             PrivateAddr.getPointer(), SharedAddr.getType());
978     llvm::Value *Ptr = CGF.Builder.CreateGEP(
979         SharedAddr.getElementType(), PrivatePointer, Adjustment);
980     return castToBase(CGF, OrigVD->getType(),
981                       SharedAddresses[N].first.getType(),
982                       OriginalBaseLValue.getAddress(CGF), Ptr);
983   }
984   BaseDecls.emplace_back(
985       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
986   return PrivateAddr;
987 }
988 
989 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
990   const OMPDeclareReductionDecl *DRD =
991       getReductionInit(ClausesData[N].ReductionOp);
992   return DRD && DRD->getInitializer();
993 }
994 
995 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
996   return CGF.EmitLoadOfPointerLValue(
997       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
998       getThreadIDVariable()->getType()->castAs<PointerType>());
999 }
1000 
1001 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1002   if (!CGF.HaveInsertPoint())
1003     return;
1004   // 1.2.2 OpenMP Language Terminology
1005   // Structured block - An executable statement with a single entry at the
1006   // top and a single exit at the bottom.
1007   // The point of exit cannot be a branch out of the structured block.
1008   // longjmp() and throw() must not violate the entry/exit criteria.
1009   CGF.EHStack.pushTerminate();
1010   if (S)
1011     CGF.incrementProfileCounter(S);
1012   CodeGen(CGF);
1013   CGF.EHStack.popTerminate();
1014 }
1015 
1016 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1017     CodeGenFunction &CGF) {
1018   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1019                             getThreadIDVariable()->getType(),
1020                             AlignmentSource::Decl);
1021 }
1022 
1023 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1024                                        QualType FieldTy) {
1025   auto *Field = FieldDecl::Create(
1026       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1027       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1028       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1029   Field->setAccess(AS_public);
1030   DC->addDecl(Field);
1031   return Field;
1032 }
1033 
1034 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
1035     : CGM(CGM), OMPBuilder(CGM.getModule()) {
1036   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1037   llvm::OpenMPIRBuilderConfig Config(
1038       CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1039       CGM.getLangOpts().OpenMPOffloadMandatory,
1040       /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1041       hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1042   OMPBuilder.initialize();
1043   OMPBuilder.loadOffloadInfoMetadata(CGM.getLangOpts().OpenMPIsTargetDevice
1044                                          ? CGM.getLangOpts().OMPHostIRFile
1045                                          : StringRef{});
1046   OMPBuilder.setConfig(Config);
1047 }
1048 
1049 void CGOpenMPRuntime::clear() {
1050   InternalVars.clear();
1051   // Clean non-target variable declarations possibly used only in debug info.
1052   for (const auto &Data : EmittedNonTargetVariables) {
1053     if (!Data.getValue().pointsToAliveValue())
1054       continue;
1055     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1056     if (!GV)
1057       continue;
1058     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1059       continue;
1060     GV->eraseFromParent();
1061   }
1062 }
1063 
1064 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1065   return OMPBuilder.createPlatformSpecificName(Parts);
1066 }
1067 
1068 static llvm::Function *
1069 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1070                           const Expr *CombinerInitializer, const VarDecl *In,
1071                           const VarDecl *Out, bool IsCombiner) {
1072   // void .omp_combiner.(Ty *in, Ty *out);
1073   ASTContext &C = CGM.getContext();
1074   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1075   FunctionArgList Args;
1076   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1077                                /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1078   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1079                               /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1080   Args.push_back(&OmpOutParm);
1081   Args.push_back(&OmpInParm);
1082   const CGFunctionInfo &FnInfo =
1083       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1084   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1085   std::string Name = CGM.getOpenMPRuntime().getName(
1086       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1087   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1088                                     Name, &CGM.getModule());
1089   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1090   if (CGM.getLangOpts().Optimize) {
1091     Fn->removeFnAttr(llvm::Attribute::NoInline);
1092     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1093     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1094   }
1095   CodeGenFunction CGF(CGM);
1096   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1097   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1098   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1099                     Out->getLocation());
1100   CodeGenFunction::OMPPrivateScope Scope(CGF);
1101   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1102   Scope.addPrivate(
1103       In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1104               .getAddress(CGF));
1105   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1106   Scope.addPrivate(
1107       Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1108                .getAddress(CGF));
1109   (void)Scope.Privatize();
1110   if (!IsCombiner && Out->hasInit() &&
1111       !CGF.isTrivialInitializer(Out->getInit())) {
1112     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1113                          Out->getType().getQualifiers(),
1114                          /*IsInitializer=*/true);
1115   }
1116   if (CombinerInitializer)
1117     CGF.EmitIgnoredExpr(CombinerInitializer);
1118   Scope.ForceCleanup();
1119   CGF.FinishFunction();
1120   return Fn;
1121 }
1122 
1123 void CGOpenMPRuntime::emitUserDefinedReduction(
1124     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1125   if (UDRMap.count(D) > 0)
1126     return;
1127   llvm::Function *Combiner = emitCombinerOrInitializer(
1128       CGM, D->getType(), D->getCombiner(),
1129       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1130       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1131       /*IsCombiner=*/true);
1132   llvm::Function *Initializer = nullptr;
1133   if (const Expr *Init = D->getInitializer()) {
1134     Initializer = emitCombinerOrInitializer(
1135         CGM, D->getType(),
1136         D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init
1137                                                                      : nullptr,
1138         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1139         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1140         /*IsCombiner=*/false);
1141   }
1142   UDRMap.try_emplace(D, Combiner, Initializer);
1143   if (CGF) {
1144     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1145     Decls.second.push_back(D);
1146   }
1147 }
1148 
1149 std::pair<llvm::Function *, llvm::Function *>
1150 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1151   auto I = UDRMap.find(D);
1152   if (I != UDRMap.end())
1153     return I->second;
1154   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1155   return UDRMap.lookup(D);
1156 }
1157 
1158 namespace {
1159 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1160 // Builder if one is present.
1161 struct PushAndPopStackRAII {
1162   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1163                       bool HasCancel, llvm::omp::Directive Kind)
1164       : OMPBuilder(OMPBuilder) {
1165     if (!OMPBuilder)
1166       return;
1167 
1168     // The following callback is the crucial part of clangs cleanup process.
1169     //
1170     // NOTE:
1171     // Once the OpenMPIRBuilder is used to create parallel regions (and
1172     // similar), the cancellation destination (Dest below) is determined via
1173     // IP. That means if we have variables to finalize we split the block at IP,
1174     // use the new block (=BB) as destination to build a JumpDest (via
1175     // getJumpDestInCurrentScope(BB)) which then is fed to
1176     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1177     // to push & pop an FinalizationInfo object.
1178     // The FiniCB will still be needed but at the point where the
1179     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1180     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1181       assert(IP.getBlock()->end() == IP.getPoint() &&
1182              "Clang CG should cause non-terminated block!");
1183       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1184       CGF.Builder.restoreIP(IP);
1185       CodeGenFunction::JumpDest Dest =
1186           CGF.getOMPCancelDestination(OMPD_parallel);
1187       CGF.EmitBranchThroughCleanup(Dest);
1188     };
1189 
1190     // TODO: Remove this once we emit parallel regions through the
1191     //       OpenMPIRBuilder as it can do this setup internally.
1192     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1193     OMPBuilder->pushFinalizationCB(std::move(FI));
1194   }
1195   ~PushAndPopStackRAII() {
1196     if (OMPBuilder)
1197       OMPBuilder->popFinalizationCB();
1198   }
1199   llvm::OpenMPIRBuilder *OMPBuilder;
1200 };
1201 } // namespace
1202 
1203 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1204     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1205     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1206     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1207   assert(ThreadIDVar->getType()->isPointerType() &&
1208          "thread id variable must be of type kmp_int32 *");
1209   CodeGenFunction CGF(CGM, true);
1210   bool HasCancel = false;
1211   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1212     HasCancel = OPD->hasCancel();
1213   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1214     HasCancel = OPD->hasCancel();
1215   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1216     HasCancel = OPSD->hasCancel();
1217   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1218     HasCancel = OPFD->hasCancel();
1219   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1220     HasCancel = OPFD->hasCancel();
1221   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1222     HasCancel = OPFD->hasCancel();
1223   else if (const auto *OPFD =
1224                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1225     HasCancel = OPFD->hasCancel();
1226   else if (const auto *OPFD =
1227                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1228     HasCancel = OPFD->hasCancel();
1229 
1230   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1231   //       parallel region to make cancellation barriers work properly.
1232   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1233   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1234   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1235                                     HasCancel, OutlinedHelperName);
1236   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1237   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1238 }
1239 
1240 std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1241   std::string Suffix = getName({"omp_outlined"});
1242   return (Name + Suffix).str();
1243 }
1244 
1245 std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const {
1246   return getOutlinedHelperName(CGF.CurFn->getName());
1247 }
1248 
1249 std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1250   std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1251   return (Name + Suffix).str();
1252 }
1253 
1254 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1255     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1256     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1257     const RegionCodeGenTy &CodeGen) {
1258   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1259   return emitParallelOrTeamsOutlinedFunction(
1260       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1261       CodeGen);
1262 }
1263 
1264 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1265     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1266     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1267     const RegionCodeGenTy &CodeGen) {
1268   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1269   return emitParallelOrTeamsOutlinedFunction(
1270       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1271       CodeGen);
1272 }
1273 
1274 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1275     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1276     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1277     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1278     bool Tied, unsigned &NumberOfParts) {
1279   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1280                                               PrePostActionTy &) {
1281     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1282     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1283     llvm::Value *TaskArgs[] = {
1284         UpLoc, ThreadID,
1285         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1286                                     TaskTVar->getType()->castAs<PointerType>())
1287             .getPointer(CGF)};
1288     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1289                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1290                         TaskArgs);
1291   };
1292   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1293                                                             UntiedCodeGen);
1294   CodeGen.setAction(Action);
1295   assert(!ThreadIDVar->getType()->isPointerType() &&
1296          "thread id variable must be of type kmp_int32 for tasks");
1297   const OpenMPDirectiveKind Region =
1298       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1299                                                       : OMPD_task;
1300   const CapturedStmt *CS = D.getCapturedStmt(Region);
1301   bool HasCancel = false;
1302   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1303     HasCancel = TD->hasCancel();
1304   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1305     HasCancel = TD->hasCancel();
1306   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1307     HasCancel = TD->hasCancel();
1308   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1309     HasCancel = TD->hasCancel();
1310 
1311   CodeGenFunction CGF(CGM, true);
1312   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1313                                         InnermostKind, HasCancel, Action);
1314   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1315   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1316   if (!Tied)
1317     NumberOfParts = Action.getNumberOfParts();
1318   return Res;
1319 }
1320 
1321 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1322                                              bool AtCurrentPoint) {
1323   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1324   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1325 
1326   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1327   if (AtCurrentPoint) {
1328     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1329         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1330   } else {
1331     Elem.second.ServiceInsertPt =
1332         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1333     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1334   }
1335 }
1336 
1337 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1338   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1339   if (Elem.second.ServiceInsertPt) {
1340     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1341     Elem.second.ServiceInsertPt = nullptr;
1342     Ptr->eraseFromParent();
1343   }
1344 }
1345 
1346 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1347                                                   SourceLocation Loc,
1348                                                   SmallString<128> &Buffer) {
1349   llvm::raw_svector_ostream OS(Buffer);
1350   // Build debug location
1351   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1352   OS << ";" << PLoc.getFilename() << ";";
1353   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1354     OS << FD->getQualifiedNameAsString();
1355   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1356   return OS.str();
1357 }
1358 
1359 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1360                                                  SourceLocation Loc,
1361                                                  unsigned Flags, bool EmitLoc) {
1362   uint32_t SrcLocStrSize;
1363   llvm::Constant *SrcLocStr;
1364   if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1365                        llvm::codegenoptions::NoDebugInfo) ||
1366       Loc.isInvalid()) {
1367     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1368   } else {
1369     std::string FunctionName;
1370     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1371       FunctionName = FD->getQualifiedNameAsString();
1372     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1373     const char *FileName = PLoc.getFilename();
1374     unsigned Line = PLoc.getLine();
1375     unsigned Column = PLoc.getColumn();
1376     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1377                                                 Column, SrcLocStrSize);
1378   }
1379   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1380   return OMPBuilder.getOrCreateIdent(
1381       SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1382 }
1383 
1384 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1385                                           SourceLocation Loc) {
1386   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1387   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1388   // the clang invariants used below might be broken.
1389   if (CGM.getLangOpts().OpenMPIRBuilder) {
1390     SmallString<128> Buffer;
1391     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1392     uint32_t SrcLocStrSize;
1393     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1394         getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1395     return OMPBuilder.getOrCreateThreadID(
1396         OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1397   }
1398 
1399   llvm::Value *ThreadID = nullptr;
1400   // Check whether we've already cached a load of the thread id in this
1401   // function.
1402   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1403   if (I != OpenMPLocThreadIDMap.end()) {
1404     ThreadID = I->second.ThreadID;
1405     if (ThreadID != nullptr)
1406       return ThreadID;
1407   }
1408   // If exceptions are enabled, do not use parameter to avoid possible crash.
1409   if (auto *OMPRegionInfo =
1410           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1411     if (OMPRegionInfo->getThreadIDVariable()) {
1412       // Check if this an outlined function with thread id passed as argument.
1413       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1414       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1415       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1416           !CGF.getLangOpts().CXXExceptions ||
1417           CGF.Builder.GetInsertBlock() == TopBlock ||
1418           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1419           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1420               TopBlock ||
1421           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1422               CGF.Builder.GetInsertBlock()) {
1423         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1424         // If value loaded in entry block, cache it and use it everywhere in
1425         // function.
1426         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1427           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1428           Elem.second.ThreadID = ThreadID;
1429         }
1430         return ThreadID;
1431       }
1432     }
1433   }
1434 
1435   // This is not an outlined function region - need to call __kmpc_int32
1436   // kmpc_global_thread_num(ident_t *loc).
1437   // Generate thread id value and cache this value for use across the
1438   // function.
1439   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1440   if (!Elem.second.ServiceInsertPt)
1441     setLocThreadIdInsertPt(CGF);
1442   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1443   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1444   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
1445   llvm::CallInst *Call = CGF.Builder.CreateCall(
1446       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1447                                             OMPRTL___kmpc_global_thread_num),
1448       emitUpdateLocation(CGF, Loc));
1449   Call->setCallingConv(CGF.getRuntimeCC());
1450   Elem.second.ThreadID = Call;
1451   return Call;
1452 }
1453 
1454 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1455   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1456   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1457     clearLocThreadIdInsertPt(CGF);
1458     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1459   }
1460   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1461     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1462       UDRMap.erase(D);
1463     FunctionUDRMap.erase(CGF.CurFn);
1464   }
1465   auto I = FunctionUDMMap.find(CGF.CurFn);
1466   if (I != FunctionUDMMap.end()) {
1467     for(const auto *D : I->second)
1468       UDMMap.erase(D);
1469     FunctionUDMMap.erase(I);
1470   }
1471   LastprivateConditionalToTypes.erase(CGF.CurFn);
1472   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1473 }
1474 
1475 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1476   return OMPBuilder.IdentPtr;
1477 }
1478 
1479 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1480   if (!Kmpc_MicroTy) {
1481     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1482     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1483                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1484     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1485   }
1486   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1487 }
1488 
1489 llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1490 convertDeviceClause(const VarDecl *VD) {
1491   std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1492       OMPDeclareTargetDeclAttr::getDeviceType(VD);
1493   if (!DevTy)
1494     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1495 
1496   switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1497   case OMPDeclareTargetDeclAttr::DT_Host:
1498     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1499     break;
1500   case OMPDeclareTargetDeclAttr::DT_NoHost:
1501     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1502     break;
1503   case OMPDeclareTargetDeclAttr::DT_Any:
1504     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1505     break;
1506   default:
1507     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1508     break;
1509   }
1510 }
1511 
1512 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1513 convertCaptureClause(const VarDecl *VD) {
1514   std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1515       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1516   if (!MapType)
1517     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1518   switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1519   case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1520     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1521     break;
1522   case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1523     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1524     break;
1525   case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1526     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1527     break;
1528   default:
1529     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1530     break;
1531   }
1532 }
1533 
1534 static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1535     CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1536     SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1537 
1538   auto FileInfoCallBack = [&]() {
1539     SourceManager &SM = CGM.getContext().getSourceManager();
1540     PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1541 
1542     llvm::sys::fs::UniqueID ID;
1543     if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1544       PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1545     }
1546 
1547     return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1548   };
1549 
1550   return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName);
1551 }
1552 
1553 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1554   auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1555 
1556   auto LinkageForVariable = [&VD, this]() {
1557     return CGM.getLLVMLinkageVarDefinition(VD);
1558   };
1559 
1560   std::vector<llvm::GlobalVariable *> GeneratedRefs;
1561 
1562   llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1563       CGM.getContext().getPointerType(VD->getType()));
1564   llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1565       convertCaptureClause(VD), convertDeviceClause(VD),
1566       VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1567       VD->isExternallyVisible(),
1568       getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
1569                                   VD->getCanonicalDecl()->getBeginLoc()),
1570       CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1571       CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1572       LinkageForVariable);
1573 
1574   if (!addr)
1575     return Address::invalid();
1576   return Address(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1577 }
1578 
1579 llvm::Constant *
1580 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1581   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1582          !CGM.getContext().getTargetInfo().isTLSSupported());
1583   // Lookup the entry, lazily creating it if necessary.
1584   std::string Suffix = getName({"cache", ""});
1585   return OMPBuilder.getOrCreateInternalVariable(
1586       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1587 }
1588 
1589 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1590                                                 const VarDecl *VD,
1591                                                 Address VDAddr,
1592                                                 SourceLocation Loc) {
1593   if (CGM.getLangOpts().OpenMPUseTLS &&
1594       CGM.getContext().getTargetInfo().isTLSSupported())
1595     return VDAddr;
1596 
1597   llvm::Type *VarTy = VDAddr.getElementType();
1598   llvm::Value *Args[] = {
1599       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1600       CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy),
1601       CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1602       getOrCreateThreadPrivateCache(VD)};
1603   return Address(
1604       CGF.EmitRuntimeCall(
1605           OMPBuilder.getOrCreateRuntimeFunction(
1606               CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1607           Args),
1608       CGF.Int8Ty, VDAddr.getAlignment());
1609 }
1610 
1611 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1612     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1613     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1614   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1615   // library.
1616   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1617   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1618                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1619                       OMPLoc);
1620   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1621   // to register constructor/destructor for variable.
1622   llvm::Value *Args[] = {
1623       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1624       Ctor, CopyCtor, Dtor};
1625   CGF.EmitRuntimeCall(
1626       OMPBuilder.getOrCreateRuntimeFunction(
1627           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1628       Args);
1629 }
1630 
1631 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1632     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1633     bool PerformInit, CodeGenFunction *CGF) {
1634   if (CGM.getLangOpts().OpenMPUseTLS &&
1635       CGM.getContext().getTargetInfo().isTLSSupported())
1636     return nullptr;
1637 
1638   VD = VD->getDefinition(CGM.getContext());
1639   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1640     QualType ASTTy = VD->getType();
1641 
1642     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1643     const Expr *Init = VD->getAnyInitializer();
1644     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1645       // Generate function that re-emits the declaration's initializer into the
1646       // threadprivate copy of the variable VD
1647       CodeGenFunction CtorCGF(CGM);
1648       FunctionArgList Args;
1649       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1650                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1651                             ImplicitParamKind::Other);
1652       Args.push_back(&Dst);
1653 
1654       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1655           CGM.getContext().VoidPtrTy, Args);
1656       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1657       std::string Name = getName({"__kmpc_global_ctor_", ""});
1658       llvm::Function *Fn =
1659           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1660       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1661                             Args, Loc, Loc);
1662       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1663           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1664           CGM.getContext().VoidPtrTy, Dst.getLocation());
1665       Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1666                   VDAddr.getAlignment());
1667       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1668                                /*IsInitializer=*/true);
1669       ArgVal = CtorCGF.EmitLoadOfScalar(
1670           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1671           CGM.getContext().VoidPtrTy, Dst.getLocation());
1672       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1673       CtorCGF.FinishFunction();
1674       Ctor = Fn;
1675     }
1676     if (VD->getType().isDestructedType() != QualType::DK_none) {
1677       // Generate function that emits destructor call for the threadprivate copy
1678       // of the variable VD
1679       CodeGenFunction DtorCGF(CGM);
1680       FunctionArgList Args;
1681       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1682                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1683                             ImplicitParamKind::Other);
1684       Args.push_back(&Dst);
1685 
1686       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1687           CGM.getContext().VoidTy, Args);
1688       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1689       std::string Name = getName({"__kmpc_global_dtor_", ""});
1690       llvm::Function *Fn =
1691           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1692       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1693       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1694                             Loc, Loc);
1695       // Create a scope with an artificial location for the body of this function.
1696       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1697       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1698           DtorCGF.GetAddrOfLocalVar(&Dst),
1699           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1700       DtorCGF.emitDestroy(
1701           Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1702           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1703           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1704       DtorCGF.FinishFunction();
1705       Dtor = Fn;
1706     }
1707     // Do not emit init function if it is not required.
1708     if (!Ctor && !Dtor)
1709       return nullptr;
1710 
1711     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1712     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1713                                                /*isVarArg=*/false)
1714                            ->getPointerTo();
1715     // Copying constructor for the threadprivate variable.
1716     // Must be NULL - reserved by runtime, but currently it requires that this
1717     // parameter is always NULL. Otherwise it fires assertion.
1718     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1719     if (Ctor == nullptr) {
1720       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1721                                              /*isVarArg=*/false)
1722                          ->getPointerTo();
1723       Ctor = llvm::Constant::getNullValue(CtorTy);
1724     }
1725     if (Dtor == nullptr) {
1726       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1727                                              /*isVarArg=*/false)
1728                          ->getPointerTo();
1729       Dtor = llvm::Constant::getNullValue(DtorTy);
1730     }
1731     if (!CGF) {
1732       auto *InitFunctionTy =
1733           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1734       std::string Name = getName({"__omp_threadprivate_init_", ""});
1735       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1736           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1737       CodeGenFunction InitCGF(CGM);
1738       FunctionArgList ArgList;
1739       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1740                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1741                             Loc, Loc);
1742       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1743       InitCGF.FinishFunction();
1744       return InitFunction;
1745     }
1746     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1747   }
1748   return nullptr;
1749 }
1750 
1751 void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
1752                                                 llvm::GlobalValue *GV) {
1753   std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1754       OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1755 
1756   // We only need to handle active 'indirect' declare target functions.
1757   if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1758     return;
1759 
1760   // Get a mangled name to store the new device global in.
1761   llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1762       CGM, OMPBuilder, FD->getCanonicalDecl()->getBeginLoc(), FD->getName());
1763   SmallString<128> Name;
1764   OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1765 
1766   // We need to generate a new global to hold the address of the indirectly
1767   // called device function. Doing this allows us to keep the visibility and
1768   // linkage of the associated function unchanged while allowing the runtime to
1769   // access its value.
1770   llvm::GlobalValue *Addr = GV;
1771   if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1772     Addr = new llvm::GlobalVariable(
1773         CGM.getModule(), CGM.VoidPtrTy,
1774         /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1775         nullptr, llvm::GlobalValue::NotThreadLocal,
1776         CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1777     Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1778   }
1779 
1780   OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1781       Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),
1782       llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1783       llvm::GlobalValue::WeakODRLinkage);
1784 }
1785 
1786 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1787                                                           QualType VarType,
1788                                                           StringRef Name) {
1789   std::string Suffix = getName({"artificial", ""});
1790   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1791   llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1792       VarLVType, Twine(Name).concat(Suffix).str());
1793   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1794       CGM.getTarget().isTLSSupported()) {
1795     GAddr->setThreadLocal(/*Val=*/true);
1796     return Address(GAddr, GAddr->getValueType(),
1797                    CGM.getContext().getTypeAlignInChars(VarType));
1798   }
1799   std::string CacheSuffix = getName({"cache", ""});
1800   llvm::Value *Args[] = {
1801       emitUpdateLocation(CGF, SourceLocation()),
1802       getThreadID(CGF, SourceLocation()),
1803       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1804       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1805                                 /*isSigned=*/false),
1806       OMPBuilder.getOrCreateInternalVariable(
1807           CGM.VoidPtrPtrTy,
1808           Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1809   return Address(
1810       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1811           CGF.EmitRuntimeCall(
1812               OMPBuilder.getOrCreateRuntimeFunction(
1813                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1814               Args),
1815           VarLVType->getPointerTo(/*AddrSpace=*/0)),
1816       VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1817 }
1818 
1819 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
1820                                    const RegionCodeGenTy &ThenGen,
1821                                    const RegionCodeGenTy &ElseGen) {
1822   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1823 
1824   // If the condition constant folds and can be elided, try to avoid emitting
1825   // the condition and the dead arm of the if/else.
1826   bool CondConstant;
1827   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1828     if (CondConstant)
1829       ThenGen(CGF);
1830     else
1831       ElseGen(CGF);
1832     return;
1833   }
1834 
1835   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
1836   // emit the conditional branch.
1837   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1838   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1839   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1840   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1841 
1842   // Emit the 'then' code.
1843   CGF.EmitBlock(ThenBlock);
1844   ThenGen(CGF);
1845   CGF.EmitBranch(ContBlock);
1846   // Emit the 'else' code if present.
1847   // There is no need to emit line number for unconditional branch.
1848   (void)ApplyDebugLocation::CreateEmpty(CGF);
1849   CGF.EmitBlock(ElseBlock);
1850   ElseGen(CGF);
1851   // There is no need to emit line number for unconditional branch.
1852   (void)ApplyDebugLocation::CreateEmpty(CGF);
1853   CGF.EmitBranch(ContBlock);
1854   // Emit the continuation block for code after the if.
1855   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1856 }
1857 
1858 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1859                                        llvm::Function *OutlinedFn,
1860                                        ArrayRef<llvm::Value *> CapturedVars,
1861                                        const Expr *IfCond,
1862                                        llvm::Value *NumThreads) {
1863   if (!CGF.HaveInsertPoint())
1864     return;
1865   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1866   auto &M = CGM.getModule();
1867   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1868                     this](CodeGenFunction &CGF, PrePostActionTy &) {
1869     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1870     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
1871     llvm::Value *Args[] = {
1872         RTLoc,
1873         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1874         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
1875     llvm::SmallVector<llvm::Value *, 16> RealArgs;
1876     RealArgs.append(std::begin(Args), std::end(Args));
1877     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1878 
1879     llvm::FunctionCallee RTLFn =
1880         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
1881     CGF.EmitRuntimeCall(RTLFn, RealArgs);
1882   };
1883   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1884                     this](CodeGenFunction &CGF, PrePostActionTy &) {
1885     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
1886     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1887     // Build calls:
1888     // __kmpc_serialized_parallel(&Loc, GTid);
1889     llvm::Value *Args[] = {RTLoc, ThreadID};
1890     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1891                             M, OMPRTL___kmpc_serialized_parallel),
1892                         Args);
1893 
1894     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
1895     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1896     Address ZeroAddrBound =
1897         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
1898                                          /*Name=*/".bound.zero.addr");
1899     CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
1900     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1901     // ThreadId for serialized parallels is 0.
1902     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
1903     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
1904     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1905 
1906     // Ensure we do not inline the function. This is trivially true for the ones
1907     // passed to __kmpc_fork_call but the ones called in serialized regions
1908     // could be inlined. This is not a perfect but it is closer to the invariant
1909     // we want, namely, every data environment starts with a new function.
1910     // TODO: We should pass the if condition to the runtime function and do the
1911     //       handling there. Much cleaner code.
1912     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
1913     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
1914     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
1915 
1916     // __kmpc_end_serialized_parallel(&Loc, GTid);
1917     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1918     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1919                             M, OMPRTL___kmpc_end_serialized_parallel),
1920                         EndArgs);
1921   };
1922   if (IfCond) {
1923     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
1924   } else {
1925     RegionCodeGenTy ThenRCG(ThenGen);
1926     ThenRCG(CGF);
1927   }
1928 }
1929 
1930 // If we're inside an (outlined) parallel region, use the region info's
1931 // thread-ID variable (it is passed in a first argument of the outlined function
1932 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1933 // regular serial code region, get thread ID by calling kmp_int32
1934 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1935 // return the address of that temp.
1936 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1937                                              SourceLocation Loc) {
1938   if (auto *OMPRegionInfo =
1939           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1940     if (OMPRegionInfo->getThreadIDVariable())
1941       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
1942 
1943   llvm::Value *ThreadID = getThreadID(CGF, Loc);
1944   QualType Int32Ty =
1945       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1946   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1947   CGF.EmitStoreOfScalar(ThreadID,
1948                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1949 
1950   return ThreadIDTemp;
1951 }
1952 
1953 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1954   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1955   std::string Name = getName({Prefix, "var"});
1956   return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
1957 }
1958 
1959 namespace {
1960 /// Common pre(post)-action for different OpenMP constructs.
1961 class CommonActionTy final : public PrePostActionTy {
1962   llvm::FunctionCallee EnterCallee;
1963   ArrayRef<llvm::Value *> EnterArgs;
1964   llvm::FunctionCallee ExitCallee;
1965   ArrayRef<llvm::Value *> ExitArgs;
1966   bool Conditional;
1967   llvm::BasicBlock *ContBlock = nullptr;
1968 
1969 public:
1970   CommonActionTy(llvm::FunctionCallee EnterCallee,
1971                  ArrayRef<llvm::Value *> EnterArgs,
1972                  llvm::FunctionCallee ExitCallee,
1973                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1974       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1975         ExitArgs(ExitArgs), Conditional(Conditional) {}
1976   void Enter(CodeGenFunction &CGF) override {
1977     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
1978     if (Conditional) {
1979       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
1980       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1981       ContBlock = CGF.createBasicBlock("omp_if.end");
1982       // Generate the branch (If-stmt)
1983       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1984       CGF.EmitBlock(ThenBlock);
1985     }
1986   }
1987   void Done(CodeGenFunction &CGF) {
1988     // Emit the rest of blocks/branches
1989     CGF.EmitBranch(ContBlock);
1990     CGF.EmitBlock(ContBlock, true);
1991   }
1992   void Exit(CodeGenFunction &CGF) override {
1993     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
1994   }
1995 };
1996 } // anonymous namespace
1997 
1998 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
1999                                          StringRef CriticalName,
2000                                          const RegionCodeGenTy &CriticalOpGen,
2001                                          SourceLocation Loc, const Expr *Hint) {
2002   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2003   // CriticalOpGen();
2004   // __kmpc_end_critical(ident_t *, gtid, Lock);
2005   // Prepare arguments and build a call to __kmpc_critical
2006   if (!CGF.HaveInsertPoint())
2007     return;
2008   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2009                          getCriticalRegionLock(CriticalName)};
2010   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2011                                                 std::end(Args));
2012   if (Hint) {
2013     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2014         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2015   }
2016   CommonActionTy Action(
2017       OMPBuilder.getOrCreateRuntimeFunction(
2018           CGM.getModule(),
2019           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2020       EnterArgs,
2021       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2022                                             OMPRTL___kmpc_end_critical),
2023       Args);
2024   CriticalOpGen.setAction(Action);
2025   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2026 }
2027 
2028 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2029                                        const RegionCodeGenTy &MasterOpGen,
2030                                        SourceLocation Loc) {
2031   if (!CGF.HaveInsertPoint())
2032     return;
2033   // if(__kmpc_master(ident_t *, gtid)) {
2034   //   MasterOpGen();
2035   //   __kmpc_end_master(ident_t *, gtid);
2036   // }
2037   // Prepare arguments and build a call to __kmpc_master
2038   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2039   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2040                             CGM.getModule(), OMPRTL___kmpc_master),
2041                         Args,
2042                         OMPBuilder.getOrCreateRuntimeFunction(
2043                             CGM.getModule(), OMPRTL___kmpc_end_master),
2044                         Args,
2045                         /*Conditional=*/true);
2046   MasterOpGen.setAction(Action);
2047   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2048   Action.Done(CGF);
2049 }
2050 
2051 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2052                                        const RegionCodeGenTy &MaskedOpGen,
2053                                        SourceLocation Loc, const Expr *Filter) {
2054   if (!CGF.HaveInsertPoint())
2055     return;
2056   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2057   //   MaskedOpGen();
2058   //   __kmpc_end_masked(iden_t *, gtid);
2059   // }
2060   // Prepare arguments and build a call to __kmpc_masked
2061   llvm::Value *FilterVal = Filter
2062                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2063                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2064   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2065                          FilterVal};
2066   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2067                             getThreadID(CGF, Loc)};
2068   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2069                             CGM.getModule(), OMPRTL___kmpc_masked),
2070                         Args,
2071                         OMPBuilder.getOrCreateRuntimeFunction(
2072                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2073                         ArgsEnd,
2074                         /*Conditional=*/true);
2075   MaskedOpGen.setAction(Action);
2076   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2077   Action.Done(CGF);
2078 }
2079 
2080 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2081                                         SourceLocation Loc) {
2082   if (!CGF.HaveInsertPoint())
2083     return;
2084   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2085     OMPBuilder.createTaskyield(CGF.Builder);
2086   } else {
2087     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2088     llvm::Value *Args[] = {
2089         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2090         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2091     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2092                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2093                         Args);
2094   }
2095 
2096   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2097     Region->emitUntiedSwitch(CGF);
2098 }
2099 
2100 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2101                                           const RegionCodeGenTy &TaskgroupOpGen,
2102                                           SourceLocation Loc) {
2103   if (!CGF.HaveInsertPoint())
2104     return;
2105   // __kmpc_taskgroup(ident_t *, gtid);
2106   // TaskgroupOpGen();
2107   // __kmpc_end_taskgroup(ident_t *, gtid);
2108   // Prepare arguments and build a call to __kmpc_taskgroup
2109   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2110   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2111                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2112                         Args,
2113                         OMPBuilder.getOrCreateRuntimeFunction(
2114                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2115                         Args);
2116   TaskgroupOpGen.setAction(Action);
2117   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2118 }
2119 
2120 /// Given an array of pointers to variables, project the address of a
2121 /// given variable.
2122 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2123                                       unsigned Index, const VarDecl *Var) {
2124   // Pull out the pointer to the variable.
2125   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2126   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2127 
2128   llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2129   return Address(
2130       CGF.Builder.CreateBitCast(
2131           Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())),
2132       ElemTy, CGF.getContext().getDeclAlign(Var));
2133 }
2134 
2135 static llvm::Value *emitCopyprivateCopyFunction(
2136     CodeGenModule &CGM, llvm::Type *ArgsElemType,
2137     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2138     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2139     SourceLocation Loc) {
2140   ASTContext &C = CGM.getContext();
2141   // void copy_func(void *LHSArg, void *RHSArg);
2142   FunctionArgList Args;
2143   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2144                            ImplicitParamKind::Other);
2145   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2146                            ImplicitParamKind::Other);
2147   Args.push_back(&LHSArg);
2148   Args.push_back(&RHSArg);
2149   const auto &CGFI =
2150       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2151   std::string Name =
2152       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2153   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2154                                     llvm::GlobalValue::InternalLinkage, Name,
2155                                     &CGM.getModule());
2156   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2157   Fn->setDoesNotRecurse();
2158   CodeGenFunction CGF(CGM);
2159   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2160   // Dest = (void*[n])(LHSArg);
2161   // Src = (void*[n])(RHSArg);
2162   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2163                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2164                   ArgsElemType->getPointerTo()),
2165               ArgsElemType, CGF.getPointerAlign());
2166   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2167                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2168                   ArgsElemType->getPointerTo()),
2169               ArgsElemType, CGF.getPointerAlign());
2170   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2171   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2172   // ...
2173   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2174   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2175     const auto *DestVar =
2176         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2177     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2178 
2179     const auto *SrcVar =
2180         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2181     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2182 
2183     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2184     QualType Type = VD->getType();
2185     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2186   }
2187   CGF.FinishFunction();
2188   return Fn;
2189 }
2190 
2191 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2192                                        const RegionCodeGenTy &SingleOpGen,
2193                                        SourceLocation Loc,
2194                                        ArrayRef<const Expr *> CopyprivateVars,
2195                                        ArrayRef<const Expr *> SrcExprs,
2196                                        ArrayRef<const Expr *> DstExprs,
2197                                        ArrayRef<const Expr *> AssignmentOps) {
2198   if (!CGF.HaveInsertPoint())
2199     return;
2200   assert(CopyprivateVars.size() == SrcExprs.size() &&
2201          CopyprivateVars.size() == DstExprs.size() &&
2202          CopyprivateVars.size() == AssignmentOps.size());
2203   ASTContext &C = CGM.getContext();
2204   // int32 did_it = 0;
2205   // if(__kmpc_single(ident_t *, gtid)) {
2206   //   SingleOpGen();
2207   //   __kmpc_end_single(ident_t *, gtid);
2208   //   did_it = 1;
2209   // }
2210   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2211   // <copy_func>, did_it);
2212 
2213   Address DidIt = Address::invalid();
2214   if (!CopyprivateVars.empty()) {
2215     // int32 did_it = 0;
2216     QualType KmpInt32Ty =
2217         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2218     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2219     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2220   }
2221   // Prepare arguments and build a call to __kmpc_single
2222   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2223   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2224                             CGM.getModule(), OMPRTL___kmpc_single),
2225                         Args,
2226                         OMPBuilder.getOrCreateRuntimeFunction(
2227                             CGM.getModule(), OMPRTL___kmpc_end_single),
2228                         Args,
2229                         /*Conditional=*/true);
2230   SingleOpGen.setAction(Action);
2231   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2232   if (DidIt.isValid()) {
2233     // did_it = 1;
2234     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2235   }
2236   Action.Done(CGF);
2237   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2238   // <copy_func>, did_it);
2239   if (DidIt.isValid()) {
2240     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2241     QualType CopyprivateArrayTy = C.getConstantArrayType(
2242         C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2243         /*IndexTypeQuals=*/0);
2244     // Create a list of all private variables for copyprivate.
2245     Address CopyprivateList =
2246         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2247     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2248       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2249       CGF.Builder.CreateStore(
2250           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2251               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2252               CGF.VoidPtrTy),
2253           Elem);
2254     }
2255     // Build function that copies private values from single region to all other
2256     // threads in the corresponding parallel region.
2257     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2258         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2259         SrcExprs, DstExprs, AssignmentOps, Loc);
2260     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2261     Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2262         CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2263     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2264     llvm::Value *Args[] = {
2265         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2266         getThreadID(CGF, Loc),        // i32 <gtid>
2267         BufSize,                      // size_t <buf_size>
2268         CL.getPointer(),              // void *<copyprivate list>
2269         CpyFn,                        // void (*) (void *, void *) <copy_func>
2270         DidItVal                      // i32 did_it
2271     };
2272     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2273                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2274                         Args);
2275   }
2276 }
2277 
2278 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2279                                         const RegionCodeGenTy &OrderedOpGen,
2280                                         SourceLocation Loc, bool IsThreads) {
2281   if (!CGF.HaveInsertPoint())
2282     return;
2283   // __kmpc_ordered(ident_t *, gtid);
2284   // OrderedOpGen();
2285   // __kmpc_end_ordered(ident_t *, gtid);
2286   // Prepare arguments and build a call to __kmpc_ordered
2287   if (IsThreads) {
2288     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2289     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2290                               CGM.getModule(), OMPRTL___kmpc_ordered),
2291                           Args,
2292                           OMPBuilder.getOrCreateRuntimeFunction(
2293                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2294                           Args);
2295     OrderedOpGen.setAction(Action);
2296     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2297     return;
2298   }
2299   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2300 }
2301 
2302 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2303   unsigned Flags;
2304   if (Kind == OMPD_for)
2305     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2306   else if (Kind == OMPD_sections)
2307     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2308   else if (Kind == OMPD_single)
2309     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2310   else if (Kind == OMPD_barrier)
2311     Flags = OMP_IDENT_BARRIER_EXPL;
2312   else
2313     Flags = OMP_IDENT_BARRIER_IMPL;
2314   return Flags;
2315 }
2316 
2317 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2318     CodeGenFunction &CGF, const OMPLoopDirective &S,
2319     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2320   // Check if the loop directive is actually a doacross loop directive. In this
2321   // case choose static, 1 schedule.
2322   if (llvm::any_of(
2323           S.getClausesOfKind<OMPOrderedClause>(),
2324           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2325     ScheduleKind = OMPC_SCHEDULE_static;
2326     // Chunk size is 1 in this case.
2327     llvm::APInt ChunkSize(32, 1);
2328     ChunkExpr = IntegerLiteral::Create(
2329         CGF.getContext(), ChunkSize,
2330         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2331         SourceLocation());
2332   }
2333 }
2334 
2335 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2336                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2337                                       bool ForceSimpleCall) {
2338   // Check if we should use the OMPBuilder
2339   auto *OMPRegionInfo =
2340       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2341   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2342     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2343         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2344     return;
2345   }
2346 
2347   if (!CGF.HaveInsertPoint())
2348     return;
2349   // Build call __kmpc_cancel_barrier(loc, thread_id);
2350   // Build call __kmpc_barrier(loc, thread_id);
2351   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2352   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2353   // thread_id);
2354   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2355                          getThreadID(CGF, Loc)};
2356   if (OMPRegionInfo) {
2357     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2358       llvm::Value *Result = CGF.EmitRuntimeCall(
2359           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2360                                                 OMPRTL___kmpc_cancel_barrier),
2361           Args);
2362       if (EmitChecks) {
2363         // if (__kmpc_cancel_barrier()) {
2364         //   exit from construct;
2365         // }
2366         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2367         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2368         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2369         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2370         CGF.EmitBlock(ExitBB);
2371         //   exit from construct;
2372         CodeGenFunction::JumpDest CancelDestination =
2373             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2374         CGF.EmitBranchThroughCleanup(CancelDestination);
2375         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2376       }
2377       return;
2378     }
2379   }
2380   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2381                           CGM.getModule(), OMPRTL___kmpc_barrier),
2382                       Args);
2383 }
2384 
2385 void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc,
2386                                     Expr *ME, bool IsFatal) {
2387   llvm::Value *MVL =
2388       ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF)
2389          : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2390   // Build call void __kmpc_error(ident_t *loc, int severity, const char
2391   // *message)
2392   llvm::Value *Args[] = {
2393       emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2394       llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2395       CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2396   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2397                           CGM.getModule(), OMPRTL___kmpc_error),
2398                       Args);
2399 }
2400 
2401 /// Map the OpenMP loop schedule to the runtime enumeration.
2402 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2403                                           bool Chunked, bool Ordered) {
2404   switch (ScheduleKind) {
2405   case OMPC_SCHEDULE_static:
2406     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2407                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2408   case OMPC_SCHEDULE_dynamic:
2409     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2410   case OMPC_SCHEDULE_guided:
2411     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2412   case OMPC_SCHEDULE_runtime:
2413     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2414   case OMPC_SCHEDULE_auto:
2415     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2416   case OMPC_SCHEDULE_unknown:
2417     assert(!Chunked && "chunk was specified but schedule kind not known");
2418     return Ordered ? OMP_ord_static : OMP_sch_static;
2419   }
2420   llvm_unreachable("Unexpected runtime schedule");
2421 }
2422 
2423 /// Map the OpenMP distribute schedule to the runtime enumeration.
2424 static OpenMPSchedType
2425 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2426   // only static is allowed for dist_schedule
2427   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2428 }
2429 
2430 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2431                                          bool Chunked) const {
2432   OpenMPSchedType Schedule =
2433       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2434   return Schedule == OMP_sch_static;
2435 }
2436 
2437 bool CGOpenMPRuntime::isStaticNonchunked(
2438     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2439   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2440   return Schedule == OMP_dist_sch_static;
2441 }
2442 
2443 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2444                                       bool Chunked) const {
2445   OpenMPSchedType Schedule =
2446       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2447   return Schedule == OMP_sch_static_chunked;
2448 }
2449 
2450 bool CGOpenMPRuntime::isStaticChunked(
2451     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2452   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2453   return Schedule == OMP_dist_sch_static_chunked;
2454 }
2455 
2456 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2457   OpenMPSchedType Schedule =
2458       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2459   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2460   return Schedule != OMP_sch_static;
2461 }
2462 
2463 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2464                                   OpenMPScheduleClauseModifier M1,
2465                                   OpenMPScheduleClauseModifier M2) {
2466   int Modifier = 0;
2467   switch (M1) {
2468   case OMPC_SCHEDULE_MODIFIER_monotonic:
2469     Modifier = OMP_sch_modifier_monotonic;
2470     break;
2471   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2472     Modifier = OMP_sch_modifier_nonmonotonic;
2473     break;
2474   case OMPC_SCHEDULE_MODIFIER_simd:
2475     if (Schedule == OMP_sch_static_chunked)
2476       Schedule = OMP_sch_static_balanced_chunked;
2477     break;
2478   case OMPC_SCHEDULE_MODIFIER_last:
2479   case OMPC_SCHEDULE_MODIFIER_unknown:
2480     break;
2481   }
2482   switch (M2) {
2483   case OMPC_SCHEDULE_MODIFIER_monotonic:
2484     Modifier = OMP_sch_modifier_monotonic;
2485     break;
2486   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2487     Modifier = OMP_sch_modifier_nonmonotonic;
2488     break;
2489   case OMPC_SCHEDULE_MODIFIER_simd:
2490     if (Schedule == OMP_sch_static_chunked)
2491       Schedule = OMP_sch_static_balanced_chunked;
2492     break;
2493   case OMPC_SCHEDULE_MODIFIER_last:
2494   case OMPC_SCHEDULE_MODIFIER_unknown:
2495     break;
2496   }
2497   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2498   // If the static schedule kind is specified or if the ordered clause is
2499   // specified, and if the nonmonotonic modifier is not specified, the effect is
2500   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2501   // modifier is specified, the effect is as if the nonmonotonic modifier is
2502   // specified.
2503   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2504     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2505           Schedule == OMP_sch_static_balanced_chunked ||
2506           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2507           Schedule == OMP_dist_sch_static_chunked ||
2508           Schedule == OMP_dist_sch_static))
2509       Modifier = OMP_sch_modifier_nonmonotonic;
2510   }
2511   return Schedule | Modifier;
2512 }
2513 
2514 void CGOpenMPRuntime::emitForDispatchInit(
2515     CodeGenFunction &CGF, SourceLocation Loc,
2516     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2517     bool Ordered, const DispatchRTInput &DispatchValues) {
2518   if (!CGF.HaveInsertPoint())
2519     return;
2520   OpenMPSchedType Schedule = getRuntimeSchedule(
2521       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2522   assert(Ordered ||
2523          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2524           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2525           Schedule != OMP_sch_static_balanced_chunked));
2526   // Call __kmpc_dispatch_init(
2527   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2528   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2529   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2530 
2531   // If the Chunk was not specified in the clause - use default value 1.
2532   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2533                                             : CGF.Builder.getIntN(IVSize, 1);
2534   llvm::Value *Args[] = {
2535       emitUpdateLocation(CGF, Loc),
2536       getThreadID(CGF, Loc),
2537       CGF.Builder.getInt32(addMonoNonMonoModifier(
2538           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2539       DispatchValues.LB,                                     // Lower
2540       DispatchValues.UB,                                     // Upper
2541       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2542       Chunk                                                  // Chunk
2543   };
2544   CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2545                       Args);
2546 }
2547 
2548 static void emitForStaticInitCall(
2549     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2550     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2551     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2552     const CGOpenMPRuntime::StaticRTInput &Values) {
2553   if (!CGF.HaveInsertPoint())
2554     return;
2555 
2556   assert(!Values.Ordered);
2557   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2558          Schedule == OMP_sch_static_balanced_chunked ||
2559          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2560          Schedule == OMP_dist_sch_static ||
2561          Schedule == OMP_dist_sch_static_chunked);
2562 
2563   // Call __kmpc_for_static_init(
2564   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2565   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2566   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2567   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2568   llvm::Value *Chunk = Values.Chunk;
2569   if (Chunk == nullptr) {
2570     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2571             Schedule == OMP_dist_sch_static) &&
2572            "expected static non-chunked schedule");
2573     // If the Chunk was not specified in the clause - use default value 1.
2574     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2575   } else {
2576     assert((Schedule == OMP_sch_static_chunked ||
2577             Schedule == OMP_sch_static_balanced_chunked ||
2578             Schedule == OMP_ord_static_chunked ||
2579             Schedule == OMP_dist_sch_static_chunked) &&
2580            "expected static chunked schedule");
2581   }
2582   llvm::Value *Args[] = {
2583       UpdateLocation,
2584       ThreadId,
2585       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2586                                                   M2)), // Schedule type
2587       Values.IL.getPointer(),                           // &isLastIter
2588       Values.LB.getPointer(),                           // &LB
2589       Values.UB.getPointer(),                           // &UB
2590       Values.ST.getPointer(),                           // &Stride
2591       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2592       Chunk                                             // Chunk
2593   };
2594   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2595 }
2596 
2597 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2598                                         SourceLocation Loc,
2599                                         OpenMPDirectiveKind DKind,
2600                                         const OpenMPScheduleTy &ScheduleKind,
2601                                         const StaticRTInput &Values) {
2602   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2603       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2604   assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2605          "Expected loop-based or sections-based directive.");
2606   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2607                                              isOpenMPLoopDirective(DKind)
2608                                                  ? OMP_IDENT_WORK_LOOP
2609                                                  : OMP_IDENT_WORK_SECTIONS);
2610   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2611   llvm::FunctionCallee StaticInitFunction =
2612       OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2613                                              false);
2614   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2615   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2616                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2617 }
2618 
2619 void CGOpenMPRuntime::emitDistributeStaticInit(
2620     CodeGenFunction &CGF, SourceLocation Loc,
2621     OpenMPDistScheduleClauseKind SchedKind,
2622     const CGOpenMPRuntime::StaticRTInput &Values) {
2623   OpenMPSchedType ScheduleNum =
2624       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2625   llvm::Value *UpdatedLocation =
2626       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2627   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2628   llvm::FunctionCallee StaticInitFunction;
2629   bool isGPUDistribute =
2630       CGM.getLangOpts().OpenMPIsTargetDevice &&
2631       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2632   StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2633       Values.IVSize, Values.IVSigned, isGPUDistribute);
2634 
2635   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2636                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2637                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2638 }
2639 
2640 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2641                                           SourceLocation Loc,
2642                                           OpenMPDirectiveKind DKind) {
2643   if (!CGF.HaveInsertPoint())
2644     return;
2645   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2646   llvm::Value *Args[] = {
2647       emitUpdateLocation(CGF, Loc,
2648                          isOpenMPDistributeDirective(DKind)
2649                              ? OMP_IDENT_WORK_DISTRIBUTE
2650                              : isOpenMPLoopDirective(DKind)
2651                                    ? OMP_IDENT_WORK_LOOP
2652                                    : OMP_IDENT_WORK_SECTIONS),
2653       getThreadID(CGF, Loc)};
2654   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2655   if (isOpenMPDistributeDirective(DKind) &&
2656       CGM.getLangOpts().OpenMPIsTargetDevice &&
2657       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2658     CGF.EmitRuntimeCall(
2659         OMPBuilder.getOrCreateRuntimeFunction(
2660             CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2661         Args);
2662   else
2663     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2664                             CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2665                         Args);
2666 }
2667 
2668 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2669                                                  SourceLocation Loc,
2670                                                  unsigned IVSize,
2671                                                  bool IVSigned) {
2672   if (!CGF.HaveInsertPoint())
2673     return;
2674   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2675   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2676   CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2677                       Args);
2678 }
2679 
2680 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2681                                           SourceLocation Loc, unsigned IVSize,
2682                                           bool IVSigned, Address IL,
2683                                           Address LB, Address UB,
2684                                           Address ST) {
2685   // Call __kmpc_dispatch_next(
2686   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2687   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2688   //          kmp_int[32|64] *p_stride);
2689   llvm::Value *Args[] = {
2690       emitUpdateLocation(CGF, Loc),
2691       getThreadID(CGF, Loc),
2692       IL.getPointer(), // &isLastIter
2693       LB.getPointer(), // &Lower
2694       UB.getPointer(), // &Upper
2695       ST.getPointer()  // &Stride
2696   };
2697   llvm::Value *Call = CGF.EmitRuntimeCall(
2698       OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2699   return CGF.EmitScalarConversion(
2700       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2701       CGF.getContext().BoolTy, Loc);
2702 }
2703 
2704 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2705                                            llvm::Value *NumThreads,
2706                                            SourceLocation Loc) {
2707   if (!CGF.HaveInsertPoint())
2708     return;
2709   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2710   llvm::Value *Args[] = {
2711       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2712       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2713   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2714                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2715                       Args);
2716 }
2717 
2718 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2719                                          ProcBindKind ProcBind,
2720                                          SourceLocation Loc) {
2721   if (!CGF.HaveInsertPoint())
2722     return;
2723   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2724   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2725   llvm::Value *Args[] = {
2726       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2727       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2728   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2729                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2730                       Args);
2731 }
2732 
2733 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2734                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2735   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2736     OMPBuilder.createFlush(CGF.Builder);
2737   } else {
2738     if (!CGF.HaveInsertPoint())
2739       return;
2740     // Build call void __kmpc_flush(ident_t *loc)
2741     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2742                             CGM.getModule(), OMPRTL___kmpc_flush),
2743                         emitUpdateLocation(CGF, Loc));
2744   }
2745 }
2746 
2747 namespace {
2748 /// Indexes of fields for type kmp_task_t.
2749 enum KmpTaskTFields {
2750   /// List of shared variables.
2751   KmpTaskTShareds,
2752   /// Task routine.
2753   KmpTaskTRoutine,
2754   /// Partition id for the untied tasks.
2755   KmpTaskTPartId,
2756   /// Function with call of destructors for private variables.
2757   Data1,
2758   /// Task priority.
2759   Data2,
2760   /// (Taskloops only) Lower bound.
2761   KmpTaskTLowerBound,
2762   /// (Taskloops only) Upper bound.
2763   KmpTaskTUpperBound,
2764   /// (Taskloops only) Stride.
2765   KmpTaskTStride,
2766   /// (Taskloops only) Is last iteration flag.
2767   KmpTaskTLastIter,
2768   /// (Taskloops only) Reduction data.
2769   KmpTaskTReductions,
2770 };
2771 } // anonymous namespace
2772 
2773 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2774   // If we are in simd mode or there are no entries, we don't need to do
2775   // anything.
2776   if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2777     return;
2778 
2779   llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2780       [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2781              const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2782     SourceLocation Loc;
2783     if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2784       for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2785                 E = CGM.getContext().getSourceManager().fileinfo_end();
2786            I != E; ++I) {
2787         if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2788             I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2789           Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2790               I->getFirst(), EntryInfo.Line, 1);
2791           break;
2792         }
2793       }
2794     }
2795     switch (Kind) {
2796     case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2797       unsigned DiagID = CGM.getDiags().getCustomDiagID(
2798           DiagnosticsEngine::Error, "Offloading entry for target region in "
2799                                     "%0 is incorrect: either the "
2800                                     "address or the ID is invalid.");
2801       CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2802     } break;
2803     case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2804       unsigned DiagID = CGM.getDiags().getCustomDiagID(
2805           DiagnosticsEngine::Error, "Offloading entry for declare target "
2806                                     "variable %0 is incorrect: the "
2807                                     "address is invalid.");
2808       CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2809     } break;
2810     case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2811       unsigned DiagID = CGM.getDiags().getCustomDiagID(
2812           DiagnosticsEngine::Error,
2813           "Offloading entry for declare target variable is incorrect: the "
2814           "address is invalid.");
2815       CGM.getDiags().Report(DiagID);
2816     } break;
2817     }
2818   };
2819 
2820   OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2821 }
2822 
2823 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
2824   if (!KmpRoutineEntryPtrTy) {
2825     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2826     ASTContext &C = CGM.getContext();
2827     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2828     FunctionProtoType::ExtProtoInfo EPI;
2829     KmpRoutineEntryPtrQTy = C.getPointerType(
2830         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2831     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
2832   }
2833 }
2834 
2835 namespace {
2836 struct PrivateHelpersTy {
2837   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2838                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2839       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2840         PrivateElemInit(PrivateElemInit) {}
2841   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2842   const Expr *OriginalRef = nullptr;
2843   const VarDecl *Original = nullptr;
2844   const VarDecl *PrivateCopy = nullptr;
2845   const VarDecl *PrivateElemInit = nullptr;
2846   bool isLocalPrivate() const {
2847     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2848   }
2849 };
2850 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2851 } // anonymous namespace
2852 
2853 static bool isAllocatableDecl(const VarDecl *VD) {
2854   const VarDecl *CVD = VD->getCanonicalDecl();
2855   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2856     return false;
2857   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2858   // Use the default allocation.
2859   return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2860            !AA->getAllocator());
2861 }
2862 
2863 static RecordDecl *
2864 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
2865   if (!Privates.empty()) {
2866     ASTContext &C = CGM.getContext();
2867     // Build struct .kmp_privates_t. {
2868     //         /*  private vars  */
2869     //       };
2870     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
2871     RD->startDefinition();
2872     for (const auto &Pair : Privates) {
2873       const VarDecl *VD = Pair.second.Original;
2874       QualType Type = VD->getType().getNonReferenceType();
2875       // If the private variable is a local variable with lvalue ref type,
2876       // allocate the pointer instead of the pointee type.
2877       if (Pair.second.isLocalPrivate()) {
2878         if (VD->getType()->isLValueReferenceType())
2879           Type = C.getPointerType(Type);
2880         if (isAllocatableDecl(VD))
2881           Type = C.getPointerType(Type);
2882       }
2883       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
2884       if (VD->hasAttrs()) {
2885         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2886              E(VD->getAttrs().end());
2887              I != E; ++I)
2888           FD->addAttr(*I);
2889       }
2890     }
2891     RD->completeDefinition();
2892     return RD;
2893   }
2894   return nullptr;
2895 }
2896 
2897 static RecordDecl *
2898 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
2899                          QualType KmpInt32Ty,
2900                          QualType KmpRoutineEntryPointerQTy) {
2901   ASTContext &C = CGM.getContext();
2902   // Build struct kmp_task_t {
2903   //         void *              shareds;
2904   //         kmp_routine_entry_t routine;
2905   //         kmp_int32           part_id;
2906   //         kmp_cmplrdata_t data1;
2907   //         kmp_cmplrdata_t data2;
2908   // For taskloops additional fields:
2909   //         kmp_uint64          lb;
2910   //         kmp_uint64          ub;
2911   //         kmp_int64           st;
2912   //         kmp_int32           liter;
2913   //         void *              reductions;
2914   //       };
2915   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
2916   UD->startDefinition();
2917   addFieldToRecordDecl(C, UD, KmpInt32Ty);
2918   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
2919   UD->completeDefinition();
2920   QualType KmpCmplrdataTy = C.getRecordType(UD);
2921   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
2922   RD->startDefinition();
2923   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2924   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2925   addFieldToRecordDecl(C, RD, KmpInt32Ty);
2926   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2927   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2928   if (isOpenMPTaskLoopDirective(Kind)) {
2929     QualType KmpUInt64Ty =
2930         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2931     QualType KmpInt64Ty =
2932         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2933     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2934     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2935     addFieldToRecordDecl(C, RD, KmpInt64Ty);
2936     addFieldToRecordDecl(C, RD, KmpInt32Ty);
2937     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2938   }
2939   RD->completeDefinition();
2940   return RD;
2941 }
2942 
2943 static RecordDecl *
2944 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
2945                                      ArrayRef<PrivateDataTy> Privates) {
2946   ASTContext &C = CGM.getContext();
2947   // Build struct kmp_task_t_with_privates {
2948   //         kmp_task_t task_data;
2949   //         .kmp_privates_t. privates;
2950   //       };
2951   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2952   RD->startDefinition();
2953   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2954   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2955     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
2956   RD->completeDefinition();
2957   return RD;
2958 }
2959 
2960 /// Emit a proxy function which accepts kmp_task_t as the second
2961 /// argument.
2962 /// \code
2963 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2964 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
2965 ///   For taskloops:
2966 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
2967 ///   tt->reductions, tt->shareds);
2968 ///   return 0;
2969 /// }
2970 /// \endcode
2971 static llvm::Function *
2972 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
2973                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
2974                       QualType KmpTaskTWithPrivatesPtrQTy,
2975                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2976                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
2977                       llvm::Value *TaskPrivatesMap) {
2978   ASTContext &C = CGM.getContext();
2979   FunctionArgList Args;
2980   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
2981                             ImplicitParamKind::Other);
2982   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2983                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
2984                                 ImplicitParamKind::Other);
2985   Args.push_back(&GtidArg);
2986   Args.push_back(&TaskTypeArg);
2987   const auto &TaskEntryFnInfo =
2988       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
2989   llvm::FunctionType *TaskEntryTy =
2990       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
2991   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
2992   auto *TaskEntry = llvm::Function::Create(
2993       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
2994   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
2995   TaskEntry->setDoesNotRecurse();
2996   CodeGenFunction CGF(CGM);
2997   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
2998                     Loc, Loc);
2999 
3000   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3001   // tt,
3002   // For taskloops:
3003   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3004   // tt->task_data.shareds);
3005   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3006       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3007   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3008       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3009       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3010   const auto *KmpTaskTWithPrivatesQTyRD =
3011       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3012   LValue Base =
3013       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3014   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3015   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3016   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3017   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3018 
3019   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3020   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3021   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3022       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3023       CGF.ConvertTypeForMem(SharedsPtrTy));
3024 
3025   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3026   llvm::Value *PrivatesParam;
3027   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3028     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3029     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3030         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3031   } else {
3032     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3033   }
3034 
3035   llvm::Value *CommonArgs[] = {
3036       GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3037       CGF.Builder
3038           .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF),
3039                                                CGF.VoidPtrTy, CGF.Int8Ty)
3040           .getPointer()};
3041   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3042                                           std::end(CommonArgs));
3043   if (isOpenMPTaskLoopDirective(Kind)) {
3044     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3045     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3046     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3047     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3048     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3049     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3050     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3051     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3052     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3053     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3054     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3055     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3056     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3057     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3058     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3059     CallArgs.push_back(LBParam);
3060     CallArgs.push_back(UBParam);
3061     CallArgs.push_back(StParam);
3062     CallArgs.push_back(LIParam);
3063     CallArgs.push_back(RParam);
3064   }
3065   CallArgs.push_back(SharedsParam);
3066 
3067   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3068                                                   CallArgs);
3069   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3070                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3071   CGF.FinishFunction();
3072   return TaskEntry;
3073 }
3074 
3075 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3076                                             SourceLocation Loc,
3077                                             QualType KmpInt32Ty,
3078                                             QualType KmpTaskTWithPrivatesPtrQTy,
3079                                             QualType KmpTaskTWithPrivatesQTy) {
3080   ASTContext &C = CGM.getContext();
3081   FunctionArgList Args;
3082   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3083                             ImplicitParamKind::Other);
3084   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3085                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3086                                 ImplicitParamKind::Other);
3087   Args.push_back(&GtidArg);
3088   Args.push_back(&TaskTypeArg);
3089   const auto &DestructorFnInfo =
3090       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3091   llvm::FunctionType *DestructorFnTy =
3092       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3093   std::string Name =
3094       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3095   auto *DestructorFn =
3096       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3097                              Name, &CGM.getModule());
3098   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3099                                     DestructorFnInfo);
3100   DestructorFn->setDoesNotRecurse();
3101   CodeGenFunction CGF(CGM);
3102   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3103                     Args, Loc, Loc);
3104 
3105   LValue Base = CGF.EmitLoadOfPointerLValue(
3106       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3107       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3108   const auto *KmpTaskTWithPrivatesQTyRD =
3109       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3110   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3111   Base = CGF.EmitLValueForField(Base, *FI);
3112   for (const auto *Field :
3113        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3114     if (QualType::DestructionKind DtorKind =
3115             Field->getType().isDestructedType()) {
3116       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3117       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3118     }
3119   }
3120   CGF.FinishFunction();
3121   return DestructorFn;
3122 }
3123 
3124 /// Emit a privates mapping function for correct handling of private and
3125 /// firstprivate variables.
3126 /// \code
3127 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3128 /// **noalias priv1,...,  <tyn> **noalias privn) {
3129 ///   *priv1 = &.privates.priv1;
3130 ///   ...;
3131 ///   *privn = &.privates.privn;
3132 /// }
3133 /// \endcode
3134 static llvm::Value *
3135 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3136                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3137                                ArrayRef<PrivateDataTy> Privates) {
3138   ASTContext &C = CGM.getContext();
3139   FunctionArgList Args;
3140   ImplicitParamDecl TaskPrivatesArg(
3141       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3142       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3143       ImplicitParamKind::Other);
3144   Args.push_back(&TaskPrivatesArg);
3145   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3146   unsigned Counter = 1;
3147   for (const Expr *E : Data.PrivateVars) {
3148     Args.push_back(ImplicitParamDecl::Create(
3149         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3150         C.getPointerType(C.getPointerType(E->getType()))
3151             .withConst()
3152             .withRestrict(),
3153         ImplicitParamKind::Other));
3154     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3155     PrivateVarsPos[VD] = Counter;
3156     ++Counter;
3157   }
3158   for (const Expr *E : Data.FirstprivateVars) {
3159     Args.push_back(ImplicitParamDecl::Create(
3160         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3161         C.getPointerType(C.getPointerType(E->getType()))
3162             .withConst()
3163             .withRestrict(),
3164         ImplicitParamKind::Other));
3165     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3166     PrivateVarsPos[VD] = Counter;
3167     ++Counter;
3168   }
3169   for (const Expr *E : Data.LastprivateVars) {
3170     Args.push_back(ImplicitParamDecl::Create(
3171         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3172         C.getPointerType(C.getPointerType(E->getType()))
3173             .withConst()
3174             .withRestrict(),
3175         ImplicitParamKind::Other));
3176     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3177     PrivateVarsPos[VD] = Counter;
3178     ++Counter;
3179   }
3180   for (const VarDecl *VD : Data.PrivateLocals) {
3181     QualType Ty = VD->getType().getNonReferenceType();
3182     if (VD->getType()->isLValueReferenceType())
3183       Ty = C.getPointerType(Ty);
3184     if (isAllocatableDecl(VD))
3185       Ty = C.getPointerType(Ty);
3186     Args.push_back(ImplicitParamDecl::Create(
3187         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3188         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3189         ImplicitParamKind::Other));
3190     PrivateVarsPos[VD] = Counter;
3191     ++Counter;
3192   }
3193   const auto &TaskPrivatesMapFnInfo =
3194       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3195   llvm::FunctionType *TaskPrivatesMapTy =
3196       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3197   std::string Name =
3198       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3199   auto *TaskPrivatesMap = llvm::Function::Create(
3200       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3201       &CGM.getModule());
3202   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3203                                     TaskPrivatesMapFnInfo);
3204   if (CGM.getLangOpts().Optimize) {
3205     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3206     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3207     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3208   }
3209   CodeGenFunction CGF(CGM);
3210   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3211                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3212 
3213   // *privi = &.privates.privi;
3214   LValue Base = CGF.EmitLoadOfPointerLValue(
3215       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3216       TaskPrivatesArg.getType()->castAs<PointerType>());
3217   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3218   Counter = 0;
3219   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3220     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3221     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3222     LValue RefLVal =
3223         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3224     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3225         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3226     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3227     ++Counter;
3228   }
3229   CGF.FinishFunction();
3230   return TaskPrivatesMap;
3231 }
3232 
3233 /// Emit initialization for private variables in task-based directives.
3234 static void emitPrivatesInit(CodeGenFunction &CGF,
3235                              const OMPExecutableDirective &D,
3236                              Address KmpTaskSharedsPtr, LValue TDBase,
3237                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3238                              QualType SharedsTy, QualType SharedsPtrTy,
3239                              const OMPTaskDataTy &Data,
3240                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3241   ASTContext &C = CGF.getContext();
3242   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3243   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3244   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3245                                  ? OMPD_taskloop
3246                                  : OMPD_task;
3247   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3248   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3249   LValue SrcBase;
3250   bool IsTargetTask =
3251       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3252       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3253   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3254   // PointersArray, SizesArray, and MappersArray. The original variables for
3255   // these arrays are not captured and we get their addresses explicitly.
3256   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3257       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3258     SrcBase = CGF.MakeAddrLValue(
3259         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3260             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3261             CGF.ConvertTypeForMem(SharedsTy)),
3262         SharedsTy);
3263   }
3264   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3265   for (const PrivateDataTy &Pair : Privates) {
3266     // Do not initialize private locals.
3267     if (Pair.second.isLocalPrivate()) {
3268       ++FI;
3269       continue;
3270     }
3271     const VarDecl *VD = Pair.second.PrivateCopy;
3272     const Expr *Init = VD->getAnyInitializer();
3273     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3274                              !CGF.isTrivialInitializer(Init)))) {
3275       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3276       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3277         const VarDecl *OriginalVD = Pair.second.Original;
3278         // Check if the variable is the target-based BasePointersArray,
3279         // PointersArray, SizesArray, or MappersArray.
3280         LValue SharedRefLValue;
3281         QualType Type = PrivateLValue.getType();
3282         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3283         if (IsTargetTask && !SharedField) {
3284           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3285                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3286                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3287                          ->getNumParams() == 0 &&
3288                  isa<TranslationUnitDecl>(
3289                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3290                          ->getDeclContext()) &&
3291                  "Expected artificial target data variable.");
3292           SharedRefLValue =
3293               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3294         } else if (ForDup) {
3295           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3296           SharedRefLValue = CGF.MakeAddrLValue(
3297               SharedRefLValue.getAddress(CGF).withAlignment(
3298                   C.getDeclAlign(OriginalVD)),
3299               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3300               SharedRefLValue.getTBAAInfo());
3301         } else if (CGF.LambdaCaptureFields.count(
3302                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3303                    isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3304           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3305         } else {
3306           // Processing for implicitly captured variables.
3307           InlinedOpenMPRegionRAII Region(
3308               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3309               /*HasCancel=*/false, /*NoInheritance=*/true);
3310           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3311         }
3312         if (Type->isArrayType()) {
3313           // Initialize firstprivate array.
3314           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3315             // Perform simple memcpy.
3316             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3317           } else {
3318             // Initialize firstprivate array using element-by-element
3319             // initialization.
3320             CGF.EmitOMPAggregateAssign(
3321                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3322                 Type,
3323                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3324                                                   Address SrcElement) {
3325                   // Clean up any temporaries needed by the initialization.
3326                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3327                   InitScope.addPrivate(Elem, SrcElement);
3328                   (void)InitScope.Privatize();
3329                   // Emit initialization for single element.
3330                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3331                       CGF, &CapturesInfo);
3332                   CGF.EmitAnyExprToMem(Init, DestElement,
3333                                        Init->getType().getQualifiers(),
3334                                        /*IsInitializer=*/false);
3335                 });
3336           }
3337         } else {
3338           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3339           InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF));
3340           (void)InitScope.Privatize();
3341           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3342           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3343                              /*capturedByInit=*/false);
3344         }
3345       } else {
3346         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3347       }
3348     }
3349     ++FI;
3350   }
3351 }
3352 
3353 /// Check if duplication function is required for taskloops.
3354 static bool checkInitIsRequired(CodeGenFunction &CGF,
3355                                 ArrayRef<PrivateDataTy> Privates) {
3356   bool InitRequired = false;
3357   for (const PrivateDataTy &Pair : Privates) {
3358     if (Pair.second.isLocalPrivate())
3359       continue;
3360     const VarDecl *VD = Pair.second.PrivateCopy;
3361     const Expr *Init = VD->getAnyInitializer();
3362     InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3363                                     !CGF.isTrivialInitializer(Init));
3364     if (InitRequired)
3365       break;
3366   }
3367   return InitRequired;
3368 }
3369 
3370 
3371 /// Emit task_dup function (for initialization of
3372 /// private/firstprivate/lastprivate vars and last_iter flag)
3373 /// \code
3374 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3375 /// lastpriv) {
3376 /// // setup lastprivate flag
3377 ///    task_dst->last = lastpriv;
3378 /// // could be constructor calls here...
3379 /// }
3380 /// \endcode
3381 static llvm::Value *
3382 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3383                     const OMPExecutableDirective &D,
3384                     QualType KmpTaskTWithPrivatesPtrQTy,
3385                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3386                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3387                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3388                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3389   ASTContext &C = CGM.getContext();
3390   FunctionArgList Args;
3391   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3392                            KmpTaskTWithPrivatesPtrQTy,
3393                            ImplicitParamKind::Other);
3394   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3395                            KmpTaskTWithPrivatesPtrQTy,
3396                            ImplicitParamKind::Other);
3397   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3398                                 ImplicitParamKind::Other);
3399   Args.push_back(&DstArg);
3400   Args.push_back(&SrcArg);
3401   Args.push_back(&LastprivArg);
3402   const auto &TaskDupFnInfo =
3403       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3404   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3405   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3406   auto *TaskDup = llvm::Function::Create(
3407       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3408   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3409   TaskDup->setDoesNotRecurse();
3410   CodeGenFunction CGF(CGM);
3411   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3412                     Loc);
3413 
3414   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3415       CGF.GetAddrOfLocalVar(&DstArg),
3416       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3417   // task_dst->liter = lastpriv;
3418   if (WithLastIter) {
3419     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3420     LValue Base = CGF.EmitLValueForField(
3421         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3422     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3423     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3424         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3425     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3426   }
3427 
3428   // Emit initial values for private copies (if any).
3429   assert(!Privates.empty());
3430   Address KmpTaskSharedsPtr = Address::invalid();
3431   if (!Data.FirstprivateVars.empty()) {
3432     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3433         CGF.GetAddrOfLocalVar(&SrcArg),
3434         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3435     LValue Base = CGF.EmitLValueForField(
3436         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3437     KmpTaskSharedsPtr = Address(
3438         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3439                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
3440                                                   KmpTaskTShareds)),
3441                              Loc),
3442         CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3443   }
3444   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3445                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3446   CGF.FinishFunction();
3447   return TaskDup;
3448 }
3449 
3450 /// Checks if destructor function is required to be generated.
3451 /// \return true if cleanups are required, false otherwise.
3452 static bool
3453 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3454                          ArrayRef<PrivateDataTy> Privates) {
3455   for (const PrivateDataTy &P : Privates) {
3456     if (P.second.isLocalPrivate())
3457       continue;
3458     QualType Ty = P.second.Original->getType().getNonReferenceType();
3459     if (Ty.isDestructedType())
3460       return true;
3461   }
3462   return false;
3463 }
3464 
3465 namespace {
3466 /// Loop generator for OpenMP iterator expression.
3467 class OMPIteratorGeneratorScope final
3468     : public CodeGenFunction::OMPPrivateScope {
3469   CodeGenFunction &CGF;
3470   const OMPIteratorExpr *E = nullptr;
3471   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3472   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3473   OMPIteratorGeneratorScope() = delete;
3474   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3475 
3476 public:
3477   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3478       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3479     if (!E)
3480       return;
3481     SmallVector<llvm::Value *, 4> Uppers;
3482     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3483       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3484       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3485       addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3486       const OMPIteratorHelperData &HelperData = E->getHelper(I);
3487       addPrivate(
3488           HelperData.CounterVD,
3489           CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3490     }
3491     Privatize();
3492 
3493     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3494       const OMPIteratorHelperData &HelperData = E->getHelper(I);
3495       LValue CLVal =
3496           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3497                              HelperData.CounterVD->getType());
3498       // Counter = 0;
3499       CGF.EmitStoreOfScalar(
3500           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
3501           CLVal);
3502       CodeGenFunction::JumpDest &ContDest =
3503           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3504       CodeGenFunction::JumpDest &ExitDest =
3505           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3506       // N = <number-of_iterations>;
3507       llvm::Value *N = Uppers[I];
3508       // cont:
3509       // if (Counter < N) goto body; else goto exit;
3510       CGF.EmitBlock(ContDest.getBlock());
3511       auto *CVal =
3512           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3513       llvm::Value *Cmp =
3514           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3515               ? CGF.Builder.CreateICmpSLT(CVal, N)
3516               : CGF.Builder.CreateICmpULT(CVal, N);
3517       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3518       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3519       // body:
3520       CGF.EmitBlock(BodyBB);
3521       // Iteri = Begini + Counter * Stepi;
3522       CGF.EmitIgnoredExpr(HelperData.Update);
3523     }
3524   }
3525   ~OMPIteratorGeneratorScope() {
3526     if (!E)
3527       return;
3528     for (unsigned I = E->numOfIterators(); I > 0; --I) {
3529       // Counter = Counter + 1;
3530       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3531       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3532       // goto cont;
3533       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3534       // exit:
3535       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3536     }
3537   }
3538 };
3539 } // namespace
3540 
3541 static std::pair<llvm::Value *, llvm::Value *>
3542 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
3543   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3544   llvm::Value *Addr;
3545   if (OASE) {
3546     const Expr *Base = OASE->getBase();
3547     Addr = CGF.EmitScalarExpr(Base);
3548   } else {
3549     Addr = CGF.EmitLValue(E).getPointer(CGF);
3550   }
3551   llvm::Value *SizeVal;
3552   QualType Ty = E->getType();
3553   if (OASE) {
3554     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3555     for (const Expr *SE : OASE->getDimensions()) {
3556       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3557       Sz = CGF.EmitScalarConversion(
3558           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3559       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3560     }
3561   } else if (const auto *ASE =
3562                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
3563     LValue UpAddrLVal =
3564         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
3565     Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
3566     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3567         UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
3568     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3569     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3570     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3571   } else {
3572     SizeVal = CGF.getTypeSize(Ty);
3573   }
3574   return std::make_pair(Addr, SizeVal);
3575 }
3576 
3577 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3578 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3579   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3580   if (KmpTaskAffinityInfoTy.isNull()) {
3581     RecordDecl *KmpAffinityInfoRD =
3582         C.buildImplicitRecord("kmp_task_affinity_info_t");
3583     KmpAffinityInfoRD->startDefinition();
3584     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3585     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3586     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3587     KmpAffinityInfoRD->completeDefinition();
3588     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
3589   }
3590 }
3591 
3592 CGOpenMPRuntime::TaskResultTy
3593 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3594                               const OMPExecutableDirective &D,
3595                               llvm::Function *TaskFunction, QualType SharedsTy,
3596                               Address Shareds, const OMPTaskDataTy &Data) {
3597   ASTContext &C = CGM.getContext();
3598   llvm::SmallVector<PrivateDataTy, 4> Privates;
3599   // Aggregate privates and sort them by the alignment.
3600   const auto *I = Data.PrivateCopies.begin();
3601   for (const Expr *E : Data.PrivateVars) {
3602     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3603     Privates.emplace_back(
3604         C.getDeclAlign(VD),
3605         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3606                          /*PrivateElemInit=*/nullptr));
3607     ++I;
3608   }
3609   I = Data.FirstprivateCopies.begin();
3610   const auto *IElemInitRef = Data.FirstprivateInits.begin();
3611   for (const Expr *E : Data.FirstprivateVars) {
3612     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3613     Privates.emplace_back(
3614         C.getDeclAlign(VD),
3615         PrivateHelpersTy(
3616             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3617             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3618     ++I;
3619     ++IElemInitRef;
3620   }
3621   I = Data.LastprivateCopies.begin();
3622   for (const Expr *E : Data.LastprivateVars) {
3623     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3624     Privates.emplace_back(
3625         C.getDeclAlign(VD),
3626         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3627                          /*PrivateElemInit=*/nullptr));
3628     ++I;
3629   }
3630   for (const VarDecl *VD : Data.PrivateLocals) {
3631     if (isAllocatableDecl(VD))
3632       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3633     else
3634       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3635   }
3636   llvm::stable_sort(Privates,
3637                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
3638                       return L.first > R.first;
3639                     });
3640   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3641   // Build type kmp_routine_entry_t (if not built yet).
3642   emitKmpRoutineEntryT(KmpInt32Ty);
3643   // Build type kmp_task_t (if not built yet).
3644   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3645     if (SavedKmpTaskloopTQTy.isNull()) {
3646       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3647           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3648     }
3649     KmpTaskTQTy = SavedKmpTaskloopTQTy;
3650   } else {
3651     assert((D.getDirectiveKind() == OMPD_task ||
3652             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3653             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3654            "Expected taskloop, task or target directive");
3655     if (SavedKmpTaskTQTy.isNull()) {
3656       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3657           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3658     }
3659     KmpTaskTQTy = SavedKmpTaskTQTy;
3660   }
3661   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3662   // Build particular struct kmp_task_t for the given task.
3663   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3664       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3665   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3666   QualType KmpTaskTWithPrivatesPtrQTy =
3667       C.getPointerType(KmpTaskTWithPrivatesQTy);
3668   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
3669   llvm::Type *KmpTaskTWithPrivatesPtrTy =
3670       KmpTaskTWithPrivatesTy->getPointerTo();
3671   llvm::Value *KmpTaskTWithPrivatesTySize =
3672       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3673   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3674 
3675   // Emit initial values for private copies (if any).
3676   llvm::Value *TaskPrivatesMap = nullptr;
3677   llvm::Type *TaskPrivatesMapTy =
3678       std::next(TaskFunction->arg_begin(), 3)->getType();
3679   if (!Privates.empty()) {
3680     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3681     TaskPrivatesMap =
3682         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3683     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3684         TaskPrivatesMap, TaskPrivatesMapTy);
3685   } else {
3686     TaskPrivatesMap = llvm::ConstantPointerNull::get(
3687         cast<llvm::PointerType>(TaskPrivatesMapTy));
3688   }
3689   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3690   // kmp_task_t *tt);
3691   llvm::Function *TaskEntry = emitProxyTaskFunction(
3692       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3693       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3694       TaskPrivatesMap);
3695 
3696   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3697   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3698   // kmp_routine_entry_t *task_entry);
3699   // Task flags. Format is taken from
3700   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3701   // description of kmp_tasking_flags struct.
3702   enum {
3703     TiedFlag = 0x1,
3704     FinalFlag = 0x2,
3705     DestructorsFlag = 0x8,
3706     PriorityFlag = 0x20,
3707     DetachableFlag = 0x40,
3708   };
3709   unsigned Flags = Data.Tied ? TiedFlag : 0;
3710   bool NeedsCleanup = false;
3711   if (!Privates.empty()) {
3712     NeedsCleanup =
3713         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3714     if (NeedsCleanup)
3715       Flags = Flags | DestructorsFlag;
3716   }
3717   if (Data.Priority.getInt())
3718     Flags = Flags | PriorityFlag;
3719   if (D.hasClausesOfKind<OMPDetachClause>())
3720     Flags = Flags | DetachableFlag;
3721   llvm::Value *TaskFlags =
3722       Data.Final.getPointer()
3723           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3724                                      CGF.Builder.getInt32(FinalFlag),
3725                                      CGF.Builder.getInt32(/*C=*/0))
3726           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3727   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3728   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3729   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
3730       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3731       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3732           TaskEntry, KmpRoutineEntryPtrTy)};
3733   llvm::Value *NewTask;
3734   if (D.hasClausesOfKind<OMPNowaitClause>()) {
3735     // Check if we have any device clause associated with the directive.
3736     const Expr *Device = nullptr;
3737     if (auto *C = D.getSingleClause<OMPDeviceClause>())
3738       Device = C->getDevice();
3739     // Emit device ID if any otherwise use default value.
3740     llvm::Value *DeviceID;
3741     if (Device)
3742       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3743                                            CGF.Int64Ty, /*isSigned=*/true);
3744     else
3745       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3746     AllocArgs.push_back(DeviceID);
3747     NewTask = CGF.EmitRuntimeCall(
3748         OMPBuilder.getOrCreateRuntimeFunction(
3749             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3750         AllocArgs);
3751   } else {
3752     NewTask =
3753         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3754                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3755                             AllocArgs);
3756   }
3757   // Emit detach clause initialization.
3758   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3759   // task_descriptor);
3760   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3761     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3762     LValue EvtLVal = CGF.EmitLValue(Evt);
3763 
3764     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3765     // int gtid, kmp_task_t *task);
3766     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3767     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3768     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3769     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3770         OMPBuilder.getOrCreateRuntimeFunction(
3771             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3772         {Loc, Tid, NewTask});
3773     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3774                                       Evt->getExprLoc());
3775     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3776   }
3777   // Process affinity clauses.
3778   if (D.hasClausesOfKind<OMPAffinityClause>()) {
3779     // Process list of affinity data.
3780     ASTContext &C = CGM.getContext();
3781     Address AffinitiesArray = Address::invalid();
3782     // Calculate number of elements to form the array of affinity data.
3783     llvm::Value *NumOfElements = nullptr;
3784     unsigned NumAffinities = 0;
3785     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3786       if (const Expr *Modifier = C->getModifier()) {
3787         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3788         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3789           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3790           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3791           NumOfElements =
3792               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3793         }
3794       } else {
3795         NumAffinities += C->varlist_size();
3796       }
3797     }
3798     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
3799     // Fields ids in kmp_task_affinity_info record.
3800     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3801 
3802     QualType KmpTaskAffinityInfoArrayTy;
3803     if (NumOfElements) {
3804       NumOfElements = CGF.Builder.CreateNUWAdd(
3805           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3806       auto *OVE = new (C) OpaqueValueExpr(
3807           Loc,
3808           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3809           VK_PRValue);
3810       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3811                                                     RValue::get(NumOfElements));
3812       KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3813           KmpTaskAffinityInfoTy, OVE, ArraySizeModifier::Normal,
3814           /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
3815       // Properly emit variable-sized array.
3816       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3817                                            ImplicitParamKind::Other);
3818       CGF.EmitVarDecl(*PD);
3819       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3820       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3821                                                 /*isSigned=*/false);
3822     } else {
3823       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3824           KmpTaskAffinityInfoTy,
3825           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3826           ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3827       AffinitiesArray =
3828           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
3829       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
3830       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
3831                                              /*isSigned=*/false);
3832     }
3833 
3834     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3835     // Fill array by elements without iterators.
3836     unsigned Pos = 0;
3837     bool HasIterator = false;
3838     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3839       if (C->getModifier()) {
3840         HasIterator = true;
3841         continue;
3842       }
3843       for (const Expr *E : C->varlists()) {
3844         llvm::Value *Addr;
3845         llvm::Value *Size;
3846         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3847         LValue Base =
3848             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
3849                                KmpTaskAffinityInfoTy);
3850         // affs[i].base_addr = &<Affinities[i].second>;
3851         LValue BaseAddrLVal = CGF.EmitLValueForField(
3852             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3853         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3854                               BaseAddrLVal);
3855         // affs[i].len = sizeof(<Affinities[i].second>);
3856         LValue LenLVal = CGF.EmitLValueForField(
3857             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3858         CGF.EmitStoreOfScalar(Size, LenLVal);
3859         ++Pos;
3860       }
3861     }
3862     LValue PosLVal;
3863     if (HasIterator) {
3864       PosLVal = CGF.MakeAddrLValue(
3865           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
3866           C.getSizeType());
3867       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
3868     }
3869     // Process elements with iterators.
3870     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3871       const Expr *Modifier = C->getModifier();
3872       if (!Modifier)
3873         continue;
3874       OMPIteratorGeneratorScope IteratorScope(
3875           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
3876       for (const Expr *E : C->varlists()) {
3877         llvm::Value *Addr;
3878         llvm::Value *Size;
3879         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3880         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
3881         LValue Base = CGF.MakeAddrLValue(
3882             CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
3883         // affs[i].base_addr = &<Affinities[i].second>;
3884         LValue BaseAddrLVal = CGF.EmitLValueForField(
3885             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3886         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3887                               BaseAddrLVal);
3888         // affs[i].len = sizeof(<Affinities[i].second>);
3889         LValue LenLVal = CGF.EmitLValueForField(
3890             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3891         CGF.EmitStoreOfScalar(Size, LenLVal);
3892         Idx = CGF.Builder.CreateNUWAdd(
3893             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
3894         CGF.EmitStoreOfScalar(Idx, PosLVal);
3895       }
3896     }
3897     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3898     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3899     // naffins, kmp_task_affinity_info_t *affin_list);
3900     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3901     llvm::Value *GTid = getThreadID(CGF, Loc);
3902     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3903         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
3904     // FIXME: Emit the function and ignore its result for now unless the
3905     // runtime function is properly implemented.
3906     (void)CGF.EmitRuntimeCall(
3907         OMPBuilder.getOrCreateRuntimeFunction(
3908             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
3909         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3910   }
3911   llvm::Value *NewTaskNewTaskTTy =
3912       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3913           NewTask, KmpTaskTWithPrivatesPtrTy);
3914   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
3915                                                KmpTaskTWithPrivatesQTy);
3916   LValue TDBase =
3917       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3918   // Fill the data in the resulting kmp_task_t record.
3919   // Copy shareds if there are any.
3920   Address KmpTaskSharedsPtr = Address::invalid();
3921   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3922     KmpTaskSharedsPtr = Address(
3923         CGF.EmitLoadOfScalar(
3924             CGF.EmitLValueForField(
3925                 TDBase,
3926                 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
3927             Loc),
3928         CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3929     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
3930     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
3931     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
3932   }
3933   // Emit initial values for private copies (if any).
3934   TaskResultTy Result;
3935   if (!Privates.empty()) {
3936     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3937                      SharedsTy, SharedsPtrTy, Data, Privates,
3938                      /*ForDup=*/false);
3939     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3940         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3941       Result.TaskDupFn = emitTaskDupFunction(
3942           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3943           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3944           /*WithLastIter=*/!Data.LastprivateVars.empty());
3945     }
3946   }
3947   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3948   enum { Priority = 0, Destructors = 1 };
3949   // Provide pointer to function with destructors for privates.
3950   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3951   const RecordDecl *KmpCmplrdataUD =
3952       (*FI)->getType()->getAsUnionType()->getDecl();
3953   if (NeedsCleanup) {
3954     llvm::Value *DestructorFn = emitDestructorsFunction(
3955         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3956         KmpTaskTWithPrivatesQTy);
3957     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
3958     LValue DestructorsLV = CGF.EmitLValueForField(
3959         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
3960     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3961                               DestructorFn, KmpRoutineEntryPtrTy),
3962                           DestructorsLV);
3963   }
3964   // Set priority.
3965   if (Data.Priority.getInt()) {
3966     LValue Data2LV = CGF.EmitLValueForField(
3967         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
3968     LValue PriorityLV = CGF.EmitLValueForField(
3969         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
3970     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
3971   }
3972   Result.NewTask = NewTask;
3973   Result.TaskEntry = TaskEntry;
3974   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
3975   Result.TDBase = TDBase;
3976   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
3977   return Result;
3978 }
3979 
3980 /// Translates internal dependency kind into the runtime kind.
3981 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
3982   RTLDependenceKindTy DepKind;
3983   switch (K) {
3984   case OMPC_DEPEND_in:
3985     DepKind = RTLDependenceKindTy::DepIn;
3986     break;
3987   // Out and InOut dependencies must use the same code.
3988   case OMPC_DEPEND_out:
3989   case OMPC_DEPEND_inout:
3990     DepKind = RTLDependenceKindTy::DepInOut;
3991     break;
3992   case OMPC_DEPEND_mutexinoutset:
3993     DepKind = RTLDependenceKindTy::DepMutexInOutSet;
3994     break;
3995   case OMPC_DEPEND_inoutset:
3996     DepKind = RTLDependenceKindTy::DepInOutSet;
3997     break;
3998   case OMPC_DEPEND_outallmemory:
3999     DepKind = RTLDependenceKindTy::DepOmpAllMem;
4000     break;
4001   case OMPC_DEPEND_source:
4002   case OMPC_DEPEND_sink:
4003   case OMPC_DEPEND_depobj:
4004   case OMPC_DEPEND_inoutallmemory:
4005   case OMPC_DEPEND_unknown:
4006     llvm_unreachable("Unknown task dependence type");
4007   }
4008   return DepKind;
4009 }
4010 
4011 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4012 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4013                            QualType &FlagsTy) {
4014   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4015   if (KmpDependInfoTy.isNull()) {
4016     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4017     KmpDependInfoRD->startDefinition();
4018     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4019     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4020     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4021     KmpDependInfoRD->completeDefinition();
4022     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4023   }
4024 }
4025 
4026 std::pair<llvm::Value *, LValue>
4027 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4028                                    SourceLocation Loc) {
4029   ASTContext &C = CGM.getContext();
4030   QualType FlagsTy;
4031   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4032   RecordDecl *KmpDependInfoRD =
4033       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4034   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4035   LValue Base = CGF.EmitLoadOfPointerLValue(
4036       DepobjLVal.getAddress(CGF).withElementType(
4037           CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4038       KmpDependInfoPtrTy->castAs<PointerType>());
4039   Address DepObjAddr = CGF.Builder.CreateGEP(
4040       Base.getAddress(CGF),
4041       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4042   LValue NumDepsBase = CGF.MakeAddrLValue(
4043       DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4044   // NumDeps = deps[i].base_addr;
4045   LValue BaseAddrLVal = CGF.EmitLValueForField(
4046       NumDepsBase,
4047       *std::next(KmpDependInfoRD->field_begin(),
4048                  static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4049   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4050   return std::make_pair(NumDeps, Base);
4051 }
4052 
4053 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4054                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4055                            const OMPTaskDataTy::DependData &Data,
4056                            Address DependenciesArray) {
4057   CodeGenModule &CGM = CGF.CGM;
4058   ASTContext &C = CGM.getContext();
4059   QualType FlagsTy;
4060   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4061   RecordDecl *KmpDependInfoRD =
4062       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4063   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4064 
4065   OMPIteratorGeneratorScope IteratorScope(
4066       CGF, cast_or_null<OMPIteratorExpr>(
4067                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4068                                  : nullptr));
4069   for (const Expr *E : Data.DepExprs) {
4070     llvm::Value *Addr;
4071     llvm::Value *Size;
4072 
4073     // The expression will be a nullptr in the 'omp_all_memory' case.
4074     if (E) {
4075       std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4076       Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4077     } else {
4078       Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4079       Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4080     }
4081     LValue Base;
4082     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4083       Base = CGF.MakeAddrLValue(
4084           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4085     } else {
4086       assert(E && "Expected a non-null expression");
4087       LValue &PosLVal = *Pos.get<LValue *>();
4088       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4089       Base = CGF.MakeAddrLValue(
4090           CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
4091     }
4092     // deps[i].base_addr = &<Dependencies[i].second>;
4093     LValue BaseAddrLVal = CGF.EmitLValueForField(
4094         Base,
4095         *std::next(KmpDependInfoRD->field_begin(),
4096                    static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4097     CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4098     // deps[i].len = sizeof(<Dependencies[i].second>);
4099     LValue LenLVal = CGF.EmitLValueForField(
4100         Base, *std::next(KmpDependInfoRD->field_begin(),
4101                          static_cast<unsigned int>(RTLDependInfoFields::Len)));
4102     CGF.EmitStoreOfScalar(Size, LenLVal);
4103     // deps[i].flags = <Dependencies[i].first>;
4104     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4105     LValue FlagsLVal = CGF.EmitLValueForField(
4106         Base,
4107         *std::next(KmpDependInfoRD->field_begin(),
4108                    static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4109     CGF.EmitStoreOfScalar(
4110         llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4111         FlagsLVal);
4112     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4113       ++(*P);
4114     } else {
4115       LValue &PosLVal = *Pos.get<LValue *>();
4116       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4117       Idx = CGF.Builder.CreateNUWAdd(Idx,
4118                                      llvm::ConstantInt::get(Idx->getType(), 1));
4119       CGF.EmitStoreOfScalar(Idx, PosLVal);
4120     }
4121   }
4122 }
4123 
4124 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4125     CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4126     const OMPTaskDataTy::DependData &Data) {
4127   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4128          "Expected depobj dependency kind.");
4129   SmallVector<llvm::Value *, 4> Sizes;
4130   SmallVector<LValue, 4> SizeLVals;
4131   ASTContext &C = CGF.getContext();
4132   {
4133     OMPIteratorGeneratorScope IteratorScope(
4134         CGF, cast_or_null<OMPIteratorExpr>(
4135                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4136                                    : nullptr));
4137     for (const Expr *E : Data.DepExprs) {
4138       llvm::Value *NumDeps;
4139       LValue Base;
4140       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4141       std::tie(NumDeps, Base) =
4142           getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4143       LValue NumLVal = CGF.MakeAddrLValue(
4144           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4145           C.getUIntPtrType());
4146       CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4147                               NumLVal.getAddress(CGF));
4148       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4149       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4150       CGF.EmitStoreOfScalar(Add, NumLVal);
4151       SizeLVals.push_back(NumLVal);
4152     }
4153   }
4154   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4155     llvm::Value *Size =
4156         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4157     Sizes.push_back(Size);
4158   }
4159   return Sizes;
4160 }
4161 
4162 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4163                                          QualType &KmpDependInfoTy,
4164                                          LValue PosLVal,
4165                                          const OMPTaskDataTy::DependData &Data,
4166                                          Address DependenciesArray) {
4167   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4168          "Expected depobj dependency kind.");
4169   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4170   {
4171     OMPIteratorGeneratorScope IteratorScope(
4172         CGF, cast_or_null<OMPIteratorExpr>(
4173                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4174                                    : nullptr));
4175     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4176       const Expr *E = Data.DepExprs[I];
4177       llvm::Value *NumDeps;
4178       LValue Base;
4179       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4180       std::tie(NumDeps, Base) =
4181           getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4182 
4183       // memcopy dependency data.
4184       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4185           ElSize,
4186           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4187       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4188       Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
4189       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4190 
4191       // Increase pos.
4192       // pos += size;
4193       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4194       CGF.EmitStoreOfScalar(Add, PosLVal);
4195     }
4196   }
4197 }
4198 
4199 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4200     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4201     SourceLocation Loc) {
4202   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4203         return D.DepExprs.empty();
4204       }))
4205     return std::make_pair(nullptr, Address::invalid());
4206   // Process list of dependencies.
4207   ASTContext &C = CGM.getContext();
4208   Address DependenciesArray = Address::invalid();
4209   llvm::Value *NumOfElements = nullptr;
4210   unsigned NumDependencies = std::accumulate(
4211       Dependencies.begin(), Dependencies.end(), 0,
4212       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4213         return D.DepKind == OMPC_DEPEND_depobj
4214                    ? V
4215                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4216       });
4217   QualType FlagsTy;
4218   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4219   bool HasDepobjDeps = false;
4220   bool HasRegularWithIterators = false;
4221   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4222   llvm::Value *NumOfRegularWithIterators =
4223       llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4224   // Calculate number of depobj dependencies and regular deps with the
4225   // iterators.
4226   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4227     if (D.DepKind == OMPC_DEPEND_depobj) {
4228       SmallVector<llvm::Value *, 4> Sizes =
4229           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4230       for (llvm::Value *Size : Sizes) {
4231         NumOfDepobjElements =
4232             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4233       }
4234       HasDepobjDeps = true;
4235       continue;
4236     }
4237     // Include number of iterations, if any.
4238 
4239     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4240       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4241         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4242         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4243         llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4244             Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4245         NumOfRegularWithIterators =
4246             CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4247       }
4248       HasRegularWithIterators = true;
4249       continue;
4250     }
4251   }
4252 
4253   QualType KmpDependInfoArrayTy;
4254   if (HasDepobjDeps || HasRegularWithIterators) {
4255     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4256                                            /*isSigned=*/false);
4257     if (HasDepobjDeps) {
4258       NumOfElements =
4259           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4260     }
4261     if (HasRegularWithIterators) {
4262       NumOfElements =
4263           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4264     }
4265     auto *OVE = new (C) OpaqueValueExpr(
4266         Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4267         VK_PRValue);
4268     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4269                                                   RValue::get(NumOfElements));
4270     KmpDependInfoArrayTy =
4271         C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4272                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4273     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4274     // Properly emit variable-sized array.
4275     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4276                                          ImplicitParamKind::Other);
4277     CGF.EmitVarDecl(*PD);
4278     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4279     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4280                                               /*isSigned=*/false);
4281   } else {
4282     KmpDependInfoArrayTy = C.getConstantArrayType(
4283         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4284         ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4285     DependenciesArray =
4286         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4287     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4288     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4289                                            /*isSigned=*/false);
4290   }
4291   unsigned Pos = 0;
4292   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4293     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4294         Dependencies[I].IteratorExpr)
4295       continue;
4296     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4297                    DependenciesArray);
4298   }
4299   // Copy regular dependencies with iterators.
4300   LValue PosLVal = CGF.MakeAddrLValue(
4301       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4302   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4303   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4304     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4305         !Dependencies[I].IteratorExpr)
4306       continue;
4307     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4308                    DependenciesArray);
4309   }
4310   // Copy final depobj arrays without iterators.
4311   if (HasDepobjDeps) {
4312     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4313       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4314         continue;
4315       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4316                          DependenciesArray);
4317     }
4318   }
4319   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4320       DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4321   return std::make_pair(NumOfElements, DependenciesArray);
4322 }
4323 
4324 Address CGOpenMPRuntime::emitDepobjDependClause(
4325     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4326     SourceLocation Loc) {
4327   if (Dependencies.DepExprs.empty())
4328     return Address::invalid();
4329   // Process list of dependencies.
4330   ASTContext &C = CGM.getContext();
4331   Address DependenciesArray = Address::invalid();
4332   unsigned NumDependencies = Dependencies.DepExprs.size();
4333   QualType FlagsTy;
4334   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4335   RecordDecl *KmpDependInfoRD =
4336       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4337 
4338   llvm::Value *Size;
4339   // Define type kmp_depend_info[<Dependencies.size()>];
4340   // For depobj reserve one extra element to store the number of elements.
4341   // It is required to handle depobj(x) update(in) construct.
4342   // kmp_depend_info[<Dependencies.size()>] deps;
4343   llvm::Value *NumDepsVal;
4344   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4345   if (const auto *IE =
4346           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4347     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4348     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4349       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4350       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4351       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4352     }
4353     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4354                                     NumDepsVal);
4355     CharUnits SizeInBytes =
4356         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4357     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4358     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4359     NumDepsVal =
4360         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4361   } else {
4362     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4363         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4364         nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4365     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4366     Size = CGM.getSize(Sz.alignTo(Align));
4367     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4368   }
4369   // Need to allocate on the dynamic memory.
4370   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4371   // Use default allocator.
4372   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4373   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4374 
4375   llvm::Value *Addr =
4376       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4377                               CGM.getModule(), OMPRTL___kmpc_alloc),
4378                           Args, ".dep.arr.addr");
4379   llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4380   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4381       Addr, KmpDependInfoLlvmTy->getPointerTo());
4382   DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4383   // Write number of elements in the first element of array for depobj.
4384   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4385   // deps[i].base_addr = NumDependencies;
4386   LValue BaseAddrLVal = CGF.EmitLValueForField(
4387       Base,
4388       *std::next(KmpDependInfoRD->field_begin(),
4389                  static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4390   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4391   llvm::PointerUnion<unsigned *, LValue *> Pos;
4392   unsigned Idx = 1;
4393   LValue PosLVal;
4394   if (Dependencies.IteratorExpr) {
4395     PosLVal = CGF.MakeAddrLValue(
4396         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4397         C.getSizeType());
4398     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4399                           /*IsInit=*/true);
4400     Pos = &PosLVal;
4401   } else {
4402     Pos = &Idx;
4403   }
4404   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4405   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4406       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4407       CGF.Int8Ty);
4408   return DependenciesArray;
4409 }
4410 
4411 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4412                                         SourceLocation Loc) {
4413   ASTContext &C = CGM.getContext();
4414   QualType FlagsTy;
4415   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4416   LValue Base = CGF.EmitLoadOfPointerLValue(
4417       DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>());
4418   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4419   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4420       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4421       CGF.ConvertTypeForMem(KmpDependInfoTy));
4422   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4423       Addr.getElementType(), Addr.getPointer(),
4424       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4425   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4426                                                                CGF.VoidPtrTy);
4427   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4428   // Use default allocator.
4429   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4430   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4431 
4432   // _kmpc_free(gtid, addr, nullptr);
4433   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4434                                 CGM.getModule(), OMPRTL___kmpc_free),
4435                             Args);
4436 }
4437 
4438 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4439                                        OpenMPDependClauseKind NewDepKind,
4440                                        SourceLocation Loc) {
4441   ASTContext &C = CGM.getContext();
4442   QualType FlagsTy;
4443   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4444   RecordDecl *KmpDependInfoRD =
4445       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4446   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4447   llvm::Value *NumDeps;
4448   LValue Base;
4449   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4450 
4451   Address Begin = Base.getAddress(CGF);
4452   // Cast from pointer to array type to pointer to single element.
4453   llvm::Value *End = CGF.Builder.CreateGEP(
4454       Begin.getElementType(), Begin.getPointer(), NumDeps);
4455   // The basic structure here is a while-do loop.
4456   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4457   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4458   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4459   CGF.EmitBlock(BodyBB);
4460   llvm::PHINode *ElementPHI =
4461       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4462   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
4463   Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4464   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4465                             Base.getTBAAInfo());
4466   // deps[i].flags = NewDepKind;
4467   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4468   LValue FlagsLVal = CGF.EmitLValueForField(
4469       Base, *std::next(KmpDependInfoRD->field_begin(),
4470                        static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4471   CGF.EmitStoreOfScalar(
4472       llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4473       FlagsLVal);
4474 
4475   // Shift the address forward by one element.
4476   Address ElementNext =
4477       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
4478   ElementPHI->addIncoming(ElementNext.getPointer(),
4479                           CGF.Builder.GetInsertBlock());
4480   llvm::Value *IsEmpty =
4481       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
4482   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4483   // Done.
4484   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4485 }
4486 
4487 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4488                                    const OMPExecutableDirective &D,
4489                                    llvm::Function *TaskFunction,
4490                                    QualType SharedsTy, Address Shareds,
4491                                    const Expr *IfCond,
4492                                    const OMPTaskDataTy &Data) {
4493   if (!CGF.HaveInsertPoint())
4494     return;
4495 
4496   TaskResultTy Result =
4497       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4498   llvm::Value *NewTask = Result.NewTask;
4499   llvm::Function *TaskEntry = Result.TaskEntry;
4500   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4501   LValue TDBase = Result.TDBase;
4502   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4503   // Process list of dependences.
4504   Address DependenciesArray = Address::invalid();
4505   llvm::Value *NumOfElements;
4506   std::tie(NumOfElements, DependenciesArray) =
4507       emitDependClause(CGF, Data.Dependences, Loc);
4508 
4509   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4510   // libcall.
4511   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4512   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4513   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4514   // list is not empty
4515   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4516   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4517   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4518   llvm::Value *DepTaskArgs[7];
4519   if (!Data.Dependences.empty()) {
4520     DepTaskArgs[0] = UpLoc;
4521     DepTaskArgs[1] = ThreadID;
4522     DepTaskArgs[2] = NewTask;
4523     DepTaskArgs[3] = NumOfElements;
4524     DepTaskArgs[4] = DependenciesArray.getPointer();
4525     DepTaskArgs[5] = CGF.Builder.getInt32(0);
4526     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4527   }
4528   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4529                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4530     if (!Data.Tied) {
4531       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4532       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4533       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4534     }
4535     if (!Data.Dependences.empty()) {
4536       CGF.EmitRuntimeCall(
4537           OMPBuilder.getOrCreateRuntimeFunction(
4538               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4539           DepTaskArgs);
4540     } else {
4541       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4542                               CGM.getModule(), OMPRTL___kmpc_omp_task),
4543                           TaskArgs);
4544     }
4545     // Check if parent region is untied and build return for untied task;
4546     if (auto *Region =
4547             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4548       Region->emitUntiedSwitch(CGF);
4549   };
4550 
4551   llvm::Value *DepWaitTaskArgs[7];
4552   if (!Data.Dependences.empty()) {
4553     DepWaitTaskArgs[0] = UpLoc;
4554     DepWaitTaskArgs[1] = ThreadID;
4555     DepWaitTaskArgs[2] = NumOfElements;
4556     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
4557     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4558     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4559     DepWaitTaskArgs[6] =
4560         llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4561   }
4562   auto &M = CGM.getModule();
4563   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4564                         TaskEntry, &Data, &DepWaitTaskArgs,
4565                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4566     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4567     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4568     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4569     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4570     // is specified.
4571     if (!Data.Dependences.empty())
4572       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4573                               M, OMPRTL___kmpc_omp_taskwait_deps_51),
4574                           DepWaitTaskArgs);
4575     // Call proxy_task_entry(gtid, new_task);
4576     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4577                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4578       Action.Enter(CGF);
4579       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4580       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4581                                                           OutlinedFnArgs);
4582     };
4583 
4584     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4585     // kmp_task_t *new_task);
4586     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4587     // kmp_task_t *new_task);
4588     RegionCodeGenTy RCG(CodeGen);
4589     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4590                               M, OMPRTL___kmpc_omp_task_begin_if0),
4591                           TaskArgs,
4592                           OMPBuilder.getOrCreateRuntimeFunction(
4593                               M, OMPRTL___kmpc_omp_task_complete_if0),
4594                           TaskArgs);
4595     RCG.setAction(Action);
4596     RCG(CGF);
4597   };
4598 
4599   if (IfCond) {
4600     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4601   } else {
4602     RegionCodeGenTy ThenRCG(ThenCodeGen);
4603     ThenRCG(CGF);
4604   }
4605 }
4606 
4607 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4608                                        const OMPLoopDirective &D,
4609                                        llvm::Function *TaskFunction,
4610                                        QualType SharedsTy, Address Shareds,
4611                                        const Expr *IfCond,
4612                                        const OMPTaskDataTy &Data) {
4613   if (!CGF.HaveInsertPoint())
4614     return;
4615   TaskResultTy Result =
4616       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4617   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4618   // libcall.
4619   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4620   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4621   // sched, kmp_uint64 grainsize, void *task_dup);
4622   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4623   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4624   llvm::Value *IfVal;
4625   if (IfCond) {
4626     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4627                                       /*isSigned=*/true);
4628   } else {
4629     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4630   }
4631 
4632   LValue LBLVal = CGF.EmitLValueForField(
4633       Result.TDBase,
4634       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4635   const auto *LBVar =
4636       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4637   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
4638                        LBLVal.getQuals(),
4639                        /*IsInitializer=*/true);
4640   LValue UBLVal = CGF.EmitLValueForField(
4641       Result.TDBase,
4642       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4643   const auto *UBVar =
4644       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4645   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
4646                        UBLVal.getQuals(),
4647                        /*IsInitializer=*/true);
4648   LValue StLVal = CGF.EmitLValueForField(
4649       Result.TDBase,
4650       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4651   const auto *StVar =
4652       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4653   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
4654                        StLVal.getQuals(),
4655                        /*IsInitializer=*/true);
4656   // Store reductions address.
4657   LValue RedLVal = CGF.EmitLValueForField(
4658       Result.TDBase,
4659       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4660   if (Data.Reductions) {
4661     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4662   } else {
4663     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
4664                                CGF.getContext().VoidPtrTy);
4665   }
4666   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4667   llvm::Value *TaskArgs[] = {
4668       UpLoc,
4669       ThreadID,
4670       Result.NewTask,
4671       IfVal,
4672       LBLVal.getPointer(CGF),
4673       UBLVal.getPointer(CGF),
4674       CGF.EmitLoadOfScalar(StLVal, Loc),
4675       llvm::ConstantInt::getSigned(
4676           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4677       llvm::ConstantInt::getSigned(
4678           CGF.IntTy, Data.Schedule.getPointer()
4679                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
4680                          : NoSchedule),
4681       Data.Schedule.getPointer()
4682           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4683                                       /*isSigned=*/false)
4684           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
4685       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4686                              Result.TaskDupFn, CGF.VoidPtrTy)
4687                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
4688   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4689                           CGM.getModule(), OMPRTL___kmpc_taskloop),
4690                       TaskArgs);
4691 }
4692 
4693 /// Emit reduction operation for each element of array (required for
4694 /// array sections) LHS op = RHS.
4695 /// \param Type Type of array.
4696 /// \param LHSVar Variable on the left side of the reduction operation
4697 /// (references element of array in original variable).
4698 /// \param RHSVar Variable on the right side of the reduction operation
4699 /// (references element of array in original variable).
4700 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
4701 /// RHSVar.
4702 static void EmitOMPAggregateReduction(
4703     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4704     const VarDecl *RHSVar,
4705     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4706                                   const Expr *, const Expr *)> &RedOpGen,
4707     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4708     const Expr *UpExpr = nullptr) {
4709   // Perform element-by-element initialization.
4710   QualType ElementTy;
4711   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4712   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4713 
4714   // Drill down to the base element type on both arrays.
4715   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4716   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4717 
4718   llvm::Value *RHSBegin = RHSAddr.getPointer();
4719   llvm::Value *LHSBegin = LHSAddr.getPointer();
4720   // Cast from pointer to array type to pointer to single element.
4721   llvm::Value *LHSEnd =
4722       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4723   // The basic structure here is a while-do loop.
4724   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4725   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4726   llvm::Value *IsEmpty =
4727       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4728   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4729 
4730   // Enter the loop body, making that address the current address.
4731   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4732   CGF.EmitBlock(BodyBB);
4733 
4734   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4735 
4736   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4737       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4738   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4739   Address RHSElementCurrent(
4740       RHSElementPHI, RHSAddr.getElementType(),
4741       RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4742 
4743   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4744       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4745   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4746   Address LHSElementCurrent(
4747       LHSElementPHI, LHSAddr.getElementType(),
4748       LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4749 
4750   // Emit copy.
4751   CodeGenFunction::OMPPrivateScope Scope(CGF);
4752   Scope.addPrivate(LHSVar, LHSElementCurrent);
4753   Scope.addPrivate(RHSVar, RHSElementCurrent);
4754   Scope.Privatize();
4755   RedOpGen(CGF, XExpr, EExpr, UpExpr);
4756   Scope.ForceCleanup();
4757 
4758   // Shift the address forward by one element.
4759   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4760       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4761       "omp.arraycpy.dest.element");
4762   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4763       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4764       "omp.arraycpy.src.element");
4765   // Check whether we've reached the end.
4766   llvm::Value *Done =
4767       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4768   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4769   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4770   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4771 
4772   // Done.
4773   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4774 }
4775 
4776 /// Emit reduction combiner. If the combiner is a simple expression emit it as
4777 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4778 /// UDR combiner function.
4779 static void emitReductionCombiner(CodeGenFunction &CGF,
4780                                   const Expr *ReductionOp) {
4781   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4782     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4783       if (const auto *DRE =
4784               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4785         if (const auto *DRD =
4786                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4787           std::pair<llvm::Function *, llvm::Function *> Reduction =
4788               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
4789           RValue Func = RValue::get(Reduction.first);
4790           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4791           CGF.EmitIgnoredExpr(ReductionOp);
4792           return;
4793         }
4794   CGF.EmitIgnoredExpr(ReductionOp);
4795 }
4796 
4797 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
4798     StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4799     ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
4800     ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4801   ASTContext &C = CGM.getContext();
4802 
4803   // void reduction_func(void *LHSArg, void *RHSArg);
4804   FunctionArgList Args;
4805   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4806                            ImplicitParamKind::Other);
4807   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4808                            ImplicitParamKind::Other);
4809   Args.push_back(&LHSArg);
4810   Args.push_back(&RHSArg);
4811   const auto &CGFI =
4812       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4813   std::string Name = getReductionFuncName(ReducerName);
4814   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4815                                     llvm::GlobalValue::InternalLinkage, Name,
4816                                     &CGM.getModule());
4817   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
4818   Fn->setDoesNotRecurse();
4819   CodeGenFunction CGF(CGM);
4820   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
4821 
4822   // Dst = (void*[n])(LHSArg);
4823   // Src = (void*[n])(RHSArg);
4824   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4825                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4826                   ArgsElemType->getPointerTo()),
4827               ArgsElemType, CGF.getPointerAlign());
4828   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4829                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4830                   ArgsElemType->getPointerTo()),
4831               ArgsElemType, CGF.getPointerAlign());
4832 
4833   //  ...
4834   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4835   //  ...
4836   CodeGenFunction::OMPPrivateScope Scope(CGF);
4837   const auto *IPriv = Privates.begin();
4838   unsigned Idx = 0;
4839   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4840     const auto *RHSVar =
4841         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4842     Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
4843     const auto *LHSVar =
4844         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4845     Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
4846     QualType PrivTy = (*IPriv)->getType();
4847     if (PrivTy->isVariablyModifiedType()) {
4848       // Get array size and emit VLA type.
4849       ++Idx;
4850       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
4851       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4852       const VariableArrayType *VLA =
4853           CGF.getContext().getAsVariableArrayType(PrivTy);
4854       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4855       CodeGenFunction::OpaqueValueMapping OpaqueMap(
4856           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4857       CGF.EmitVariablyModifiedType(PrivTy);
4858     }
4859   }
4860   Scope.Privatize();
4861   IPriv = Privates.begin();
4862   const auto *ILHS = LHSExprs.begin();
4863   const auto *IRHS = RHSExprs.begin();
4864   for (const Expr *E : ReductionOps) {
4865     if ((*IPriv)->getType()->isArrayType()) {
4866       // Emit reduction for array section.
4867       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4868       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4869       EmitOMPAggregateReduction(
4870           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4871           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4872             emitReductionCombiner(CGF, E);
4873           });
4874     } else {
4875       // Emit reduction for array subscript or single variable.
4876       emitReductionCombiner(CGF, E);
4877     }
4878     ++IPriv;
4879     ++ILHS;
4880     ++IRHS;
4881   }
4882   Scope.ForceCleanup();
4883   CGF.FinishFunction();
4884   return Fn;
4885 }
4886 
4887 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
4888                                                   const Expr *ReductionOp,
4889                                                   const Expr *PrivateRef,
4890                                                   const DeclRefExpr *LHS,
4891                                                   const DeclRefExpr *RHS) {
4892   if (PrivateRef->getType()->isArrayType()) {
4893     // Emit reduction for array section.
4894     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4895     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4896     EmitOMPAggregateReduction(
4897         CGF, PrivateRef->getType(), LHSVar, RHSVar,
4898         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4899           emitReductionCombiner(CGF, ReductionOp);
4900         });
4901   } else {
4902     // Emit reduction for array subscript or single variable.
4903     emitReductionCombiner(CGF, ReductionOp);
4904   }
4905 }
4906 
4907 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
4908                                     ArrayRef<const Expr *> Privates,
4909                                     ArrayRef<const Expr *> LHSExprs,
4910                                     ArrayRef<const Expr *> RHSExprs,
4911                                     ArrayRef<const Expr *> ReductionOps,
4912                                     ReductionOptionsTy Options) {
4913   if (!CGF.HaveInsertPoint())
4914     return;
4915 
4916   bool WithNowait = Options.WithNowait;
4917   bool SimpleReduction = Options.SimpleReduction;
4918 
4919   // Next code should be emitted for reduction:
4920   //
4921   // static kmp_critical_name lock = { 0 };
4922   //
4923   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
4924   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
4925   //  ...
4926   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
4927   //  *(Type<n>-1*)rhs[<n>-1]);
4928   // }
4929   //
4930   // ...
4931   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
4932   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4933   // RedList, reduce_func, &<lock>)) {
4934   // case 1:
4935   //  ...
4936   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4937   //  ...
4938   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4939   // break;
4940   // case 2:
4941   //  ...
4942   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4943   //  ...
4944   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
4945   // break;
4946   // default:;
4947   // }
4948   //
4949   // if SimpleReduction is true, only the next code is generated:
4950   //  ...
4951   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4952   //  ...
4953 
4954   ASTContext &C = CGM.getContext();
4955 
4956   if (SimpleReduction) {
4957     CodeGenFunction::RunCleanupsScope Scope(CGF);
4958     const auto *IPriv = Privates.begin();
4959     const auto *ILHS = LHSExprs.begin();
4960     const auto *IRHS = RHSExprs.begin();
4961     for (const Expr *E : ReductionOps) {
4962       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
4963                                   cast<DeclRefExpr>(*IRHS));
4964       ++IPriv;
4965       ++ILHS;
4966       ++IRHS;
4967     }
4968     return;
4969   }
4970 
4971   // 1. Build a list of reduction variables.
4972   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
4973   auto Size = RHSExprs.size();
4974   for (const Expr *E : Privates) {
4975     if (E->getType()->isVariablyModifiedType())
4976       // Reserve place for array size.
4977       ++Size;
4978   }
4979   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
4980   QualType ReductionArrayTy = C.getConstantArrayType(
4981       C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
4982       /*IndexTypeQuals=*/0);
4983   Address ReductionList =
4984       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
4985   const auto *IPriv = Privates.begin();
4986   unsigned Idx = 0;
4987   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
4988     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
4989     CGF.Builder.CreateStore(
4990         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4991             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
4992         Elem);
4993     if ((*IPriv)->getType()->isVariablyModifiedType()) {
4994       // Store array size.
4995       ++Idx;
4996       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
4997       llvm::Value *Size = CGF.Builder.CreateIntCast(
4998           CGF.getVLASize(
4999                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5000               .NumElts,
5001           CGF.SizeTy, /*isSigned=*/false);
5002       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5003                               Elem);
5004     }
5005   }
5006 
5007   // 2. Emit reduce_func().
5008   llvm::Function *ReductionFn = emitReductionFunction(
5009       CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5010       Privates, LHSExprs, RHSExprs, ReductionOps);
5011 
5012   // 3. Create static kmp_critical_name lock = { 0 };
5013   std::string Name = getName({"reduction"});
5014   llvm::Value *Lock = getCriticalRegionLock(Name);
5015 
5016   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5017   // RedList, reduce_func, &<lock>);
5018   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5019   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5020   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5021   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5022       ReductionList.getPointer(), CGF.VoidPtrTy);
5023   llvm::Value *Args[] = {
5024       IdentTLoc,                             // ident_t *<loc>
5025       ThreadId,                              // i32 <gtid>
5026       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5027       ReductionArrayTySize,                  // size_type sizeof(RedList)
5028       RL,                                    // void *RedList
5029       ReductionFn, // void (*) (void *, void *) <reduce_func>
5030       Lock         // kmp_critical_name *&<lock>
5031   };
5032   llvm::Value *Res = CGF.EmitRuntimeCall(
5033       OMPBuilder.getOrCreateRuntimeFunction(
5034           CGM.getModule(),
5035           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5036       Args);
5037 
5038   // 5. Build switch(res)
5039   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5040   llvm::SwitchInst *SwInst =
5041       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5042 
5043   // 6. Build case 1:
5044   //  ...
5045   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5046   //  ...
5047   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5048   // break;
5049   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5050   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5051   CGF.EmitBlock(Case1BB);
5052 
5053   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5054   llvm::Value *EndArgs[] = {
5055       IdentTLoc, // ident_t *<loc>
5056       ThreadId,  // i32 <gtid>
5057       Lock       // kmp_critical_name *&<lock>
5058   };
5059   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5060                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5061     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5062     const auto *IPriv = Privates.begin();
5063     const auto *ILHS = LHSExprs.begin();
5064     const auto *IRHS = RHSExprs.begin();
5065     for (const Expr *E : ReductionOps) {
5066       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5067                                      cast<DeclRefExpr>(*IRHS));
5068       ++IPriv;
5069       ++ILHS;
5070       ++IRHS;
5071     }
5072   };
5073   RegionCodeGenTy RCG(CodeGen);
5074   CommonActionTy Action(
5075       nullptr, std::nullopt,
5076       OMPBuilder.getOrCreateRuntimeFunction(
5077           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5078                                       : OMPRTL___kmpc_end_reduce),
5079       EndArgs);
5080   RCG.setAction(Action);
5081   RCG(CGF);
5082 
5083   CGF.EmitBranch(DefaultBB);
5084 
5085   // 7. Build case 2:
5086   //  ...
5087   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5088   //  ...
5089   // break;
5090   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5091   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5092   CGF.EmitBlock(Case2BB);
5093 
5094   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5095                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5096     const auto *ILHS = LHSExprs.begin();
5097     const auto *IRHS = RHSExprs.begin();
5098     const auto *IPriv = Privates.begin();
5099     for (const Expr *E : ReductionOps) {
5100       const Expr *XExpr = nullptr;
5101       const Expr *EExpr = nullptr;
5102       const Expr *UpExpr = nullptr;
5103       BinaryOperatorKind BO = BO_Comma;
5104       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5105         if (BO->getOpcode() == BO_Assign) {
5106           XExpr = BO->getLHS();
5107           UpExpr = BO->getRHS();
5108         }
5109       }
5110       // Try to emit update expression as a simple atomic.
5111       const Expr *RHSExpr = UpExpr;
5112       if (RHSExpr) {
5113         // Analyze RHS part of the whole expression.
5114         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5115                 RHSExpr->IgnoreParenImpCasts())) {
5116           // If this is a conditional operator, analyze its condition for
5117           // min/max reduction operator.
5118           RHSExpr = ACO->getCond();
5119         }
5120         if (const auto *BORHS =
5121                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5122           EExpr = BORHS->getRHS();
5123           BO = BORHS->getOpcode();
5124         }
5125       }
5126       if (XExpr) {
5127         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5128         auto &&AtomicRedGen = [BO, VD,
5129                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5130                                     const Expr *EExpr, const Expr *UpExpr) {
5131           LValue X = CGF.EmitLValue(XExpr);
5132           RValue E;
5133           if (EExpr)
5134             E = CGF.EmitAnyExpr(EExpr);
5135           CGF.EmitOMPAtomicSimpleUpdateExpr(
5136               X, E, BO, /*IsXLHSInRHSPart=*/true,
5137               llvm::AtomicOrdering::Monotonic, Loc,
5138               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5139                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5140                 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5141                 CGF.emitOMPSimpleStore(
5142                     CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5143                     VD->getType().getNonReferenceType(), Loc);
5144                 PrivateScope.addPrivate(VD, LHSTemp);
5145                 (void)PrivateScope.Privatize();
5146                 return CGF.EmitAnyExpr(UpExpr);
5147               });
5148         };
5149         if ((*IPriv)->getType()->isArrayType()) {
5150           // Emit atomic reduction for array section.
5151           const auto *RHSVar =
5152               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5153           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5154                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5155         } else {
5156           // Emit atomic reduction for array subscript or single variable.
5157           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5158         }
5159       } else {
5160         // Emit as a critical region.
5161         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5162                                            const Expr *, const Expr *) {
5163           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5164           std::string Name = RT.getName({"atomic_reduction"});
5165           RT.emitCriticalRegion(
5166               CGF, Name,
5167               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5168                 Action.Enter(CGF);
5169                 emitReductionCombiner(CGF, E);
5170               },
5171               Loc);
5172         };
5173         if ((*IPriv)->getType()->isArrayType()) {
5174           const auto *LHSVar =
5175               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5176           const auto *RHSVar =
5177               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5178           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5179                                     CritRedGen);
5180         } else {
5181           CritRedGen(CGF, nullptr, nullptr, nullptr);
5182         }
5183       }
5184       ++ILHS;
5185       ++IRHS;
5186       ++IPriv;
5187     }
5188   };
5189   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5190   if (!WithNowait) {
5191     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5192     llvm::Value *EndArgs[] = {
5193         IdentTLoc, // ident_t *<loc>
5194         ThreadId,  // i32 <gtid>
5195         Lock       // kmp_critical_name *&<lock>
5196     };
5197     CommonActionTy Action(nullptr, std::nullopt,
5198                           OMPBuilder.getOrCreateRuntimeFunction(
5199                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5200                           EndArgs);
5201     AtomicRCG.setAction(Action);
5202     AtomicRCG(CGF);
5203   } else {
5204     AtomicRCG(CGF);
5205   }
5206 
5207   CGF.EmitBranch(DefaultBB);
5208   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5209 }
5210 
5211 /// Generates unique name for artificial threadprivate variables.
5212 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5213 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5214                                       const Expr *Ref) {
5215   SmallString<256> Buffer;
5216   llvm::raw_svector_ostream Out(Buffer);
5217   const clang::DeclRefExpr *DE;
5218   const VarDecl *D = ::getBaseDecl(Ref, DE);
5219   if (!D)
5220     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5221   D = D->getCanonicalDecl();
5222   std::string Name = CGM.getOpenMPRuntime().getName(
5223       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5224   Out << Prefix << Name << "_"
5225       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5226   return std::string(Out.str());
5227 }
5228 
5229 /// Emits reduction initializer function:
5230 /// \code
5231 /// void @.red_init(void* %arg, void* %orig) {
5232 /// %0 = bitcast void* %arg to <type>*
5233 /// store <type> <init>, <type>* %0
5234 /// ret void
5235 /// }
5236 /// \endcode
5237 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5238                                            SourceLocation Loc,
5239                                            ReductionCodeGen &RCG, unsigned N) {
5240   ASTContext &C = CGM.getContext();
5241   QualType VoidPtrTy = C.VoidPtrTy;
5242   VoidPtrTy.addRestrict();
5243   FunctionArgList Args;
5244   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5245                           ImplicitParamKind::Other);
5246   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5247                               ImplicitParamKind::Other);
5248   Args.emplace_back(&Param);
5249   Args.emplace_back(&ParamOrig);
5250   const auto &FnInfo =
5251       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5252   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5253   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5254   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5255                                     Name, &CGM.getModule());
5256   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5257   Fn->setDoesNotRecurse();
5258   CodeGenFunction CGF(CGM);
5259   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5260   QualType PrivateType = RCG.getPrivateType(N);
5261   Address PrivateAddr = CGF.EmitLoadOfPointer(
5262       CGF.GetAddrOfLocalVar(&Param).withElementType(
5263           CGF.ConvertTypeForMem(PrivateType)->getPointerTo()),
5264       C.getPointerType(PrivateType)->castAs<PointerType>());
5265   llvm::Value *Size = nullptr;
5266   // If the size of the reduction item is non-constant, load it from global
5267   // threadprivate variable.
5268   if (RCG.getSizes(N).second) {
5269     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5270         CGF, CGM.getContext().getSizeType(),
5271         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5272     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5273                                 CGM.getContext().getSizeType(), Loc);
5274   }
5275   RCG.emitAggregateType(CGF, N, Size);
5276   Address OrigAddr = Address::invalid();
5277   // If initializer uses initializer from declare reduction construct, emit a
5278   // pointer to the address of the original reduction item (reuired by reduction
5279   // initializer)
5280   if (RCG.usesReductionInitializer(N)) {
5281     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5282     OrigAddr = CGF.EmitLoadOfPointer(
5283         SharedAddr,
5284         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5285   }
5286   // Emit the initializer:
5287   // %0 = bitcast void* %arg to <type>*
5288   // store <type> <init>, <type>* %0
5289   RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5290                          [](CodeGenFunction &) { return false; });
5291   CGF.FinishFunction();
5292   return Fn;
5293 }
5294 
5295 /// Emits reduction combiner function:
5296 /// \code
5297 /// void @.red_comb(void* %arg0, void* %arg1) {
5298 /// %lhs = bitcast void* %arg0 to <type>*
5299 /// %rhs = bitcast void* %arg1 to <type>*
5300 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5301 /// store <type> %2, <type>* %lhs
5302 /// ret void
5303 /// }
5304 /// \endcode
5305 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5306                                            SourceLocation Loc,
5307                                            ReductionCodeGen &RCG, unsigned N,
5308                                            const Expr *ReductionOp,
5309                                            const Expr *LHS, const Expr *RHS,
5310                                            const Expr *PrivateRef) {
5311   ASTContext &C = CGM.getContext();
5312   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5313   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5314   FunctionArgList Args;
5315   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5316                                C.VoidPtrTy, ImplicitParamKind::Other);
5317   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5318                             ImplicitParamKind::Other);
5319   Args.emplace_back(&ParamInOut);
5320   Args.emplace_back(&ParamIn);
5321   const auto &FnInfo =
5322       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5323   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5324   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5325   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5326                                     Name, &CGM.getModule());
5327   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5328   Fn->setDoesNotRecurse();
5329   CodeGenFunction CGF(CGM);
5330   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5331   llvm::Value *Size = nullptr;
5332   // If the size of the reduction item is non-constant, load it from global
5333   // threadprivate variable.
5334   if (RCG.getSizes(N).second) {
5335     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5336         CGF, CGM.getContext().getSizeType(),
5337         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5338     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5339                                 CGM.getContext().getSizeType(), Loc);
5340   }
5341   RCG.emitAggregateType(CGF, N, Size);
5342   // Remap lhs and rhs variables to the addresses of the function arguments.
5343   // %lhs = bitcast void* %arg0 to <type>*
5344   // %rhs = bitcast void* %arg1 to <type>*
5345   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5346   PrivateScope.addPrivate(
5347       LHSVD,
5348       // Pull out the pointer to the variable.
5349       CGF.EmitLoadOfPointer(
5350           CGF.GetAddrOfLocalVar(&ParamInOut)
5351               .withElementType(
5352                   CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()),
5353           C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5354   PrivateScope.addPrivate(
5355       RHSVD,
5356       // Pull out the pointer to the variable.
5357       CGF.EmitLoadOfPointer(
5358           CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5359               CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()),
5360           C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5361   PrivateScope.Privatize();
5362   // Emit the combiner body:
5363   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5364   // store <type> %2, <type>* %lhs
5365   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5366       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5367       cast<DeclRefExpr>(RHS));
5368   CGF.FinishFunction();
5369   return Fn;
5370 }
5371 
5372 /// Emits reduction finalizer function:
5373 /// \code
5374 /// void @.red_fini(void* %arg) {
5375 /// %0 = bitcast void* %arg to <type>*
5376 /// <destroy>(<type>* %0)
5377 /// ret void
5378 /// }
5379 /// \endcode
5380 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5381                                            SourceLocation Loc,
5382                                            ReductionCodeGen &RCG, unsigned N) {
5383   if (!RCG.needCleanups(N))
5384     return nullptr;
5385   ASTContext &C = CGM.getContext();
5386   FunctionArgList Args;
5387   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5388                           ImplicitParamKind::Other);
5389   Args.emplace_back(&Param);
5390   const auto &FnInfo =
5391       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5392   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5393   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5394   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5395                                     Name, &CGM.getModule());
5396   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5397   Fn->setDoesNotRecurse();
5398   CodeGenFunction CGF(CGM);
5399   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5400   Address PrivateAddr = CGF.EmitLoadOfPointer(
5401       CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5402   llvm::Value *Size = nullptr;
5403   // If the size of the reduction item is non-constant, load it from global
5404   // threadprivate variable.
5405   if (RCG.getSizes(N).second) {
5406     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5407         CGF, CGM.getContext().getSizeType(),
5408         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5409     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5410                                 CGM.getContext().getSizeType(), Loc);
5411   }
5412   RCG.emitAggregateType(CGF, N, Size);
5413   // Emit the finalizer body:
5414   // <destroy>(<type>* %0)
5415   RCG.emitCleanups(CGF, N, PrivateAddr);
5416   CGF.FinishFunction(Loc);
5417   return Fn;
5418 }
5419 
5420 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5421     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5422     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5423   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5424     return nullptr;
5425 
5426   // Build typedef struct:
5427   // kmp_taskred_input {
5428   //   void *reduce_shar; // shared reduction item
5429   //   void *reduce_orig; // original reduction item used for initialization
5430   //   size_t reduce_size; // size of data item
5431   //   void *reduce_init; // data initialization routine
5432   //   void *reduce_fini; // data finalization routine
5433   //   void *reduce_comb; // data combiner routine
5434   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
5435   // } kmp_taskred_input_t;
5436   ASTContext &C = CGM.getContext();
5437   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5438   RD->startDefinition();
5439   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5440   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5441   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5442   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5443   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5444   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5445   const FieldDecl *FlagsFD = addFieldToRecordDecl(
5446       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5447   RD->completeDefinition();
5448   QualType RDType = C.getRecordType(RD);
5449   unsigned Size = Data.ReductionVars.size();
5450   llvm::APInt ArraySize(/*numBits=*/64, Size);
5451   QualType ArrayRDType =
5452       C.getConstantArrayType(RDType, ArraySize, nullptr,
5453                              ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5454   // kmp_task_red_input_t .rd_input.[Size];
5455   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5456   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5457                        Data.ReductionCopies, Data.ReductionOps);
5458   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5459     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5460     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5461                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5462     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5463         TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5464         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5465         ".rd_input.gep.");
5466     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
5467     // ElemLVal.reduce_shar = &Shareds[Cnt];
5468     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5469     RCG.emitSharedOrigLValue(CGF, Cnt);
5470     llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5471     CGF.EmitStoreOfScalar(Shared, SharedLVal);
5472     // ElemLVal.reduce_orig = &Origs[Cnt];
5473     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5474     llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5475     CGF.EmitStoreOfScalar(Orig, OrigLVal);
5476     RCG.emitAggregateType(CGF, Cnt);
5477     llvm::Value *SizeValInChars;
5478     llvm::Value *SizeVal;
5479     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5480     // We use delayed creation/initialization for VLAs and array sections. It is
5481     // required because runtime does not provide the way to pass the sizes of
5482     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5483     // threadprivate global variables are used to store these values and use
5484     // them in the functions.
5485     bool DelayedCreation = !!SizeVal;
5486     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5487                                                /*isSigned=*/false);
5488     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5489     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5490     // ElemLVal.reduce_init = init;
5491     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5492     llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5493     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5494     // ElemLVal.reduce_fini = fini;
5495     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5496     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5497     llvm::Value *FiniAddr =
5498         Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5499     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5500     // ElemLVal.reduce_comb = comb;
5501     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5502     llvm::Value *CombAddr = emitReduceCombFunction(
5503         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5504         RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5505     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5506     // ElemLVal.flags = 0;
5507     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5508     if (DelayedCreation) {
5509       CGF.EmitStoreOfScalar(
5510           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5511           FlagsLVal);
5512     } else
5513       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
5514                                  FlagsLVal.getType());
5515   }
5516   if (Data.IsReductionWithTaskMod) {
5517     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5518     // is_ws, int num, void *data);
5519     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5520     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5521                                                   CGM.IntTy, /*isSigned=*/true);
5522     llvm::Value *Args[] = {
5523         IdentTLoc, GTid,
5524         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5525                                /*isSigned=*/true),
5526         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5527         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5528             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5529     return CGF.EmitRuntimeCall(
5530         OMPBuilder.getOrCreateRuntimeFunction(
5531             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5532         Args);
5533   }
5534   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5535   llvm::Value *Args[] = {
5536       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5537                                 /*isSigned=*/true),
5538       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5539       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
5540                                                       CGM.VoidPtrTy)};
5541   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5542                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
5543                              Args);
5544 }
5545 
5546 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
5547                                             SourceLocation Loc,
5548                                             bool IsWorksharingReduction) {
5549   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5550   // is_ws, int num, void *data);
5551   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5552   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5553                                                 CGM.IntTy, /*isSigned=*/true);
5554   llvm::Value *Args[] = {IdentTLoc, GTid,
5555                          llvm::ConstantInt::get(CGM.IntTy,
5556                                                 IsWorksharingReduction ? 1 : 0,
5557                                                 /*isSigned=*/true)};
5558   (void)CGF.EmitRuntimeCall(
5559       OMPBuilder.getOrCreateRuntimeFunction(
5560           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5561       Args);
5562 }
5563 
5564 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
5565                                               SourceLocation Loc,
5566                                               ReductionCodeGen &RCG,
5567                                               unsigned N) {
5568   auto Sizes = RCG.getSizes(N);
5569   // Emit threadprivate global variable if the type is non-constant
5570   // (Sizes.second = nullptr).
5571   if (Sizes.second) {
5572     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5573                                                      /*isSigned=*/false);
5574     Address SizeAddr = getAddrOfArtificialThreadPrivate(
5575         CGF, CGM.getContext().getSizeType(),
5576         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5577     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5578   }
5579 }
5580 
5581 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
5582                                               SourceLocation Loc,
5583                                               llvm::Value *ReductionsPtr,
5584                                               LValue SharedLVal) {
5585   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5586   // *d);
5587   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5588                                                    CGM.IntTy,
5589                                                    /*isSigned=*/true),
5590                          ReductionsPtr,
5591                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5592                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
5593   return Address(
5594       CGF.EmitRuntimeCall(
5595           OMPBuilder.getOrCreateRuntimeFunction(
5596               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
5597           Args),
5598       CGF.Int8Ty, SharedLVal.getAlignment());
5599 }
5600 
5601 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
5602                                        const OMPTaskDataTy &Data) {
5603   if (!CGF.HaveInsertPoint())
5604     return;
5605 
5606   if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5607     // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5608     OMPBuilder.createTaskwait(CGF.Builder);
5609   } else {
5610     llvm::Value *ThreadID = getThreadID(CGF, Loc);
5611     llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5612     auto &M = CGM.getModule();
5613     Address DependenciesArray = Address::invalid();
5614     llvm::Value *NumOfElements;
5615     std::tie(NumOfElements, DependenciesArray) =
5616         emitDependClause(CGF, Data.Dependences, Loc);
5617     if (!Data.Dependences.empty()) {
5618       llvm::Value *DepWaitTaskArgs[7];
5619       DepWaitTaskArgs[0] = UpLoc;
5620       DepWaitTaskArgs[1] = ThreadID;
5621       DepWaitTaskArgs[2] = NumOfElements;
5622       DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5623       DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5624       DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5625       DepWaitTaskArgs[6] =
5626           llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
5627 
5628       CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5629 
5630       // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5631       // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5632       // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5633       // kmp_int32 has_no_wait); if dependence info is specified.
5634       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5635                               M, OMPRTL___kmpc_omp_taskwait_deps_51),
5636                           DepWaitTaskArgs);
5637 
5638     } else {
5639 
5640       // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5641       // global_tid);
5642       llvm::Value *Args[] = {UpLoc, ThreadID};
5643       // Ignore return result until untied tasks are supported.
5644       CGF.EmitRuntimeCall(
5645           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
5646           Args);
5647     }
5648   }
5649 
5650   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5651     Region->emitUntiedSwitch(CGF);
5652 }
5653 
5654 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
5655                                            OpenMPDirectiveKind InnerKind,
5656                                            const RegionCodeGenTy &CodeGen,
5657                                            bool HasCancel) {
5658   if (!CGF.HaveInsertPoint())
5659     return;
5660   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5661                                  InnerKind != OMPD_critical &&
5662                                      InnerKind != OMPD_master &&
5663                                      InnerKind != OMPD_masked);
5664   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5665 }
5666 
5667 namespace {
5668 enum RTCancelKind {
5669   CancelNoreq = 0,
5670   CancelParallel = 1,
5671   CancelLoop = 2,
5672   CancelSections = 3,
5673   CancelTaskgroup = 4
5674 };
5675 } // anonymous namespace
5676 
5677 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5678   RTCancelKind CancelKind = CancelNoreq;
5679   if (CancelRegion == OMPD_parallel)
5680     CancelKind = CancelParallel;
5681   else if (CancelRegion == OMPD_for)
5682     CancelKind = CancelLoop;
5683   else if (CancelRegion == OMPD_sections)
5684     CancelKind = CancelSections;
5685   else {
5686     assert(CancelRegion == OMPD_taskgroup);
5687     CancelKind = CancelTaskgroup;
5688   }
5689   return CancelKind;
5690 }
5691 
5692 void CGOpenMPRuntime::emitCancellationPointCall(
5693     CodeGenFunction &CGF, SourceLocation Loc,
5694     OpenMPDirectiveKind CancelRegion) {
5695   if (!CGF.HaveInsertPoint())
5696     return;
5697   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5698   // global_tid, kmp_int32 cncl_kind);
5699   if (auto *OMPRegionInfo =
5700           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5701     // For 'cancellation point taskgroup', the task region info may not have a
5702     // cancel. This may instead happen in another adjacent task.
5703     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5704       llvm::Value *Args[] = {
5705           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
5706           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5707       // Ignore return result until untied tasks are supported.
5708       llvm::Value *Result = CGF.EmitRuntimeCall(
5709           OMPBuilder.getOrCreateRuntimeFunction(
5710               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
5711           Args);
5712       // if (__kmpc_cancellationpoint()) {
5713       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5714       //   exit from construct;
5715       // }
5716       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5717       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5718       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5719       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5720       CGF.EmitBlock(ExitBB);
5721       if (CancelRegion == OMPD_parallel)
5722         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5723       // exit from construct;
5724       CodeGenFunction::JumpDest CancelDest =
5725           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5726       CGF.EmitBranchThroughCleanup(CancelDest);
5727       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5728     }
5729   }
5730 }
5731 
5732 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
5733                                      const Expr *IfCond,
5734                                      OpenMPDirectiveKind CancelRegion) {
5735   if (!CGF.HaveInsertPoint())
5736     return;
5737   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5738   // kmp_int32 cncl_kind);
5739   auto &M = CGM.getModule();
5740   if (auto *OMPRegionInfo =
5741           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5742     auto &&ThenGen = [this, &M, Loc, CancelRegion,
5743                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
5744       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5745       llvm::Value *Args[] = {
5746           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
5747           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5748       // Ignore return result until untied tasks are supported.
5749       llvm::Value *Result = CGF.EmitRuntimeCall(
5750           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
5751       // if (__kmpc_cancel()) {
5752       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5753       //   exit from construct;
5754       // }
5755       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5756       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5757       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5758       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5759       CGF.EmitBlock(ExitBB);
5760       if (CancelRegion == OMPD_parallel)
5761         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5762       // exit from construct;
5763       CodeGenFunction::JumpDest CancelDest =
5764           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5765       CGF.EmitBranchThroughCleanup(CancelDest);
5766       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5767     };
5768     if (IfCond) {
5769       emitIfClause(CGF, IfCond, ThenGen,
5770                    [](CodeGenFunction &, PrePostActionTy &) {});
5771     } else {
5772       RegionCodeGenTy ThenRCG(ThenGen);
5773       ThenRCG(CGF);
5774     }
5775   }
5776 }
5777 
5778 namespace {
5779 /// Cleanup action for uses_allocators support.
5780 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
5781   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
5782 
5783 public:
5784   OMPUsesAllocatorsActionTy(
5785       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
5786       : Allocators(Allocators) {}
5787   void Enter(CodeGenFunction &CGF) override {
5788     if (!CGF.HaveInsertPoint())
5789       return;
5790     for (const auto &AllocatorData : Allocators) {
5791       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
5792           CGF, AllocatorData.first, AllocatorData.second);
5793     }
5794   }
5795   void Exit(CodeGenFunction &CGF) override {
5796     if (!CGF.HaveInsertPoint())
5797       return;
5798     for (const auto &AllocatorData : Allocators) {
5799       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
5800                                                         AllocatorData.first);
5801     }
5802   }
5803 };
5804 } // namespace
5805 
5806 void CGOpenMPRuntime::emitTargetOutlinedFunction(
5807     const OMPExecutableDirective &D, StringRef ParentName,
5808     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5809     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5810   assert(!ParentName.empty() && "Invalid target entry parent name!");
5811   HasEmittedTargetRegion = true;
5812   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
5813   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
5814     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
5815       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
5816       if (!D.AllocatorTraits)
5817         continue;
5818       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
5819     }
5820   }
5821   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
5822   CodeGen.setAction(UsesAllocatorAction);
5823   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
5824                                    IsOffloadEntry, CodeGen);
5825 }
5826 
5827 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
5828                                              const Expr *Allocator,
5829                                              const Expr *AllocatorTraits) {
5830   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5831   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5832   // Use default memspace handle.
5833   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5834   llvm::Value *NumTraits = llvm::ConstantInt::get(
5835       CGF.IntTy, cast<ConstantArrayType>(
5836                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
5837                      ->getSize()
5838                      .getLimitedValue());
5839   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
5840   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5841       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
5842   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
5843                                            AllocatorTraitsLVal.getBaseInfo(),
5844                                            AllocatorTraitsLVal.getTBAAInfo());
5845   llvm::Value *Traits = Addr.getPointer();
5846 
5847   llvm::Value *AllocatorVal =
5848       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5849                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
5850                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
5851   // Store to allocator.
5852   CGF.EmitAutoVarAlloca(*cast<VarDecl>(
5853       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
5854   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
5855   AllocatorVal =
5856       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
5857                                Allocator->getType(), Allocator->getExprLoc());
5858   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
5859 }
5860 
5861 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
5862                                              const Expr *Allocator) {
5863   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5864   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5865   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
5866   llvm::Value *AllocatorVal =
5867       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
5868   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
5869                                           CGF.getContext().VoidPtrTy,
5870                                           Allocator->getExprLoc());
5871   (void)CGF.EmitRuntimeCall(
5872       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
5873                                             OMPRTL___kmpc_destroy_allocator),
5874       {ThreadId, AllocatorVal});
5875 }
5876 
5877 void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams(
5878     const OMPExecutableDirective &D, CodeGenFunction &CGF,
5879     int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal,
5880     int32_t &MaxTeamsVal) {
5881 
5882   getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal, MaxTeamsVal);
5883   getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
5884                                       /*UpperBoundOnly=*/true);
5885 
5886   for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5887     for (auto *A : C->getAttrs()) {
5888       int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
5889       int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
5890       if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
5891         CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
5892                                        &AttrMinBlocksVal, &AttrMaxBlocksVal);
5893       else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
5894         CGM.handleAMDGPUFlatWorkGroupSizeAttr(
5895             nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
5896             &AttrMaxThreadsVal);
5897       else
5898         continue;
5899 
5900       MinThreadsVal = std::max(MinThreadsVal, AttrMinThreadsVal);
5901       if (AttrMaxThreadsVal > 0)
5902         MaxThreadsVal = MaxThreadsVal > 0
5903                             ? std::min(MaxThreadsVal, AttrMaxThreadsVal)
5904                             : AttrMaxThreadsVal;
5905       MinTeamsVal = std::max(MinTeamsVal, AttrMinBlocksVal);
5906       if (AttrMaxBlocksVal > 0)
5907         MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
5908                                       : AttrMaxBlocksVal;
5909     }
5910   }
5911 }
5912 
5913 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
5914     const OMPExecutableDirective &D, StringRef ParentName,
5915     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5916     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5917 
5918   llvm::TargetRegionEntryInfo EntryInfo =
5919       getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName);
5920 
5921   CodeGenFunction CGF(CGM, true);
5922   llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
5923       [&CGF, &D, &CodeGen](StringRef EntryFnName) {
5924         const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
5925 
5926         CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
5927         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
5928         return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
5929       };
5930 
5931   OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction,
5932                                       IsOffloadEntry, OutlinedFn, OutlinedFnID);
5933 
5934   if (!OutlinedFn)
5935     return;
5936 
5937   CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
5938 
5939   for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5940     for (auto *A : C->getAttrs()) {
5941       if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
5942         CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
5943     }
5944   }
5945 }
5946 
5947 /// Checks if the expression is constant or does not have non-trivial function
5948 /// calls.
5949 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
5950   // We can skip constant expressions.
5951   // We can skip expressions with trivial calls or simple expressions.
5952   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
5953           !E->hasNonTrivialCall(Ctx)) &&
5954          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
5955 }
5956 
5957 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
5958                                                     const Stmt *Body) {
5959   const Stmt *Child = Body->IgnoreContainers();
5960   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
5961     Child = nullptr;
5962     for (const Stmt *S : C->body()) {
5963       if (const auto *E = dyn_cast<Expr>(S)) {
5964         if (isTrivial(Ctx, E))
5965           continue;
5966       }
5967       // Some of the statements can be ignored.
5968       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
5969           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
5970         continue;
5971       // Analyze declarations.
5972       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
5973         if (llvm::all_of(DS->decls(), [](const Decl *D) {
5974               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
5975                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
5976                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
5977                   isa<UsingDirectiveDecl>(D) ||
5978                   isa<OMPDeclareReductionDecl>(D) ||
5979                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
5980                 return true;
5981               const auto *VD = dyn_cast<VarDecl>(D);
5982               if (!VD)
5983                 return false;
5984               return VD->hasGlobalStorage() || !VD->isUsed();
5985             }))
5986           continue;
5987       }
5988       // Found multiple children - cannot get the one child only.
5989       if (Child)
5990         return nullptr;
5991       Child = S;
5992     }
5993     if (Child)
5994       Child = Child->IgnoreContainers();
5995   }
5996   return Child;
5997 }
5998 
5999 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6000     CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6001     int32_t &MaxTeamsVal) {
6002 
6003   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6004   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6005          "Expected target-based executable directive.");
6006   switch (DirectiveKind) {
6007   case OMPD_target: {
6008     const auto *CS = D.getInnermostCapturedStmt();
6009     const auto *Body =
6010         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6011     const Stmt *ChildStmt =
6012         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6013     if (const auto *NestedDir =
6014             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6015       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6016         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6017           const Expr *NumTeams =
6018               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6019           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6020             if (auto Constant =
6021                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6022               MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6023           return NumTeams;
6024         }
6025         MinTeamsVal = MaxTeamsVal = 0;
6026         return nullptr;
6027       }
6028       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6029           isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6030         MinTeamsVal = MaxTeamsVal = 1;
6031         return nullptr;
6032       }
6033       MinTeamsVal = MaxTeamsVal = 1;
6034       return nullptr;
6035     }
6036     // A value of -1 is used to check if we need to emit no teams region
6037     MinTeamsVal = MaxTeamsVal = -1;
6038     return nullptr;
6039   }
6040   case OMPD_target_teams_loop:
6041   case OMPD_target_teams:
6042   case OMPD_target_teams_distribute:
6043   case OMPD_target_teams_distribute_simd:
6044   case OMPD_target_teams_distribute_parallel_for:
6045   case OMPD_target_teams_distribute_parallel_for_simd: {
6046     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6047       const Expr *NumTeams =
6048           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6049       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6050         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6051           MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6052       return NumTeams;
6053     }
6054     MinTeamsVal = MaxTeamsVal = 0;
6055     return nullptr;
6056   }
6057   case OMPD_target_parallel:
6058   case OMPD_target_parallel_for:
6059   case OMPD_target_parallel_for_simd:
6060   case OMPD_target_parallel_loop:
6061   case OMPD_target_simd:
6062     MinTeamsVal = MaxTeamsVal = 1;
6063     return nullptr;
6064   case OMPD_parallel:
6065   case OMPD_for:
6066   case OMPD_parallel_for:
6067   case OMPD_parallel_loop:
6068   case OMPD_parallel_master:
6069   case OMPD_parallel_sections:
6070   case OMPD_for_simd:
6071   case OMPD_parallel_for_simd:
6072   case OMPD_cancel:
6073   case OMPD_cancellation_point:
6074   case OMPD_ordered:
6075   case OMPD_threadprivate:
6076   case OMPD_allocate:
6077   case OMPD_task:
6078   case OMPD_simd:
6079   case OMPD_tile:
6080   case OMPD_unroll:
6081   case OMPD_sections:
6082   case OMPD_section:
6083   case OMPD_single:
6084   case OMPD_master:
6085   case OMPD_critical:
6086   case OMPD_taskyield:
6087   case OMPD_barrier:
6088   case OMPD_taskwait:
6089   case OMPD_taskgroup:
6090   case OMPD_atomic:
6091   case OMPD_flush:
6092   case OMPD_depobj:
6093   case OMPD_scan:
6094   case OMPD_teams:
6095   case OMPD_target_data:
6096   case OMPD_target_exit_data:
6097   case OMPD_target_enter_data:
6098   case OMPD_distribute:
6099   case OMPD_distribute_simd:
6100   case OMPD_distribute_parallel_for:
6101   case OMPD_distribute_parallel_for_simd:
6102   case OMPD_teams_distribute:
6103   case OMPD_teams_distribute_simd:
6104   case OMPD_teams_distribute_parallel_for:
6105   case OMPD_teams_distribute_parallel_for_simd:
6106   case OMPD_target_update:
6107   case OMPD_declare_simd:
6108   case OMPD_declare_variant:
6109   case OMPD_begin_declare_variant:
6110   case OMPD_end_declare_variant:
6111   case OMPD_declare_target:
6112   case OMPD_end_declare_target:
6113   case OMPD_declare_reduction:
6114   case OMPD_declare_mapper:
6115   case OMPD_taskloop:
6116   case OMPD_taskloop_simd:
6117   case OMPD_master_taskloop:
6118   case OMPD_master_taskloop_simd:
6119   case OMPD_parallel_master_taskloop:
6120   case OMPD_parallel_master_taskloop_simd:
6121   case OMPD_requires:
6122   case OMPD_metadirective:
6123   case OMPD_unknown:
6124     break;
6125   default:
6126     break;
6127   }
6128   llvm_unreachable("Unexpected directive kind.");
6129 }
6130 
6131 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6132     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6133   assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6134          "Clauses associated with the teams directive expected to be emitted "
6135          "only for the host!");
6136   CGBuilderTy &Bld = CGF.Builder;
6137   int32_t MinNT = -1, MaxNT = -1;
6138   const Expr *NumTeams =
6139       getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
6140   if (NumTeams != nullptr) {
6141     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6142 
6143     switch (DirectiveKind) {
6144     case OMPD_target: {
6145       const auto *CS = D.getInnermostCapturedStmt();
6146       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6147       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6148       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6149                                                   /*IgnoreResultAssign*/ true);
6150       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6151                              /*isSigned=*/true);
6152     }
6153     case OMPD_target_teams:
6154     case OMPD_target_teams_distribute:
6155     case OMPD_target_teams_distribute_simd:
6156     case OMPD_target_teams_distribute_parallel_for:
6157     case OMPD_target_teams_distribute_parallel_for_simd: {
6158       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6159       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6160                                                   /*IgnoreResultAssign*/ true);
6161       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6162                              /*isSigned=*/true);
6163     }
6164     default:
6165       break;
6166     }
6167   }
6168 
6169   assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6170   return llvm::ConstantInt::get(CGF.Int32Ty, MinNT);
6171 }
6172 
6173 /// Check for a num threads constant value (stored in \p DefaultVal), or
6174 /// expression (stored in \p E). If the value is conditional (via an if-clause),
6175 /// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6176 /// nullptr, no expression evaluation is perfomed.
6177 static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6178                           const Expr **E, int32_t &UpperBound,
6179                           bool UpperBoundOnly, llvm::Value **CondVal) {
6180   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6181       CGF.getContext(), CS->getCapturedStmt());
6182   const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6183   if (!Dir)
6184     return;
6185 
6186   if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6187     // Handle if clause. If if clause present, the number of threads is
6188     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6189     if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6190       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6191       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6192       const OMPIfClause *IfClause = nullptr;
6193       for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6194         if (C->getNameModifier() == OMPD_unknown ||
6195             C->getNameModifier() == OMPD_parallel) {
6196           IfClause = C;
6197           break;
6198         }
6199       }
6200       if (IfClause) {
6201         const Expr *CondExpr = IfClause->getCondition();
6202         bool Result;
6203         if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6204           if (!Result) {
6205             UpperBound = 1;
6206             return;
6207           }
6208         } else {
6209           CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange());
6210           if (const auto *PreInit =
6211                   cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6212             for (const auto *I : PreInit->decls()) {
6213               if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6214                 CGF.EmitVarDecl(cast<VarDecl>(*I));
6215               } else {
6216                 CodeGenFunction::AutoVarEmission Emission =
6217                     CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6218                 CGF.EmitAutoVarCleanups(Emission);
6219               }
6220             }
6221             *CondVal = CGF.EvaluateExprAsBool(CondExpr);
6222           }
6223         }
6224       }
6225     }
6226     // Check the value of num_threads clause iff if clause was not specified
6227     // or is not evaluated to false.
6228     if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6229       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6230       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6231       const auto *NumThreadsClause =
6232           Dir->getSingleClause<OMPNumThreadsClause>();
6233       const Expr *NTExpr = NumThreadsClause->getNumThreads();
6234       if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
6235         if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
6236           UpperBound =
6237               UpperBound
6238                   ? Constant->getZExtValue()
6239                   : std::min(UpperBound,
6240                              static_cast<int32_t>(Constant->getZExtValue()));
6241       // If we haven't found a upper bound, remember we saw a thread limiting
6242       // clause.
6243       if (UpperBound == -1)
6244         UpperBound = 0;
6245       if (!E)
6246         return;
6247       CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6248       if (const auto *PreInit =
6249               cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6250         for (const auto *I : PreInit->decls()) {
6251           if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6252             CGF.EmitVarDecl(cast<VarDecl>(*I));
6253           } else {
6254             CodeGenFunction::AutoVarEmission Emission =
6255                 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6256             CGF.EmitAutoVarCleanups(Emission);
6257           }
6258         }
6259       }
6260       *E = NTExpr;
6261     }
6262     return;
6263   }
6264   if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6265     UpperBound = 1;
6266 }
6267 
6268 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6269     CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6270     bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6271   assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6272          "Clauses associated with the teams directive expected to be emitted "
6273          "only for the host!");
6274   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6275   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6276          "Expected target-based executable directive.");
6277 
6278   const Expr *NT = nullptr;
6279   const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6280 
6281   auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6282     if (E->isIntegerConstantExpr(CGF.getContext())) {
6283       if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
6284         UpperBound = UpperBound ? Constant->getZExtValue()
6285                                 : std::min(UpperBound,
6286                                            int32_t(Constant->getZExtValue()));
6287     }
6288     // If we haven't found a upper bound, remember we saw a thread limiting
6289     // clause.
6290     if (UpperBound == -1)
6291       UpperBound = 0;
6292     if (EPtr)
6293       *EPtr = E;
6294   };
6295 
6296   auto ReturnSequential = [&]() {
6297     UpperBound = 1;
6298     return NT;
6299   };
6300 
6301   switch (DirectiveKind) {
6302   case OMPD_target: {
6303     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6304     getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6305     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6306         CGF.getContext(), CS->getCapturedStmt());
6307     // TODO: The standard is not clear how to resolve two thread limit clauses,
6308     //       let's pick the teams one if it's present, otherwise the target one.
6309     const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6310     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6311       if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6312         ThreadLimitClause = TLC;
6313         if (ThreadLimitExpr) {
6314           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6315           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6316           CodeGenFunction::LexicalScope Scope(
6317               CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6318           if (const auto *PreInit =
6319                   cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6320             for (const auto *I : PreInit->decls()) {
6321               if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6322                 CGF.EmitVarDecl(cast<VarDecl>(*I));
6323               } else {
6324                 CodeGenFunction::AutoVarEmission Emission =
6325                     CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6326                 CGF.EmitAutoVarCleanups(Emission);
6327               }
6328             }
6329           }
6330         }
6331       }
6332     }
6333     if (ThreadLimitClause)
6334       CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6335     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6336       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6337           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6338         CS = Dir->getInnermostCapturedStmt();
6339         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6340             CGF.getContext(), CS->getCapturedStmt());
6341         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6342       }
6343       if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6344         CS = Dir->getInnermostCapturedStmt();
6345         getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6346       } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6347         return ReturnSequential();
6348     }
6349     return NT;
6350   }
6351   case OMPD_target_teams: {
6352     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6353       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6354       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6355       CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6356     }
6357     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6358     getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6359     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6360         CGF.getContext(), CS->getCapturedStmt());
6361     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6362       if (Dir->getDirectiveKind() == OMPD_distribute) {
6363         CS = Dir->getInnermostCapturedStmt();
6364         getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6365       }
6366     }
6367     return NT;
6368   }
6369   case OMPD_target_teams_distribute:
6370     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6371       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6372       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6373       CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6374     }
6375     getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
6376                   UpperBoundOnly, CondVal);
6377     return NT;
6378   case OMPD_target_teams_loop:
6379   case OMPD_target_parallel_loop:
6380   case OMPD_target_parallel:
6381   case OMPD_target_parallel_for:
6382   case OMPD_target_parallel_for_simd:
6383   case OMPD_target_teams_distribute_parallel_for:
6384   case OMPD_target_teams_distribute_parallel_for_simd: {
6385     if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6386       const OMPIfClause *IfClause = nullptr;
6387       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6388         if (C->getNameModifier() == OMPD_unknown ||
6389             C->getNameModifier() == OMPD_parallel) {
6390           IfClause = C;
6391           break;
6392         }
6393       }
6394       if (IfClause) {
6395         const Expr *Cond = IfClause->getCondition();
6396         bool Result;
6397         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6398           if (!Result)
6399             return ReturnSequential();
6400         } else {
6401           CodeGenFunction::RunCleanupsScope Scope(CGF);
6402           *CondVal = CGF.EvaluateExprAsBool(Cond);
6403         }
6404       }
6405     }
6406     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6407       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6408       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6409       CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6410     }
6411     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6412       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6413       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6414       CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6415       return NumThreadsClause->getNumThreads();
6416     }
6417     return NT;
6418   }
6419   case OMPD_target_teams_distribute_simd:
6420   case OMPD_target_simd:
6421     return ReturnSequential();
6422   default:
6423     break;
6424   }
6425   llvm_unreachable("Unsupported directive kind.");
6426 }
6427 
6428 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6429     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6430   llvm::Value *NumThreadsVal = nullptr;
6431   llvm::Value *CondVal = nullptr;
6432   llvm::Value *ThreadLimitVal = nullptr;
6433   const Expr *ThreadLimitExpr = nullptr;
6434   int32_t UpperBound = -1;
6435 
6436   const Expr *NT = getNumThreadsExprForTargetDirective(
6437       CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
6438       &ThreadLimitExpr);
6439 
6440   // Thread limit expressions are used below, emit them.
6441   if (ThreadLimitExpr) {
6442     ThreadLimitVal =
6443         CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6444     ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
6445                                                /*isSigned=*/false);
6446   }
6447 
6448   // Generate the num teams expression.
6449   if (UpperBound == 1) {
6450     NumThreadsVal = CGF.Builder.getInt32(UpperBound);
6451   } else if (NT) {
6452     NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
6453     NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
6454                                               /*isSigned=*/false);
6455   } else if (ThreadLimitVal) {
6456     // If we do not have a num threads value but a thread limit, replace the
6457     // former with the latter. We know handled the thread limit expression.
6458     NumThreadsVal = ThreadLimitVal;
6459     ThreadLimitVal = nullptr;
6460   } else {
6461     // Default to "0" which means runtime choice.
6462     assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6463     NumThreadsVal = CGF.Builder.getInt32(0);
6464   }
6465 
6466   // Handle if clause. If if clause present, the number of threads is
6467   // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6468   if (CondVal) {
6469     CodeGenFunction::RunCleanupsScope Scope(CGF);
6470     NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
6471                                              CGF.Builder.getInt32(1));
6472   }
6473 
6474   // If the thread limit and num teams expression were present, take the
6475   // minimum.
6476   if (ThreadLimitVal) {
6477     NumThreadsVal = CGF.Builder.CreateSelect(
6478         CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
6479         ThreadLimitVal, NumThreadsVal);
6480   }
6481 
6482   return NumThreadsVal;
6483 }
6484 
6485 namespace {
6486 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6487 
6488 // Utility to handle information from clauses associated with a given
6489 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6490 // It provides a convenient interface to obtain the information and generate
6491 // code for that information.
6492 class MappableExprsHandler {
6493 public:
6494   /// Get the offset of the OMP_MAP_MEMBER_OF field.
6495   static unsigned getFlagMemberOffset() {
6496     unsigned Offset = 0;
6497     for (uint64_t Remain =
6498              static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
6499                  OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
6500          !(Remain & 1); Remain = Remain >> 1)
6501       Offset++;
6502     return Offset;
6503   }
6504 
6505   /// Class that holds debugging information for a data mapping to be passed to
6506   /// the runtime library.
6507   class MappingExprInfo {
6508     /// The variable declaration used for the data mapping.
6509     const ValueDecl *MapDecl = nullptr;
6510     /// The original expression used in the map clause, or null if there is
6511     /// none.
6512     const Expr *MapExpr = nullptr;
6513 
6514   public:
6515     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
6516         : MapDecl(MapDecl), MapExpr(MapExpr) {}
6517 
6518     const ValueDecl *getMapDecl() const { return MapDecl; }
6519     const Expr *getMapExpr() const { return MapExpr; }
6520   };
6521 
6522   using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
6523   using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6524   using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6525   using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
6526   using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
6527   using MapNonContiguousArrayTy =
6528       llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
6529   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
6530   using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
6531 
6532   /// This structure contains combined information generated for mappable
6533   /// clauses, including base pointers, pointers, sizes, map types, user-defined
6534   /// mappers, and non-contiguous information.
6535   struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
6536     MapExprsArrayTy Exprs;
6537     MapValueDeclsArrayTy Mappers;
6538     MapValueDeclsArrayTy DevicePtrDecls;
6539 
6540     /// Append arrays in \a CurInfo.
6541     void append(MapCombinedInfoTy &CurInfo) {
6542       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
6543       DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
6544                             CurInfo.DevicePtrDecls.end());
6545       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
6546       llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
6547     }
6548   };
6549 
6550   /// Map between a struct and the its lowest & highest elements which have been
6551   /// mapped.
6552   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6553   ///                    HE(FieldIndex, Pointer)}
6554   struct StructRangeInfoTy {
6555     MapCombinedInfoTy PreliminaryMapData;
6556     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6557         0, Address::invalid()};
6558     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6559         0, Address::invalid()};
6560     Address Base = Address::invalid();
6561     Address LB = Address::invalid();
6562     bool IsArraySection = false;
6563     bool HasCompleteRecord = false;
6564   };
6565 
6566 private:
6567   /// Kind that defines how a device pointer has to be returned.
6568   struct MapInfo {
6569     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
6570     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
6571     ArrayRef<OpenMPMapModifierKind> MapModifiers;
6572     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
6573     bool ReturnDevicePointer = false;
6574     bool IsImplicit = false;
6575     const ValueDecl *Mapper = nullptr;
6576     const Expr *VarRef = nullptr;
6577     bool ForDeviceAddr = false;
6578 
6579     MapInfo() = default;
6580     MapInfo(
6581         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6582         OpenMPMapClauseKind MapType,
6583         ArrayRef<OpenMPMapModifierKind> MapModifiers,
6584         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6585         bool ReturnDevicePointer, bool IsImplicit,
6586         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
6587         bool ForDeviceAddr = false)
6588         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
6589           MotionModifiers(MotionModifiers),
6590           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
6591           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
6592   };
6593 
6594   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6595   /// member and there is no map information about it, then emission of that
6596   /// entry is deferred until the whole struct has been processed.
6597   struct DeferredDevicePtrEntryTy {
6598     const Expr *IE = nullptr;
6599     const ValueDecl *VD = nullptr;
6600     bool ForDeviceAddr = false;
6601 
6602     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
6603                              bool ForDeviceAddr)
6604         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
6605   };
6606 
6607   /// The target directive from where the mappable clauses were extracted. It
6608   /// is either a executable directive or a user-defined mapper directive.
6609   llvm::PointerUnion<const OMPExecutableDirective *,
6610                      const OMPDeclareMapperDecl *>
6611       CurDir;
6612 
6613   /// Function the directive is being generated for.
6614   CodeGenFunction &CGF;
6615 
6616   /// Set of all first private variables in the current directive.
6617   /// bool data is set to true if the variable is implicitly marked as
6618   /// firstprivate, false otherwise.
6619   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
6620 
6621   /// Map between device pointer declarations and their expression components.
6622   /// The key value for declarations in 'this' is null.
6623   llvm::DenseMap<
6624       const ValueDecl *,
6625       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6626       DevPointersMap;
6627 
6628   /// Map between device addr declarations and their expression components.
6629   /// The key value for declarations in 'this' is null.
6630   llvm::DenseMap<
6631       const ValueDecl *,
6632       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6633       HasDevAddrsMap;
6634 
6635   /// Map between lambda declarations and their map type.
6636   llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
6637 
6638   llvm::Value *getExprTypeSize(const Expr *E) const {
6639     QualType ExprTy = E->getType().getCanonicalType();
6640 
6641     // Calculate the size for array shaping expression.
6642     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
6643       llvm::Value *Size =
6644           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
6645       for (const Expr *SE : OAE->getDimensions()) {
6646         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
6647         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
6648                                       CGF.getContext().getSizeType(),
6649                                       SE->getExprLoc());
6650         Size = CGF.Builder.CreateNUWMul(Size, Sz);
6651       }
6652       return Size;
6653     }
6654 
6655     // Reference types are ignored for mapping purposes.
6656     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6657       ExprTy = RefTy->getPointeeType().getCanonicalType();
6658 
6659     // Given that an array section is considered a built-in type, we need to
6660     // do the calculation based on the length of the section instead of relying
6661     // on CGF.getTypeSize(E->getType()).
6662     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
6663       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
6664                             OAE->getBase()->IgnoreParenImpCasts())
6665                             .getCanonicalType();
6666 
6667       // If there is no length associated with the expression and lower bound is
6668       // not specified too, that means we are using the whole length of the
6669       // base.
6670       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6671           !OAE->getLowerBound())
6672         return CGF.getTypeSize(BaseTy);
6673 
6674       llvm::Value *ElemSize;
6675       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6676         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
6677       } else {
6678         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
6679         assert(ATy && "Expecting array type if not a pointer type.");
6680         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
6681       }
6682 
6683       // If we don't have a length at this point, that is because we have an
6684       // array section with a single element.
6685       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
6686         return ElemSize;
6687 
6688       if (const Expr *LenExpr = OAE->getLength()) {
6689         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
6690         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
6691                                              CGF.getContext().getSizeType(),
6692                                              LenExpr->getExprLoc());
6693         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
6694       }
6695       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6696              OAE->getLowerBound() && "expected array_section[lb:].");
6697       // Size = sizetype - lb * elemtype;
6698       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
6699       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
6700       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
6701                                        CGF.getContext().getSizeType(),
6702                                        OAE->getLowerBound()->getExprLoc());
6703       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
6704       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
6705       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
6706       LengthVal = CGF.Builder.CreateSelect(
6707           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
6708       return LengthVal;
6709     }
6710     return CGF.getTypeSize(ExprTy);
6711   }
6712 
6713   /// Return the corresponding bits for a given map clause modifier. Add
6714   /// a flag marking the map as a pointer if requested. Add a flag marking the
6715   /// map as the first one of a series of maps that relate to the same map
6716   /// expression.
6717   OpenMPOffloadMappingFlags getMapTypeBits(
6718       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6719       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
6720       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
6721     OpenMPOffloadMappingFlags Bits =
6722         IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
6723                    : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
6724     switch (MapType) {
6725     case OMPC_MAP_alloc:
6726     case OMPC_MAP_release:
6727       // alloc and release is the default behavior in the runtime library,  i.e.
6728       // if we don't pass any bits alloc/release that is what the runtime is
6729       // going to do. Therefore, we don't need to signal anything for these two
6730       // type modifiers.
6731       break;
6732     case OMPC_MAP_to:
6733       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
6734       break;
6735     case OMPC_MAP_from:
6736       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6737       break;
6738     case OMPC_MAP_tofrom:
6739       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
6740               OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6741       break;
6742     case OMPC_MAP_delete:
6743       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
6744       break;
6745     case OMPC_MAP_unknown:
6746       llvm_unreachable("Unexpected map type!");
6747     }
6748     if (AddPtrFlag)
6749       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
6750     if (AddIsTargetParamFlag)
6751       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
6752     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
6753       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
6754     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
6755       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
6756     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
6757         llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
6758       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
6759     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
6760       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
6761     if (IsNonContiguous)
6762       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
6763     return Bits;
6764   }
6765 
6766   /// Return true if the provided expression is a final array section. A
6767   /// final array section, is one whose length can't be proved to be one.
6768   bool isFinalArraySectionExpression(const Expr *E) const {
6769     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
6770 
6771     // It is not an array section and therefore not a unity-size one.
6772     if (!OASE)
6773       return false;
6774 
6775     // An array section with no colon always refer to a single element.
6776     if (OASE->getColonLocFirst().isInvalid())
6777       return false;
6778 
6779     const Expr *Length = OASE->getLength();
6780 
6781     // If we don't have a length we have to check if the array has size 1
6782     // for this dimension. Also, we should always expect a length if the
6783     // base type is pointer.
6784     if (!Length) {
6785       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
6786                              OASE->getBase()->IgnoreParenImpCasts())
6787                              .getCanonicalType();
6788       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
6789         return ATy->getSize().getSExtValue() != 1;
6790       // If we don't have a constant dimension length, we have to consider
6791       // the current section as having any size, so it is not necessarily
6792       // unitary. If it happen to be unity size, that's user fault.
6793       return true;
6794     }
6795 
6796     // Check if the length evaluates to 1.
6797     Expr::EvalResult Result;
6798     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
6799       return true; // Can have more that size 1.
6800 
6801     llvm::APSInt ConstLength = Result.Val.getInt();
6802     return ConstLength.getSExtValue() != 1;
6803   }
6804 
6805   /// Generate the base pointers, section pointers, sizes, map type bits, and
6806   /// user-defined mappers (all included in \a CombinedInfo) for the provided
6807   /// map type, map or motion modifiers, and expression components.
6808   /// \a IsFirstComponent should be set to true if the provided set of
6809   /// components is the first associated with a capture.
6810   void generateInfoForComponentList(
6811       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6812       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6813       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6814       MapCombinedInfoTy &CombinedInfo,
6815       MapCombinedInfoTy &StructBaseCombinedInfo,
6816       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
6817       bool IsImplicit, bool GenerateAllInfoForClauses,
6818       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
6819       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
6820       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
6821           OverlappedElements = std::nullopt) const {
6822     // The following summarizes what has to be generated for each map and the
6823     // types below. The generated information is expressed in this order:
6824     // base pointer, section pointer, size, flags
6825     // (to add to the ones that come from the map type and modifier).
6826     //
6827     // double d;
6828     // int i[100];
6829     // float *p;
6830     // int **a = &i;
6831     //
6832     // struct S1 {
6833     //   int i;
6834     //   float f[50];
6835     // }
6836     // struct S2 {
6837     //   int i;
6838     //   float f[50];
6839     //   S1 s;
6840     //   double *p;
6841     //   struct S2 *ps;
6842     //   int &ref;
6843     // }
6844     // S2 s;
6845     // S2 *ps;
6846     //
6847     // map(d)
6848     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
6849     //
6850     // map(i)
6851     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
6852     //
6853     // map(i[1:23])
6854     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
6855     //
6856     // map(p)
6857     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
6858     //
6859     // map(p[1:24])
6860     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
6861     // in unified shared memory mode or for local pointers
6862     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
6863     //
6864     // map((*a)[0:3])
6865     // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6866     // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
6867     //
6868     // map(**a)
6869     // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6870     // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
6871     //
6872     // map(s)
6873     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
6874     //
6875     // map(s.i)
6876     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
6877     //
6878     // map(s.s.f)
6879     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6880     //
6881     // map(s.p)
6882     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
6883     //
6884     // map(to: s.p[:22])
6885     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
6886     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
6887     // &(s.p), &(s.p[0]), 22*sizeof(double),
6888     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6889     // (*) alloc space for struct members, only this is a target parameter
6890     // (**) map the pointer (nothing to be mapped in this example) (the compiler
6891     //      optimizes this entry out, same in the examples below)
6892     // (***) map the pointee (map: to)
6893     //
6894     // map(to: s.ref)
6895     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
6896     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6897     // (*) alloc space for struct members, only this is a target parameter
6898     // (**) map the pointer (nothing to be mapped in this example) (the compiler
6899     //      optimizes this entry out, same in the examples below)
6900     // (***) map the pointee (map: to)
6901     //
6902     // map(s.ps)
6903     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6904     //
6905     // map(from: s.ps->s.i)
6906     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6907     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6908     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
6909     //
6910     // map(to: s.ps->ps)
6911     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6912     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6913     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
6914     //
6915     // map(s.ps->ps->ps)
6916     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6917     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6918     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6919     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6920     //
6921     // map(to: s.ps->ps->s.f[:22])
6922     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6923     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6924     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6925     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6926     //
6927     // map(ps)
6928     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
6929     //
6930     // map(ps->i)
6931     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
6932     //
6933     // map(ps->s.f)
6934     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6935     //
6936     // map(from: ps->p)
6937     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
6938     //
6939     // map(to: ps->p[:22])
6940     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
6941     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
6942     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
6943     //
6944     // map(ps->ps)
6945     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6946     //
6947     // map(from: ps->ps->s.i)
6948     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6949     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6950     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6951     //
6952     // map(from: ps->ps->ps)
6953     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6954     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6955     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6956     //
6957     // map(ps->ps->ps->ps)
6958     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6959     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6960     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6961     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6962     //
6963     // map(to: ps->ps->ps->s.f[:22])
6964     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6965     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6966     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6967     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6968     //
6969     // map(to: s.f[:22]) map(from: s.p[:33])
6970     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
6971     //     sizeof(double*) (**), TARGET_PARAM
6972     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
6973     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
6974     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6975     // (*) allocate contiguous space needed to fit all mapped members even if
6976     //     we allocate space for members not mapped (in this example,
6977     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
6978     //     them as well because they fall between &s.f[0] and &s.p)
6979     //
6980     // map(from: s.f[:22]) map(to: ps->p[:33])
6981     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
6982     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
6983     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
6984     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
6985     // (*) the struct this entry pertains to is the 2nd element in the list of
6986     //     arguments, hence MEMBER_OF(2)
6987     //
6988     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
6989     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
6990     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
6991     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
6992     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
6993     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
6994     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
6995     // (*) the struct this entry pertains to is the 4th element in the list
6996     //     of arguments, hence MEMBER_OF(4)
6997 
6998     // Track if the map information being generated is the first for a capture.
6999     bool IsCaptureFirstInfo = IsFirstComponentList;
7000     // When the variable is on a declare target link or in a to clause with
7001     // unified memory, a reference is needed to hold the host/device address
7002     // of the variable.
7003     bool RequiresReference = false;
7004 
7005     // Scan the components from the base to the complete expression.
7006     auto CI = Components.rbegin();
7007     auto CE = Components.rend();
7008     auto I = CI;
7009 
7010     // Track if the map information being generated is the first for a list of
7011     // components.
7012     bool IsExpressionFirstInfo = true;
7013     bool FirstPointerInComplexData = false;
7014     Address BP = Address::invalid();
7015     const Expr *AssocExpr = I->getAssociatedExpression();
7016     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7017     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7018     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7019 
7020     if (isa<MemberExpr>(AssocExpr)) {
7021       // The base is the 'this' pointer. The content of the pointer is going
7022       // to be the base of the field being mapped.
7023       BP = CGF.LoadCXXThisAddress();
7024     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7025                (OASE &&
7026                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7027       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7028     } else if (OAShE &&
7029                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7030       BP = Address(
7031           CGF.EmitScalarExpr(OAShE->getBase()),
7032           CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7033           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7034     } else {
7035       // The base is the reference to the variable.
7036       // BP = &Var.
7037       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7038       if (const auto *VD =
7039               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7040         if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7041                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7042           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7043               ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7044                 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7045                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7046             RequiresReference = true;
7047             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7048           }
7049         }
7050       }
7051 
7052       // If the variable is a pointer and is being dereferenced (i.e. is not
7053       // the last component), the base has to be the pointer itself, not its
7054       // reference. References are ignored for mapping purposes.
7055       QualType Ty =
7056           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7057       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7058         // No need to generate individual map information for the pointer, it
7059         // can be associated with the combined storage if shared memory mode is
7060         // active or the base declaration is not global variable.
7061         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7062         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7063             !VD || VD->hasLocalStorage())
7064           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7065         else
7066           FirstPointerInComplexData = true;
7067         ++I;
7068       }
7069     }
7070 
7071     // Track whether a component of the list should be marked as MEMBER_OF some
7072     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7073     // in a component list should be marked as MEMBER_OF, all subsequent entries
7074     // do not belong to the base struct. E.g.
7075     // struct S2 s;
7076     // s.ps->ps->ps->f[:]
7077     //   (1) (2) (3) (4)
7078     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7079     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7080     // is the pointee of ps(2) which is not member of struct s, so it should not
7081     // be marked as such (it is still PTR_AND_OBJ).
7082     // The variable is initialized to false so that PTR_AND_OBJ entries which
7083     // are not struct members are not considered (e.g. array of pointers to
7084     // data).
7085     bool ShouldBeMemberOf = false;
7086 
7087     // Variable keeping track of whether or not we have encountered a component
7088     // in the component list which is a member expression. Useful when we have a
7089     // pointer or a final array section, in which case it is the previous
7090     // component in the list which tells us whether we have a member expression.
7091     // E.g. X.f[:]
7092     // While processing the final array section "[:]" it is "f" which tells us
7093     // whether we are dealing with a member of a declared struct.
7094     const MemberExpr *EncounteredME = nullptr;
7095 
7096     // Track for the total number of dimension. Start from one for the dummy
7097     // dimension.
7098     uint64_t DimSize = 1;
7099 
7100     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7101     bool IsPrevMemberReference = false;
7102 
7103     // We need to check if we will be encountering any MEs. If we do not
7104     // encounter any ME expression it means we will be mapping the whole struct.
7105     // In that case we need to skip adding an entry for the struct to the
7106     // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7107     // list only when generating all info for clauses.
7108     bool IsMappingWholeStruct = true;
7109     if (!GenerateAllInfoForClauses) {
7110       IsMappingWholeStruct = false;
7111     } else {
7112       for (auto TempI = I; TempI != CE; ++TempI) {
7113         const MemberExpr *PossibleME =
7114             dyn_cast<MemberExpr>(TempI->getAssociatedExpression());
7115         if (PossibleME) {
7116           IsMappingWholeStruct = false;
7117           break;
7118         }
7119       }
7120     }
7121 
7122     for (; I != CE; ++I) {
7123       // If the current component is member of a struct (parent struct) mark it.
7124       if (!EncounteredME) {
7125         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7126         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7127         // as MEMBER_OF the parent struct.
7128         if (EncounteredME) {
7129           ShouldBeMemberOf = true;
7130           // Do not emit as complex pointer if this is actually not array-like
7131           // expression.
7132           if (FirstPointerInComplexData) {
7133             QualType Ty = std::prev(I)
7134                               ->getAssociatedDeclaration()
7135                               ->getType()
7136                               .getNonReferenceType();
7137             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7138             FirstPointerInComplexData = false;
7139           }
7140         }
7141       }
7142 
7143       auto Next = std::next(I);
7144 
7145       // We need to generate the addresses and sizes if this is the last
7146       // component, if the component is a pointer or if it is an array section
7147       // whose length can't be proved to be one. If this is a pointer, it
7148       // becomes the base address for the following components.
7149 
7150       // A final array section, is one whose length can't be proved to be one.
7151       // If the map item is non-contiguous then we don't treat any array section
7152       // as final array section.
7153       bool IsFinalArraySection =
7154           !IsNonContiguous &&
7155           isFinalArraySectionExpression(I->getAssociatedExpression());
7156 
7157       // If we have a declaration for the mapping use that, otherwise use
7158       // the base declaration of the map clause.
7159       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7160                                      ? I->getAssociatedDeclaration()
7161                                      : BaseDecl;
7162       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7163                                                : MapExpr;
7164 
7165       // Get information on whether the element is a pointer. Have to do a
7166       // special treatment for array sections given that they are built-in
7167       // types.
7168       const auto *OASE =
7169           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7170       const auto *OAShE =
7171           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7172       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7173       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7174       bool IsPointer =
7175           OAShE ||
7176           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7177                        .getCanonicalType()
7178                        ->isAnyPointerType()) ||
7179           I->getAssociatedExpression()->getType()->isAnyPointerType();
7180       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7181                                MapDecl &&
7182                                MapDecl->getType()->isLValueReferenceType();
7183       bool IsNonDerefPointer = IsPointer &&
7184                                !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7185                                !IsNonContiguous;
7186 
7187       if (OASE)
7188         ++DimSize;
7189 
7190       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7191           IsFinalArraySection) {
7192         // If this is not the last component, we expect the pointer to be
7193         // associated with an array expression or member expression.
7194         assert((Next == CE ||
7195                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7196                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7197                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7198                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7199                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7200                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7201                "Unexpected expression");
7202 
7203         Address LB = Address::invalid();
7204         Address LowestElem = Address::invalid();
7205         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7206                                        const MemberExpr *E) {
7207           const Expr *BaseExpr = E->getBase();
7208           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
7209           // scalar.
7210           LValue BaseLV;
7211           if (E->isArrow()) {
7212             LValueBaseInfo BaseInfo;
7213             TBAAAccessInfo TBAAInfo;
7214             Address Addr =
7215                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7216             QualType PtrTy = BaseExpr->getType()->getPointeeType();
7217             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7218           } else {
7219             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7220           }
7221           return BaseLV;
7222         };
7223         if (OAShE) {
7224           LowestElem = LB =
7225               Address(CGF.EmitScalarExpr(OAShE->getBase()),
7226                       CGF.ConvertTypeForMem(
7227                           OAShE->getBase()->getType()->getPointeeType()),
7228                       CGF.getContext().getTypeAlignInChars(
7229                           OAShE->getBase()->getType()));
7230         } else if (IsMemberReference) {
7231           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7232           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7233           LowestElem = CGF.EmitLValueForFieldInitialization(
7234                               BaseLVal, cast<FieldDecl>(MapDecl))
7235                            .getAddress(CGF);
7236           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7237                    .getAddress(CGF);
7238         } else {
7239           LowestElem = LB =
7240               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7241                   .getAddress(CGF);
7242         }
7243 
7244         // If this component is a pointer inside the base struct then we don't
7245         // need to create any entry for it - it will be combined with the object
7246         // it is pointing to into a single PTR_AND_OBJ entry.
7247         bool IsMemberPointerOrAddr =
7248             EncounteredME &&
7249             (((IsPointer || ForDeviceAddr) &&
7250               I->getAssociatedExpression() == EncounteredME) ||
7251              (IsPrevMemberReference && !IsPointer) ||
7252              (IsMemberReference && Next != CE &&
7253               !Next->getAssociatedExpression()->getType()->isPointerType()));
7254         if (!OverlappedElements.empty() && Next == CE) {
7255           // Handle base element with the info for overlapped elements.
7256           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7257           assert(!IsPointer &&
7258                  "Unexpected base element with the pointer type.");
7259           // Mark the whole struct as the struct that requires allocation on the
7260           // device.
7261           PartialStruct.LowestElem = {0, LowestElem};
7262           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7263               I->getAssociatedExpression()->getType());
7264           Address HB = CGF.Builder.CreateConstGEP(
7265               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7266                   LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
7267               TypeSize.getQuantity() - 1);
7268           PartialStruct.HighestElem = {
7269               std::numeric_limits<decltype(
7270                   PartialStruct.HighestElem.first)>::max(),
7271               HB};
7272           PartialStruct.Base = BP;
7273           PartialStruct.LB = LB;
7274           assert(
7275               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7276               "Overlapped elements must be used only once for the variable.");
7277           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7278           // Emit data for non-overlapped data.
7279           OpenMPOffloadMappingFlags Flags =
7280               OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
7281               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7282                              /*AddPtrFlag=*/false,
7283                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7284           llvm::Value *Size = nullptr;
7285           // Do bitcopy of all non-overlapped structure elements.
7286           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7287                    Component : OverlappedElements) {
7288             Address ComponentLB = Address::invalid();
7289             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7290                  Component) {
7291               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7292                 const auto *FD = dyn_cast<FieldDecl>(VD);
7293                 if (FD && FD->getType()->isLValueReferenceType()) {
7294                   const auto *ME =
7295                       cast<MemberExpr>(MC.getAssociatedExpression());
7296                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7297                   ComponentLB =
7298                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7299                           .getAddress(CGF);
7300                 } else {
7301                   ComponentLB =
7302                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7303                           .getAddress(CGF);
7304                 }
7305                 Size = CGF.Builder.CreatePtrDiff(
7306                     CGF.Int8Ty, ComponentLB.getPointer(), LB.getPointer());
7307                 break;
7308               }
7309             }
7310             assert(Size && "Failed to determine structure size");
7311             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7312             CombinedInfo.BasePointers.push_back(BP.getPointer());
7313             CombinedInfo.DevicePtrDecls.push_back(nullptr);
7314             CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7315             CombinedInfo.Pointers.push_back(LB.getPointer());
7316             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7317                 Size, CGF.Int64Ty, /*isSigned=*/true));
7318             CombinedInfo.Types.push_back(Flags);
7319             CombinedInfo.Mappers.push_back(nullptr);
7320             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7321                                                                       : 1);
7322             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7323           }
7324           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7325           CombinedInfo.BasePointers.push_back(BP.getPointer());
7326           CombinedInfo.DevicePtrDecls.push_back(nullptr);
7327           CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7328           CombinedInfo.Pointers.push_back(LB.getPointer());
7329           Size = CGF.Builder.CreatePtrDiff(
7330               CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
7331               LB.getPointer());
7332           CombinedInfo.Sizes.push_back(
7333               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7334           CombinedInfo.Types.push_back(Flags);
7335           CombinedInfo.Mappers.push_back(nullptr);
7336           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7337                                                                     : 1);
7338           break;
7339         }
7340         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7341         // Skip adding an entry in the CurInfo of this combined entry if the
7342         // whole struct is currently being mapped. The struct needs to be added
7343         // in the first position before any data internal to the struct is being
7344         // mapped.
7345         if (!IsMemberPointerOrAddr ||
7346             (Next == CE && MapType != OMPC_MAP_unknown)) {
7347           if (!IsMappingWholeStruct) {
7348             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7349             CombinedInfo.BasePointers.push_back(BP.getPointer());
7350             CombinedInfo.DevicePtrDecls.push_back(nullptr);
7351             CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7352             CombinedInfo.Pointers.push_back(LB.getPointer());
7353             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7354                 Size, CGF.Int64Ty, /*isSigned=*/true));
7355             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7356                                                                       : 1);
7357           } else {
7358             StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7359             StructBaseCombinedInfo.BasePointers.push_back(BP.getPointer());
7360             StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr);
7361             StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7362             StructBaseCombinedInfo.Pointers.push_back(LB.getPointer());
7363             StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7364                 Size, CGF.Int64Ty, /*isSigned=*/true));
7365             StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
7366                 IsNonContiguous ? DimSize : 1);
7367           }
7368 
7369           // If Mapper is valid, the last component inherits the mapper.
7370           bool HasMapper = Mapper && Next == CE;
7371           if (!IsMappingWholeStruct)
7372             CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7373           else
7374             StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper
7375                                                                : nullptr);
7376 
7377           // We need to add a pointer flag for each map that comes from the
7378           // same expression except for the first one. We also need to signal
7379           // this map is the first one that relates with the current capture
7380           // (there is a set of entries for each capture).
7381           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7382               MapType, MapModifiers, MotionModifiers, IsImplicit,
7383               !IsExpressionFirstInfo || RequiresReference ||
7384                   FirstPointerInComplexData || IsMemberReference,
7385               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
7386 
7387           if (!IsExpressionFirstInfo || IsMemberReference) {
7388             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7389             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7390             if (IsPointer || (IsMemberReference && Next != CE))
7391               Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
7392                          OpenMPOffloadMappingFlags::OMP_MAP_FROM |
7393                          OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
7394                          OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
7395                          OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
7396 
7397             if (ShouldBeMemberOf) {
7398               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7399               // should be later updated with the correct value of MEMBER_OF.
7400               Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7401               // From now on, all subsequent PTR_AND_OBJ entries should not be
7402               // marked as MEMBER_OF.
7403               ShouldBeMemberOf = false;
7404             }
7405           }
7406 
7407           if (!IsMappingWholeStruct)
7408             CombinedInfo.Types.push_back(Flags);
7409           else
7410             StructBaseCombinedInfo.Types.push_back(Flags);
7411         }
7412 
7413         // If we have encountered a member expression so far, keep track of the
7414         // mapped member. If the parent is "*this", then the value declaration
7415         // is nullptr.
7416         if (EncounteredME) {
7417           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7418           unsigned FieldIndex = FD->getFieldIndex();
7419 
7420           // Update info about the lowest and highest elements for this struct
7421           if (!PartialStruct.Base.isValid()) {
7422             PartialStruct.LowestElem = {FieldIndex, LowestElem};
7423             if (IsFinalArraySection) {
7424               Address HB =
7425                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7426                       .getAddress(CGF);
7427               PartialStruct.HighestElem = {FieldIndex, HB};
7428             } else {
7429               PartialStruct.HighestElem = {FieldIndex, LowestElem};
7430             }
7431             PartialStruct.Base = BP;
7432             PartialStruct.LB = BP;
7433           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7434             PartialStruct.LowestElem = {FieldIndex, LowestElem};
7435           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7436             if (IsFinalArraySection) {
7437               Address HB =
7438                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7439                       .getAddress(CGF);
7440               PartialStruct.HighestElem = {FieldIndex, HB};
7441             } else {
7442               PartialStruct.HighestElem = {FieldIndex, LowestElem};
7443             }
7444           }
7445         }
7446 
7447         // Need to emit combined struct for array sections.
7448         if (IsFinalArraySection || IsNonContiguous)
7449           PartialStruct.IsArraySection = true;
7450 
7451         // If we have a final array section, we are done with this expression.
7452         if (IsFinalArraySection)
7453           break;
7454 
7455         // The pointer becomes the base for the next element.
7456         if (Next != CE)
7457           BP = IsMemberReference ? LowestElem : LB;
7458 
7459         IsExpressionFirstInfo = false;
7460         IsCaptureFirstInfo = false;
7461         FirstPointerInComplexData = false;
7462         IsPrevMemberReference = IsMemberReference;
7463       } else if (FirstPointerInComplexData) {
7464         QualType Ty = Components.rbegin()
7465                           ->getAssociatedDeclaration()
7466                           ->getType()
7467                           .getNonReferenceType();
7468         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7469         FirstPointerInComplexData = false;
7470       }
7471     }
7472     // If ran into the whole component - allocate the space for the whole
7473     // record.
7474     if (!EncounteredME)
7475       PartialStruct.HasCompleteRecord = true;
7476 
7477     if (!IsNonContiguous)
7478       return;
7479 
7480     const ASTContext &Context = CGF.getContext();
7481 
7482     // For supporting stride in array section, we need to initialize the first
7483     // dimension size as 1, first offset as 0, and first count as 1
7484     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7485     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7486     MapValuesArrayTy CurStrides;
7487     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7488     uint64_t ElementTypeSize;
7489 
7490     // Collect Size information for each dimension and get the element size as
7491     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7492     // should be [10, 10] and the first stride is 4 btyes.
7493     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7494          Components) {
7495       const Expr *AssocExpr = Component.getAssociatedExpression();
7496       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7497 
7498       if (!OASE)
7499         continue;
7500 
7501       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
7502       auto *CAT = Context.getAsConstantArrayType(Ty);
7503       auto *VAT = Context.getAsVariableArrayType(Ty);
7504 
7505       // We need all the dimension size except for the last dimension.
7506       assert((VAT || CAT || &Component == &*Components.begin()) &&
7507              "Should be either ConstantArray or VariableArray if not the "
7508              "first Component");
7509 
7510       // Get element size if CurStrides is empty.
7511       if (CurStrides.empty()) {
7512         const Type *ElementType = nullptr;
7513         if (CAT)
7514           ElementType = CAT->getElementType().getTypePtr();
7515         else if (VAT)
7516           ElementType = VAT->getElementType().getTypePtr();
7517         else
7518           assert(&Component == &*Components.begin() &&
7519                  "Only expect pointer (non CAT or VAT) when this is the "
7520                  "first Component");
7521         // If ElementType is null, then it means the base is a pointer
7522         // (neither CAT nor VAT) and we'll attempt to get ElementType again
7523         // for next iteration.
7524         if (ElementType) {
7525           // For the case that having pointer as base, we need to remove one
7526           // level of indirection.
7527           if (&Component != &*Components.begin())
7528             ElementType = ElementType->getPointeeOrArrayElementType();
7529           ElementTypeSize =
7530               Context.getTypeSizeInChars(ElementType).getQuantity();
7531           CurStrides.push_back(
7532               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7533         }
7534       }
7535       // Get dimension value except for the last dimension since we don't need
7536       // it.
7537       if (DimSizes.size() < Components.size() - 1) {
7538         if (CAT)
7539           DimSizes.push_back(llvm::ConstantInt::get(
7540               CGF.Int64Ty, CAT->getSize().getZExtValue()));
7541         else if (VAT)
7542           DimSizes.push_back(CGF.Builder.CreateIntCast(
7543               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
7544               /*IsSigned=*/false));
7545       }
7546     }
7547 
7548     // Skip the dummy dimension since we have already have its information.
7549     auto *DI = DimSizes.begin() + 1;
7550     // Product of dimension.
7551     llvm::Value *DimProd =
7552         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
7553 
7554     // Collect info for non-contiguous. Notice that offset, count, and stride
7555     // are only meaningful for array-section, so we insert a null for anything
7556     // other than array-section.
7557     // Also, the size of offset, count, and stride are not the same as
7558     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7559     // count, and stride are the same as the number of non-contiguous
7560     // declaration in target update to/from clause.
7561     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7562          Components) {
7563       const Expr *AssocExpr = Component.getAssociatedExpression();
7564 
7565       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
7566         llvm::Value *Offset = CGF.Builder.CreateIntCast(
7567             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
7568             /*isSigned=*/false);
7569         CurOffsets.push_back(Offset);
7570         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
7571         CurStrides.push_back(CurStrides.back());
7572         continue;
7573       }
7574 
7575       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7576 
7577       if (!OASE)
7578         continue;
7579 
7580       // Offset
7581       const Expr *OffsetExpr = OASE->getLowerBound();
7582       llvm::Value *Offset = nullptr;
7583       if (!OffsetExpr) {
7584         // If offset is absent, then we just set it to zero.
7585         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
7586       } else {
7587         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
7588                                            CGF.Int64Ty,
7589                                            /*isSigned=*/false);
7590       }
7591       CurOffsets.push_back(Offset);
7592 
7593       // Count
7594       const Expr *CountExpr = OASE->getLength();
7595       llvm::Value *Count = nullptr;
7596       if (!CountExpr) {
7597         // In Clang, once a high dimension is an array section, we construct all
7598         // the lower dimension as array section, however, for case like
7599         // arr[0:2][2], Clang construct the inner dimension as an array section
7600         // but it actually is not in an array section form according to spec.
7601         if (!OASE->getColonLocFirst().isValid() &&
7602             !OASE->getColonLocSecond().isValid()) {
7603           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
7604         } else {
7605           // OpenMP 5.0, 2.1.5 Array Sections, Description.
7606           // When the length is absent it defaults to ⌈(size −
7607           // lower-bound)/stride⌉, where size is the size of the array
7608           // dimension.
7609           const Expr *StrideExpr = OASE->getStride();
7610           llvm::Value *Stride =
7611               StrideExpr
7612                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7613                                               CGF.Int64Ty, /*isSigned=*/false)
7614                   : nullptr;
7615           if (Stride)
7616             Count = CGF.Builder.CreateUDiv(
7617                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
7618           else
7619             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
7620         }
7621       } else {
7622         Count = CGF.EmitScalarExpr(CountExpr);
7623       }
7624       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
7625       CurCounts.push_back(Count);
7626 
7627       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7628       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7629       //              Offset      Count     Stride
7630       //    D0          0           1         4    (int)    <- dummy dimension
7631       //    D1          0           2         8    (2 * (1) * 4)
7632       //    D2          1           2         20   (1 * (1 * 5) * 4)
7633       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
7634       const Expr *StrideExpr = OASE->getStride();
7635       llvm::Value *Stride =
7636           StrideExpr
7637               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7638                                           CGF.Int64Ty, /*isSigned=*/false)
7639               : nullptr;
7640       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
7641       if (Stride)
7642         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
7643       else
7644         CurStrides.push_back(DimProd);
7645       if (DI != DimSizes.end())
7646         ++DI;
7647     }
7648 
7649     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
7650     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
7651     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
7652   }
7653 
7654   /// Return the adjusted map modifiers if the declaration a capture refers to
7655   /// appears in a first-private clause. This is expected to be used only with
7656   /// directives that start with 'target'.
7657   OpenMPOffloadMappingFlags
7658   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7659     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7660 
7661     // A first private variable captured by reference will use only the
7662     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7663     // declaration is known as first-private in this handler.
7664     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7665       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7666         return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7667                OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7668       return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
7669              OpenMPOffloadMappingFlags::OMP_MAP_TO;
7670     }
7671     auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
7672     if (I != LambdasMap.end())
7673       // for map(to: lambda): using user specified map type.
7674       return getMapTypeBits(
7675           I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
7676           /*MotionModifiers=*/std::nullopt, I->getSecond()->isImplicit(),
7677           /*AddPtrFlag=*/false,
7678           /*AddIsTargetParamFlag=*/false,
7679           /*isNonContiguous=*/false);
7680     return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7681            OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7682   }
7683 
7684   void getPlainLayout(const CXXRecordDecl *RD,
7685                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7686                       bool AsBase) const {
7687     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7688 
7689     llvm::StructType *St =
7690         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7691 
7692     unsigned NumElements = St->getNumElements();
7693     llvm::SmallVector<
7694         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7695         RecordLayout(NumElements);
7696 
7697     // Fill bases.
7698     for (const auto &I : RD->bases()) {
7699       if (I.isVirtual())
7700         continue;
7701       const auto *Base = I.getType()->getAsCXXRecordDecl();
7702       // Ignore empty bases.
7703       if (Base->isEmpty() || CGF.getContext()
7704                                  .getASTRecordLayout(Base)
7705                                  .getNonVirtualSize()
7706                                  .isZero())
7707         continue;
7708 
7709       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7710       RecordLayout[FieldIndex] = Base;
7711     }
7712     // Fill in virtual bases.
7713     for (const auto &I : RD->vbases()) {
7714       const auto *Base = I.getType()->getAsCXXRecordDecl();
7715       // Ignore empty bases.
7716       if (Base->isEmpty())
7717         continue;
7718       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7719       if (RecordLayout[FieldIndex])
7720         continue;
7721       RecordLayout[FieldIndex] = Base;
7722     }
7723     // Fill in all the fields.
7724     assert(!RD->isUnion() && "Unexpected union.");
7725     for (const auto *Field : RD->fields()) {
7726       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7727       // will fill in later.)
7728       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7729         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7730         RecordLayout[FieldIndex] = Field;
7731       }
7732     }
7733     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7734              &Data : RecordLayout) {
7735       if (Data.isNull())
7736         continue;
7737       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7738         getPlainLayout(Base, Layout, /*AsBase=*/true);
7739       else
7740         Layout.push_back(Data.get<const FieldDecl *>());
7741     }
7742   }
7743 
7744   /// Generate all the base pointers, section pointers, sizes, map types, and
7745   /// mappers for the extracted mappable expressions (all included in \a
7746   /// CombinedInfo). Also, for each item that relates with a device pointer, a
7747   /// pair of the relevant declaration and index where it occurs is appended to
7748   /// the device pointers info array.
7749   void generateAllInfoForClauses(
7750       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
7751       llvm::OpenMPIRBuilder &OMPBuilder,
7752       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
7753           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
7754     // We have to process the component lists that relate with the same
7755     // declaration in a single chunk so that we can generate the map flags
7756     // correctly. Therefore, we organize all lists in a map.
7757     enum MapKind { Present, Allocs, Other, Total };
7758     llvm::MapVector<CanonicalDeclPtr<const Decl>,
7759                     SmallVector<SmallVector<MapInfo, 8>, 4>>
7760         Info;
7761 
7762     // Helper function to fill the information map for the different supported
7763     // clauses.
7764     auto &&InfoGen =
7765         [&Info, &SkipVarSet](
7766             const ValueDecl *D, MapKind Kind,
7767             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7768             OpenMPMapClauseKind MapType,
7769             ArrayRef<OpenMPMapModifierKind> MapModifiers,
7770             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7771             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
7772             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
7773           if (SkipVarSet.contains(D))
7774             return;
7775           auto It = Info.find(D);
7776           if (It == Info.end())
7777             It = Info
7778                      .insert(std::make_pair(
7779                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
7780                      .first;
7781           It->second[Kind].emplace_back(
7782               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
7783               IsImplicit, Mapper, VarRef, ForDeviceAddr);
7784         };
7785 
7786     for (const auto *Cl : Clauses) {
7787       const auto *C = dyn_cast<OMPMapClause>(Cl);
7788       if (!C)
7789         continue;
7790       MapKind Kind = Other;
7791       if (llvm::is_contained(C->getMapTypeModifiers(),
7792                              OMPC_MAP_MODIFIER_present))
7793         Kind = Present;
7794       else if (C->getMapType() == OMPC_MAP_alloc)
7795         Kind = Allocs;
7796       const auto *EI = C->getVarRefs().begin();
7797       for (const auto L : C->component_lists()) {
7798         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
7799         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
7800                 C->getMapTypeModifiers(), std::nullopt,
7801                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7802                 E);
7803         ++EI;
7804       }
7805     }
7806     for (const auto *Cl : Clauses) {
7807       const auto *C = dyn_cast<OMPToClause>(Cl);
7808       if (!C)
7809         continue;
7810       MapKind Kind = Other;
7811       if (llvm::is_contained(C->getMotionModifiers(),
7812                              OMPC_MOTION_MODIFIER_present))
7813         Kind = Present;
7814       const auto *EI = C->getVarRefs().begin();
7815       for (const auto L : C->component_lists()) {
7816         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, std::nullopt,
7817                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
7818                 C->isImplicit(), std::get<2>(L), *EI);
7819         ++EI;
7820       }
7821     }
7822     for (const auto *Cl : Clauses) {
7823       const auto *C = dyn_cast<OMPFromClause>(Cl);
7824       if (!C)
7825         continue;
7826       MapKind Kind = Other;
7827       if (llvm::is_contained(C->getMotionModifiers(),
7828                              OMPC_MOTION_MODIFIER_present))
7829         Kind = Present;
7830       const auto *EI = C->getVarRefs().begin();
7831       for (const auto L : C->component_lists()) {
7832         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from,
7833                 std::nullopt, C->getMotionModifiers(),
7834                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7835                 *EI);
7836         ++EI;
7837       }
7838     }
7839 
7840     // Look at the use_device_ptr and use_device_addr clauses information and
7841     // mark the existing map entries as such. If there is no map information for
7842     // an entry in the use_device_ptr and use_device_addr list, we create one
7843     // with map type 'alloc' and zero size section. It is the user fault if that
7844     // was not mapped before. If there is no map information and the pointer is
7845     // a struct member, then we defer the emission of that entry until the whole
7846     // struct has been processed.
7847     llvm::MapVector<CanonicalDeclPtr<const Decl>,
7848                     SmallVector<DeferredDevicePtrEntryTy, 4>>
7849         DeferredInfo;
7850     MapCombinedInfoTy UseDeviceDataCombinedInfo;
7851 
7852     auto &&UseDeviceDataCombinedInfoGen =
7853         [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
7854                                      CodeGenFunction &CGF, bool IsDevAddr) {
7855           UseDeviceDataCombinedInfo.Exprs.push_back(VD);
7856           UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
7857           UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
7858           UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
7859               IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
7860           UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
7861           UseDeviceDataCombinedInfo.Sizes.push_back(
7862               llvm::Constant::getNullValue(CGF.Int64Ty));
7863           UseDeviceDataCombinedInfo.Types.push_back(
7864               OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
7865           UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
7866         };
7867 
7868     auto &&MapInfoGen =
7869         [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
7870          &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
7871                    OMPClauseMappableExprCommon::MappableExprComponentListRef
7872                        Components,
7873                    bool IsImplicit, bool IsDevAddr) {
7874           // We didn't find any match in our map information - generate a zero
7875           // size array section - if the pointer is a struct member we defer
7876           // this action until the whole struct has been processed.
7877           if (isa<MemberExpr>(IE)) {
7878             // Insert the pointer into Info to be processed by
7879             // generateInfoForComponentList. Because it is a member pointer
7880             // without a pointee, no entry will be generated for it, therefore
7881             // we need to generate one after the whole struct has been
7882             // processed. Nonetheless, generateInfoForComponentList must be
7883             // called to take the pointer into account for the calculation of
7884             // the range of the partial struct.
7885             InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt,
7886                     std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit,
7887                     nullptr, nullptr, IsDevAddr);
7888             DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
7889           } else {
7890             llvm::Value *Ptr;
7891             if (IsDevAddr) {
7892               if (IE->isGLValue())
7893                 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
7894               else
7895                 Ptr = CGF.EmitScalarExpr(IE);
7896             } else {
7897               Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
7898             }
7899             UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);
7900           }
7901         };
7902 
7903     auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
7904                                     const Expr *IE, bool IsDevAddr) -> bool {
7905       // We potentially have map information for this declaration already.
7906       // Look for the first set of components that refer to it. If found,
7907       // return true.
7908       // If the first component is a member expression, we have to look into
7909       // 'this', which maps to null in the map of map information. Otherwise
7910       // look directly for the information.
7911       auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7912       if (It != Info.end()) {
7913         bool Found = false;
7914         for (auto &Data : It->second) {
7915           auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
7916             return MI.Components.back().getAssociatedDeclaration() == VD;
7917           });
7918           // If we found a map entry, signal that the pointer has to be
7919           // returned and move on to the next declaration. Exclude cases where
7920           // the base pointer is mapped as array subscript, array section or
7921           // array shaping. The base address is passed as a pointer to base in
7922           // this case and cannot be used as a base for use_device_ptr list
7923           // item.
7924           if (CI != Data.end()) {
7925             if (IsDevAddr) {
7926               CI->ForDeviceAddr = IsDevAddr;
7927               CI->ReturnDevicePointer = true;
7928               Found = true;
7929               break;
7930             } else {
7931               auto PrevCI = std::next(CI->Components.rbegin());
7932               const auto *VarD = dyn_cast<VarDecl>(VD);
7933               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7934                   isa<MemberExpr>(IE) ||
7935                   !VD->getType().getNonReferenceType()->isPointerType() ||
7936                   PrevCI == CI->Components.rend() ||
7937                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
7938                   VarD->hasLocalStorage()) {
7939                 CI->ForDeviceAddr = IsDevAddr;
7940                 CI->ReturnDevicePointer = true;
7941                 Found = true;
7942                 break;
7943               }
7944             }
7945           }
7946         }
7947         return Found;
7948       }
7949       return false;
7950     };
7951 
7952     // Look at the use_device_ptr clause information and mark the existing map
7953     // entries as such. If there is no map information for an entry in the
7954     // use_device_ptr list, we create one with map type 'alloc' and zero size
7955     // section. It is the user fault if that was not mapped before. If there is
7956     // no map information and the pointer is a struct member, then we defer the
7957     // emission of that entry until the whole struct has been processed.
7958     for (const auto *Cl : Clauses) {
7959       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
7960       if (!C)
7961         continue;
7962       for (const auto L : C->component_lists()) {
7963         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
7964             std::get<1>(L);
7965         assert(!Components.empty() &&
7966                "Not expecting empty list of components!");
7967         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
7968         VD = cast<ValueDecl>(VD->getCanonicalDecl());
7969         const Expr *IE = Components.back().getAssociatedExpression();
7970         if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
7971           continue;
7972         MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
7973                    /*IsDevAddr=*/false);
7974       }
7975     }
7976 
7977     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7978     for (const auto *Cl : Clauses) {
7979       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
7980       if (!C)
7981         continue;
7982       for (const auto L : C->component_lists()) {
7983         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
7984             std::get<1>(L);
7985         assert(!std::get<1>(L).empty() &&
7986                "Not expecting empty list of components!");
7987         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
7988         if (!Processed.insert(VD).second)
7989           continue;
7990         VD = cast<ValueDecl>(VD->getCanonicalDecl());
7991         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
7992         if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
7993           continue;
7994         MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
7995                    /*IsDevAddr=*/true);
7996       }
7997     }
7998 
7999     for (const auto &Data : Info) {
8000       StructRangeInfoTy PartialStruct;
8001       // Current struct information:
8002       MapCombinedInfoTy CurInfo;
8003       // Current struct base information:
8004       MapCombinedInfoTy StructBaseCurInfo;
8005       const Decl *D = Data.first;
8006       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8007       for (const auto &M : Data.second) {
8008         for (const MapInfo &L : M) {
8009           assert(!L.Components.empty() &&
8010                  "Not expecting declaration with no component lists.");
8011 
8012           // Remember the current base pointer index.
8013           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8014           unsigned StructBasePointersIdx =
8015               StructBaseCurInfo.BasePointers.size();
8016           CurInfo.NonContigInfo.IsNonContiguous =
8017               L.Components.back().isNonContiguous();
8018           generateInfoForComponentList(
8019               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8020               CurInfo, StructBaseCurInfo, PartialStruct,
8021               /*IsFirstComponentList=*/false, L.IsImplicit,
8022               /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD,
8023               L.VarRef);
8024 
8025           // If this entry relates to a device pointer, set the relevant
8026           // declaration and add the 'return pointer' flag.
8027           if (L.ReturnDevicePointer) {
8028             // Check whether a value was added to either CurInfo or
8029             // StructBaseCurInfo and error if no value was added to either of
8030             // them:
8031             assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() ||
8032                     StructBasePointersIdx <
8033                         StructBaseCurInfo.BasePointers.size()) &&
8034                    "Unexpected number of mapped base pointers.");
8035 
8036             // Choose a base pointer index which is always valid:
8037             const ValueDecl *RelevantVD =
8038                 L.Components.back().getAssociatedDeclaration();
8039             assert(RelevantVD &&
8040                    "No relevant declaration related with device pointer??");
8041 
8042             // If StructBaseCurInfo has been updated this iteration then work on
8043             // the first new entry added to it i.e. make sure that when multiple
8044             // values are added to any of the lists, the first value added is
8045             // being modified by the assignments below (not the last value
8046             // added).
8047             if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) {
8048               StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
8049                   RelevantVD;
8050               StructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
8051                   L.ForDeviceAddr ? DeviceInfoTy::Address
8052                                   : DeviceInfoTy::Pointer;
8053               StructBaseCurInfo.Types[StructBasePointersIdx] |=
8054                   OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8055             } else {
8056               CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8057               CurInfo.DevicePointers[CurrentBasePointersIdx] =
8058                   L.ForDeviceAddr ? DeviceInfoTy::Address
8059                                   : DeviceInfoTy::Pointer;
8060               CurInfo.Types[CurrentBasePointersIdx] |=
8061                   OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8062             }
8063           }
8064         }
8065       }
8066 
8067       // Append any pending zero-length pointers which are struct members and
8068       // used with use_device_ptr or use_device_addr.
8069       auto CI = DeferredInfo.find(Data.first);
8070       if (CI != DeferredInfo.end()) {
8071         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8072           llvm::Value *BasePtr;
8073           llvm::Value *Ptr;
8074           if (L.ForDeviceAddr) {
8075             if (L.IE->isGLValue())
8076               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8077             else
8078               Ptr = this->CGF.EmitScalarExpr(L.IE);
8079             BasePtr = Ptr;
8080             // Entry is RETURN_PARAM. Also, set the placeholder value
8081             // MEMBER_OF=FFFF so that the entry is later updated with the
8082             // correct value of MEMBER_OF.
8083             CurInfo.Types.push_back(
8084                 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8085                 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8086           } else {
8087             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8088             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8089                                              L.IE->getExprLoc());
8090             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8091             // placeholder value MEMBER_OF=FFFF so that the entry is later
8092             // updated with the correct value of MEMBER_OF.
8093             CurInfo.Types.push_back(
8094                 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8095                 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8096                 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8097           }
8098           CurInfo.Exprs.push_back(L.VD);
8099           CurInfo.BasePointers.emplace_back(BasePtr);
8100           CurInfo.DevicePtrDecls.emplace_back(L.VD);
8101           CurInfo.DevicePointers.emplace_back(
8102               L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8103           CurInfo.Pointers.push_back(Ptr);
8104           CurInfo.Sizes.push_back(
8105               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8106           CurInfo.Mappers.push_back(nullptr);
8107         }
8108       }
8109 
8110       // Unify entries in one list making sure the struct mapping precedes the
8111       // individual fields:
8112       MapCombinedInfoTy UnionCurInfo;
8113       UnionCurInfo.append(StructBaseCurInfo);
8114       UnionCurInfo.append(CurInfo);
8115 
8116       // If there is an entry in PartialStruct it means we have a struct with
8117       // individual members mapped. Emit an extra combined entry.
8118       if (PartialStruct.Base.isValid()) {
8119         UnionCurInfo.NonContigInfo.Dims.push_back(0);
8120         // Emit a combined entry:
8121         emitCombinedEntry(CombinedInfo, UnionCurInfo.Types, PartialStruct,
8122                           /*IsMapThis*/ !VD, OMPBuilder, VD);
8123       }
8124 
8125       // We need to append the results of this capture to what we already have.
8126       CombinedInfo.append(UnionCurInfo);
8127     }
8128     // Append data for use_device_ptr clauses.
8129     CombinedInfo.append(UseDeviceDataCombinedInfo);
8130   }
8131 
8132 public:
8133   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8134       : CurDir(&Dir), CGF(CGF) {
8135     // Extract firstprivate clause information.
8136     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8137       for (const auto *D : C->varlists())
8138         FirstPrivateDecls.try_emplace(
8139             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8140     // Extract implicit firstprivates from uses_allocators clauses.
8141     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8142       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8143         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8144         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8145           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8146                                         /*Implicit=*/true);
8147         else if (const auto *VD = dyn_cast<VarDecl>(
8148                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8149                          ->getDecl()))
8150           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8151       }
8152     }
8153     // Extract device pointer clause information.
8154     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8155       for (auto L : C->component_lists())
8156         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8157     // Extract device addr clause information.
8158     for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8159       for (auto L : C->component_lists())
8160         HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8161     // Extract map information.
8162     for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8163       if (C->getMapType() != OMPC_MAP_to)
8164         continue;
8165       for (auto L : C->component_lists()) {
8166         const ValueDecl *VD = std::get<0>(L);
8167         const auto *RD = VD ? VD->getType()
8168                                   .getCanonicalType()
8169                                   .getNonReferenceType()
8170                                   ->getAsCXXRecordDecl()
8171                             : nullptr;
8172         if (RD && RD->isLambda())
8173           LambdasMap.try_emplace(std::get<0>(L), C);
8174       }
8175     }
8176   }
8177 
8178   /// Constructor for the declare mapper directive.
8179   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8180       : CurDir(&Dir), CGF(CGF) {}
8181 
8182   /// Generate code for the combined entry if we have a partially mapped struct
8183   /// and take care of the mapping flags of the arguments corresponding to
8184   /// individual struct members.
8185   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8186                          MapFlagsArrayTy &CurTypes,
8187                          const StructRangeInfoTy &PartialStruct, bool IsMapThis,
8188                          llvm::OpenMPIRBuilder &OMPBuilder,
8189                          const ValueDecl *VD = nullptr,
8190                          bool NotTargetParams = true) const {
8191     if (CurTypes.size() == 1 &&
8192         ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
8193          OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
8194         !PartialStruct.IsArraySection)
8195       return;
8196     Address LBAddr = PartialStruct.LowestElem.second;
8197     Address HBAddr = PartialStruct.HighestElem.second;
8198     if (PartialStruct.HasCompleteRecord) {
8199       LBAddr = PartialStruct.LB;
8200       HBAddr = PartialStruct.LB;
8201     }
8202     CombinedInfo.Exprs.push_back(VD);
8203     // Base is the base of the struct
8204     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8205     CombinedInfo.DevicePtrDecls.push_back(nullptr);
8206     CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8207     // Pointer is the address of the lowest element
8208     llvm::Value *LB = LBAddr.getPointer();
8209     const CXXMethodDecl *MD =
8210         CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
8211     const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
8212     bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
8213     // There should not be a mapper for a combined entry.
8214     if (HasBaseClass) {
8215       // OpenMP 5.2 148:21:
8216       // If the target construct is within a class non-static member function,
8217       // and a variable is an accessible data member of the object for which the
8218       // non-static data member function is invoked, the variable is treated as
8219       // if the this[:1] expression had appeared in a map clause with a map-type
8220       // of tofrom.
8221       // Emit this[:1]
8222       CombinedInfo.Pointers.push_back(PartialStruct.Base.getPointer());
8223       QualType Ty = MD->getFunctionObjectParameterType();
8224       llvm::Value *Size =
8225           CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
8226                                     /*isSigned=*/true);
8227       CombinedInfo.Sizes.push_back(Size);
8228     } else {
8229       CombinedInfo.Pointers.push_back(LB);
8230       // Size is (addr of {highest+1} element) - (addr of lowest element)
8231       llvm::Value *HB = HBAddr.getPointer();
8232       llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
8233           HBAddr.getElementType(), HB, /*Idx0=*/1);
8234       llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8235       llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8236       llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8237       llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8238                                                     /*isSigned=*/false);
8239       CombinedInfo.Sizes.push_back(Size);
8240     }
8241     CombinedInfo.Mappers.push_back(nullptr);
8242     // Map type is always TARGET_PARAM, if generate info for captures.
8243     CombinedInfo.Types.push_back(
8244         NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8245                         : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8246     // If any element has the present modifier, then make sure the runtime
8247     // doesn't attempt to allocate the struct.
8248     if (CurTypes.end() !=
8249         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8250           return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8251               Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8252         }))
8253       CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
8254     // Remove TARGET_PARAM flag from the first element
8255     (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8256     // If any element has the ompx_hold modifier, then make sure the runtime
8257     // uses the hold reference count for the struct as a whole so that it won't
8258     // be unmapped by an extra dynamic reference count decrement.  Add it to all
8259     // elements as well so the runtime knows which reference count to check
8260     // when determining whether it's time for device-to-host transfers of
8261     // individual elements.
8262     if (CurTypes.end() !=
8263         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8264           return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8265               Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
8266         })) {
8267       CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8268       for (auto &M : CurTypes)
8269         M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8270     }
8271 
8272     // All other current entries will be MEMBER_OF the combined entry
8273     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8274     // 0xFFFF in the MEMBER_OF field).
8275     OpenMPOffloadMappingFlags MemberOfFlag =
8276         OMPBuilder.getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8277     for (auto &M : CurTypes)
8278       OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);
8279   }
8280 
8281   /// Generate all the base pointers, section pointers, sizes, map types, and
8282   /// mappers for the extracted mappable expressions (all included in \a
8283   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8284   /// pair of the relevant declaration and index where it occurs is appended to
8285   /// the device pointers info array.
8286   void generateAllInfo(
8287       MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
8288       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8289           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8290     assert(CurDir.is<const OMPExecutableDirective *>() &&
8291            "Expect a executable directive");
8292     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8293     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,
8294                               SkipVarSet);
8295   }
8296 
8297   /// Generate all the base pointers, section pointers, sizes, map types, and
8298   /// mappers for the extracted map clauses of user-defined mapper (all included
8299   /// in \a CombinedInfo).
8300   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
8301                                 llvm::OpenMPIRBuilder &OMPBuilder) const {
8302     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8303            "Expect a declare mapper directive");
8304     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8305     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,
8306                               OMPBuilder);
8307   }
8308 
8309   /// Emit capture info for lambdas for variables captured by reference.
8310   void generateInfoForLambdaCaptures(
8311       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8312       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8313     QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
8314     const auto *RD = VDType->getAsCXXRecordDecl();
8315     if (!RD || !RD->isLambda())
8316       return;
8317     Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
8318                    CGF.getContext().getDeclAlign(VD));
8319     LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
8320     llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8321     FieldDecl *ThisCapture = nullptr;
8322     RD->getCaptureFields(Captures, ThisCapture);
8323     if (ThisCapture) {
8324       LValue ThisLVal =
8325           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8326       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8327       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8328                                  VDLVal.getPointer(CGF));
8329       CombinedInfo.Exprs.push_back(VD);
8330       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8331       CombinedInfo.DevicePtrDecls.push_back(nullptr);
8332       CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8333       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8334       CombinedInfo.Sizes.push_back(
8335           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8336                                     CGF.Int64Ty, /*isSigned=*/true));
8337       CombinedInfo.Types.push_back(
8338           OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8339           OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8340           OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8341           OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8342       CombinedInfo.Mappers.push_back(nullptr);
8343     }
8344     for (const LambdaCapture &LC : RD->captures()) {
8345       if (!LC.capturesVariable())
8346         continue;
8347       const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
8348       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8349         continue;
8350       auto It = Captures.find(VD);
8351       assert(It != Captures.end() && "Found lambda capture without field.");
8352       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8353       if (LC.getCaptureKind() == LCK_ByRef) {
8354         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8355         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8356                                    VDLVal.getPointer(CGF));
8357         CombinedInfo.Exprs.push_back(VD);
8358         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8359         CombinedInfo.DevicePtrDecls.push_back(nullptr);
8360         CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8361         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8362         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8363             CGF.getTypeSize(
8364                 VD->getType().getCanonicalType().getNonReferenceType()),
8365             CGF.Int64Ty, /*isSigned=*/true));
8366       } else {
8367         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8368         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8369                                    VDLVal.getPointer(CGF));
8370         CombinedInfo.Exprs.push_back(VD);
8371         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8372         CombinedInfo.DevicePtrDecls.push_back(nullptr);
8373         CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8374         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8375         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8376       }
8377       CombinedInfo.Types.push_back(
8378           OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8379           OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8380           OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8381           OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8382       CombinedInfo.Mappers.push_back(nullptr);
8383     }
8384   }
8385 
8386   /// Set correct indices for lambdas captures.
8387   void adjustMemberOfForLambdaCaptures(
8388       llvm::OpenMPIRBuilder &OMPBuilder,
8389       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8390       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8391       MapFlagsArrayTy &Types) const {
8392     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8393       // Set correct member_of idx for all implicit lambda captures.
8394       if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8395                        OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8396                        OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8397                        OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
8398         continue;
8399       llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
8400       assert(BasePtr && "Unable to find base lambda address.");
8401       int TgtIdx = -1;
8402       for (unsigned J = I; J > 0; --J) {
8403         unsigned Idx = J - 1;
8404         if (Pointers[Idx] != BasePtr)
8405           continue;
8406         TgtIdx = Idx;
8407         break;
8408       }
8409       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8410       // All other current entries will be MEMBER_OF the combined entry
8411       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8412       // 0xFFFF in the MEMBER_OF field).
8413       OpenMPOffloadMappingFlags MemberOfFlag =
8414           OMPBuilder.getMemberOfFlag(TgtIdx);
8415       OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8416     }
8417   }
8418 
8419   /// Generate the base pointers, section pointers, sizes, map types, and
8420   /// mappers associated to a given capture (all included in \a CombinedInfo).
8421   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8422                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8423                               StructRangeInfoTy &PartialStruct) const {
8424     assert(!Cap->capturesVariableArrayType() &&
8425            "Not expecting to generate map info for a variable array type!");
8426 
8427     // We need to know when we generating information for the first component
8428     const ValueDecl *VD = Cap->capturesThis()
8429                               ? nullptr
8430                               : Cap->getCapturedVar()->getCanonicalDecl();
8431 
8432     // for map(to: lambda): skip here, processing it in
8433     // generateDefaultMapInfo
8434     if (LambdasMap.count(VD))
8435       return;
8436 
8437     // If this declaration appears in a is_device_ptr clause we just have to
8438     // pass the pointer by value. If it is a reference to a declaration, we just
8439     // pass its value.
8440     if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
8441       CombinedInfo.Exprs.push_back(VD);
8442       CombinedInfo.BasePointers.emplace_back(Arg);
8443       CombinedInfo.DevicePtrDecls.emplace_back(VD);
8444       CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
8445       CombinedInfo.Pointers.push_back(Arg);
8446       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8447           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8448           /*isSigned=*/true));
8449       CombinedInfo.Types.push_back(
8450           OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8451           OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8452       CombinedInfo.Mappers.push_back(nullptr);
8453       return;
8454     }
8455 
8456     using MapData =
8457         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8458                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8459                    const ValueDecl *, const Expr *>;
8460     SmallVector<MapData, 4> DeclComponentLists;
8461     // For member fields list in is_device_ptr, store it in
8462     // DeclComponentLists for generating components info.
8463     static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
8464     auto It = DevPointersMap.find(VD);
8465     if (It != DevPointersMap.end())
8466       for (const auto &MCL : It->second)
8467         DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
8468                                         /*IsImpicit = */ true, nullptr,
8469                                         nullptr);
8470     auto I = HasDevAddrsMap.find(VD);
8471     if (I != HasDevAddrsMap.end())
8472       for (const auto &MCL : I->second)
8473         DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
8474                                         /*IsImpicit = */ true, nullptr,
8475                                         nullptr);
8476     assert(CurDir.is<const OMPExecutableDirective *>() &&
8477            "Expect a executable directive");
8478     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8479     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8480       const auto *EI = C->getVarRefs().begin();
8481       for (const auto L : C->decl_component_lists(VD)) {
8482         const ValueDecl *VDecl, *Mapper;
8483         // The Expression is not correct if the mapping is implicit
8484         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8485         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8486         std::tie(VDecl, Components, Mapper) = L;
8487         assert(VDecl == VD && "We got information for the wrong declaration??");
8488         assert(!Components.empty() &&
8489                "Not expecting declaration with no component lists.");
8490         DeclComponentLists.emplace_back(Components, C->getMapType(),
8491                                         C->getMapTypeModifiers(),
8492                                         C->isImplicit(), Mapper, E);
8493         ++EI;
8494       }
8495     }
8496     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8497                                              const MapData &RHS) {
8498       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8499       OpenMPMapClauseKind MapType = std::get<1>(RHS);
8500       bool HasPresent =
8501           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8502       bool HasAllocs = MapType == OMPC_MAP_alloc;
8503       MapModifiers = std::get<2>(RHS);
8504       MapType = std::get<1>(LHS);
8505       bool HasPresentR =
8506           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8507       bool HasAllocsR = MapType == OMPC_MAP_alloc;
8508       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8509     });
8510 
8511     // Find overlapping elements (including the offset from the base element).
8512     llvm::SmallDenseMap<
8513         const MapData *,
8514         llvm::SmallVector<
8515             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8516         4>
8517         OverlappedData;
8518     size_t Count = 0;
8519     for (const MapData &L : DeclComponentLists) {
8520       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8521       OpenMPMapClauseKind MapType;
8522       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8523       bool IsImplicit;
8524       const ValueDecl *Mapper;
8525       const Expr *VarRef;
8526       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8527           L;
8528       ++Count;
8529       for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
8530         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8531         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8532                  VarRef) = L1;
8533         auto CI = Components.rbegin();
8534         auto CE = Components.rend();
8535         auto SI = Components1.rbegin();
8536         auto SE = Components1.rend();
8537         for (; CI != CE && SI != SE; ++CI, ++SI) {
8538           if (CI->getAssociatedExpression()->getStmtClass() !=
8539               SI->getAssociatedExpression()->getStmtClass())
8540             break;
8541           // Are we dealing with different variables/fields?
8542           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8543             break;
8544         }
8545         // Found overlapping if, at least for one component, reached the head
8546         // of the components list.
8547         if (CI == CE || SI == SE) {
8548           // Ignore it if it is the same component.
8549           if (CI == CE && SI == SE)
8550             continue;
8551           const auto It = (SI == SE) ? CI : SI;
8552           // If one component is a pointer and another one is a kind of
8553           // dereference of this pointer (array subscript, section, dereference,
8554           // etc.), it is not an overlapping.
8555           // Same, if one component is a base and another component is a
8556           // dereferenced pointer memberexpr with the same base.
8557           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
8558               (std::prev(It)->getAssociatedDeclaration() &&
8559                std::prev(It)
8560                    ->getAssociatedDeclaration()
8561                    ->getType()
8562                    ->isPointerType()) ||
8563               (It->getAssociatedDeclaration() &&
8564                It->getAssociatedDeclaration()->getType()->isPointerType() &&
8565                std::next(It) != CE && std::next(It) != SE))
8566             continue;
8567           const MapData &BaseData = CI == CE ? L : L1;
8568           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8569               SI == SE ? Components : Components1;
8570           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8571           OverlappedElements.getSecond().push_back(SubData);
8572         }
8573       }
8574     }
8575     // Sort the overlapped elements for each item.
8576     llvm::SmallVector<const FieldDecl *, 4> Layout;
8577     if (!OverlappedData.empty()) {
8578       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8579       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8580       while (BaseType != OrigType) {
8581         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8582         OrigType = BaseType->getPointeeOrArrayElementType();
8583       }
8584 
8585       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8586         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8587       else {
8588         const auto *RD = BaseType->getAsRecordDecl();
8589         Layout.append(RD->field_begin(), RD->field_end());
8590       }
8591     }
8592     for (auto &Pair : OverlappedData) {
8593       llvm::stable_sort(
8594           Pair.getSecond(),
8595           [&Layout](
8596               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8597               OMPClauseMappableExprCommon::MappableExprComponentListRef
8598                   Second) {
8599             auto CI = First.rbegin();
8600             auto CE = First.rend();
8601             auto SI = Second.rbegin();
8602             auto SE = Second.rend();
8603             for (; CI != CE && SI != SE; ++CI, ++SI) {
8604               if (CI->getAssociatedExpression()->getStmtClass() !=
8605                   SI->getAssociatedExpression()->getStmtClass())
8606                 break;
8607               // Are we dealing with different variables/fields?
8608               if (CI->getAssociatedDeclaration() !=
8609                   SI->getAssociatedDeclaration())
8610                 break;
8611             }
8612 
8613             // Lists contain the same elements.
8614             if (CI == CE && SI == SE)
8615               return false;
8616 
8617             // List with less elements is less than list with more elements.
8618             if (CI == CE || SI == SE)
8619               return CI == CE;
8620 
8621             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8622             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8623             if (FD1->getParent() == FD2->getParent())
8624               return FD1->getFieldIndex() < FD2->getFieldIndex();
8625             const auto *It =
8626                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8627                   return FD == FD1 || FD == FD2;
8628                 });
8629             return *It == FD1;
8630           });
8631     }
8632 
8633     // Associated with a capture, because the mapping flags depend on it.
8634     // Go through all of the elements with the overlapped elements.
8635     bool IsFirstComponentList = true;
8636     MapCombinedInfoTy StructBaseCombinedInfo;
8637     for (const auto &Pair : OverlappedData) {
8638       const MapData &L = *Pair.getFirst();
8639       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8640       OpenMPMapClauseKind MapType;
8641       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8642       bool IsImplicit;
8643       const ValueDecl *Mapper;
8644       const Expr *VarRef;
8645       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8646           L;
8647       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8648           OverlappedComponents = Pair.getSecond();
8649       generateInfoForComponentList(
8650           MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
8651           StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8652           IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8653           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
8654       IsFirstComponentList = false;
8655     }
8656     // Go through other elements without overlapped elements.
8657     for (const MapData &L : DeclComponentLists) {
8658       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8659       OpenMPMapClauseKind MapType;
8660       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8661       bool IsImplicit;
8662       const ValueDecl *Mapper;
8663       const Expr *VarRef;
8664       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8665           L;
8666       auto It = OverlappedData.find(&L);
8667       if (It == OverlappedData.end())
8668         generateInfoForComponentList(
8669             MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
8670             StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8671             IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8672             /*ForDeviceAddr=*/false, VD, VarRef);
8673       IsFirstComponentList = false;
8674     }
8675   }
8676 
8677   /// Generate the default map information for a given capture \a CI,
8678   /// record field declaration \a RI and captured value \a CV.
8679   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8680                               const FieldDecl &RI, llvm::Value *CV,
8681                               MapCombinedInfoTy &CombinedInfo) const {
8682     bool IsImplicit = true;
8683     // Do the default mapping.
8684     if (CI.capturesThis()) {
8685       CombinedInfo.Exprs.push_back(nullptr);
8686       CombinedInfo.BasePointers.push_back(CV);
8687       CombinedInfo.DevicePtrDecls.push_back(nullptr);
8688       CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8689       CombinedInfo.Pointers.push_back(CV);
8690       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8691       CombinedInfo.Sizes.push_back(
8692           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8693                                     CGF.Int64Ty, /*isSigned=*/true));
8694       // Default map type.
8695       CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8696                                    OpenMPOffloadMappingFlags::OMP_MAP_FROM);
8697     } else if (CI.capturesVariableByCopy()) {
8698       const VarDecl *VD = CI.getCapturedVar();
8699       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8700       CombinedInfo.BasePointers.push_back(CV);
8701       CombinedInfo.DevicePtrDecls.push_back(nullptr);
8702       CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8703       CombinedInfo.Pointers.push_back(CV);
8704       if (!RI.getType()->isAnyPointerType()) {
8705         // We have to signal to the runtime captures passed by value that are
8706         // not pointers.
8707         CombinedInfo.Types.push_back(
8708             OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
8709         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8710             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8711       } else {
8712         // Pointers are implicitly mapped with a zero size and no flags
8713         // (other than first map that is added for all implicit maps).
8714         CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
8715         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8716       }
8717       auto I = FirstPrivateDecls.find(VD);
8718       if (I != FirstPrivateDecls.end())
8719         IsImplicit = I->getSecond();
8720     } else {
8721       assert(CI.capturesVariable() && "Expected captured reference.");
8722       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8723       QualType ElementType = PtrTy->getPointeeType();
8724       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8725           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8726       // The default map type for a scalar/complex type is 'to' because by
8727       // default the value doesn't have to be retrieved. For an aggregate
8728       // type, the default is 'tofrom'.
8729       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
8730       const VarDecl *VD = CI.getCapturedVar();
8731       auto I = FirstPrivateDecls.find(VD);
8732       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8733       CombinedInfo.BasePointers.push_back(CV);
8734       CombinedInfo.DevicePtrDecls.push_back(nullptr);
8735       CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8736       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8737         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8738             CV, ElementType, CGF.getContext().getDeclAlign(VD),
8739             AlignmentSource::Decl));
8740         CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
8741       } else {
8742         CombinedInfo.Pointers.push_back(CV);
8743       }
8744       if (I != FirstPrivateDecls.end())
8745         IsImplicit = I->getSecond();
8746     }
8747     // Every default map produces a single argument which is a target parameter.
8748     CombinedInfo.Types.back() |=
8749         OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8750 
8751     // Add flag stating this is an implicit map.
8752     if (IsImplicit)
8753       CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
8754 
8755     // No user-defined mapper for default mapping.
8756     CombinedInfo.Mappers.push_back(nullptr);
8757   }
8758 };
8759 } // anonymous namespace
8760 
8761 // Try to extract the base declaration from a `this->x` expression if possible.
8762 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
8763   if (!E)
8764     return nullptr;
8765 
8766   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
8767     if (const MemberExpr *ME =
8768             dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
8769       return ME->getMemberDecl();
8770   return nullptr;
8771 }
8772 
8773 /// Emit a string constant containing the names of the values mapped to the
8774 /// offloading runtime library.
8775 llvm::Constant *
8776 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
8777                        MappableExprsHandler::MappingExprInfo &MapExprs) {
8778 
8779   uint32_t SrcLocStrSize;
8780   if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
8781     return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
8782 
8783   SourceLocation Loc;
8784   if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
8785     if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
8786       Loc = VD->getLocation();
8787     else
8788       Loc = MapExprs.getMapExpr()->getExprLoc();
8789   } else {
8790     Loc = MapExprs.getMapDecl()->getLocation();
8791   }
8792 
8793   std::string ExprName;
8794   if (MapExprs.getMapExpr()) {
8795     PrintingPolicy P(CGF.getContext().getLangOpts());
8796     llvm::raw_string_ostream OS(ExprName);
8797     MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
8798     OS.flush();
8799   } else {
8800     ExprName = MapExprs.getMapDecl()->getNameAsString();
8801   }
8802 
8803   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
8804   return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
8805                                          PLoc.getLine(), PLoc.getColumn(),
8806                                          SrcLocStrSize);
8807 }
8808 
8809 /// Emit the arrays used to pass the captures and map information to the
8810 /// offloading runtime library. If there is no map or capture information,
8811 /// return nullptr by reference.
8812 static void emitOffloadingArrays(
8813     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
8814     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
8815     bool IsNonContiguous = false) {
8816   CodeGenModule &CGM = CGF.CGM;
8817 
8818   // Reset the array information.
8819   Info.clearArrayInfo();
8820   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
8821 
8822   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
8823   InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
8824                          CGF.AllocaInsertPt->getIterator());
8825   InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
8826                           CGF.Builder.GetInsertPoint());
8827 
8828   auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
8829     return emitMappingInformation(CGF, OMPBuilder, MapExpr);
8830   };
8831   if (CGM.getCodeGenOpts().getDebugInfo() !=
8832       llvm::codegenoptions::NoDebugInfo) {
8833     CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
8834     llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
8835                     FillInfoMap);
8836   }
8837 
8838   auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
8839     if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
8840       Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
8841     }
8842   };
8843 
8844   auto CustomMapperCB = [&](unsigned int I) {
8845     llvm::Value *MFunc = nullptr;
8846     if (CombinedInfo.Mappers[I]) {
8847       Info.HasMapper = true;
8848       MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
8849           cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
8850     }
8851     return MFunc;
8852   };
8853   OMPBuilder.emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info,
8854                                   /*IsNonContiguous=*/true, DeviceAddrCB,
8855                                   CustomMapperCB);
8856 }
8857 
8858 /// Check for inner distribute directive.
8859 static const OMPExecutableDirective *
8860 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8861   const auto *CS = D.getInnermostCapturedStmt();
8862   const auto *Body =
8863       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8864   const Stmt *ChildStmt =
8865       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8866 
8867   if (const auto *NestedDir =
8868           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8869     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8870     switch (D.getDirectiveKind()) {
8871     case OMPD_target:
8872       // For now, just treat 'target teams loop' as if it's distributed.
8873       if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
8874         return NestedDir;
8875       if (DKind == OMPD_teams) {
8876         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8877             /*IgnoreCaptured=*/true);
8878         if (!Body)
8879           return nullptr;
8880         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8881         if (const auto *NND =
8882                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8883           DKind = NND->getDirectiveKind();
8884           if (isOpenMPDistributeDirective(DKind))
8885             return NND;
8886         }
8887       }
8888       return nullptr;
8889     case OMPD_target_teams:
8890       if (isOpenMPDistributeDirective(DKind))
8891         return NestedDir;
8892       return nullptr;
8893     case OMPD_target_parallel:
8894     case OMPD_target_simd:
8895     case OMPD_target_parallel_for:
8896     case OMPD_target_parallel_for_simd:
8897       return nullptr;
8898     case OMPD_target_teams_distribute:
8899     case OMPD_target_teams_distribute_simd:
8900     case OMPD_target_teams_distribute_parallel_for:
8901     case OMPD_target_teams_distribute_parallel_for_simd:
8902     case OMPD_parallel:
8903     case OMPD_for:
8904     case OMPD_parallel_for:
8905     case OMPD_parallel_master:
8906     case OMPD_parallel_sections:
8907     case OMPD_for_simd:
8908     case OMPD_parallel_for_simd:
8909     case OMPD_cancel:
8910     case OMPD_cancellation_point:
8911     case OMPD_ordered:
8912     case OMPD_threadprivate:
8913     case OMPD_allocate:
8914     case OMPD_task:
8915     case OMPD_simd:
8916     case OMPD_tile:
8917     case OMPD_unroll:
8918     case OMPD_sections:
8919     case OMPD_section:
8920     case OMPD_single:
8921     case OMPD_master:
8922     case OMPD_critical:
8923     case OMPD_taskyield:
8924     case OMPD_barrier:
8925     case OMPD_taskwait:
8926     case OMPD_taskgroup:
8927     case OMPD_atomic:
8928     case OMPD_flush:
8929     case OMPD_depobj:
8930     case OMPD_scan:
8931     case OMPD_teams:
8932     case OMPD_target_data:
8933     case OMPD_target_exit_data:
8934     case OMPD_target_enter_data:
8935     case OMPD_distribute:
8936     case OMPD_distribute_simd:
8937     case OMPD_distribute_parallel_for:
8938     case OMPD_distribute_parallel_for_simd:
8939     case OMPD_teams_distribute:
8940     case OMPD_teams_distribute_simd:
8941     case OMPD_teams_distribute_parallel_for:
8942     case OMPD_teams_distribute_parallel_for_simd:
8943     case OMPD_target_update:
8944     case OMPD_declare_simd:
8945     case OMPD_declare_variant:
8946     case OMPD_begin_declare_variant:
8947     case OMPD_end_declare_variant:
8948     case OMPD_declare_target:
8949     case OMPD_end_declare_target:
8950     case OMPD_declare_reduction:
8951     case OMPD_declare_mapper:
8952     case OMPD_taskloop:
8953     case OMPD_taskloop_simd:
8954     case OMPD_master_taskloop:
8955     case OMPD_master_taskloop_simd:
8956     case OMPD_parallel_master_taskloop:
8957     case OMPD_parallel_master_taskloop_simd:
8958     case OMPD_requires:
8959     case OMPD_metadirective:
8960     case OMPD_unknown:
8961     default:
8962       llvm_unreachable("Unexpected directive.");
8963     }
8964   }
8965 
8966   return nullptr;
8967 }
8968 
8969 /// Emit the user-defined mapper function. The code generation follows the
8970 /// pattern in the example below.
8971 /// \code
8972 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
8973 ///                                           void *base, void *begin,
8974 ///                                           int64_t size, int64_t type,
8975 ///                                           void *name = nullptr) {
8976 ///   // Allocate space for an array section first or add a base/begin for
8977 ///   // pointer dereference.
8978 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
8979 ///       !maptype.IsDelete)
8980 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8981 ///                                 size*sizeof(Ty), clearToFromMember(type));
8982 ///   // Map members.
8983 ///   for (unsigned i = 0; i < size; i++) {
8984 ///     // For each component specified by this mapper:
8985 ///     for (auto c : begin[i]->all_components) {
8986 ///       if (c.hasMapper())
8987 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
8988 ///                       c.arg_type, c.arg_name);
8989 ///       else
8990 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
8991 ///                                     c.arg_begin, c.arg_size, c.arg_type,
8992 ///                                     c.arg_name);
8993 ///     }
8994 ///   }
8995 ///   // Delete the array section.
8996 ///   if (size > 1 && maptype.IsDelete)
8997 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8998 ///                                 size*sizeof(Ty), clearToFromMember(type));
8999 /// }
9000 /// \endcode
9001 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9002                                             CodeGenFunction *CGF) {
9003   if (UDMMap.count(D) > 0)
9004     return;
9005   ASTContext &C = CGM.getContext();
9006   QualType Ty = D->getType();
9007   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9008   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9009   auto *MapperVarDecl =
9010       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9011   SourceLocation Loc = D->getLocation();
9012   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9013   llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9014 
9015   // Prepare mapper function arguments and attributes.
9016   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9017                               C.VoidPtrTy, ImplicitParamKind::Other);
9018   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9019                             ImplicitParamKind::Other);
9020   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9021                              C.VoidPtrTy, ImplicitParamKind::Other);
9022   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9023                             ImplicitParamKind::Other);
9024   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9025                             ImplicitParamKind::Other);
9026   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9027                             ImplicitParamKind::Other);
9028   FunctionArgList Args;
9029   Args.push_back(&HandleArg);
9030   Args.push_back(&BaseArg);
9031   Args.push_back(&BeginArg);
9032   Args.push_back(&SizeArg);
9033   Args.push_back(&TypeArg);
9034   Args.push_back(&NameArg);
9035   const CGFunctionInfo &FnInfo =
9036       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9037   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9038   SmallString<64> TyStr;
9039   llvm::raw_svector_ostream Out(TyStr);
9040   CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out);
9041   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9042   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9043                                     Name, &CGM.getModule());
9044   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9045   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9046   // Start the mapper function code generation.
9047   CodeGenFunction MapperCGF(CGM);
9048   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9049   // Compute the starting and end addresses of array elements.
9050   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9051       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9052       C.getPointerType(Int64Ty), Loc);
9053   // Prepare common arguments for array initiation and deletion.
9054   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9055       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9056       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9057   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9058       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9059       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9060   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9061       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9062       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9063   // Convert the size in bytes into the number of array elements.
9064   Size = MapperCGF.Builder.CreateExactUDiv(
9065       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9066   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9067       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9068   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size);
9069   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9070       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9071       C.getPointerType(Int64Ty), Loc);
9072   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9073       MapperCGF.GetAddrOfLocalVar(&NameArg),
9074       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9075 
9076   // Emit array initiation if this is an array section and \p MapType indicates
9077   // that memory allocation is required.
9078   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9079   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9080                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
9081 
9082   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9083 
9084   // Emit the loop header block.
9085   MapperCGF.EmitBlock(HeadBB);
9086   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9087   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9088   // Evaluate whether the initial condition is satisfied.
9089   llvm::Value *IsEmpty =
9090       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9091   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9092   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9093 
9094   // Emit the loop body block.
9095   MapperCGF.EmitBlock(BodyBB);
9096   llvm::BasicBlock *LastBB = BodyBB;
9097   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9098       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9099   PtrPHI->addIncoming(PtrBegin, EntryBB);
9100   Address PtrCurrent(PtrPHI, ElemTy,
9101                      MapperCGF.GetAddrOfLocalVar(&BeginArg)
9102                          .getAlignment()
9103                          .alignmentOfArrayElement(ElementSize));
9104   // Privatize the declared variable of mapper to be the current array element.
9105   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9106   Scope.addPrivate(MapperVarDecl, PtrCurrent);
9107   (void)Scope.Privatize();
9108 
9109   // Get map clause information. Fill up the arrays with all mapped variables.
9110   MappableExprsHandler::MapCombinedInfoTy Info;
9111   MappableExprsHandler MEHandler(*D, MapperCGF);
9112   MEHandler.generateAllInfoForMapper(Info, OMPBuilder);
9113 
9114   // Call the runtime API __tgt_mapper_num_components to get the number of
9115   // pre-existing components.
9116   llvm::Value *OffloadingArgs[] = {Handle};
9117   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9118       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9119                                             OMPRTL___tgt_mapper_num_components),
9120       OffloadingArgs);
9121   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9122       PreviousSize,
9123       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9124 
9125   // Fill up the runtime mapper handle for all components.
9126   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9127     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9128         Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9129     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9130         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9131     llvm::Value *CurSizeArg = Info.Sizes[I];
9132     llvm::Value *CurNameArg =
9133         (CGM.getCodeGenOpts().getDebugInfo() ==
9134          llvm::codegenoptions::NoDebugInfo)
9135             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9136             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9137 
9138     // Extract the MEMBER_OF field from the map type.
9139     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(
9140         static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9141             Info.Types[I]));
9142     llvm::Value *MemberMapType =
9143         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9144 
9145     // Combine the map type inherited from user-defined mapper with that
9146     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9147     // bits of the \a MapType, which is the input argument of the mapper
9148     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9149     // bits of MemberMapType.
9150     // [OpenMP 5.0], 1.2.6. map-type decay.
9151     //        | alloc |  to   | from  | tofrom | release | delete
9152     // ----------------------------------------------------------
9153     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9154     // to     | alloc |  to   | alloc |   to   | release | delete
9155     // from   | alloc | alloc | from  |  from  | release | delete
9156     // tofrom | alloc |  to   | from  | tofrom | release | delete
9157     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9158         MapType,
9159         MapperCGF.Builder.getInt64(
9160             static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9161                 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9162                 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9163     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9164     llvm::BasicBlock *AllocElseBB =
9165         MapperCGF.createBasicBlock("omp.type.alloc.else");
9166     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9167     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9168     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9169     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9170     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9171     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9172     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9173     MapperCGF.EmitBlock(AllocBB);
9174     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9175         MemberMapType,
9176         MapperCGF.Builder.getInt64(
9177             ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9178                 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9179                 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9180     MapperCGF.Builder.CreateBr(EndBB);
9181     MapperCGF.EmitBlock(AllocElseBB);
9182     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9183         LeftToFrom,
9184         MapperCGF.Builder.getInt64(
9185             static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9186                 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9187     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9188     // In case of to, clear OMP_MAP_FROM.
9189     MapperCGF.EmitBlock(ToBB);
9190     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9191         MemberMapType,
9192         MapperCGF.Builder.getInt64(
9193             ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9194                 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9195     MapperCGF.Builder.CreateBr(EndBB);
9196     MapperCGF.EmitBlock(ToElseBB);
9197     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9198         LeftToFrom,
9199         MapperCGF.Builder.getInt64(
9200             static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9201                 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9202     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9203     // In case of from, clear OMP_MAP_TO.
9204     MapperCGF.EmitBlock(FromBB);
9205     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9206         MemberMapType,
9207         MapperCGF.Builder.getInt64(
9208             ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9209                 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9210     // In case of tofrom, do nothing.
9211     MapperCGF.EmitBlock(EndBB);
9212     LastBB = EndBB;
9213     llvm::PHINode *CurMapType =
9214         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9215     CurMapType->addIncoming(AllocMapType, AllocBB);
9216     CurMapType->addIncoming(ToMapType, ToBB);
9217     CurMapType->addIncoming(FromMapType, FromBB);
9218     CurMapType->addIncoming(MemberMapType, ToElseBB);
9219 
9220     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
9221                                      CurSizeArg, CurMapType, CurNameArg};
9222     if (Info.Mappers[I]) {
9223       // Call the corresponding mapper function.
9224       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9225           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9226       assert(MapperFunc && "Expect a valid mapper function is available.");
9227       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9228     } else {
9229       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9230       // data structure.
9231       MapperCGF.EmitRuntimeCall(
9232           OMPBuilder.getOrCreateRuntimeFunction(
9233               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9234           OffloadingArgs);
9235     }
9236   }
9237 
9238   // Update the pointer to point to the next element that needs to be mapped,
9239   // and check whether we have mapped all elements.
9240   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9241       ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9242   PtrPHI->addIncoming(PtrNext, LastBB);
9243   llvm::Value *IsDone =
9244       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9245   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9246   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9247 
9248   MapperCGF.EmitBlock(ExitBB);
9249   // Emit array deletion if this is an array section and \p MapType indicates
9250   // that deletion is required.
9251   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9252                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
9253 
9254   // Emit the function exit block.
9255   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9256   MapperCGF.FinishFunction();
9257   UDMMap.try_emplace(D, Fn);
9258   if (CGF) {
9259     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9260     Decls.second.push_back(D);
9261   }
9262 }
9263 
9264 /// Emit the array initialization or deletion portion for user-defined mapper
9265 /// code generation. First, it evaluates whether an array section is mapped and
9266 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9267 /// true, and \a MapType indicates to not delete this array, array
9268 /// initialization code is generated. If \a IsInit is false, and \a MapType
9269 /// indicates to not this array, array deletion code is generated.
9270 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9271     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9272     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9273     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
9274     bool IsInit) {
9275   StringRef Prefix = IsInit ? ".init" : ".del";
9276 
9277   // Evaluate if this is an array section.
9278   llvm::BasicBlock *BodyBB =
9279       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9280   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9281       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9282   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9283       MapType,
9284       MapperCGF.Builder.getInt64(
9285           static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9286               OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
9287   llvm::Value *DeleteCond;
9288   llvm::Value *Cond;
9289   if (IsInit) {
9290     // base != begin?
9291     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
9292     // IsPtrAndObj?
9293     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
9294         MapType,
9295         MapperCGF.Builder.getInt64(
9296             static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9297                 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
9298     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
9299     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
9300     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
9301     DeleteCond = MapperCGF.Builder.CreateIsNull(
9302         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9303   } else {
9304     Cond = IsArray;
9305     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9306         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9307   }
9308   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
9309   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
9310 
9311   MapperCGF.EmitBlock(BodyBB);
9312   // Get the array size by multiplying element size and element number (i.e., \p
9313   // Size).
9314   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9315       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9316   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9317   // memory allocation/deletion purpose only.
9318   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9319       MapType,
9320       MapperCGF.Builder.getInt64(
9321           ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9322               OpenMPOffloadMappingFlags::OMP_MAP_TO |
9323               OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9324   MapTypeArg = MapperCGF.Builder.CreateOr(
9325       MapTypeArg,
9326       MapperCGF.Builder.getInt64(
9327           static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9328               OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
9329 
9330   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9331   // data structure.
9332   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
9333                                    ArraySize, MapTypeArg, MapName};
9334   MapperCGF.EmitRuntimeCall(
9335       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9336                                             OMPRTL___tgt_push_mapper_component),
9337       OffloadingArgs);
9338 }
9339 
9340 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9341     const OMPDeclareMapperDecl *D) {
9342   auto I = UDMMap.find(D);
9343   if (I != UDMMap.end())
9344     return I->second;
9345   emitUserDefinedMapper(D);
9346   return UDMMap.lookup(D);
9347 }
9348 
9349 llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
9350     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9351     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9352                                      const OMPLoopDirective &D)>
9353         SizeEmitter) {
9354   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9355   const OMPExecutableDirective *TD = &D;
9356   // Get nested teams distribute kind directive, if any.
9357   if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
9358       Kind != OMPD_target_teams_loop)
9359     TD = getNestedDistributeDirective(CGM.getContext(), D);
9360   if (!TD)
9361     return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9362 
9363   const auto *LD = cast<OMPLoopDirective>(TD);
9364   if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9365     return NumIterations;
9366   return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9367 }
9368 
9369 static void
9370 emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9371                        const OMPExecutableDirective &D,
9372                        llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9373                        bool RequiresOuterTask, const CapturedStmt &CS,
9374                        bool OffloadingMandatory, CodeGenFunction &CGF) {
9375   if (OffloadingMandatory) {
9376     CGF.Builder.CreateUnreachable();
9377   } else {
9378     if (RequiresOuterTask) {
9379       CapturedVars.clear();
9380       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9381     }
9382     OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
9383                                          CapturedVars);
9384   }
9385 }
9386 
9387 static llvm::Value *emitDeviceID(
9388     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9389     CodeGenFunction &CGF) {
9390   // Emit device ID if any.
9391   llvm::Value *DeviceID;
9392   if (Device.getPointer()) {
9393     assert((Device.getInt() == OMPC_DEVICE_unknown ||
9394             Device.getInt() == OMPC_DEVICE_device_num) &&
9395            "Expected device_num modifier.");
9396     llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9397     DeviceID =
9398         CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9399   } else {
9400     DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9401   }
9402   return DeviceID;
9403 }
9404 
9405 llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
9406                                CodeGenFunction &CGF) {
9407   llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0);
9408 
9409   if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
9410     CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
9411     llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
9412         DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
9413     DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty,
9414                                              /*isSigned=*/false);
9415   }
9416   return DynCGroupMem;
9417 }
9418 
9419 static void emitTargetCallKernelLaunch(
9420     CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9421     const OMPExecutableDirective &D,
9422     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
9423     const CapturedStmt &CS, bool OffloadingMandatory,
9424     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9425     llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
9426     llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
9427     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9428                                      const OMPLoopDirective &D)>
9429         SizeEmitter,
9430     CodeGenFunction &CGF, CodeGenModule &CGM) {
9431   llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
9432 
9433   // Fill up the arrays with all the captured variables.
9434   MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9435 
9436   // Get mappable expression information.
9437   MappableExprsHandler MEHandler(D, CGF);
9438   llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9439   llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
9440 
9441   auto RI = CS.getCapturedRecordDecl()->field_begin();
9442   auto *CV = CapturedVars.begin();
9443   for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9444                                             CE = CS.capture_end();
9445        CI != CE; ++CI, ++RI, ++CV) {
9446     MappableExprsHandler::MapCombinedInfoTy CurInfo;
9447     MappableExprsHandler::StructRangeInfoTy PartialStruct;
9448 
9449     // VLA sizes are passed to the outlined region by copy and do not have map
9450     // information associated.
9451     if (CI->capturesVariableArrayType()) {
9452       CurInfo.Exprs.push_back(nullptr);
9453       CurInfo.BasePointers.push_back(*CV);
9454       CurInfo.DevicePtrDecls.push_back(nullptr);
9455       CurInfo.DevicePointers.push_back(
9456           MappableExprsHandler::DeviceInfoTy::None);
9457       CurInfo.Pointers.push_back(*CV);
9458       CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9459           CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9460       // Copy to the device as an argument. No need to retrieve it.
9461       CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9462                               OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
9463                               OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9464       CurInfo.Mappers.push_back(nullptr);
9465     } else {
9466       // If we have any information in the map clause, we use it, otherwise we
9467       // just do a default mapping.
9468       MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
9469       if (!CI->capturesThis())
9470         MappedVarSet.insert(CI->getCapturedVar());
9471       else
9472         MappedVarSet.insert(nullptr);
9473       if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
9474         MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
9475       // Generate correct mapping for variables captured by reference in
9476       // lambdas.
9477       if (CI->capturesVariable())
9478         MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
9479                                                 CurInfo, LambdaPointers);
9480     }
9481     // We expect to have at least an element of information for this capture.
9482     assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
9483            "Non-existing map pointer for capture!");
9484     assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9485            CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9486            CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9487            CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9488            "Inconsistent map information sizes!");
9489 
9490     // If there is an entry in PartialStruct it means we have a struct with
9491     // individual members mapped. Emit an extra combined entry.
9492     if (PartialStruct.Base.isValid()) {
9493       CombinedInfo.append(PartialStruct.PreliminaryMapData);
9494       MEHandler.emitCombinedEntry(
9495           CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(),
9496           OMPBuilder, nullptr,
9497           !PartialStruct.PreliminaryMapData.BasePointers.empty());
9498     }
9499 
9500     // We need to append the results of this capture to what we already have.
9501     CombinedInfo.append(CurInfo);
9502   }
9503   // Adjust MEMBER_OF flags for the lambdas captures.
9504   MEHandler.adjustMemberOfForLambdaCaptures(
9505       OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
9506       CombinedInfo.Pointers, CombinedInfo.Types);
9507   // Map any list items in a map clause that were not captures because they
9508   // weren't referenced within the construct.
9509   MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet);
9510 
9511   CGOpenMPRuntime::TargetDataInfo Info;
9512   // Fill up the arrays and create the arguments.
9513   emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
9514   bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
9515                    llvm::codegenoptions::NoDebugInfo;
9516   OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
9517                                           EmitDebug,
9518                                           /*ForEndCall=*/false);
9519 
9520   InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9521   InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
9522                                         CGF.VoidPtrTy, CGM.getPointerAlign());
9523   InputInfo.PointersArray =
9524       Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9525   InputInfo.SizesArray =
9526       Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
9527   InputInfo.MappersArray =
9528       Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9529   MapTypesArray = Info.RTArgs.MapTypesArray;
9530   MapNamesArray = Info.RTArgs.MapNamesArray;
9531 
9532   auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
9533                     RequiresOuterTask, &CS, OffloadingMandatory, Device,
9534                     OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
9535                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9536     bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
9537 
9538     if (IsReverseOffloading) {
9539       // Reverse offloading is not supported, so just execute on the host.
9540       // FIXME: This fallback solution is incorrect since it ignores the
9541       // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
9542       // assert here and ensure SEMA emits an error.
9543       emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9544                              RequiresOuterTask, CS, OffloadingMandatory, CGF);
9545       return;
9546     }
9547 
9548     bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
9549     unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
9550 
9551     llvm::Value *BasePointersArray = InputInfo.BasePointersArray.getPointer();
9552     llvm::Value *PointersArray = InputInfo.PointersArray.getPointer();
9553     llvm::Value *SizesArray = InputInfo.SizesArray.getPointer();
9554     llvm::Value *MappersArray = InputInfo.MappersArray.getPointer();
9555 
9556     auto &&EmitTargetCallFallbackCB =
9557         [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9558          OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
9559         -> llvm::OpenMPIRBuilder::InsertPointTy {
9560       CGF.Builder.restoreIP(IP);
9561       emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9562                              RequiresOuterTask, CS, OffloadingMandatory, CGF);
9563       return CGF.Builder.saveIP();
9564     };
9565 
9566     llvm::Value *DeviceID = emitDeviceID(Device, CGF);
9567     llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D);
9568     llvm::Value *NumThreads =
9569         OMPRuntime->emitNumThreadsForTargetDirective(CGF, D);
9570     llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
9571     llvm::Value *NumIterations =
9572         OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
9573     llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
9574     llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
9575         CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
9576 
9577     llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
9578         BasePointersArray, PointersArray, SizesArray, MapTypesArray,
9579         nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
9580 
9581     llvm::OpenMPIRBuilder::TargetKernelArgs Args(
9582         NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
9583         DynCGGroupMem, HasNoWait);
9584 
9585     CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch(
9586         CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args,
9587         DeviceID, RTLoc, AllocaIP));
9588   };
9589 
9590   if (RequiresOuterTask)
9591     CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9592   else
9593     OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9594 }
9595 
9596 static void
9597 emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9598                    const OMPExecutableDirective &D,
9599                    llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9600                    bool RequiresOuterTask, const CapturedStmt &CS,
9601                    bool OffloadingMandatory, CodeGenFunction &CGF) {
9602 
9603   // Notify that the host version must be executed.
9604   auto &&ElseGen =
9605       [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9606        OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9607         emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9608                                RequiresOuterTask, CS, OffloadingMandatory, CGF);
9609       };
9610 
9611   if (RequiresOuterTask) {
9612     CodeGenFunction::OMPTargetDataInfo InputInfo;
9613     CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9614   } else {
9615     OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9616   }
9617 }
9618 
9619 void CGOpenMPRuntime::emitTargetCall(
9620     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9621     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9622     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9623     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9624                                      const OMPLoopDirective &D)>
9625         SizeEmitter) {
9626   if (!CGF.HaveInsertPoint())
9627     return;
9628 
9629   const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
9630                                    CGM.getLangOpts().OpenMPOffloadMandatory;
9631 
9632   assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
9633 
9634   const bool RequiresOuterTask =
9635       D.hasClausesOfKind<OMPDependClause>() ||
9636       D.hasClausesOfKind<OMPNowaitClause>() ||
9637       D.hasClausesOfKind<OMPInReductionClause>() ||
9638       (CGM.getLangOpts().OpenMP >= 51 &&
9639        needsTaskBasedThreadLimit(D.getDirectiveKind()) &&
9640        D.hasClausesOfKind<OMPThreadLimitClause>());
9641   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9642   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9643   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9644                                             PrePostActionTy &) {
9645     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9646   };
9647   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9648 
9649   CodeGenFunction::OMPTargetDataInfo InputInfo;
9650   llvm::Value *MapTypesArray = nullptr;
9651   llvm::Value *MapNamesArray = nullptr;
9652 
9653   auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
9654                           RequiresOuterTask, &CS, OffloadingMandatory, Device,
9655                           OutlinedFnID, &InputInfo, &MapTypesArray,
9656                           &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
9657                                                        PrePostActionTy &) {
9658     emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
9659                                RequiresOuterTask, CS, OffloadingMandatory,
9660                                Device, OutlinedFnID, InputInfo, MapTypesArray,
9661                                MapNamesArray, SizeEmitter, CGF, CGM);
9662   };
9663 
9664   auto &&TargetElseGen =
9665       [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9666        OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9667         emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
9668                            CS, OffloadingMandatory, CGF);
9669       };
9670 
9671   // If we have a target function ID it means that we need to support
9672   // offloading, otherwise, just execute on the host. We need to execute on host
9673   // regardless of the conditional in the if clause if, e.g., the user do not
9674   // specify target triples.
9675   if (OutlinedFnID) {
9676     if (IfCond) {
9677       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9678     } else {
9679       RegionCodeGenTy ThenRCG(TargetThenGen);
9680       ThenRCG(CGF);
9681     }
9682   } else {
9683     RegionCodeGenTy ElseRCG(TargetElseGen);
9684     ElseRCG(CGF);
9685   }
9686 }
9687 
9688 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9689                                                     StringRef ParentName) {
9690   if (!S)
9691     return;
9692 
9693   // Codegen OMP target directives that offload compute to the device.
9694   bool RequiresDeviceCodegen =
9695       isa<OMPExecutableDirective>(S) &&
9696       isOpenMPTargetExecutionDirective(
9697           cast<OMPExecutableDirective>(S)->getDirectiveKind());
9698 
9699   if (RequiresDeviceCodegen) {
9700     const auto &E = *cast<OMPExecutableDirective>(S);
9701 
9702     llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
9703         CGM, OMPBuilder, E.getBeginLoc(), ParentName);
9704 
9705     // Is this a target region that should not be emitted as an entry point? If
9706     // so just signal we are done with this target region.
9707     if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
9708       return;
9709 
9710     switch (E.getDirectiveKind()) {
9711     case OMPD_target:
9712       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9713                                                    cast<OMPTargetDirective>(E));
9714       break;
9715     case OMPD_target_parallel:
9716       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9717           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9718       break;
9719     case OMPD_target_teams:
9720       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9721           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9722       break;
9723     case OMPD_target_teams_distribute:
9724       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9725           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9726       break;
9727     case OMPD_target_teams_distribute_simd:
9728       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9729           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9730       break;
9731     case OMPD_target_parallel_for:
9732       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9733           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9734       break;
9735     case OMPD_target_parallel_for_simd:
9736       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9737           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9738       break;
9739     case OMPD_target_simd:
9740       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9741           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9742       break;
9743     case OMPD_target_teams_distribute_parallel_for:
9744       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9745           CGM, ParentName,
9746           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9747       break;
9748     case OMPD_target_teams_distribute_parallel_for_simd:
9749       CodeGenFunction::
9750           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9751               CGM, ParentName,
9752               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9753       break;
9754     case OMPD_target_teams_loop:
9755       CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
9756           CGM, ParentName, cast<OMPTargetTeamsGenericLoopDirective>(E));
9757       break;
9758     case OMPD_target_parallel_loop:
9759       CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
9760           CGM, ParentName, cast<OMPTargetParallelGenericLoopDirective>(E));
9761       break;
9762     case OMPD_parallel:
9763     case OMPD_for:
9764     case OMPD_parallel_for:
9765     case OMPD_parallel_master:
9766     case OMPD_parallel_sections:
9767     case OMPD_for_simd:
9768     case OMPD_parallel_for_simd:
9769     case OMPD_cancel:
9770     case OMPD_cancellation_point:
9771     case OMPD_ordered:
9772     case OMPD_threadprivate:
9773     case OMPD_allocate:
9774     case OMPD_task:
9775     case OMPD_simd:
9776     case OMPD_tile:
9777     case OMPD_unroll:
9778     case OMPD_sections:
9779     case OMPD_section:
9780     case OMPD_single:
9781     case OMPD_master:
9782     case OMPD_critical:
9783     case OMPD_taskyield:
9784     case OMPD_barrier:
9785     case OMPD_taskwait:
9786     case OMPD_taskgroup:
9787     case OMPD_atomic:
9788     case OMPD_flush:
9789     case OMPD_depobj:
9790     case OMPD_scan:
9791     case OMPD_teams:
9792     case OMPD_target_data:
9793     case OMPD_target_exit_data:
9794     case OMPD_target_enter_data:
9795     case OMPD_distribute:
9796     case OMPD_distribute_simd:
9797     case OMPD_distribute_parallel_for:
9798     case OMPD_distribute_parallel_for_simd:
9799     case OMPD_teams_distribute:
9800     case OMPD_teams_distribute_simd:
9801     case OMPD_teams_distribute_parallel_for:
9802     case OMPD_teams_distribute_parallel_for_simd:
9803     case OMPD_target_update:
9804     case OMPD_declare_simd:
9805     case OMPD_declare_variant:
9806     case OMPD_begin_declare_variant:
9807     case OMPD_end_declare_variant:
9808     case OMPD_declare_target:
9809     case OMPD_end_declare_target:
9810     case OMPD_declare_reduction:
9811     case OMPD_declare_mapper:
9812     case OMPD_taskloop:
9813     case OMPD_taskloop_simd:
9814     case OMPD_master_taskloop:
9815     case OMPD_master_taskloop_simd:
9816     case OMPD_parallel_master_taskloop:
9817     case OMPD_parallel_master_taskloop_simd:
9818     case OMPD_requires:
9819     case OMPD_metadirective:
9820     case OMPD_unknown:
9821     default:
9822       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9823     }
9824     return;
9825   }
9826 
9827   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9828     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9829       return;
9830 
9831     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
9832     return;
9833   }
9834 
9835   // If this is a lambda function, look into its body.
9836   if (const auto *L = dyn_cast<LambdaExpr>(S))
9837     S = L->getBody();
9838 
9839   // Keep looking for target regions recursively.
9840   for (const Stmt *II : S->children())
9841     scanForTargetRegionsFunctions(II, ParentName);
9842 }
9843 
9844 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
9845   std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9846       OMPDeclareTargetDeclAttr::getDeviceType(VD);
9847   if (!DevTy)
9848     return false;
9849   // Do not emit device_type(nohost) functions for the host.
9850   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9851     return true;
9852   // Do not emit device_type(host) functions for the device.
9853   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9854     return true;
9855   return false;
9856 }
9857 
9858 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9859   // If emitting code for the host, we do not process FD here. Instead we do
9860   // the normal code generation.
9861   if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
9862     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
9863       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9864                                   CGM.getLangOpts().OpenMPIsTargetDevice))
9865         return true;
9866     return false;
9867   }
9868 
9869   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9870   // Try to detect target regions in the function.
9871   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9872     StringRef Name = CGM.getMangledName(GD);
9873     scanForTargetRegionsFunctions(FD->getBody(), Name);
9874     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9875                                 CGM.getLangOpts().OpenMPIsTargetDevice))
9876       return true;
9877   }
9878 
9879   // Do not to emit function if it is not marked as declare target.
9880   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9881          AlreadyEmittedTargetDecls.count(VD) == 0;
9882 }
9883 
9884 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9885   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
9886                               CGM.getLangOpts().OpenMPIsTargetDevice))
9887     return true;
9888 
9889   if (!CGM.getLangOpts().OpenMPIsTargetDevice)
9890     return false;
9891 
9892   // Check if there are Ctors/Dtors in this declaration and look for target
9893   // regions in it. We use the complete variant to produce the kernel name
9894   // mangling.
9895   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9896   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9897     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9898       StringRef ParentName =
9899           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9900       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9901     }
9902     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9903       StringRef ParentName =
9904           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9905       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9906     }
9907   }
9908 
9909   // Do not to emit variable if it is not marked as declare target.
9910   std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9911       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9912           cast<VarDecl>(GD.getDecl()));
9913   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9914       ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9915         *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9916        HasRequiresUnifiedSharedMemory)) {
9917     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9918     return true;
9919   }
9920   return false;
9921 }
9922 
9923 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9924                                                    llvm::Constant *Addr) {
9925   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9926       !CGM.getLangOpts().OpenMPIsTargetDevice)
9927     return;
9928 
9929   std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9930       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9931 
9932   // If this is an 'extern' declaration we defer to the canonical definition and
9933   // do not emit an offloading entry.
9934   if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
9935       VD->hasExternalStorage())
9936     return;
9937 
9938   if (!Res) {
9939     if (CGM.getLangOpts().OpenMPIsTargetDevice) {
9940       // Register non-target variables being emitted in device code (debug info
9941       // may cause this).
9942       StringRef VarName = CGM.getMangledName(VD);
9943       EmittedNonTargetVariables.try_emplace(VarName, Addr);
9944     }
9945     return;
9946   }
9947 
9948   auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
9949   auto LinkageForVariable = [&VD, this]() {
9950     return CGM.getLLVMLinkageVarDefinition(VD);
9951   };
9952 
9953   std::vector<llvm::GlobalVariable *> GeneratedRefs;
9954   OMPBuilder.registerTargetGlobalVariable(
9955       convertCaptureClause(VD), convertDeviceClause(VD),
9956       VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
9957       VD->isExternallyVisible(),
9958       getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
9959                                   VD->getCanonicalDecl()->getBeginLoc()),
9960       CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
9961       CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
9962       CGM.getTypes().ConvertTypeForMem(
9963           CGM.getContext().getPointerType(VD->getType())),
9964       Addr);
9965 
9966   for (auto *ref : GeneratedRefs)
9967     CGM.addCompilerUsedGlobal(ref);
9968 }
9969 
9970 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9971   if (isa<FunctionDecl>(GD.getDecl()) ||
9972       isa<OMPDeclareReductionDecl>(GD.getDecl()))
9973     return emitTargetFunctions(GD);
9974 
9975   return emitTargetGlobalVariable(GD);
9976 }
9977 
9978 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9979   for (const VarDecl *VD : DeferredGlobalVariables) {
9980     std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9981         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9982     if (!Res)
9983       continue;
9984     if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9985          *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9986         !HasRequiresUnifiedSharedMemory) {
9987       CGM.EmitGlobal(VD);
9988     } else {
9989       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9990               ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9991                 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9992                HasRequiresUnifiedSharedMemory)) &&
9993              "Expected link clause or to clause with unified memory.");
9994       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
9995     }
9996   }
9997 }
9998 
9999 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10000     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10001   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10002          " Expected target-based directive.");
10003 }
10004 
10005 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10006   for (const OMPClause *Clause : D->clauselists()) {
10007     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10008       HasRequiresUnifiedSharedMemory = true;
10009       OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
10010     } else if (const auto *AC =
10011                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10012       switch (AC->getAtomicDefaultMemOrderKind()) {
10013       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10014         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10015         break;
10016       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10017         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10018         break;
10019       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10020         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10021         break;
10022       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10023         break;
10024       }
10025     }
10026   }
10027 }
10028 
10029 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10030   return RequiresAtomicOrdering;
10031 }
10032 
10033 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10034                                                        LangAS &AS) {
10035   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10036     return false;
10037   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10038   switch(A->getAllocatorType()) {
10039   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10040   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10041   // Not supported, fallback to the default mem space.
10042   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10043   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10044   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10045   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10046   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10047   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10048   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10049     AS = LangAS::Default;
10050     return true;
10051   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10052     llvm_unreachable("Expected predefined allocator for the variables with the "
10053                      "static storage.");
10054   }
10055   return false;
10056 }
10057 
10058 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10059   return HasRequiresUnifiedSharedMemory;
10060 }
10061 
10062 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10063     CodeGenModule &CGM)
10064     : CGM(CGM) {
10065   if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10066     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10067     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10068   }
10069 }
10070 
10071 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10072   if (CGM.getLangOpts().OpenMPIsTargetDevice)
10073     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10074 }
10075 
10076 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10077   if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
10078     return true;
10079 
10080   const auto *D = cast<FunctionDecl>(GD.getDecl());
10081   // Do not to emit function if it is marked as declare target as it was already
10082   // emitted.
10083   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10084     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10085       if (auto *F = dyn_cast_or_null<llvm::Function>(
10086               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10087         return !F->isDeclaration();
10088       return false;
10089     }
10090     return true;
10091   }
10092 
10093   return !AlreadyEmittedTargetDecls.insert(D).second;
10094 }
10095 
10096 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10097   // If we don't have entries or if we are emitting code for the device, we
10098   // don't need to do anything.
10099   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10100       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsTargetDevice ||
10101       (OMPBuilder.OffloadInfoManager.empty() &&
10102        !HasEmittedDeclareTargetRegion && !HasEmittedTargetRegion))
10103     return nullptr;
10104 
10105   // Create and register the function that handles the requires directives.
10106   ASTContext &C = CGM.getContext();
10107 
10108   llvm::Function *RequiresRegFn;
10109   {
10110     CodeGenFunction CGF(CGM);
10111     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10112     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10113     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10114     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10115     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10116     // TODO: check for other requires clauses.
10117     // The requires directive takes effect only when a target region is
10118     // present in the compilation unit. Otherwise it is ignored and not
10119     // passed to the runtime. This avoids the runtime from throwing an error
10120     // for mismatching requires clauses across compilation units that don't
10121     // contain at least 1 target region.
10122     assert((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion ||
10123             !OMPBuilder.OffloadInfoManager.empty()) &&
10124            "Target or declare target region expected.");
10125     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10126                             CGM.getModule(), OMPRTL___tgt_register_requires),
10127                         llvm::ConstantInt::get(
10128                             CGM.Int64Ty, OMPBuilder.Config.getRequiresFlags()));
10129     CGF.FinishFunction();
10130   }
10131   return RequiresRegFn;
10132 }
10133 
10134 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10135                                     const OMPExecutableDirective &D,
10136                                     SourceLocation Loc,
10137                                     llvm::Function *OutlinedFn,
10138                                     ArrayRef<llvm::Value *> CapturedVars) {
10139   if (!CGF.HaveInsertPoint())
10140     return;
10141 
10142   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10143   CodeGenFunction::RunCleanupsScope Scope(CGF);
10144 
10145   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10146   llvm::Value *Args[] = {
10147       RTLoc,
10148       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10149       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10150   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10151   RealArgs.append(std::begin(Args), std::end(Args));
10152   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10153 
10154   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10155       CGM.getModule(), OMPRTL___kmpc_fork_teams);
10156   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10157 }
10158 
10159 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10160                                          const Expr *NumTeams,
10161                                          const Expr *ThreadLimit,
10162                                          SourceLocation Loc) {
10163   if (!CGF.HaveInsertPoint())
10164     return;
10165 
10166   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10167 
10168   llvm::Value *NumTeamsVal =
10169       NumTeams
10170           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10171                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10172           : CGF.Builder.getInt32(0);
10173 
10174   llvm::Value *ThreadLimitVal =
10175       ThreadLimit
10176           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10177                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10178           : CGF.Builder.getInt32(0);
10179 
10180   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10181   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10182                                      ThreadLimitVal};
10183   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10184                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10185                       PushNumTeamsArgs);
10186 }
10187 
10188 void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction &CGF,
10189                                             const Expr *ThreadLimit,
10190                                             SourceLocation Loc) {
10191   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10192   llvm::Value *ThreadLimitVal =
10193       ThreadLimit
10194           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10195                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10196           : CGF.Builder.getInt32(0);
10197 
10198   // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
10199   llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
10200                                     ThreadLimitVal};
10201   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10202                           CGM.getModule(), OMPRTL___kmpc_set_thread_limit),
10203                       ThreadLimitArgs);
10204 }
10205 
10206 void CGOpenMPRuntime::emitTargetDataCalls(
10207     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10208     const Expr *Device, const RegionCodeGenTy &CodeGen,
10209     CGOpenMPRuntime::TargetDataInfo &Info) {
10210   if (!CGF.HaveInsertPoint())
10211     return;
10212 
10213   // Action used to replace the default codegen action and turn privatization
10214   // off.
10215   PrePostActionTy NoPrivAction;
10216 
10217   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10218 
10219   llvm::Value *IfCondVal = nullptr;
10220   if (IfCond)
10221     IfCondVal = CGF.EvaluateExprAsBool(IfCond);
10222 
10223   // Emit device ID if any.
10224   llvm::Value *DeviceID = nullptr;
10225   if (Device) {
10226     DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10227                                          CGF.Int64Ty, /*isSigned=*/true);
10228   } else {
10229     DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10230   }
10231 
10232   // Fill up the arrays with all the mapped variables.
10233   MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10234   auto GenMapInfoCB =
10235       [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10236     CGF.Builder.restoreIP(CodeGenIP);
10237     // Get map clause information.
10238     MappableExprsHandler MEHandler(D, CGF);
10239     MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10240 
10241     auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10242       return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10243     };
10244     if (CGM.getCodeGenOpts().getDebugInfo() !=
10245         llvm::codegenoptions::NoDebugInfo) {
10246       CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10247       llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10248                       FillInfoMap);
10249     }
10250 
10251     return CombinedInfo;
10252   };
10253   using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
10254   auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
10255     CGF.Builder.restoreIP(CodeGenIP);
10256     switch (BodyGenType) {
10257     case BodyGenTy::Priv:
10258       if (!Info.CaptureDeviceAddrMap.empty())
10259         CodeGen(CGF);
10260       break;
10261     case BodyGenTy::DupNoPriv:
10262       if (!Info.CaptureDeviceAddrMap.empty()) {
10263         CodeGen.setAction(NoPrivAction);
10264         CodeGen(CGF);
10265       }
10266       break;
10267     case BodyGenTy::NoPriv:
10268       if (Info.CaptureDeviceAddrMap.empty()) {
10269         CodeGen.setAction(NoPrivAction);
10270         CodeGen(CGF);
10271       }
10272       break;
10273     }
10274     return InsertPointTy(CGF.Builder.GetInsertBlock(),
10275                          CGF.Builder.GetInsertPoint());
10276   };
10277 
10278   auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10279     if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10280       Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
10281     }
10282   };
10283 
10284   auto CustomMapperCB = [&](unsigned int I) {
10285     llvm::Value *MFunc = nullptr;
10286     if (CombinedInfo.Mappers[I]) {
10287       Info.HasMapper = true;
10288       MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
10289           cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10290     }
10291     return MFunc;
10292   };
10293 
10294   // Source location for the ident struct
10295   llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10296 
10297   InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10298                          CGF.AllocaInsertPt->getIterator());
10299   InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10300                           CGF.Builder.GetInsertPoint());
10301   llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
10302   CGF.Builder.restoreIP(OMPBuilder.createTargetData(
10303       OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
10304       /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc));
10305 }
10306 
10307 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10308     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10309     const Expr *Device) {
10310   if (!CGF.HaveInsertPoint())
10311     return;
10312 
10313   assert((isa<OMPTargetEnterDataDirective>(D) ||
10314           isa<OMPTargetExitDataDirective>(D) ||
10315           isa<OMPTargetUpdateDirective>(D)) &&
10316          "Expecting either target enter, exit data, or update directives.");
10317 
10318   CodeGenFunction::OMPTargetDataInfo InputInfo;
10319   llvm::Value *MapTypesArray = nullptr;
10320   llvm::Value *MapNamesArray = nullptr;
10321   // Generate the code for the opening of the data environment.
10322   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10323                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10324     // Emit device ID if any.
10325     llvm::Value *DeviceID = nullptr;
10326     if (Device) {
10327       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10328                                            CGF.Int64Ty, /*isSigned=*/true);
10329     } else {
10330       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10331     }
10332 
10333     // Emit the number of elements in the offloading arrays.
10334     llvm::Constant *PointerNum =
10335         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10336 
10337     // Source location for the ident struct
10338     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10339 
10340     llvm::Value *OffloadingArgs[] = {RTLoc,
10341                                      DeviceID,
10342                                      PointerNum,
10343                                      InputInfo.BasePointersArray.getPointer(),
10344                                      InputInfo.PointersArray.getPointer(),
10345                                      InputInfo.SizesArray.getPointer(),
10346                                      MapTypesArray,
10347                                      MapNamesArray,
10348                                      InputInfo.MappersArray.getPointer()};
10349 
10350     // Select the right runtime function call for each standalone
10351     // directive.
10352     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10353     RuntimeFunction RTLFn;
10354     switch (D.getDirectiveKind()) {
10355     case OMPD_target_enter_data:
10356       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10357                         : OMPRTL___tgt_target_data_begin_mapper;
10358       break;
10359     case OMPD_target_exit_data:
10360       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10361                         : OMPRTL___tgt_target_data_end_mapper;
10362       break;
10363     case OMPD_target_update:
10364       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10365                         : OMPRTL___tgt_target_data_update_mapper;
10366       break;
10367     case OMPD_parallel:
10368     case OMPD_for:
10369     case OMPD_parallel_for:
10370     case OMPD_parallel_master:
10371     case OMPD_parallel_sections:
10372     case OMPD_for_simd:
10373     case OMPD_parallel_for_simd:
10374     case OMPD_cancel:
10375     case OMPD_cancellation_point:
10376     case OMPD_ordered:
10377     case OMPD_threadprivate:
10378     case OMPD_allocate:
10379     case OMPD_task:
10380     case OMPD_simd:
10381     case OMPD_tile:
10382     case OMPD_unroll:
10383     case OMPD_sections:
10384     case OMPD_section:
10385     case OMPD_single:
10386     case OMPD_master:
10387     case OMPD_critical:
10388     case OMPD_taskyield:
10389     case OMPD_barrier:
10390     case OMPD_taskwait:
10391     case OMPD_taskgroup:
10392     case OMPD_atomic:
10393     case OMPD_flush:
10394     case OMPD_depobj:
10395     case OMPD_scan:
10396     case OMPD_teams:
10397     case OMPD_target_data:
10398     case OMPD_distribute:
10399     case OMPD_distribute_simd:
10400     case OMPD_distribute_parallel_for:
10401     case OMPD_distribute_parallel_for_simd:
10402     case OMPD_teams_distribute:
10403     case OMPD_teams_distribute_simd:
10404     case OMPD_teams_distribute_parallel_for:
10405     case OMPD_teams_distribute_parallel_for_simd:
10406     case OMPD_declare_simd:
10407     case OMPD_declare_variant:
10408     case OMPD_begin_declare_variant:
10409     case OMPD_end_declare_variant:
10410     case OMPD_declare_target:
10411     case OMPD_end_declare_target:
10412     case OMPD_declare_reduction:
10413     case OMPD_declare_mapper:
10414     case OMPD_taskloop:
10415     case OMPD_taskloop_simd:
10416     case OMPD_master_taskloop:
10417     case OMPD_master_taskloop_simd:
10418     case OMPD_parallel_master_taskloop:
10419     case OMPD_parallel_master_taskloop_simd:
10420     case OMPD_target:
10421     case OMPD_target_simd:
10422     case OMPD_target_teams_distribute:
10423     case OMPD_target_teams_distribute_simd:
10424     case OMPD_target_teams_distribute_parallel_for:
10425     case OMPD_target_teams_distribute_parallel_for_simd:
10426     case OMPD_target_teams:
10427     case OMPD_target_parallel:
10428     case OMPD_target_parallel_for:
10429     case OMPD_target_parallel_for_simd:
10430     case OMPD_requires:
10431     case OMPD_metadirective:
10432     case OMPD_unknown:
10433     default:
10434       llvm_unreachable("Unexpected standalone target data directive.");
10435       break;
10436     }
10437     CGF.EmitRuntimeCall(
10438         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10439         OffloadingArgs);
10440   };
10441 
10442   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10443                           &MapNamesArray](CodeGenFunction &CGF,
10444                                           PrePostActionTy &) {
10445     // Fill up the arrays with all the mapped variables.
10446     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10447 
10448     // Get map clause information.
10449     MappableExprsHandler MEHandler(D, CGF);
10450     MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10451 
10452     CGOpenMPRuntime::TargetDataInfo Info;
10453     // Fill up the arrays and create the arguments.
10454     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10455                          /*IsNonContiguous=*/true);
10456     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10457                              D.hasClausesOfKind<OMPNowaitClause>();
10458     bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
10459                      llvm::codegenoptions::NoDebugInfo;
10460     OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
10461                                             EmitDebug,
10462                                             /*ForEndCall=*/false);
10463     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10464     InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10465                                           CGF.VoidPtrTy, CGM.getPointerAlign());
10466     InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10467                                       CGM.getPointerAlign());
10468     InputInfo.SizesArray =
10469         Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10470     InputInfo.MappersArray =
10471         Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10472     MapTypesArray = Info.RTArgs.MapTypesArray;
10473     MapNamesArray = Info.RTArgs.MapNamesArray;
10474     if (RequiresOuterTask)
10475       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10476     else
10477       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10478   };
10479 
10480   if (IfCond) {
10481     emitIfClause(CGF, IfCond, TargetThenGen,
10482                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
10483   } else {
10484     RegionCodeGenTy ThenRCG(TargetThenGen);
10485     ThenRCG(CGF);
10486   }
10487 }
10488 
10489 namespace {
10490   /// Kind of parameter in a function with 'declare simd' directive.
10491 enum ParamKindTy {
10492   Linear,
10493   LinearRef,
10494   LinearUVal,
10495   LinearVal,
10496   Uniform,
10497   Vector,
10498 };
10499 /// Attribute set of the parameter.
10500 struct ParamAttrTy {
10501   ParamKindTy Kind = Vector;
10502   llvm::APSInt StrideOrArg;
10503   llvm::APSInt Alignment;
10504   bool HasVarStride = false;
10505 };
10506 } // namespace
10507 
10508 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10509                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10510   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10511   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10512   // of that clause. The VLEN value must be power of 2.
10513   // In other case the notion of the function`s "characteristic data type" (CDT)
10514   // is used to compute the vector length.
10515   // CDT is defined in the following order:
10516   //   a) For non-void function, the CDT is the return type.
10517   //   b) If the function has any non-uniform, non-linear parameters, then the
10518   //   CDT is the type of the first such parameter.
10519   //   c) If the CDT determined by a) or b) above is struct, union, or class
10520   //   type which is pass-by-value (except for the type that maps to the
10521   //   built-in complex data type), the characteristic data type is int.
10522   //   d) If none of the above three cases is applicable, the CDT is int.
10523   // The VLEN is then determined based on the CDT and the size of vector
10524   // register of that ISA for which current vector version is generated. The
10525   // VLEN is computed using the formula below:
10526   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10527   // where vector register size specified in section 3.2.1 Registers and the
10528   // Stack Frame of original AMD64 ABI document.
10529   QualType RetType = FD->getReturnType();
10530   if (RetType.isNull())
10531     return 0;
10532   ASTContext &C = FD->getASTContext();
10533   QualType CDT;
10534   if (!RetType.isNull() && !RetType->isVoidType()) {
10535     CDT = RetType;
10536   } else {
10537     unsigned Offset = 0;
10538     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10539       if (ParamAttrs[Offset].Kind == Vector)
10540         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10541       ++Offset;
10542     }
10543     if (CDT.isNull()) {
10544       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10545         if (ParamAttrs[I + Offset].Kind == Vector) {
10546           CDT = FD->getParamDecl(I)->getType();
10547           break;
10548         }
10549       }
10550     }
10551   }
10552   if (CDT.isNull())
10553     CDT = C.IntTy;
10554   CDT = CDT->getCanonicalTypeUnqualified();
10555   if (CDT->isRecordType() || CDT->isUnionType())
10556     CDT = C.IntTy;
10557   return C.getTypeSize(CDT);
10558 }
10559 
10560 /// Mangle the parameter part of the vector function name according to
10561 /// their OpenMP classification. The mangling function is defined in
10562 /// section 4.5 of the AAVFABI(2021Q1).
10563 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10564   SmallString<256> Buffer;
10565   llvm::raw_svector_ostream Out(Buffer);
10566   for (const auto &ParamAttr : ParamAttrs) {
10567     switch (ParamAttr.Kind) {
10568     case Linear:
10569       Out << 'l';
10570       break;
10571     case LinearRef:
10572       Out << 'R';
10573       break;
10574     case LinearUVal:
10575       Out << 'U';
10576       break;
10577     case LinearVal:
10578       Out << 'L';
10579       break;
10580     case Uniform:
10581       Out << 'u';
10582       break;
10583     case Vector:
10584       Out << 'v';
10585       break;
10586     }
10587     if (ParamAttr.HasVarStride)
10588       Out << "s" << ParamAttr.StrideOrArg;
10589     else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
10590              ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
10591       // Don't print the step value if it is not present or if it is
10592       // equal to 1.
10593       if (ParamAttr.StrideOrArg < 0)
10594         Out << 'n' << -ParamAttr.StrideOrArg;
10595       else if (ParamAttr.StrideOrArg != 1)
10596         Out << ParamAttr.StrideOrArg;
10597     }
10598 
10599     if (!!ParamAttr.Alignment)
10600       Out << 'a' << ParamAttr.Alignment;
10601   }
10602 
10603   return std::string(Out.str());
10604 }
10605 
10606 static void
10607 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10608                            const llvm::APSInt &VLENVal,
10609                            ArrayRef<ParamAttrTy> ParamAttrs,
10610                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
10611   struct ISADataTy {
10612     char ISA;
10613     unsigned VecRegSize;
10614   };
10615   ISADataTy ISAData[] = {
10616       {
10617           'b', 128
10618       }, // SSE
10619       {
10620           'c', 256
10621       }, // AVX
10622       {
10623           'd', 256
10624       }, // AVX2
10625       {
10626           'e', 512
10627       }, // AVX512
10628   };
10629   llvm::SmallVector<char, 2> Masked;
10630   switch (State) {
10631   case OMPDeclareSimdDeclAttr::BS_Undefined:
10632     Masked.push_back('N');
10633     Masked.push_back('M');
10634     break;
10635   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10636     Masked.push_back('N');
10637     break;
10638   case OMPDeclareSimdDeclAttr::BS_Inbranch:
10639     Masked.push_back('M');
10640     break;
10641   }
10642   for (char Mask : Masked) {
10643     for (const ISADataTy &Data : ISAData) {
10644       SmallString<256> Buffer;
10645       llvm::raw_svector_ostream Out(Buffer);
10646       Out << "_ZGV" << Data.ISA << Mask;
10647       if (!VLENVal) {
10648         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10649         assert(NumElts && "Non-zero simdlen/cdtsize expected");
10650         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10651       } else {
10652         Out << VLENVal;
10653       }
10654       Out << mangleVectorParameters(ParamAttrs);
10655       Out << '_' << Fn->getName();
10656       Fn->addFnAttr(Out.str());
10657     }
10658   }
10659 }
10660 
10661 // This are the Functions that are needed to mangle the name of the
10662 // vector functions generated by the compiler, according to the rules
10663 // defined in the "Vector Function ABI specifications for AArch64",
10664 // available at
10665 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10666 
10667 /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
10668 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10669   QT = QT.getCanonicalType();
10670 
10671   if (QT->isVoidType())
10672     return false;
10673 
10674   if (Kind == ParamKindTy::Uniform)
10675     return false;
10676 
10677   if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
10678     return false;
10679 
10680   if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
10681       !QT->isReferenceType())
10682     return false;
10683 
10684   return true;
10685 }
10686 
10687 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10688 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10689   QT = QT.getCanonicalType();
10690   unsigned Size = C.getTypeSize(QT);
10691 
10692   // Only scalars and complex within 16 bytes wide set PVB to true.
10693   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10694     return false;
10695 
10696   if (QT->isFloatingType())
10697     return true;
10698 
10699   if (QT->isIntegerType())
10700     return true;
10701 
10702   if (QT->isPointerType())
10703     return true;
10704 
10705   // TODO: Add support for complex types (section 3.1.2, item 2).
10706 
10707   return false;
10708 }
10709 
10710 /// Computes the lane size (LS) of a return type or of an input parameter,
10711 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10712 /// TODO: Add support for references, section 3.2.1, item 1.
10713 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10714   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10715     QualType PTy = QT.getCanonicalType()->getPointeeType();
10716     if (getAArch64PBV(PTy, C))
10717       return C.getTypeSize(PTy);
10718   }
10719   if (getAArch64PBV(QT, C))
10720     return C.getTypeSize(QT);
10721 
10722   return C.getTypeSize(C.getUIntPtrType());
10723 }
10724 
10725 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10726 // signature of the scalar function, as defined in 3.2.2 of the
10727 // AAVFABI.
10728 static std::tuple<unsigned, unsigned, bool>
10729 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10730   QualType RetType = FD->getReturnType().getCanonicalType();
10731 
10732   ASTContext &C = FD->getASTContext();
10733 
10734   bool OutputBecomesInput = false;
10735 
10736   llvm::SmallVector<unsigned, 8> Sizes;
10737   if (!RetType->isVoidType()) {
10738     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10739     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10740       OutputBecomesInput = true;
10741   }
10742   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10743     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10744     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10745   }
10746 
10747   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10748   // The LS of a function parameter / return value can only be a power
10749   // of 2, starting from 8 bits, up to 128.
10750   assert(llvm::all_of(Sizes,
10751                       [](unsigned Size) {
10752                         return Size == 8 || Size == 16 || Size == 32 ||
10753                                Size == 64 || Size == 128;
10754                       }) &&
10755          "Invalid size");
10756 
10757   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10758                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
10759                          OutputBecomesInput);
10760 }
10761 
10762 // Function used to add the attribute. The parameter `VLEN` is
10763 // templated to allow the use of "x" when targeting scalable functions
10764 // for SVE.
10765 template <typename T>
10766 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10767                                  char ISA, StringRef ParSeq,
10768                                  StringRef MangledName, bool OutputBecomesInput,
10769                                  llvm::Function *Fn) {
10770   SmallString<256> Buffer;
10771   llvm::raw_svector_ostream Out(Buffer);
10772   Out << Prefix << ISA << LMask << VLEN;
10773   if (OutputBecomesInput)
10774     Out << "v";
10775   Out << ParSeq << "_" << MangledName;
10776   Fn->addFnAttr(Out.str());
10777 }
10778 
10779 // Helper function to generate the Advanced SIMD names depending on
10780 // the value of the NDS when simdlen is not present.
10781 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10782                                       StringRef Prefix, char ISA,
10783                                       StringRef ParSeq, StringRef MangledName,
10784                                       bool OutputBecomesInput,
10785                                       llvm::Function *Fn) {
10786   switch (NDS) {
10787   case 8:
10788     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10789                          OutputBecomesInput, Fn);
10790     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10791                          OutputBecomesInput, Fn);
10792     break;
10793   case 16:
10794     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10795                          OutputBecomesInput, Fn);
10796     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10797                          OutputBecomesInput, Fn);
10798     break;
10799   case 32:
10800     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10801                          OutputBecomesInput, Fn);
10802     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10803                          OutputBecomesInput, Fn);
10804     break;
10805   case 64:
10806   case 128:
10807     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10808                          OutputBecomesInput, Fn);
10809     break;
10810   default:
10811     llvm_unreachable("Scalar type is too wide.");
10812   }
10813 }
10814 
10815 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10816 static void emitAArch64DeclareSimdFunction(
10817     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10818     ArrayRef<ParamAttrTy> ParamAttrs,
10819     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10820     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10821 
10822   // Get basic data for building the vector signature.
10823   const auto Data = getNDSWDS(FD, ParamAttrs);
10824   const unsigned NDS = std::get<0>(Data);
10825   const unsigned WDS = std::get<1>(Data);
10826   const bool OutputBecomesInput = std::get<2>(Data);
10827 
10828   // Check the values provided via `simdlen` by the user.
10829   // 1. A `simdlen(1)` doesn't produce vector signatures,
10830   if (UserVLEN == 1) {
10831     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10832         DiagnosticsEngine::Warning,
10833         "The clause simdlen(1) has no effect when targeting aarch64.");
10834     CGM.getDiags().Report(SLoc, DiagID);
10835     return;
10836   }
10837 
10838   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10839   // Advanced SIMD output.
10840   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10841     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10842         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10843                                     "power of 2 when targeting Advanced SIMD.");
10844     CGM.getDiags().Report(SLoc, DiagID);
10845     return;
10846   }
10847 
10848   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10849   // limits.
10850   if (ISA == 's' && UserVLEN != 0) {
10851     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10852       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10853           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10854                                       "lanes in the architectural constraints "
10855                                       "for SVE (min is 128-bit, max is "
10856                                       "2048-bit, by steps of 128-bit)");
10857       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10858       return;
10859     }
10860   }
10861 
10862   // Sort out parameter sequence.
10863   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10864   StringRef Prefix = "_ZGV";
10865   // Generate simdlen from user input (if any).
10866   if (UserVLEN) {
10867     if (ISA == 's') {
10868       // SVE generates only a masked function.
10869       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10870                            OutputBecomesInput, Fn);
10871     } else {
10872       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10873       // Advanced SIMD generates one or two functions, depending on
10874       // the `[not]inbranch` clause.
10875       switch (State) {
10876       case OMPDeclareSimdDeclAttr::BS_Undefined:
10877         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10878                              OutputBecomesInput, Fn);
10879         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10880                              OutputBecomesInput, Fn);
10881         break;
10882       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10883         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10884                              OutputBecomesInput, Fn);
10885         break;
10886       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10887         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10888                              OutputBecomesInput, Fn);
10889         break;
10890       }
10891     }
10892   } else {
10893     // If no user simdlen is provided, follow the AAVFABI rules for
10894     // generating the vector length.
10895     if (ISA == 's') {
10896       // SVE, section 3.4.1, item 1.
10897       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10898                            OutputBecomesInput, Fn);
10899     } else {
10900       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10901       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10902       // two vector names depending on the use of the clause
10903       // `[not]inbranch`.
10904       switch (State) {
10905       case OMPDeclareSimdDeclAttr::BS_Undefined:
10906         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10907                                   OutputBecomesInput, Fn);
10908         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10909                                   OutputBecomesInput, Fn);
10910         break;
10911       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10912         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10913                                   OutputBecomesInput, Fn);
10914         break;
10915       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10916         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10917                                   OutputBecomesInput, Fn);
10918         break;
10919       }
10920     }
10921   }
10922 }
10923 
10924 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10925                                               llvm::Function *Fn) {
10926   ASTContext &C = CGM.getContext();
10927   FD = FD->getMostRecentDecl();
10928   while (FD) {
10929     // Map params to their positions in function decl.
10930     llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10931     if (isa<CXXMethodDecl>(FD))
10932       ParamPositions.try_emplace(FD, 0);
10933     unsigned ParamPos = ParamPositions.size();
10934     for (const ParmVarDecl *P : FD->parameters()) {
10935       ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10936       ++ParamPos;
10937     }
10938     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10939       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10940       // Mark uniform parameters.
10941       for (const Expr *E : Attr->uniforms()) {
10942         E = E->IgnoreParenImpCasts();
10943         unsigned Pos;
10944         if (isa<CXXThisExpr>(E)) {
10945           Pos = ParamPositions[FD];
10946         } else {
10947           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10948                                 ->getCanonicalDecl();
10949           auto It = ParamPositions.find(PVD);
10950           assert(It != ParamPositions.end() && "Function parameter not found");
10951           Pos = It->second;
10952         }
10953         ParamAttrs[Pos].Kind = Uniform;
10954       }
10955       // Get alignment info.
10956       auto *NI = Attr->alignments_begin();
10957       for (const Expr *E : Attr->aligneds()) {
10958         E = E->IgnoreParenImpCasts();
10959         unsigned Pos;
10960         QualType ParmTy;
10961         if (isa<CXXThisExpr>(E)) {
10962           Pos = ParamPositions[FD];
10963           ParmTy = E->getType();
10964         } else {
10965           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10966                                 ->getCanonicalDecl();
10967           auto It = ParamPositions.find(PVD);
10968           assert(It != ParamPositions.end() && "Function parameter not found");
10969           Pos = It->second;
10970           ParmTy = PVD->getType();
10971         }
10972         ParamAttrs[Pos].Alignment =
10973             (*NI)
10974                 ? (*NI)->EvaluateKnownConstInt(C)
10975                 : llvm::APSInt::getUnsigned(
10976                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10977                           .getQuantity());
10978         ++NI;
10979       }
10980       // Mark linear parameters.
10981       auto *SI = Attr->steps_begin();
10982       auto *MI = Attr->modifiers_begin();
10983       for (const Expr *E : Attr->linears()) {
10984         E = E->IgnoreParenImpCasts();
10985         unsigned Pos;
10986         bool IsReferenceType = false;
10987         // Rescaling factor needed to compute the linear parameter
10988         // value in the mangled name.
10989         unsigned PtrRescalingFactor = 1;
10990         if (isa<CXXThisExpr>(E)) {
10991           Pos = ParamPositions[FD];
10992           auto *P = cast<PointerType>(E->getType());
10993           PtrRescalingFactor = CGM.getContext()
10994                                    .getTypeSizeInChars(P->getPointeeType())
10995                                    .getQuantity();
10996         } else {
10997           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10998                                 ->getCanonicalDecl();
10999           auto It = ParamPositions.find(PVD);
11000           assert(It != ParamPositions.end() && "Function parameter not found");
11001           Pos = It->second;
11002           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11003             PtrRescalingFactor = CGM.getContext()
11004                                      .getTypeSizeInChars(P->getPointeeType())
11005                                      .getQuantity();
11006           else if (PVD->getType()->isReferenceType()) {
11007             IsReferenceType = true;
11008             PtrRescalingFactor =
11009                 CGM.getContext()
11010                     .getTypeSizeInChars(PVD->getType().getNonReferenceType())
11011                     .getQuantity();
11012           }
11013         }
11014         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11015         if (*MI == OMPC_LINEAR_ref)
11016           ParamAttr.Kind = LinearRef;
11017         else if (*MI == OMPC_LINEAR_uval)
11018           ParamAttr.Kind = LinearUVal;
11019         else if (IsReferenceType)
11020           ParamAttr.Kind = LinearVal;
11021         else
11022           ParamAttr.Kind = Linear;
11023         // Assuming a stride of 1, for `linear` without modifiers.
11024         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11025         if (*SI) {
11026           Expr::EvalResult Result;
11027           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11028             if (const auto *DRE =
11029                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11030               if (const auto *StridePVD =
11031                       dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11032                 ParamAttr.HasVarStride = true;
11033                 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11034                 assert(It != ParamPositions.end() &&
11035                        "Function parameter not found");
11036                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11037               }
11038             }
11039           } else {
11040             ParamAttr.StrideOrArg = Result.Val.getInt();
11041           }
11042         }
11043         // If we are using a linear clause on a pointer, we need to
11044         // rescale the value of linear_step with the byte size of the
11045         // pointee type.
11046         if (!ParamAttr.HasVarStride &&
11047             (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11048           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11049         ++SI;
11050         ++MI;
11051       }
11052       llvm::APSInt VLENVal;
11053       SourceLocation ExprLoc;
11054       const Expr *VLENExpr = Attr->getSimdlen();
11055       if (VLENExpr) {
11056         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11057         ExprLoc = VLENExpr->getExprLoc();
11058       }
11059       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11060       if (CGM.getTriple().isX86()) {
11061         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11062       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11063         unsigned VLEN = VLENVal.getExtValue();
11064         StringRef MangledName = Fn->getName();
11065         if (CGM.getTarget().hasFeature("sve"))
11066           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11067                                          MangledName, 's', 128, Fn, ExprLoc);
11068         else if (CGM.getTarget().hasFeature("neon"))
11069           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11070                                          MangledName, 'n', 128, Fn, ExprLoc);
11071       }
11072     }
11073     FD = FD->getPreviousDecl();
11074   }
11075 }
11076 
11077 namespace {
11078 /// Cleanup action for doacross support.
11079 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11080 public:
11081   static const int DoacrossFinArgs = 2;
11082 
11083 private:
11084   llvm::FunctionCallee RTLFn;
11085   llvm::Value *Args[DoacrossFinArgs];
11086 
11087 public:
11088   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11089                     ArrayRef<llvm::Value *> CallArgs)
11090       : RTLFn(RTLFn) {
11091     assert(CallArgs.size() == DoacrossFinArgs);
11092     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11093   }
11094   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11095     if (!CGF.HaveInsertPoint())
11096       return;
11097     CGF.EmitRuntimeCall(RTLFn, Args);
11098   }
11099 };
11100 } // namespace
11101 
11102 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11103                                        const OMPLoopDirective &D,
11104                                        ArrayRef<Expr *> NumIterations) {
11105   if (!CGF.HaveInsertPoint())
11106     return;
11107 
11108   ASTContext &C = CGM.getContext();
11109   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11110   RecordDecl *RD;
11111   if (KmpDimTy.isNull()) {
11112     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11113     //  kmp_int64 lo; // lower
11114     //  kmp_int64 up; // upper
11115     //  kmp_int64 st; // stride
11116     // };
11117     RD = C.buildImplicitRecord("kmp_dim");
11118     RD->startDefinition();
11119     addFieldToRecordDecl(C, RD, Int64Ty);
11120     addFieldToRecordDecl(C, RD, Int64Ty);
11121     addFieldToRecordDecl(C, RD, Int64Ty);
11122     RD->completeDefinition();
11123     KmpDimTy = C.getRecordType(RD);
11124   } else {
11125     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11126   }
11127   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11128   QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
11129                                             ArraySizeModifier::Normal, 0);
11130 
11131   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11132   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11133   enum { LowerFD = 0, UpperFD, StrideFD };
11134   // Fill dims with data.
11135   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11136     LValue DimsLVal = CGF.MakeAddrLValue(
11137         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11138     // dims.upper = num_iterations;
11139     LValue UpperLVal = CGF.EmitLValueForField(
11140         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11141     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11142         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11143         Int64Ty, NumIterations[I]->getExprLoc());
11144     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11145     // dims.stride = 1;
11146     LValue StrideLVal = CGF.EmitLValueForField(
11147         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11148     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11149                           StrideLVal);
11150   }
11151 
11152   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11153   // kmp_int32 num_dims, struct kmp_dim * dims);
11154   llvm::Value *Args[] = {
11155       emitUpdateLocation(CGF, D.getBeginLoc()),
11156       getThreadID(CGF, D.getBeginLoc()),
11157       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11158       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11159           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11160           CGM.VoidPtrTy)};
11161 
11162   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11163       CGM.getModule(), OMPRTL___kmpc_doacross_init);
11164   CGF.EmitRuntimeCall(RTLFn, Args);
11165   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11166       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11167   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11168       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11169   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11170                                              llvm::ArrayRef(FiniArgs));
11171 }
11172 
11173 template <typename T>
11174 static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM,
11175                                 const T *C, llvm::Value *ULoc,
11176                                 llvm::Value *ThreadID) {
11177   QualType Int64Ty =
11178       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11179   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11180   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11181       Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);
11182   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11183   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11184     const Expr *CounterVal = C->getLoopData(I);
11185     assert(CounterVal);
11186     llvm::Value *CntVal = CGF.EmitScalarConversion(
11187         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11188         CounterVal->getExprLoc());
11189     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11190                           /*Volatile=*/false, Int64Ty);
11191   }
11192   llvm::Value *Args[] = {
11193       ULoc, ThreadID, CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11194   llvm::FunctionCallee RTLFn;
11195   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
11196   OMPDoacrossKind<T> ODK;
11197   if (ODK.isSource(C)) {
11198     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11199                                                   OMPRTL___kmpc_doacross_post);
11200   } else {
11201     assert(ODK.isSink(C) && "Expect sink modifier.");
11202     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11203                                                   OMPRTL___kmpc_doacross_wait);
11204   }
11205   CGF.EmitRuntimeCall(RTLFn, Args);
11206 }
11207 
11208 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11209                                           const OMPDependClause *C) {
11210   return EmitDoacrossOrdered<OMPDependClause>(
11211       CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11212       getThreadID(CGF, C->getBeginLoc()));
11213 }
11214 
11215 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11216                                           const OMPDoacrossClause *C) {
11217   return EmitDoacrossOrdered<OMPDoacrossClause>(
11218       CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11219       getThreadID(CGF, C->getBeginLoc()));
11220 }
11221 
11222 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11223                                llvm::FunctionCallee Callee,
11224                                ArrayRef<llvm::Value *> Args) const {
11225   assert(Loc.isValid() && "Outlined function call location must be valid.");
11226   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11227 
11228   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11229     if (Fn->doesNotThrow()) {
11230       CGF.EmitNounwindRuntimeCall(Fn, Args);
11231       return;
11232     }
11233   }
11234   CGF.EmitRuntimeCall(Callee, Args);
11235 }
11236 
11237 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11238     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11239     ArrayRef<llvm::Value *> Args) const {
11240   emitCall(CGF, Loc, OutlinedFn, Args);
11241 }
11242 
11243 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11244   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11245     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11246       HasEmittedDeclareTargetRegion = true;
11247 }
11248 
11249 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11250                                              const VarDecl *NativeParam,
11251                                              const VarDecl *TargetParam) const {
11252   return CGF.GetAddrOfLocalVar(NativeParam);
11253 }
11254 
11255 /// Return allocator value from expression, or return a null allocator (default
11256 /// when no allocator specified).
11257 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11258                                     const Expr *Allocator) {
11259   llvm::Value *AllocVal;
11260   if (Allocator) {
11261     AllocVal = CGF.EmitScalarExpr(Allocator);
11262     // According to the standard, the original allocator type is a enum
11263     // (integer). Convert to pointer type, if required.
11264     AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11265                                         CGF.getContext().VoidPtrTy,
11266                                         Allocator->getExprLoc());
11267   } else {
11268     // If no allocator specified, it defaults to the null allocator.
11269     AllocVal = llvm::Constant::getNullValue(
11270         CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
11271   }
11272   return AllocVal;
11273 }
11274 
11275 /// Return the alignment from an allocate directive if present.
11276 static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11277   std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11278 
11279   if (!AllocateAlignment)
11280     return nullptr;
11281 
11282   return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
11283 }
11284 
11285 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11286                                                    const VarDecl *VD) {
11287   if (!VD)
11288     return Address::invalid();
11289   Address UntiedAddr = Address::invalid();
11290   Address UntiedRealAddr = Address::invalid();
11291   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11292   if (It != FunctionToUntiedTaskStackMap.end()) {
11293     const UntiedLocalVarsAddressesMap &UntiedData =
11294         UntiedLocalVarsStack[It->second];
11295     auto I = UntiedData.find(VD);
11296     if (I != UntiedData.end()) {
11297       UntiedAddr = I->second.first;
11298       UntiedRealAddr = I->second.second;
11299     }
11300   }
11301   const VarDecl *CVD = VD->getCanonicalDecl();
11302   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11303     // Use the default allocation.
11304     if (!isAllocatableDecl(VD))
11305       return UntiedAddr;
11306     llvm::Value *Size;
11307     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11308     if (CVD->getType()->isVariablyModifiedType()) {
11309       Size = CGF.getTypeSize(CVD->getType());
11310       // Align the size: ((size + align - 1) / align) * align
11311       Size = CGF.Builder.CreateNUWAdd(
11312           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11313       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11314       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11315     } else {
11316       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11317       Size = CGM.getSize(Sz.alignTo(Align));
11318     }
11319     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11320     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11321     const Expr *Allocator = AA->getAllocator();
11322     llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11323     llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
11324     SmallVector<llvm::Value *, 4> Args;
11325     Args.push_back(ThreadID);
11326     if (Alignment)
11327       Args.push_back(Alignment);
11328     Args.push_back(Size);
11329     Args.push_back(AllocVal);
11330     llvm::omp::RuntimeFunction FnID =
11331         Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11332     llvm::Value *Addr = CGF.EmitRuntimeCall(
11333         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
11334         getName({CVD->getName(), ".void.addr"}));
11335     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11336         CGM.getModule(), OMPRTL___kmpc_free);
11337     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11338     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11339         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11340     if (UntiedAddr.isValid())
11341       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11342 
11343     // Cleanup action for allocate support.
11344     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11345       llvm::FunctionCallee RTLFn;
11346       SourceLocation::UIntTy LocEncoding;
11347       Address Addr;
11348       const Expr *AllocExpr;
11349 
11350     public:
11351       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11352                            SourceLocation::UIntTy LocEncoding, Address Addr,
11353                            const Expr *AllocExpr)
11354           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11355             AllocExpr(AllocExpr) {}
11356       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11357         if (!CGF.HaveInsertPoint())
11358           return;
11359         llvm::Value *Args[3];
11360         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11361             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11362         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11363             Addr.getPointer(), CGF.VoidPtrTy);
11364         llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
11365         Args[2] = AllocVal;
11366         CGF.EmitRuntimeCall(RTLFn, Args);
11367       }
11368     };
11369     Address VDAddr =
11370         UntiedRealAddr.isValid()
11371             ? UntiedRealAddr
11372             : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
11373     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11374         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11375         VDAddr, Allocator);
11376     if (UntiedRealAddr.isValid())
11377       if (auto *Region =
11378               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11379         Region->emitUntiedSwitch(CGF);
11380     return VDAddr;
11381   }
11382   return UntiedAddr;
11383 }
11384 
11385 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11386                                              const VarDecl *VD) const {
11387   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11388   if (It == FunctionToUntiedTaskStackMap.end())
11389     return false;
11390   return UntiedLocalVarsStack[It->second].count(VD) > 0;
11391 }
11392 
11393 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11394     CodeGenModule &CGM, const OMPLoopDirective &S)
11395     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11396   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11397   if (!NeedToPush)
11398     return;
11399   NontemporalDeclsSet &DS =
11400       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11401   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11402     for (const Stmt *Ref : C->private_refs()) {
11403       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11404       const ValueDecl *VD;
11405       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11406         VD = DRE->getDecl();
11407       } else {
11408         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11409         assert((ME->isImplicitCXXThis() ||
11410                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11411                "Expected member of current class.");
11412         VD = ME->getMemberDecl();
11413       }
11414       DS.insert(VD);
11415     }
11416   }
11417 }
11418 
11419 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11420   if (!NeedToPush)
11421     return;
11422   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11423 }
11424 
11425 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11426     CodeGenFunction &CGF,
11427     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
11428                           std::pair<Address, Address>> &LocalVars)
11429     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11430   if (!NeedToPush)
11431     return;
11432   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
11433       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11434   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11435 }
11436 
11437 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11438   if (!NeedToPush)
11439     return;
11440   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
11441 }
11442 
11443 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11444   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11445 
11446   return llvm::any_of(
11447       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11448       [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
11449 }
11450 
11451 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11452     const OMPExecutableDirective &S,
11453     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11454     const {
11455   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11456   // Vars in target/task regions must be excluded completely.
11457   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11458       isOpenMPTaskingDirective(S.getDirectiveKind())) {
11459     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11460     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11461     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11462     for (const CapturedStmt::Capture &Cap : CS->captures()) {
11463       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11464         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11465     }
11466   }
11467   // Exclude vars in private clauses.
11468   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11469     for (const Expr *Ref : C->varlists()) {
11470       if (!Ref->getType()->isScalarType())
11471         continue;
11472       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11473       if (!DRE)
11474         continue;
11475       NeedToCheckForLPCs.insert(DRE->getDecl());
11476     }
11477   }
11478   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11479     for (const Expr *Ref : C->varlists()) {
11480       if (!Ref->getType()->isScalarType())
11481         continue;
11482       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11483       if (!DRE)
11484         continue;
11485       NeedToCheckForLPCs.insert(DRE->getDecl());
11486     }
11487   }
11488   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11489     for (const Expr *Ref : C->varlists()) {
11490       if (!Ref->getType()->isScalarType())
11491         continue;
11492       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11493       if (!DRE)
11494         continue;
11495       NeedToCheckForLPCs.insert(DRE->getDecl());
11496     }
11497   }
11498   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11499     for (const Expr *Ref : C->varlists()) {
11500       if (!Ref->getType()->isScalarType())
11501         continue;
11502       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11503       if (!DRE)
11504         continue;
11505       NeedToCheckForLPCs.insert(DRE->getDecl());
11506     }
11507   }
11508   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11509     for (const Expr *Ref : C->varlists()) {
11510       if (!Ref->getType()->isScalarType())
11511         continue;
11512       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11513       if (!DRE)
11514         continue;
11515       NeedToCheckForLPCs.insert(DRE->getDecl());
11516     }
11517   }
11518   for (const Decl *VD : NeedToCheckForLPCs) {
11519     for (const LastprivateConditionalData &Data :
11520          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11521       if (Data.DeclToUniqueName.count(VD) > 0) {
11522         if (!Data.Disabled)
11523           NeedToAddForLPCsAsDisabled.insert(VD);
11524         break;
11525       }
11526     }
11527   }
11528 }
11529 
11530 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11531     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11532     : CGM(CGF.CGM),
11533       Action((CGM.getLangOpts().OpenMP >= 50 &&
11534               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11535                            [](const OMPLastprivateClause *C) {
11536                              return C->getKind() ==
11537                                     OMPC_LASTPRIVATE_conditional;
11538                            }))
11539                  ? ActionToDo::PushAsLastprivateConditional
11540                  : ActionToDo::DoNotPush) {
11541   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11542   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11543     return;
11544   assert(Action == ActionToDo::PushAsLastprivateConditional &&
11545          "Expected a push action.");
11546   LastprivateConditionalData &Data =
11547       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11548   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11549     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11550       continue;
11551 
11552     for (const Expr *Ref : C->varlists()) {
11553       Data.DeclToUniqueName.insert(std::make_pair(
11554           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11555           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11556     }
11557   }
11558   Data.IVLVal = IVLVal;
11559   Data.Fn = CGF.CurFn;
11560 }
11561 
11562 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11563     CodeGenFunction &CGF, const OMPExecutableDirective &S)
11564     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11565   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11566   if (CGM.getLangOpts().OpenMP < 50)
11567     return;
11568   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11569   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11570   if (!NeedToAddForLPCsAsDisabled.empty()) {
11571     Action = ActionToDo::DisableLastprivateConditional;
11572     LastprivateConditionalData &Data =
11573         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11574     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11575       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11576     Data.Fn = CGF.CurFn;
11577     Data.Disabled = true;
11578   }
11579 }
11580 
11581 CGOpenMPRuntime::LastprivateConditionalRAII
11582 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11583     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11584   return LastprivateConditionalRAII(CGF, S);
11585 }
11586 
11587 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11588   if (CGM.getLangOpts().OpenMP < 50)
11589     return;
11590   if (Action == ActionToDo::DisableLastprivateConditional) {
11591     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11592            "Expected list of disabled private vars.");
11593     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11594   }
11595   if (Action == ActionToDo::PushAsLastprivateConditional) {
11596     assert(
11597         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11598         "Expected list of lastprivate conditional vars.");
11599     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11600   }
11601 }
11602 
11603 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11604                                                         const VarDecl *VD) {
11605   ASTContext &C = CGM.getContext();
11606   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11607   if (I == LastprivateConditionalToTypes.end())
11608     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11609   QualType NewType;
11610   const FieldDecl *VDField;
11611   const FieldDecl *FiredField;
11612   LValue BaseLVal;
11613   auto VI = I->getSecond().find(VD);
11614   if (VI == I->getSecond().end()) {
11615     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11616     RD->startDefinition();
11617     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11618     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11619     RD->completeDefinition();
11620     NewType = C.getRecordType(RD);
11621     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11622     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11623     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11624   } else {
11625     NewType = std::get<0>(VI->getSecond());
11626     VDField = std::get<1>(VI->getSecond());
11627     FiredField = std::get<2>(VI->getSecond());
11628     BaseLVal = std::get<3>(VI->getSecond());
11629   }
11630   LValue FiredLVal =
11631       CGF.EmitLValueForField(BaseLVal, FiredField);
11632   CGF.EmitStoreOfScalar(
11633       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11634       FiredLVal);
11635   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
11636 }
11637 
11638 namespace {
11639 /// Checks if the lastprivate conditional variable is referenced in LHS.
11640 class LastprivateConditionalRefChecker final
11641     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11642   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11643   const Expr *FoundE = nullptr;
11644   const Decl *FoundD = nullptr;
11645   StringRef UniqueDeclName;
11646   LValue IVLVal;
11647   llvm::Function *FoundFn = nullptr;
11648   SourceLocation Loc;
11649 
11650 public:
11651   bool VisitDeclRefExpr(const DeclRefExpr *E) {
11652     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11653          llvm::reverse(LPM)) {
11654       auto It = D.DeclToUniqueName.find(E->getDecl());
11655       if (It == D.DeclToUniqueName.end())
11656         continue;
11657       if (D.Disabled)
11658         return false;
11659       FoundE = E;
11660       FoundD = E->getDecl()->getCanonicalDecl();
11661       UniqueDeclName = It->second;
11662       IVLVal = D.IVLVal;
11663       FoundFn = D.Fn;
11664       break;
11665     }
11666     return FoundE == E;
11667   }
11668   bool VisitMemberExpr(const MemberExpr *E) {
11669     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11670       return false;
11671     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11672          llvm::reverse(LPM)) {
11673       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11674       if (It == D.DeclToUniqueName.end())
11675         continue;
11676       if (D.Disabled)
11677         return false;
11678       FoundE = E;
11679       FoundD = E->getMemberDecl()->getCanonicalDecl();
11680       UniqueDeclName = It->second;
11681       IVLVal = D.IVLVal;
11682       FoundFn = D.Fn;
11683       break;
11684     }
11685     return FoundE == E;
11686   }
11687   bool VisitStmt(const Stmt *S) {
11688     for (const Stmt *Child : S->children()) {
11689       if (!Child)
11690         continue;
11691       if (const auto *E = dyn_cast<Expr>(Child))
11692         if (!E->isGLValue())
11693           continue;
11694       if (Visit(Child))
11695         return true;
11696     }
11697     return false;
11698   }
11699   explicit LastprivateConditionalRefChecker(
11700       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11701       : LPM(LPM) {}
11702   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11703   getFoundData() const {
11704     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11705   }
11706 };
11707 } // namespace
11708 
11709 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11710                                                        LValue IVLVal,
11711                                                        StringRef UniqueDeclName,
11712                                                        LValue LVal,
11713                                                        SourceLocation Loc) {
11714   // Last updated loop counter for the lastprivate conditional var.
11715   // int<xx> last_iv = 0;
11716   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11717   llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
11718       LLIVTy, getName({UniqueDeclName, "iv"}));
11719   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11720       IVLVal.getAlignment().getAsAlign());
11721   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11722 
11723   // Last value of the lastprivate conditional.
11724   // decltype(priv_a) last_a;
11725   llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
11726       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11727   Last->setAlignment(LVal.getAlignment().getAsAlign());
11728   LValue LastLVal = CGF.MakeAddrLValue(
11729       Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType());
11730 
11731   // Global loop counter. Required to handle inner parallel-for regions.
11732   // iv
11733   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11734 
11735   // #pragma omp critical(a)
11736   // if (last_iv <= iv) {
11737   //   last_iv = iv;
11738   //   last_a = priv_a;
11739   // }
11740   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11741                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11742     Action.Enter(CGF);
11743     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11744     // (last_iv <= iv) ? Check if the variable is updated and store new
11745     // value in global var.
11746     llvm::Value *CmpRes;
11747     if (IVLVal.getType()->isSignedIntegerType()) {
11748       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11749     } else {
11750       assert(IVLVal.getType()->isUnsignedIntegerType() &&
11751              "Loop iteration variable must be integer.");
11752       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11753     }
11754     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11755     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11756     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11757     // {
11758     CGF.EmitBlock(ThenBB);
11759 
11760     //   last_iv = iv;
11761     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11762 
11763     //   last_a = priv_a;
11764     switch (CGF.getEvaluationKind(LVal.getType())) {
11765     case TEK_Scalar: {
11766       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11767       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11768       break;
11769     }
11770     case TEK_Complex: {
11771       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11772       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11773       break;
11774     }
11775     case TEK_Aggregate:
11776       llvm_unreachable(
11777           "Aggregates are not supported in lastprivate conditional.");
11778     }
11779     // }
11780     CGF.EmitBranch(ExitBB);
11781     // There is no need to emit line number for unconditional branch.
11782     (void)ApplyDebugLocation::CreateEmpty(CGF);
11783     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11784   };
11785 
11786   if (CGM.getLangOpts().OpenMPSimd) {
11787     // Do not emit as a critical region as no parallel region could be emitted.
11788     RegionCodeGenTy ThenRCG(CodeGen);
11789     ThenRCG(CGF);
11790   } else {
11791     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11792   }
11793 }
11794 
11795 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11796                                                          const Expr *LHS) {
11797   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11798     return;
11799   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11800   if (!Checker.Visit(LHS))
11801     return;
11802   const Expr *FoundE;
11803   const Decl *FoundD;
11804   StringRef UniqueDeclName;
11805   LValue IVLVal;
11806   llvm::Function *FoundFn;
11807   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11808       Checker.getFoundData();
11809   if (FoundFn != CGF.CurFn) {
11810     // Special codegen for inner parallel regions.
11811     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11812     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11813     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11814            "Lastprivate conditional is not found in outer region.");
11815     QualType StructTy = std::get<0>(It->getSecond());
11816     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11817     LValue PrivLVal = CGF.EmitLValue(FoundE);
11818     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11819         PrivLVal.getAddress(CGF),
11820         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
11821         CGF.ConvertTypeForMem(StructTy));
11822     LValue BaseLVal =
11823         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11824     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11825     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11826                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11827                         FiredLVal, llvm::AtomicOrdering::Unordered,
11828                         /*IsVolatile=*/true, /*isInit=*/false);
11829     return;
11830   }
11831 
11832   // Private address of the lastprivate conditional in the current context.
11833   // priv_a
11834   LValue LVal = CGF.EmitLValue(FoundE);
11835   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11836                                    FoundE->getExprLoc());
11837 }
11838 
11839 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11840     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11841     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11842   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11843     return;
11844   auto Range = llvm::reverse(LastprivateConditionalStack);
11845   auto It = llvm::find_if(
11846       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11847   if (It == Range.end() || It->Fn != CGF.CurFn)
11848     return;
11849   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11850   assert(LPCI != LastprivateConditionalToTypes.end() &&
11851          "Lastprivates must be registered already.");
11852   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11853   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11854   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11855   for (const auto &Pair : It->DeclToUniqueName) {
11856     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11857     if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
11858       continue;
11859     auto I = LPCI->getSecond().find(Pair.first);
11860     assert(I != LPCI->getSecond().end() &&
11861            "Lastprivate must be rehistered already.");
11862     // bool Cmp = priv_a.Fired != 0;
11863     LValue BaseLVal = std::get<3>(I->getSecond());
11864     LValue FiredLVal =
11865         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11866     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11867     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11868     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11869     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11870     // if (Cmp) {
11871     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11872     CGF.EmitBlock(ThenBB);
11873     Address Addr = CGF.GetAddrOfLocalVar(VD);
11874     LValue LVal;
11875     if (VD->getType()->isReferenceType())
11876       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11877                                            AlignmentSource::Decl);
11878     else
11879       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11880                                 AlignmentSource::Decl);
11881     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11882                                      D.getBeginLoc());
11883     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11884     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11885     // }
11886   }
11887 }
11888 
11889 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11890     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11891     SourceLocation Loc) {
11892   if (CGF.getLangOpts().OpenMP < 50)
11893     return;
11894   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11895   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11896          "Unknown lastprivate conditional variable.");
11897   StringRef UniqueName = It->second;
11898   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11899   // The variable was not updated in the region - exit.
11900   if (!GV)
11901     return;
11902   LValue LPLVal = CGF.MakeAddrLValue(
11903       Address(GV, GV->getValueType(), PrivLVal.getAlignment()),
11904       PrivLVal.getType().getNonReferenceType());
11905   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11906   CGF.EmitStoreOfScalar(Res, PrivLVal);
11907 }
11908 
11909 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11910     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11911     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11912     const RegionCodeGenTy &CodeGen) {
11913   llvm_unreachable("Not supported in SIMD-only mode");
11914 }
11915 
11916 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11917     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11918     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11919     const RegionCodeGenTy &CodeGen) {
11920   llvm_unreachable("Not supported in SIMD-only mode");
11921 }
11922 
11923 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11924     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11925     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11926     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11927     bool Tied, unsigned &NumberOfParts) {
11928   llvm_unreachable("Not supported in SIMD-only mode");
11929 }
11930 
11931 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11932                                            SourceLocation Loc,
11933                                            llvm::Function *OutlinedFn,
11934                                            ArrayRef<llvm::Value *> CapturedVars,
11935                                            const Expr *IfCond,
11936                                            llvm::Value *NumThreads) {
11937   llvm_unreachable("Not supported in SIMD-only mode");
11938 }
11939 
11940 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11941     CodeGenFunction &CGF, StringRef CriticalName,
11942     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11943     const Expr *Hint) {
11944   llvm_unreachable("Not supported in SIMD-only mode");
11945 }
11946 
11947 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11948                                            const RegionCodeGenTy &MasterOpGen,
11949                                            SourceLocation Loc) {
11950   llvm_unreachable("Not supported in SIMD-only mode");
11951 }
11952 
11953 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
11954                                            const RegionCodeGenTy &MasterOpGen,
11955                                            SourceLocation Loc,
11956                                            const Expr *Filter) {
11957   llvm_unreachable("Not supported in SIMD-only mode");
11958 }
11959 
11960 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11961                                             SourceLocation Loc) {
11962   llvm_unreachable("Not supported in SIMD-only mode");
11963 }
11964 
11965 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11966     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11967     SourceLocation Loc) {
11968   llvm_unreachable("Not supported in SIMD-only mode");
11969 }
11970 
11971 void CGOpenMPSIMDRuntime::emitSingleRegion(
11972     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11973     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11974     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11975     ArrayRef<const Expr *> AssignmentOps) {
11976   llvm_unreachable("Not supported in SIMD-only mode");
11977 }
11978 
11979 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11980                                             const RegionCodeGenTy &OrderedOpGen,
11981                                             SourceLocation Loc,
11982                                             bool IsThreads) {
11983   llvm_unreachable("Not supported in SIMD-only mode");
11984 }
11985 
11986 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11987                                           SourceLocation Loc,
11988                                           OpenMPDirectiveKind Kind,
11989                                           bool EmitChecks,
11990                                           bool ForceSimpleCall) {
11991   llvm_unreachable("Not supported in SIMD-only mode");
11992 }
11993 
11994 void CGOpenMPSIMDRuntime::emitForDispatchInit(
11995     CodeGenFunction &CGF, SourceLocation Loc,
11996     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11997     bool Ordered, const DispatchRTInput &DispatchValues) {
11998   llvm_unreachable("Not supported in SIMD-only mode");
11999 }
12000 
12001 void CGOpenMPSIMDRuntime::emitForStaticInit(
12002     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12003     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12004   llvm_unreachable("Not supported in SIMD-only mode");
12005 }
12006 
12007 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12008     CodeGenFunction &CGF, SourceLocation Loc,
12009     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12010   llvm_unreachable("Not supported in SIMD-only mode");
12011 }
12012 
12013 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12014                                                      SourceLocation Loc,
12015                                                      unsigned IVSize,
12016                                                      bool IVSigned) {
12017   llvm_unreachable("Not supported in SIMD-only mode");
12018 }
12019 
12020 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12021                                               SourceLocation Loc,
12022                                               OpenMPDirectiveKind DKind) {
12023   llvm_unreachable("Not supported in SIMD-only mode");
12024 }
12025 
12026 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12027                                               SourceLocation Loc,
12028                                               unsigned IVSize, bool IVSigned,
12029                                               Address IL, Address LB,
12030                                               Address UB, Address ST) {
12031   llvm_unreachable("Not supported in SIMD-only mode");
12032 }
12033 
12034 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12035                                                llvm::Value *NumThreads,
12036                                                SourceLocation Loc) {
12037   llvm_unreachable("Not supported in SIMD-only mode");
12038 }
12039 
12040 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12041                                              ProcBindKind ProcBind,
12042                                              SourceLocation Loc) {
12043   llvm_unreachable("Not supported in SIMD-only mode");
12044 }
12045 
12046 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12047                                                     const VarDecl *VD,
12048                                                     Address VDAddr,
12049                                                     SourceLocation Loc) {
12050   llvm_unreachable("Not supported in SIMD-only mode");
12051 }
12052 
12053 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12054     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12055     CodeGenFunction *CGF) {
12056   llvm_unreachable("Not supported in SIMD-only mode");
12057 }
12058 
12059 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12060     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12061   llvm_unreachable("Not supported in SIMD-only mode");
12062 }
12063 
12064 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12065                                     ArrayRef<const Expr *> Vars,
12066                                     SourceLocation Loc,
12067                                     llvm::AtomicOrdering AO) {
12068   llvm_unreachable("Not supported in SIMD-only mode");
12069 }
12070 
12071 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12072                                        const OMPExecutableDirective &D,
12073                                        llvm::Function *TaskFunction,
12074                                        QualType SharedsTy, Address Shareds,
12075                                        const Expr *IfCond,
12076                                        const OMPTaskDataTy &Data) {
12077   llvm_unreachable("Not supported in SIMD-only mode");
12078 }
12079 
12080 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12081     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12082     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12083     const Expr *IfCond, const OMPTaskDataTy &Data) {
12084   llvm_unreachable("Not supported in SIMD-only mode");
12085 }
12086 
12087 void CGOpenMPSIMDRuntime::emitReduction(
12088     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12089     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12090     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12091   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12092   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12093                                  ReductionOps, Options);
12094 }
12095 
12096 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12097     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12098     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12099   llvm_unreachable("Not supported in SIMD-only mode");
12100 }
12101 
12102 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12103                                                 SourceLocation Loc,
12104                                                 bool IsWorksharingReduction) {
12105   llvm_unreachable("Not supported in SIMD-only mode");
12106 }
12107 
12108 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12109                                                   SourceLocation Loc,
12110                                                   ReductionCodeGen &RCG,
12111                                                   unsigned N) {
12112   llvm_unreachable("Not supported in SIMD-only mode");
12113 }
12114 
12115 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12116                                                   SourceLocation Loc,
12117                                                   llvm::Value *ReductionsPtr,
12118                                                   LValue SharedLVal) {
12119   llvm_unreachable("Not supported in SIMD-only mode");
12120 }
12121 
12122 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12123                                            SourceLocation Loc,
12124                                            const OMPTaskDataTy &Data) {
12125   llvm_unreachable("Not supported in SIMD-only mode");
12126 }
12127 
12128 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12129     CodeGenFunction &CGF, SourceLocation Loc,
12130     OpenMPDirectiveKind CancelRegion) {
12131   llvm_unreachable("Not supported in SIMD-only mode");
12132 }
12133 
12134 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12135                                          SourceLocation Loc, const Expr *IfCond,
12136                                          OpenMPDirectiveKind CancelRegion) {
12137   llvm_unreachable("Not supported in SIMD-only mode");
12138 }
12139 
12140 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12141     const OMPExecutableDirective &D, StringRef ParentName,
12142     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12143     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12144   llvm_unreachable("Not supported in SIMD-only mode");
12145 }
12146 
12147 void CGOpenMPSIMDRuntime::emitTargetCall(
12148     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12149     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12150     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12151     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12152                                      const OMPLoopDirective &D)>
12153         SizeEmitter) {
12154   llvm_unreachable("Not supported in SIMD-only mode");
12155 }
12156 
12157 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12158   llvm_unreachable("Not supported in SIMD-only mode");
12159 }
12160 
12161 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12162   llvm_unreachable("Not supported in SIMD-only mode");
12163 }
12164 
12165 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12166   return false;
12167 }
12168 
12169 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12170                                         const OMPExecutableDirective &D,
12171                                         SourceLocation Loc,
12172                                         llvm::Function *OutlinedFn,
12173                                         ArrayRef<llvm::Value *> CapturedVars) {
12174   llvm_unreachable("Not supported in SIMD-only mode");
12175 }
12176 
12177 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12178                                              const Expr *NumTeams,
12179                                              const Expr *ThreadLimit,
12180                                              SourceLocation Loc) {
12181   llvm_unreachable("Not supported in SIMD-only mode");
12182 }
12183 
12184 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12185     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12186     const Expr *Device, const RegionCodeGenTy &CodeGen,
12187     CGOpenMPRuntime::TargetDataInfo &Info) {
12188   llvm_unreachable("Not supported in SIMD-only mode");
12189 }
12190 
12191 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12192     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12193     const Expr *Device) {
12194   llvm_unreachable("Not supported in SIMD-only mode");
12195 }
12196 
12197 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12198                                            const OMPLoopDirective &D,
12199                                            ArrayRef<Expr *> NumIterations) {
12200   llvm_unreachable("Not supported in SIMD-only mode");
12201 }
12202 
12203 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12204                                               const OMPDependClause *C) {
12205   llvm_unreachable("Not supported in SIMD-only mode");
12206 }
12207 
12208 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12209                                               const OMPDoacrossClause *C) {
12210   llvm_unreachable("Not supported in SIMD-only mode");
12211 }
12212 
12213 const VarDecl *
12214 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12215                                         const VarDecl *NativeParam) const {
12216   llvm_unreachable("Not supported in SIMD-only mode");
12217 }
12218 
12219 Address
12220 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12221                                          const VarDecl *NativeParam,
12222                                          const VarDecl *TargetParam) const {
12223   llvm_unreachable("Not supported in SIMD-only mode");
12224 }
12225