xref: /freebsd/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp (revision 02e9120893770924227138ba49df1edb3896112a)
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/Attr.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/OpenMPClause.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/BitmaskEnum.h"
26 #include "clang/Basic/FileManager.h"
27 #include "clang/Basic/OpenMPKinds.h"
28 #include "clang/Basic/SourceManager.h"
29 #include "clang/CodeGen/ConstantInitBuilder.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SetOperations.h"
32 #include "llvm/ADT/SmallBitVector.h"
33 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/Bitcode/BitcodeReader.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/GlobalValue.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Value.h"
40 #include "llvm/Support/AtomicOrdering.h"
41 #include "llvm/Support/Format.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include <cassert>
44 #include <numeric>
45 #include <optional>
46 
47 using namespace clang;
48 using namespace CodeGen;
49 using namespace llvm::omp;
50 
51 namespace {
52 /// Base class for handling code generation inside OpenMP regions.
53 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
54 public:
55   /// Kinds of OpenMP regions used in codegen.
56   enum CGOpenMPRegionKind {
57     /// Region with outlined function for standalone 'parallel'
58     /// directive.
59     ParallelOutlinedRegion,
60     /// Region with outlined function for standalone 'task' directive.
61     TaskOutlinedRegion,
62     /// Region for constructs that do not require function outlining,
63     /// like 'for', 'sections', 'atomic' etc. directives.
64     InlinedRegion,
65     /// Region with outlined function for standalone 'target' directive.
66     TargetRegion,
67   };
68 
69   CGOpenMPRegionInfo(const CapturedStmt &CS,
70                      const CGOpenMPRegionKind RegionKind,
71                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
72                      bool HasCancel)
73       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
74         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
75 
76   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
77                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
78                      bool HasCancel)
79       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
80         Kind(Kind), HasCancel(HasCancel) {}
81 
82   /// Get a variable or parameter for storing global thread id
83   /// inside OpenMP construct.
84   virtual const VarDecl *getThreadIDVariable() const = 0;
85 
86   /// Emit the captured statement body.
87   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
88 
89   /// Get an LValue for the current ThreadID variable.
90   /// \return LValue for thread id variable. This LValue always has type int32*.
91   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
92 
93   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
94 
95   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
96 
97   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
98 
99   bool hasCancel() const { return HasCancel; }
100 
101   static bool classof(const CGCapturedStmtInfo *Info) {
102     return Info->getKind() == CR_OpenMP;
103   }
104 
105   ~CGOpenMPRegionInfo() override = default;
106 
107 protected:
108   CGOpenMPRegionKind RegionKind;
109   RegionCodeGenTy CodeGen;
110   OpenMPDirectiveKind Kind;
111   bool HasCancel;
112 };
113 
114 /// API for captured statement code generation in OpenMP constructs.
115 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
116 public:
117   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
118                              const RegionCodeGenTy &CodeGen,
119                              OpenMPDirectiveKind Kind, bool HasCancel,
120                              StringRef HelperName)
121       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
122                            HasCancel),
123         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
124     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
125   }
126 
127   /// Get a variable or parameter for storing global thread id
128   /// inside OpenMP construct.
129   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
130 
131   /// Get the name of the capture helper.
132   StringRef getHelperName() const override { return HelperName; }
133 
134   static bool classof(const CGCapturedStmtInfo *Info) {
135     return CGOpenMPRegionInfo::classof(Info) &&
136            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
137                ParallelOutlinedRegion;
138   }
139 
140 private:
141   /// A variable or parameter storing global thread id for OpenMP
142   /// constructs.
143   const VarDecl *ThreadIDVar;
144   StringRef HelperName;
145 };
146 
147 /// API for captured statement code generation in OpenMP constructs.
148 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
149 public:
150   class UntiedTaskActionTy final : public PrePostActionTy {
151     bool Untied;
152     const VarDecl *PartIDVar;
153     const RegionCodeGenTy UntiedCodeGen;
154     llvm::SwitchInst *UntiedSwitch = nullptr;
155 
156   public:
157     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
158                        const RegionCodeGenTy &UntiedCodeGen)
159         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
160     void Enter(CodeGenFunction &CGF) override {
161       if (Untied) {
162         // Emit task switching point.
163         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
164             CGF.GetAddrOfLocalVar(PartIDVar),
165             PartIDVar->getType()->castAs<PointerType>());
166         llvm::Value *Res =
167             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
168         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
169         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
170         CGF.EmitBlock(DoneBB);
171         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
172         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
173         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
174                               CGF.Builder.GetInsertBlock());
175         emitUntiedSwitch(CGF);
176       }
177     }
178     void emitUntiedSwitch(CodeGenFunction &CGF) const {
179       if (Untied) {
180         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
181             CGF.GetAddrOfLocalVar(PartIDVar),
182             PartIDVar->getType()->castAs<PointerType>());
183         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
184                               PartIdLVal);
185         UntiedCodeGen(CGF);
186         CodeGenFunction::JumpDest CurPoint =
187             CGF.getJumpDestInCurrentScope(".untied.next.");
188         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
189         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
190         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
191                               CGF.Builder.GetInsertBlock());
192         CGF.EmitBranchThroughCleanup(CurPoint);
193         CGF.EmitBlock(CurPoint.getBlock());
194       }
195     }
196     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
197   };
198   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
199                                  const VarDecl *ThreadIDVar,
200                                  const RegionCodeGenTy &CodeGen,
201                                  OpenMPDirectiveKind Kind, bool HasCancel,
202                                  const UntiedTaskActionTy &Action)
203       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
204         ThreadIDVar(ThreadIDVar), Action(Action) {
205     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
206   }
207 
208   /// Get a variable or parameter for storing global thread id
209   /// inside OpenMP construct.
210   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
211 
212   /// Get an LValue for the current ThreadID variable.
213   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
214 
215   /// Get the name of the capture helper.
216   StringRef getHelperName() const override { return ".omp_outlined."; }
217 
218   void emitUntiedSwitch(CodeGenFunction &CGF) override {
219     Action.emitUntiedSwitch(CGF);
220   }
221 
222   static bool classof(const CGCapturedStmtInfo *Info) {
223     return CGOpenMPRegionInfo::classof(Info) &&
224            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
225                TaskOutlinedRegion;
226   }
227 
228 private:
229   /// A variable or parameter storing global thread id for OpenMP
230   /// constructs.
231   const VarDecl *ThreadIDVar;
232   /// Action for emitting code for untied tasks.
233   const UntiedTaskActionTy &Action;
234 };
235 
236 /// API for inlined captured statement code generation in OpenMP
237 /// constructs.
238 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
239 public:
240   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
241                             const RegionCodeGenTy &CodeGen,
242                             OpenMPDirectiveKind Kind, bool HasCancel)
243       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
244         OldCSI(OldCSI),
245         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
246 
247   // Retrieve the value of the context parameter.
248   llvm::Value *getContextValue() const override {
249     if (OuterRegionInfo)
250       return OuterRegionInfo->getContextValue();
251     llvm_unreachable("No context value for inlined OpenMP region");
252   }
253 
254   void setContextValue(llvm::Value *V) override {
255     if (OuterRegionInfo) {
256       OuterRegionInfo->setContextValue(V);
257       return;
258     }
259     llvm_unreachable("No context value for inlined OpenMP region");
260   }
261 
262   /// Lookup the captured field decl for a variable.
263   const FieldDecl *lookup(const VarDecl *VD) const override {
264     if (OuterRegionInfo)
265       return OuterRegionInfo->lookup(VD);
266     // If there is no outer outlined region,no need to lookup in a list of
267     // captured variables, we can use the original one.
268     return nullptr;
269   }
270 
271   FieldDecl *getThisFieldDecl() const override {
272     if (OuterRegionInfo)
273       return OuterRegionInfo->getThisFieldDecl();
274     return nullptr;
275   }
276 
277   /// Get a variable or parameter for storing global thread id
278   /// inside OpenMP construct.
279   const VarDecl *getThreadIDVariable() const override {
280     if (OuterRegionInfo)
281       return OuterRegionInfo->getThreadIDVariable();
282     return nullptr;
283   }
284 
285   /// Get an LValue for the current ThreadID variable.
286   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
287     if (OuterRegionInfo)
288       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
289     llvm_unreachable("No LValue for inlined OpenMP construct");
290   }
291 
292   /// Get the name of the capture helper.
293   StringRef getHelperName() const override {
294     if (auto *OuterRegionInfo = getOldCSI())
295       return OuterRegionInfo->getHelperName();
296     llvm_unreachable("No helper name for inlined OpenMP construct");
297   }
298 
299   void emitUntiedSwitch(CodeGenFunction &CGF) override {
300     if (OuterRegionInfo)
301       OuterRegionInfo->emitUntiedSwitch(CGF);
302   }
303 
304   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
305 
306   static bool classof(const CGCapturedStmtInfo *Info) {
307     return CGOpenMPRegionInfo::classof(Info) &&
308            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
309   }
310 
311   ~CGOpenMPInlinedRegionInfo() override = default;
312 
313 private:
314   /// CodeGen info about outer OpenMP region.
315   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
316   CGOpenMPRegionInfo *OuterRegionInfo;
317 };
318 
319 /// API for captured statement code generation in OpenMP target
320 /// constructs. For this captures, implicit parameters are used instead of the
321 /// captured fields. The name of the target region has to be unique in a given
322 /// application so it is provided by the client, because only the client has
323 /// the information to generate that.
324 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
325 public:
326   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
327                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
328       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
329                            /*HasCancel=*/false),
330         HelperName(HelperName) {}
331 
332   /// This is unused for target regions because each starts executing
333   /// with a single thread.
334   const VarDecl *getThreadIDVariable() const override { return nullptr; }
335 
336   /// Get the name of the capture helper.
337   StringRef getHelperName() const override { return HelperName; }
338 
339   static bool classof(const CGCapturedStmtInfo *Info) {
340     return CGOpenMPRegionInfo::classof(Info) &&
341            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
342   }
343 
344 private:
345   StringRef HelperName;
346 };
347 
348 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
349   llvm_unreachable("No codegen for expressions");
350 }
351 /// API for generation of expressions captured in a innermost OpenMP
352 /// region.
353 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
354 public:
355   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
356       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
357                                   OMPD_unknown,
358                                   /*HasCancel=*/false),
359         PrivScope(CGF) {
360     // Make sure the globals captured in the provided statement are local by
361     // using the privatization logic. We assume the same variable is not
362     // captured more than once.
363     for (const auto &C : CS.captures()) {
364       if (!C.capturesVariable() && !C.capturesVariableByCopy())
365         continue;
366 
367       const VarDecl *VD = C.getCapturedVar();
368       if (VD->isLocalVarDeclOrParm())
369         continue;
370 
371       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
372                       /*RefersToEnclosingVariableOrCapture=*/false,
373                       VD->getType().getNonReferenceType(), VK_LValue,
374                       C.getLocation());
375       PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
376     }
377     (void)PrivScope.Privatize();
378   }
379 
380   /// Lookup the captured field decl for a variable.
381   const FieldDecl *lookup(const VarDecl *VD) const override {
382     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
383       return FD;
384     return nullptr;
385   }
386 
387   /// Emit the captured statement body.
388   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
389     llvm_unreachable("No body for expressions");
390   }
391 
392   /// Get a variable or parameter for storing global thread id
393   /// inside OpenMP construct.
394   const VarDecl *getThreadIDVariable() const override {
395     llvm_unreachable("No thread id for expressions");
396   }
397 
398   /// Get the name of the capture helper.
399   StringRef getHelperName() const override {
400     llvm_unreachable("No helper name for expressions");
401   }
402 
403   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
404 
405 private:
406   /// Private scope to capture global variables.
407   CodeGenFunction::OMPPrivateScope PrivScope;
408 };
409 
410 /// RAII for emitting code of OpenMP constructs.
411 class InlinedOpenMPRegionRAII {
412   CodeGenFunction &CGF;
413   llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
414   FieldDecl *LambdaThisCaptureField = nullptr;
415   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
416   bool NoInheritance = false;
417 
418 public:
419   /// Constructs region for combined constructs.
420   /// \param CodeGen Code generation sequence for combined directives. Includes
421   /// a list of functions used for code generation of implicitly inlined
422   /// regions.
423   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
424                           OpenMPDirectiveKind Kind, bool HasCancel,
425                           bool NoInheritance = true)
426       : CGF(CGF), NoInheritance(NoInheritance) {
427     // Start emission for the construct.
428     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
429         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
430     if (NoInheritance) {
431       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
432       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
433       CGF.LambdaThisCaptureField = nullptr;
434       BlockInfo = CGF.BlockInfo;
435       CGF.BlockInfo = nullptr;
436     }
437   }
438 
439   ~InlinedOpenMPRegionRAII() {
440     // Restore original CapturedStmtInfo only if we're done with code emission.
441     auto *OldCSI =
442         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
443     delete CGF.CapturedStmtInfo;
444     CGF.CapturedStmtInfo = OldCSI;
445     if (NoInheritance) {
446       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
447       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
448       CGF.BlockInfo = BlockInfo;
449     }
450   }
451 };
452 
453 /// Values for bit flags used in the ident_t to describe the fields.
454 /// All enumeric elements are named and described in accordance with the code
455 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
456 enum OpenMPLocationFlags : unsigned {
457   /// Use trampoline for internal microtask.
458   OMP_IDENT_IMD = 0x01,
459   /// Use c-style ident structure.
460   OMP_IDENT_KMPC = 0x02,
461   /// Atomic reduction option for kmpc_reduce.
462   OMP_ATOMIC_REDUCE = 0x10,
463   /// Explicit 'barrier' directive.
464   OMP_IDENT_BARRIER_EXPL = 0x20,
465   /// Implicit barrier in code.
466   OMP_IDENT_BARRIER_IMPL = 0x40,
467   /// Implicit barrier in 'for' directive.
468   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
469   /// Implicit barrier in 'sections' directive.
470   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
471   /// Implicit barrier in 'single' directive.
472   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
473   /// Call of __kmp_for_static_init for static loop.
474   OMP_IDENT_WORK_LOOP = 0x200,
475   /// Call of __kmp_for_static_init for sections.
476   OMP_IDENT_WORK_SECTIONS = 0x400,
477   /// Call of __kmp_for_static_init for distribute.
478   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
479   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
480 };
481 
482 namespace {
483 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
484 /// Values for bit flags for marking which requires clauses have been used.
485 enum OpenMPOffloadingRequiresDirFlags : int64_t {
486   /// flag undefined.
487   OMP_REQ_UNDEFINED               = 0x000,
488   /// no requires clause present.
489   OMP_REQ_NONE                    = 0x001,
490   /// reverse_offload clause.
491   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
492   /// unified_address clause.
493   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
494   /// unified_shared_memory clause.
495   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
496   /// dynamic_allocators clause.
497   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
498   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
499 };
500 
501 } // anonymous namespace
502 
503 /// Describes ident structure that describes a source location.
504 /// All descriptions are taken from
505 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
506 /// Original structure:
507 /// typedef struct ident {
508 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
509 ///                                  see above  */
510 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
511 ///                                  KMP_IDENT_KMPC identifies this union
512 ///                                  member  */
513 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
514 ///                                  see above */
515 ///#if USE_ITT_BUILD
516 ///                            /*  but currently used for storing
517 ///                                region-specific ITT */
518 ///                            /*  contextual information. */
519 ///#endif /* USE_ITT_BUILD */
520 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
521 ///                                 C++  */
522 ///    char const *psource;    /**< String describing the source location.
523 ///                            The string is composed of semi-colon separated
524 //                             fields which describe the source file,
525 ///                            the function and a pair of line numbers that
526 ///                            delimit the construct.
527 ///                             */
528 /// } ident_t;
529 enum IdentFieldIndex {
530   /// might be used in Fortran
531   IdentField_Reserved_1,
532   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
533   IdentField_Flags,
534   /// Not really used in Fortran any more
535   IdentField_Reserved_2,
536   /// Source[4] in Fortran, do not use for C++
537   IdentField_Reserved_3,
538   /// String describing the source location. The string is composed of
539   /// semi-colon separated fields which describe the source file, the function
540   /// and a pair of line numbers that delimit the construct.
541   IdentField_PSource
542 };
543 
544 /// Schedule types for 'omp for' loops (these enumerators are taken from
545 /// the enum sched_type in kmp.h).
546 enum OpenMPSchedType {
547   /// Lower bound for default (unordered) versions.
548   OMP_sch_lower = 32,
549   OMP_sch_static_chunked = 33,
550   OMP_sch_static = 34,
551   OMP_sch_dynamic_chunked = 35,
552   OMP_sch_guided_chunked = 36,
553   OMP_sch_runtime = 37,
554   OMP_sch_auto = 38,
555   /// static with chunk adjustment (e.g., simd)
556   OMP_sch_static_balanced_chunked = 45,
557   /// Lower bound for 'ordered' versions.
558   OMP_ord_lower = 64,
559   OMP_ord_static_chunked = 65,
560   OMP_ord_static = 66,
561   OMP_ord_dynamic_chunked = 67,
562   OMP_ord_guided_chunked = 68,
563   OMP_ord_runtime = 69,
564   OMP_ord_auto = 70,
565   OMP_sch_default = OMP_sch_static,
566   /// dist_schedule types
567   OMP_dist_sch_static_chunked = 91,
568   OMP_dist_sch_static = 92,
569   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
570   /// Set if the monotonic schedule modifier was present.
571   OMP_sch_modifier_monotonic = (1 << 29),
572   /// Set if the nonmonotonic schedule modifier was present.
573   OMP_sch_modifier_nonmonotonic = (1 << 30),
574 };
575 
576 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
577 /// region.
578 class CleanupTy final : public EHScopeStack::Cleanup {
579   PrePostActionTy *Action;
580 
581 public:
582   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
583   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
584     if (!CGF.HaveInsertPoint())
585       return;
586     Action->Exit(CGF);
587   }
588 };
589 
590 } // anonymous namespace
591 
592 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
593   CodeGenFunction::RunCleanupsScope Scope(CGF);
594   if (PrePostAction) {
595     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
596     Callback(CodeGen, CGF, *PrePostAction);
597   } else {
598     PrePostActionTy Action;
599     Callback(CodeGen, CGF, Action);
600   }
601 }
602 
603 /// Check if the combiner is a call to UDR combiner and if it is so return the
604 /// UDR decl used for reduction.
605 static const OMPDeclareReductionDecl *
606 getReductionInit(const Expr *ReductionOp) {
607   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
608     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
609       if (const auto *DRE =
610               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
611         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
612           return DRD;
613   return nullptr;
614 }
615 
616 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
617                                              const OMPDeclareReductionDecl *DRD,
618                                              const Expr *InitOp,
619                                              Address Private, Address Original,
620                                              QualType Ty) {
621   if (DRD->getInitializer()) {
622     std::pair<llvm::Function *, llvm::Function *> Reduction =
623         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
624     const auto *CE = cast<CallExpr>(InitOp);
625     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
626     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
627     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
628     const auto *LHSDRE =
629         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
630     const auto *RHSDRE =
631         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
632     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
633     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
634     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
635     (void)PrivateScope.Privatize();
636     RValue Func = RValue::get(Reduction.second);
637     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
638     CGF.EmitIgnoredExpr(InitOp);
639   } else {
640     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
641     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
642     auto *GV = new llvm::GlobalVariable(
643         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
644         llvm::GlobalValue::PrivateLinkage, Init, Name);
645     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
646     RValue InitRVal;
647     switch (CGF.getEvaluationKind(Ty)) {
648     case TEK_Scalar:
649       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
650       break;
651     case TEK_Complex:
652       InitRVal =
653           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
654       break;
655     case TEK_Aggregate: {
656       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
657       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
658       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
659                            /*IsInitializer=*/false);
660       return;
661     }
662     }
663     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
664     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
665     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
666                          /*IsInitializer=*/false);
667   }
668 }
669 
670 /// Emit initialization of arrays of complex types.
671 /// \param DestAddr Address of the array.
672 /// \param Type Type of array.
673 /// \param Init Initial expression of array.
674 /// \param SrcAddr Address of the original array.
675 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
676                                  QualType Type, bool EmitDeclareReductionInit,
677                                  const Expr *Init,
678                                  const OMPDeclareReductionDecl *DRD,
679                                  Address SrcAddr = Address::invalid()) {
680   // Perform element-by-element initialization.
681   QualType ElementTy;
682 
683   // Drill down to the base element type on both arrays.
684   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
685   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
686   if (DRD)
687     SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
688 
689   llvm::Value *SrcBegin = nullptr;
690   if (DRD)
691     SrcBegin = SrcAddr.getPointer();
692   llvm::Value *DestBegin = DestAddr.getPointer();
693   // Cast from pointer to array type to pointer to single element.
694   llvm::Value *DestEnd =
695       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
696   // The basic structure here is a while-do loop.
697   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
698   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
699   llvm::Value *IsEmpty =
700       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
701   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
702 
703   // Enter the loop body, making that address the current address.
704   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
705   CGF.EmitBlock(BodyBB);
706 
707   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
708 
709   llvm::PHINode *SrcElementPHI = nullptr;
710   Address SrcElementCurrent = Address::invalid();
711   if (DRD) {
712     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
713                                           "omp.arraycpy.srcElementPast");
714     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
715     SrcElementCurrent =
716         Address(SrcElementPHI, SrcAddr.getElementType(),
717                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
718   }
719   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
720       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
721   DestElementPHI->addIncoming(DestBegin, EntryBB);
722   Address DestElementCurrent =
723       Address(DestElementPHI, DestAddr.getElementType(),
724               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
725 
726   // Emit copy.
727   {
728     CodeGenFunction::RunCleanupsScope InitScope(CGF);
729     if (EmitDeclareReductionInit) {
730       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
731                                        SrcElementCurrent, ElementTy);
732     } else
733       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
734                            /*IsInitializer=*/false);
735   }
736 
737   if (DRD) {
738     // Shift the address forward by one element.
739     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
740         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
741         "omp.arraycpy.dest.element");
742     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
743   }
744 
745   // Shift the address forward by one element.
746   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
747       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
748       "omp.arraycpy.dest.element");
749   // Check whether we've reached the end.
750   llvm::Value *Done =
751       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
752   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
753   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
754 
755   // Done.
756   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
757 }
758 
759 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
760   return CGF.EmitOMPSharedLValue(E);
761 }
762 
763 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
764                                             const Expr *E) {
765   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
766     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
767   return LValue();
768 }
769 
770 void ReductionCodeGen::emitAggregateInitialization(
771     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
772     const OMPDeclareReductionDecl *DRD) {
773   // Emit VarDecl with copy init for arrays.
774   // Get the address of the original variable captured in current
775   // captured region.
776   const auto *PrivateVD =
777       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
778   bool EmitDeclareReductionInit =
779       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
780   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
781                        EmitDeclareReductionInit,
782                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
783                                                 : PrivateVD->getInit(),
784                        DRD, SharedAddr);
785 }
786 
787 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
788                                    ArrayRef<const Expr *> Origs,
789                                    ArrayRef<const Expr *> Privates,
790                                    ArrayRef<const Expr *> ReductionOps) {
791   ClausesData.reserve(Shareds.size());
792   SharedAddresses.reserve(Shareds.size());
793   Sizes.reserve(Shareds.size());
794   BaseDecls.reserve(Shareds.size());
795   const auto *IOrig = Origs.begin();
796   const auto *IPriv = Privates.begin();
797   const auto *IRed = ReductionOps.begin();
798   for (const Expr *Ref : Shareds) {
799     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
800     std::advance(IOrig, 1);
801     std::advance(IPriv, 1);
802     std::advance(IRed, 1);
803   }
804 }
805 
806 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
807   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
808          "Number of generated lvalues must be exactly N.");
809   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
810   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
811   SharedAddresses.emplace_back(First, Second);
812   if (ClausesData[N].Shared == ClausesData[N].Ref) {
813     OrigAddresses.emplace_back(First, Second);
814   } else {
815     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
816     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
817     OrigAddresses.emplace_back(First, Second);
818   }
819 }
820 
821 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
822   QualType PrivateType = getPrivateType(N);
823   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
824   if (!PrivateType->isVariablyModifiedType()) {
825     Sizes.emplace_back(
826         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
827         nullptr);
828     return;
829   }
830   llvm::Value *Size;
831   llvm::Value *SizeInChars;
832   auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
833   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
834   if (AsArraySection) {
835     Size = CGF.Builder.CreatePtrDiff(ElemType,
836                                      OrigAddresses[N].second.getPointer(CGF),
837                                      OrigAddresses[N].first.getPointer(CGF));
838     Size = CGF.Builder.CreateNUWAdd(
839         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
840     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
841   } else {
842     SizeInChars =
843         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
844     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
845   }
846   Sizes.emplace_back(SizeInChars, Size);
847   CodeGenFunction::OpaqueValueMapping OpaqueMap(
848       CGF,
849       cast<OpaqueValueExpr>(
850           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
851       RValue::get(Size));
852   CGF.EmitVariablyModifiedType(PrivateType);
853 }
854 
855 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
856                                          llvm::Value *Size) {
857   QualType PrivateType = getPrivateType(N);
858   if (!PrivateType->isVariablyModifiedType()) {
859     assert(!Size && !Sizes[N].second &&
860            "Size should be nullptr for non-variably modified reduction "
861            "items.");
862     return;
863   }
864   CodeGenFunction::OpaqueValueMapping OpaqueMap(
865       CGF,
866       cast<OpaqueValueExpr>(
867           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
868       RValue::get(Size));
869   CGF.EmitVariablyModifiedType(PrivateType);
870 }
871 
872 void ReductionCodeGen::emitInitialization(
873     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
874     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
875   assert(SharedAddresses.size() > N && "No variable was generated");
876   const auto *PrivateVD =
877       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
878   const OMPDeclareReductionDecl *DRD =
879       getReductionInit(ClausesData[N].ReductionOp);
880   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
881     if (DRD && DRD->getInitializer())
882       (void)DefaultInit(CGF);
883     emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
884   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
885     (void)DefaultInit(CGF);
886     QualType SharedType = SharedAddresses[N].first.getType();
887     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
888                                      PrivateAddr, SharedAddr, SharedType);
889   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
890              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
891     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
892                          PrivateVD->getType().getQualifiers(),
893                          /*IsInitializer=*/false);
894   }
895 }
896 
897 bool ReductionCodeGen::needCleanups(unsigned N) {
898   QualType PrivateType = getPrivateType(N);
899   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
900   return DTorKind != QualType::DK_none;
901 }
902 
903 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
904                                     Address PrivateAddr) {
905   QualType PrivateType = getPrivateType(N);
906   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
907   if (needCleanups(N)) {
908     PrivateAddr =
909         PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
910     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
911   }
912 }
913 
914 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
915                           LValue BaseLV) {
916   BaseTy = BaseTy.getNonReferenceType();
917   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
918          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
919     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
920       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
921     } else {
922       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
923       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
924     }
925     BaseTy = BaseTy->getPointeeType();
926   }
927   return CGF.MakeAddrLValue(
928       BaseLV.getAddress(CGF).withElementType(CGF.ConvertTypeForMem(ElTy)),
929       BaseLV.getType(), BaseLV.getBaseInfo(),
930       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
931 }
932 
933 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
934                           Address OriginalBaseAddress, llvm::Value *Addr) {
935   Address Tmp = Address::invalid();
936   Address TopTmp = Address::invalid();
937   Address MostTopTmp = Address::invalid();
938   BaseTy = BaseTy.getNonReferenceType();
939   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
940          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
941     Tmp = CGF.CreateMemTemp(BaseTy);
942     if (TopTmp.isValid())
943       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
944     else
945       MostTopTmp = Tmp;
946     TopTmp = Tmp;
947     BaseTy = BaseTy->getPointeeType();
948   }
949 
950   if (Tmp.isValid()) {
951     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
952         Addr, Tmp.getElementType());
953     CGF.Builder.CreateStore(Addr, Tmp);
954     return MostTopTmp;
955   }
956 
957   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
958       Addr, OriginalBaseAddress.getType());
959   return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
960 }
961 
962 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
963   const VarDecl *OrigVD = nullptr;
964   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
965     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
966     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
967       Base = TempOASE->getBase()->IgnoreParenImpCasts();
968     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
969       Base = TempASE->getBase()->IgnoreParenImpCasts();
970     DE = cast<DeclRefExpr>(Base);
971     OrigVD = cast<VarDecl>(DE->getDecl());
972   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
973     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
974     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
975       Base = TempASE->getBase()->IgnoreParenImpCasts();
976     DE = cast<DeclRefExpr>(Base);
977     OrigVD = cast<VarDecl>(DE->getDecl());
978   }
979   return OrigVD;
980 }
981 
982 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
983                                                Address PrivateAddr) {
984   const DeclRefExpr *DE;
985   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
986     BaseDecls.emplace_back(OrigVD);
987     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
988     LValue BaseLValue =
989         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
990                     OriginalBaseLValue);
991     Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
992     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
993         SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
994         SharedAddr.getPointer());
995     llvm::Value *PrivatePointer =
996         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
997             PrivateAddr.getPointer(), SharedAddr.getType());
998     llvm::Value *Ptr = CGF.Builder.CreateGEP(
999         SharedAddr.getElementType(), PrivatePointer, Adjustment);
1000     return castToBase(CGF, OrigVD->getType(),
1001                       SharedAddresses[N].first.getType(),
1002                       OriginalBaseLValue.getAddress(CGF), Ptr);
1003   }
1004   BaseDecls.emplace_back(
1005       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1006   return PrivateAddr;
1007 }
1008 
1009 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1010   const OMPDeclareReductionDecl *DRD =
1011       getReductionInit(ClausesData[N].ReductionOp);
1012   return DRD && DRD->getInitializer();
1013 }
1014 
1015 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1016   return CGF.EmitLoadOfPointerLValue(
1017       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1018       getThreadIDVariable()->getType()->castAs<PointerType>());
1019 }
1020 
1021 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1022   if (!CGF.HaveInsertPoint())
1023     return;
1024   // 1.2.2 OpenMP Language Terminology
1025   // Structured block - An executable statement with a single entry at the
1026   // top and a single exit at the bottom.
1027   // The point of exit cannot be a branch out of the structured block.
1028   // longjmp() and throw() must not violate the entry/exit criteria.
1029   CGF.EHStack.pushTerminate();
1030   if (S)
1031     CGF.incrementProfileCounter(S);
1032   CodeGen(CGF);
1033   CGF.EHStack.popTerminate();
1034 }
1035 
1036 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1037     CodeGenFunction &CGF) {
1038   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1039                             getThreadIDVariable()->getType(),
1040                             AlignmentSource::Decl);
1041 }
1042 
1043 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1044                                        QualType FieldTy) {
1045   auto *Field = FieldDecl::Create(
1046       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1047       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1048       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1049   Field->setAccess(AS_public);
1050   DC->addDecl(Field);
1051   return Field;
1052 }
1053 
1054 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
1055     : CGM(CGM), OMPBuilder(CGM.getModule()) {
1056   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1057   llvm::OpenMPIRBuilderConfig Config(CGM.getLangOpts().OpenMPIsTargetDevice,
1058                                      isGPU(), hasRequiresUnifiedSharedMemory(),
1059                                      CGM.getLangOpts().OpenMPOffloadMandatory);
1060   OMPBuilder.initialize(CGM.getLangOpts().OpenMPIsTargetDevice
1061                             ? CGM.getLangOpts().OMPHostIRFile
1062                             : StringRef{});
1063   OMPBuilder.setConfig(Config);
1064 }
1065 
1066 void CGOpenMPRuntime::clear() {
1067   InternalVars.clear();
1068   // Clean non-target variable declarations possibly used only in debug info.
1069   for (const auto &Data : EmittedNonTargetVariables) {
1070     if (!Data.getValue().pointsToAliveValue())
1071       continue;
1072     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1073     if (!GV)
1074       continue;
1075     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1076       continue;
1077     GV->eraseFromParent();
1078   }
1079 }
1080 
1081 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1082   return OMPBuilder.createPlatformSpecificName(Parts);
1083 }
1084 
1085 static llvm::Function *
1086 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1087                           const Expr *CombinerInitializer, const VarDecl *In,
1088                           const VarDecl *Out, bool IsCombiner) {
1089   // void .omp_combiner.(Ty *in, Ty *out);
1090   ASTContext &C = CGM.getContext();
1091   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1092   FunctionArgList Args;
1093   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1094                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1095   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1096                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1097   Args.push_back(&OmpOutParm);
1098   Args.push_back(&OmpInParm);
1099   const CGFunctionInfo &FnInfo =
1100       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1101   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1102   std::string Name = CGM.getOpenMPRuntime().getName(
1103       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1104   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1105                                     Name, &CGM.getModule());
1106   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1107   if (CGM.getLangOpts().Optimize) {
1108     Fn->removeFnAttr(llvm::Attribute::NoInline);
1109     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1110     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1111   }
1112   CodeGenFunction CGF(CGM);
1113   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1114   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1115   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1116                     Out->getLocation());
1117   CodeGenFunction::OMPPrivateScope Scope(CGF);
1118   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1119   Scope.addPrivate(
1120       In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1121               .getAddress(CGF));
1122   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1123   Scope.addPrivate(
1124       Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1125                .getAddress(CGF));
1126   (void)Scope.Privatize();
1127   if (!IsCombiner && Out->hasInit() &&
1128       !CGF.isTrivialInitializer(Out->getInit())) {
1129     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1130                          Out->getType().getQualifiers(),
1131                          /*IsInitializer=*/true);
1132   }
1133   if (CombinerInitializer)
1134     CGF.EmitIgnoredExpr(CombinerInitializer);
1135   Scope.ForceCleanup();
1136   CGF.FinishFunction();
1137   return Fn;
1138 }
1139 
1140 void CGOpenMPRuntime::emitUserDefinedReduction(
1141     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1142   if (UDRMap.count(D) > 0)
1143     return;
1144   llvm::Function *Combiner = emitCombinerOrInitializer(
1145       CGM, D->getType(), D->getCombiner(),
1146       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1147       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1148       /*IsCombiner=*/true);
1149   llvm::Function *Initializer = nullptr;
1150   if (const Expr *Init = D->getInitializer()) {
1151     Initializer = emitCombinerOrInitializer(
1152         CGM, D->getType(),
1153         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1154                                                                      : nullptr,
1155         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1156         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1157         /*IsCombiner=*/false);
1158   }
1159   UDRMap.try_emplace(D, Combiner, Initializer);
1160   if (CGF) {
1161     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1162     Decls.second.push_back(D);
1163   }
1164 }
1165 
1166 std::pair<llvm::Function *, llvm::Function *>
1167 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1168   auto I = UDRMap.find(D);
1169   if (I != UDRMap.end())
1170     return I->second;
1171   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1172   return UDRMap.lookup(D);
1173 }
1174 
1175 namespace {
1176 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1177 // Builder if one is present.
1178 struct PushAndPopStackRAII {
1179   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1180                       bool HasCancel, llvm::omp::Directive Kind)
1181       : OMPBuilder(OMPBuilder) {
1182     if (!OMPBuilder)
1183       return;
1184 
1185     // The following callback is the crucial part of clangs cleanup process.
1186     //
1187     // NOTE:
1188     // Once the OpenMPIRBuilder is used to create parallel regions (and
1189     // similar), the cancellation destination (Dest below) is determined via
1190     // IP. That means if we have variables to finalize we split the block at IP,
1191     // use the new block (=BB) as destination to build a JumpDest (via
1192     // getJumpDestInCurrentScope(BB)) which then is fed to
1193     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1194     // to push & pop an FinalizationInfo object.
1195     // The FiniCB will still be needed but at the point where the
1196     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1197     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1198       assert(IP.getBlock()->end() == IP.getPoint() &&
1199              "Clang CG should cause non-terminated block!");
1200       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1201       CGF.Builder.restoreIP(IP);
1202       CodeGenFunction::JumpDest Dest =
1203           CGF.getOMPCancelDestination(OMPD_parallel);
1204       CGF.EmitBranchThroughCleanup(Dest);
1205     };
1206 
1207     // TODO: Remove this once we emit parallel regions through the
1208     //       OpenMPIRBuilder as it can do this setup internally.
1209     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1210     OMPBuilder->pushFinalizationCB(std::move(FI));
1211   }
1212   ~PushAndPopStackRAII() {
1213     if (OMPBuilder)
1214       OMPBuilder->popFinalizationCB();
1215   }
1216   llvm::OpenMPIRBuilder *OMPBuilder;
1217 };
1218 } // namespace
1219 
1220 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1221     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1222     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1223     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1224   assert(ThreadIDVar->getType()->isPointerType() &&
1225          "thread id variable must be of type kmp_int32 *");
1226   CodeGenFunction CGF(CGM, true);
1227   bool HasCancel = false;
1228   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1229     HasCancel = OPD->hasCancel();
1230   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1231     HasCancel = OPD->hasCancel();
1232   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1233     HasCancel = OPSD->hasCancel();
1234   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1235     HasCancel = OPFD->hasCancel();
1236   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1237     HasCancel = OPFD->hasCancel();
1238   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1239     HasCancel = OPFD->hasCancel();
1240   else if (const auto *OPFD =
1241                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1242     HasCancel = OPFD->hasCancel();
1243   else if (const auto *OPFD =
1244                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1245     HasCancel = OPFD->hasCancel();
1246 
1247   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1248   //       parallel region to make cancellation barriers work properly.
1249   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1250   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1251   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1252                                     HasCancel, OutlinedHelperName);
1253   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1254   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1255 }
1256 
1257 std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1258   std::string Suffix = getName({"omp_outlined"});
1259   return (Name + Suffix).str();
1260 }
1261 
1262 std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const {
1263   return getOutlinedHelperName(CGF.CurFn->getName());
1264 }
1265 
1266 std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1267   std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1268   return (Name + Suffix).str();
1269 }
1270 
1271 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1272     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1273     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1274     const RegionCodeGenTy &CodeGen) {
1275   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1276   return emitParallelOrTeamsOutlinedFunction(
1277       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1278       CodeGen);
1279 }
1280 
1281 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1282     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1283     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1284     const RegionCodeGenTy &CodeGen) {
1285   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1286   return emitParallelOrTeamsOutlinedFunction(
1287       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1288       CodeGen);
1289 }
1290 
1291 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1292     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1293     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1294     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1295     bool Tied, unsigned &NumberOfParts) {
1296   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1297                                               PrePostActionTy &) {
1298     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1299     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1300     llvm::Value *TaskArgs[] = {
1301         UpLoc, ThreadID,
1302         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1303                                     TaskTVar->getType()->castAs<PointerType>())
1304             .getPointer(CGF)};
1305     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1306                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1307                         TaskArgs);
1308   };
1309   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1310                                                             UntiedCodeGen);
1311   CodeGen.setAction(Action);
1312   assert(!ThreadIDVar->getType()->isPointerType() &&
1313          "thread id variable must be of type kmp_int32 for tasks");
1314   const OpenMPDirectiveKind Region =
1315       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1316                                                       : OMPD_task;
1317   const CapturedStmt *CS = D.getCapturedStmt(Region);
1318   bool HasCancel = false;
1319   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1320     HasCancel = TD->hasCancel();
1321   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1322     HasCancel = TD->hasCancel();
1323   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1324     HasCancel = TD->hasCancel();
1325   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1326     HasCancel = TD->hasCancel();
1327 
1328   CodeGenFunction CGF(CGM, true);
1329   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1330                                         InnermostKind, HasCancel, Action);
1331   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1332   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1333   if (!Tied)
1334     NumberOfParts = Action.getNumberOfParts();
1335   return Res;
1336 }
1337 
1338 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1339                                              bool AtCurrentPoint) {
1340   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1341   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1342 
1343   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1344   if (AtCurrentPoint) {
1345     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1346         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1347   } else {
1348     Elem.second.ServiceInsertPt =
1349         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1350     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1351   }
1352 }
1353 
1354 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1355   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1356   if (Elem.second.ServiceInsertPt) {
1357     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1358     Elem.second.ServiceInsertPt = nullptr;
1359     Ptr->eraseFromParent();
1360   }
1361 }
1362 
1363 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1364                                                   SourceLocation Loc,
1365                                                   SmallString<128> &Buffer) {
1366   llvm::raw_svector_ostream OS(Buffer);
1367   // Build debug location
1368   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1369   OS << ";" << PLoc.getFilename() << ";";
1370   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1371     OS << FD->getQualifiedNameAsString();
1372   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1373   return OS.str();
1374 }
1375 
1376 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1377                                                  SourceLocation Loc,
1378                                                  unsigned Flags, bool EmitLoc) {
1379   uint32_t SrcLocStrSize;
1380   llvm::Constant *SrcLocStr;
1381   if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1382                        llvm::codegenoptions::NoDebugInfo) ||
1383       Loc.isInvalid()) {
1384     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1385   } else {
1386     std::string FunctionName;
1387     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1388       FunctionName = FD->getQualifiedNameAsString();
1389     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1390     const char *FileName = PLoc.getFilename();
1391     unsigned Line = PLoc.getLine();
1392     unsigned Column = PLoc.getColumn();
1393     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1394                                                 Column, SrcLocStrSize);
1395   }
1396   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1397   return OMPBuilder.getOrCreateIdent(
1398       SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1399 }
1400 
1401 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1402                                           SourceLocation Loc) {
1403   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1404   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1405   // the clang invariants used below might be broken.
1406   if (CGM.getLangOpts().OpenMPIRBuilder) {
1407     SmallString<128> Buffer;
1408     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1409     uint32_t SrcLocStrSize;
1410     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1411         getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1412     return OMPBuilder.getOrCreateThreadID(
1413         OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1414   }
1415 
1416   llvm::Value *ThreadID = nullptr;
1417   // Check whether we've already cached a load of the thread id in this
1418   // function.
1419   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1420   if (I != OpenMPLocThreadIDMap.end()) {
1421     ThreadID = I->second.ThreadID;
1422     if (ThreadID != nullptr)
1423       return ThreadID;
1424   }
1425   // If exceptions are enabled, do not use parameter to avoid possible crash.
1426   if (auto *OMPRegionInfo =
1427           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1428     if (OMPRegionInfo->getThreadIDVariable()) {
1429       // Check if this an outlined function with thread id passed as argument.
1430       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1431       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1432       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1433           !CGF.getLangOpts().CXXExceptions ||
1434           CGF.Builder.GetInsertBlock() == TopBlock ||
1435           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1436           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1437               TopBlock ||
1438           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1439               CGF.Builder.GetInsertBlock()) {
1440         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1441         // If value loaded in entry block, cache it and use it everywhere in
1442         // function.
1443         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1444           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1445           Elem.second.ThreadID = ThreadID;
1446         }
1447         return ThreadID;
1448       }
1449     }
1450   }
1451 
1452   // This is not an outlined function region - need to call __kmpc_int32
1453   // kmpc_global_thread_num(ident_t *loc).
1454   // Generate thread id value and cache this value for use across the
1455   // function.
1456   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1457   if (!Elem.second.ServiceInsertPt)
1458     setLocThreadIdInsertPt(CGF);
1459   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1460   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1461   llvm::CallInst *Call = CGF.Builder.CreateCall(
1462       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1463                                             OMPRTL___kmpc_global_thread_num),
1464       emitUpdateLocation(CGF, Loc));
1465   Call->setCallingConv(CGF.getRuntimeCC());
1466   Elem.second.ThreadID = Call;
1467   return Call;
1468 }
1469 
1470 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1471   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1472   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1473     clearLocThreadIdInsertPt(CGF);
1474     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1475   }
1476   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1477     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1478       UDRMap.erase(D);
1479     FunctionUDRMap.erase(CGF.CurFn);
1480   }
1481   auto I = FunctionUDMMap.find(CGF.CurFn);
1482   if (I != FunctionUDMMap.end()) {
1483     for(const auto *D : I->second)
1484       UDMMap.erase(D);
1485     FunctionUDMMap.erase(I);
1486   }
1487   LastprivateConditionalToTypes.erase(CGF.CurFn);
1488   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1489 }
1490 
1491 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1492   return OMPBuilder.IdentPtr;
1493 }
1494 
1495 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1496   if (!Kmpc_MicroTy) {
1497     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1498     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1499                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1500     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1501   }
1502   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1503 }
1504 
1505 llvm::FunctionCallee
1506 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1507                                              bool IsGPUDistribute) {
1508   assert((IVSize == 32 || IVSize == 64) &&
1509          "IV size is not compatible with the omp runtime");
1510   StringRef Name;
1511   if (IsGPUDistribute)
1512     Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1513                                     : "__kmpc_distribute_static_init_4u")
1514                         : (IVSigned ? "__kmpc_distribute_static_init_8"
1515                                     : "__kmpc_distribute_static_init_8u");
1516   else
1517     Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1518                                     : "__kmpc_for_static_init_4u")
1519                         : (IVSigned ? "__kmpc_for_static_init_8"
1520                                     : "__kmpc_for_static_init_8u");
1521 
1522   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1523   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1524   llvm::Type *TypeParams[] = {
1525     getIdentTyPointerTy(),                     // loc
1526     CGM.Int32Ty,                               // tid
1527     CGM.Int32Ty,                               // schedtype
1528     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1529     PtrTy,                                     // p_lower
1530     PtrTy,                                     // p_upper
1531     PtrTy,                                     // p_stride
1532     ITy,                                       // incr
1533     ITy                                        // chunk
1534   };
1535   auto *FnTy =
1536       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1537   return CGM.CreateRuntimeFunction(FnTy, Name);
1538 }
1539 
1540 llvm::FunctionCallee
1541 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1542   assert((IVSize == 32 || IVSize == 64) &&
1543          "IV size is not compatible with the omp runtime");
1544   StringRef Name =
1545       IVSize == 32
1546           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1547           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1548   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1549   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1550                                CGM.Int32Ty,           // tid
1551                                CGM.Int32Ty,           // schedtype
1552                                ITy,                   // lower
1553                                ITy,                   // upper
1554                                ITy,                   // stride
1555                                ITy                    // chunk
1556   };
1557   auto *FnTy =
1558       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1559   return CGM.CreateRuntimeFunction(FnTy, Name);
1560 }
1561 
1562 llvm::FunctionCallee
1563 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1564   assert((IVSize == 32 || IVSize == 64) &&
1565          "IV size is not compatible with the omp runtime");
1566   StringRef Name =
1567       IVSize == 32
1568           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1569           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1570   llvm::Type *TypeParams[] = {
1571       getIdentTyPointerTy(), // loc
1572       CGM.Int32Ty,           // tid
1573   };
1574   auto *FnTy =
1575       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1576   return CGM.CreateRuntimeFunction(FnTy, Name);
1577 }
1578 
1579 llvm::FunctionCallee
1580 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1581   assert((IVSize == 32 || IVSize == 64) &&
1582          "IV size is not compatible with the omp runtime");
1583   StringRef Name =
1584       IVSize == 32
1585           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1586           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1587   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1588   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1589   llvm::Type *TypeParams[] = {
1590     getIdentTyPointerTy(),                     // loc
1591     CGM.Int32Ty,                               // tid
1592     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1593     PtrTy,                                     // p_lower
1594     PtrTy,                                     // p_upper
1595     PtrTy                                      // p_stride
1596   };
1597   auto *FnTy =
1598       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1599   return CGM.CreateRuntimeFunction(FnTy, Name);
1600 }
1601 
1602 llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1603 convertDeviceClause(const VarDecl *VD) {
1604   std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1605       OMPDeclareTargetDeclAttr::getDeviceType(VD);
1606   if (!DevTy)
1607     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1608 
1609   switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1610   case OMPDeclareTargetDeclAttr::DT_Host:
1611     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1612     break;
1613   case OMPDeclareTargetDeclAttr::DT_NoHost:
1614     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1615     break;
1616   case OMPDeclareTargetDeclAttr::DT_Any:
1617     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1618     break;
1619   default:
1620     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1621     break;
1622   }
1623 }
1624 
1625 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1626 convertCaptureClause(const VarDecl *VD) {
1627   std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1628       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1629   if (!MapType)
1630     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1631   switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1632   case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1633     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1634     break;
1635   case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1636     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1637     break;
1638   case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1639     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1640     break;
1641   default:
1642     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1643     break;
1644   }
1645 }
1646 
1647 static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1648     CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1649     SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1650 
1651   auto FileInfoCallBack = [&]() {
1652     SourceManager &SM = CGM.getContext().getSourceManager();
1653     PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1654 
1655     llvm::sys::fs::UniqueID ID;
1656     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1657       PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1658     }
1659 
1660     return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1661   };
1662 
1663   return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName);
1664 }
1665 
1666 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1667   auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1668 
1669   auto LinkageForVariable = [&VD, this]() {
1670     return CGM.getLLVMLinkageVarDefinition(VD);
1671   };
1672 
1673   std::vector<llvm::GlobalVariable *> GeneratedRefs;
1674 
1675   llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1676       CGM.getContext().getPointerType(VD->getType()));
1677   llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1678       convertCaptureClause(VD), convertDeviceClause(VD),
1679       VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1680       VD->isExternallyVisible(),
1681       getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
1682                                   VD->getCanonicalDecl()->getBeginLoc()),
1683       CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1684       CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1685       LinkageForVariable);
1686 
1687   if (!addr)
1688     return Address::invalid();
1689   return Address(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1690 }
1691 
1692 llvm::Constant *
1693 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1694   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1695          !CGM.getContext().getTargetInfo().isTLSSupported());
1696   // Lookup the entry, lazily creating it if necessary.
1697   std::string Suffix = getName({"cache", ""});
1698   return OMPBuilder.getOrCreateInternalVariable(
1699       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1700 }
1701 
1702 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1703                                                 const VarDecl *VD,
1704                                                 Address VDAddr,
1705                                                 SourceLocation Loc) {
1706   if (CGM.getLangOpts().OpenMPUseTLS &&
1707       CGM.getContext().getTargetInfo().isTLSSupported())
1708     return VDAddr;
1709 
1710   llvm::Type *VarTy = VDAddr.getElementType();
1711   llvm::Value *Args[] = {
1712       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1713       CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy),
1714       CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1715       getOrCreateThreadPrivateCache(VD)};
1716   return Address(
1717       CGF.EmitRuntimeCall(
1718           OMPBuilder.getOrCreateRuntimeFunction(
1719               CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1720           Args),
1721       CGF.Int8Ty, VDAddr.getAlignment());
1722 }
1723 
1724 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1725     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1726     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1727   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1728   // library.
1729   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1730   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1731                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1732                       OMPLoc);
1733   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1734   // to register constructor/destructor for variable.
1735   llvm::Value *Args[] = {
1736       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1737       Ctor, CopyCtor, Dtor};
1738   CGF.EmitRuntimeCall(
1739       OMPBuilder.getOrCreateRuntimeFunction(
1740           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1741       Args);
1742 }
1743 
1744 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1745     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1746     bool PerformInit, CodeGenFunction *CGF) {
1747   if (CGM.getLangOpts().OpenMPUseTLS &&
1748       CGM.getContext().getTargetInfo().isTLSSupported())
1749     return nullptr;
1750 
1751   VD = VD->getDefinition(CGM.getContext());
1752   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1753     QualType ASTTy = VD->getType();
1754 
1755     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1756     const Expr *Init = VD->getAnyInitializer();
1757     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1758       // Generate function that re-emits the declaration's initializer into the
1759       // threadprivate copy of the variable VD
1760       CodeGenFunction CtorCGF(CGM);
1761       FunctionArgList Args;
1762       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1763                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1764                             ImplicitParamDecl::Other);
1765       Args.push_back(&Dst);
1766 
1767       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1768           CGM.getContext().VoidPtrTy, Args);
1769       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1770       std::string Name = getName({"__kmpc_global_ctor_", ""});
1771       llvm::Function *Fn =
1772           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1773       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1774                             Args, Loc, Loc);
1775       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1776           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1777           CGM.getContext().VoidPtrTy, Dst.getLocation());
1778       Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1779                   VDAddr.getAlignment());
1780       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1781                                /*IsInitializer=*/true);
1782       ArgVal = CtorCGF.EmitLoadOfScalar(
1783           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1784           CGM.getContext().VoidPtrTy, Dst.getLocation());
1785       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1786       CtorCGF.FinishFunction();
1787       Ctor = Fn;
1788     }
1789     if (VD->getType().isDestructedType() != QualType::DK_none) {
1790       // Generate function that emits destructor call for the threadprivate copy
1791       // of the variable VD
1792       CodeGenFunction DtorCGF(CGM);
1793       FunctionArgList Args;
1794       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1795                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1796                             ImplicitParamDecl::Other);
1797       Args.push_back(&Dst);
1798 
1799       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1800           CGM.getContext().VoidTy, Args);
1801       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1802       std::string Name = getName({"__kmpc_global_dtor_", ""});
1803       llvm::Function *Fn =
1804           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1805       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1806       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1807                             Loc, Loc);
1808       // Create a scope with an artificial location for the body of this function.
1809       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1810       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1811           DtorCGF.GetAddrOfLocalVar(&Dst),
1812           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1813       DtorCGF.emitDestroy(
1814           Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1815           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1816           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1817       DtorCGF.FinishFunction();
1818       Dtor = Fn;
1819     }
1820     // Do not emit init function if it is not required.
1821     if (!Ctor && !Dtor)
1822       return nullptr;
1823 
1824     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1825     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1826                                                /*isVarArg=*/false)
1827                            ->getPointerTo();
1828     // Copying constructor for the threadprivate variable.
1829     // Must be NULL - reserved by runtime, but currently it requires that this
1830     // parameter is always NULL. Otherwise it fires assertion.
1831     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1832     if (Ctor == nullptr) {
1833       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1834                                              /*isVarArg=*/false)
1835                          ->getPointerTo();
1836       Ctor = llvm::Constant::getNullValue(CtorTy);
1837     }
1838     if (Dtor == nullptr) {
1839       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1840                                              /*isVarArg=*/false)
1841                          ->getPointerTo();
1842       Dtor = llvm::Constant::getNullValue(DtorTy);
1843     }
1844     if (!CGF) {
1845       auto *InitFunctionTy =
1846           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1847       std::string Name = getName({"__omp_threadprivate_init_", ""});
1848       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1849           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1850       CodeGenFunction InitCGF(CGM);
1851       FunctionArgList ArgList;
1852       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1853                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1854                             Loc, Loc);
1855       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1856       InitCGF.FinishFunction();
1857       return InitFunction;
1858     }
1859     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1860   }
1861   return nullptr;
1862 }
1863 
1864 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1865                                                      llvm::GlobalVariable *Addr,
1866                                                      bool PerformInit) {
1867   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1868       !CGM.getLangOpts().OpenMPIsTargetDevice)
1869     return false;
1870   std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1871       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1872   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1873       ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
1874         *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
1875        HasRequiresUnifiedSharedMemory))
1876     return CGM.getLangOpts().OpenMPIsTargetDevice;
1877   VD = VD->getDefinition(CGM.getContext());
1878   assert(VD && "Unknown VarDecl");
1879 
1880   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1881     return CGM.getLangOpts().OpenMPIsTargetDevice;
1882 
1883   QualType ASTTy = VD->getType();
1884   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1885 
1886   // Produce the unique prefix to identify the new target regions. We use
1887   // the source location of the variable declaration which we know to not
1888   // conflict with any target region.
1889   llvm::TargetRegionEntryInfo EntryInfo =
1890       getEntryInfoFromPresumedLoc(CGM, OMPBuilder, Loc, VD->getName());
1891   SmallString<128> Buffer, Out;
1892   OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Buffer, EntryInfo);
1893 
1894   const Expr *Init = VD->getAnyInitializer();
1895   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1896     llvm::Constant *Ctor;
1897     llvm::Constant *ID;
1898     if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1899       // Generate function that re-emits the declaration's initializer into
1900       // the threadprivate copy of the variable VD
1901       CodeGenFunction CtorCGF(CGM);
1902 
1903       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1904       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1905       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1906           FTy, Twine(Buffer, "_ctor"), FI, Loc, false,
1907           llvm::GlobalValue::WeakODRLinkage);
1908       Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1909       if (CGM.getTriple().isAMDGCN())
1910         Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1911       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1912       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1913                             FunctionArgList(), Loc, Loc);
1914       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1915       llvm::Constant *AddrInAS0 = Addr;
1916       if (Addr->getAddressSpace() != 0)
1917         AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1918             Addr, llvm::PointerType::get(CGM.getLLVMContext(), 0));
1919       CtorCGF.EmitAnyExprToMem(Init,
1920                                Address(AddrInAS0, Addr->getValueType(),
1921                                        CGM.getContext().getDeclAlign(VD)),
1922                                Init->getType().getQualifiers(),
1923                                /*IsInitializer=*/true);
1924       CtorCGF.FinishFunction();
1925       Ctor = Fn;
1926       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1927     } else {
1928       Ctor = new llvm::GlobalVariable(
1929           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1930           llvm::GlobalValue::PrivateLinkage,
1931           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1932       ID = Ctor;
1933     }
1934 
1935     // Register the information for the entry associated with the constructor.
1936     Out.clear();
1937     auto CtorEntryInfo = EntryInfo;
1938     CtorEntryInfo.ParentName = Twine(Buffer, "_ctor").toStringRef(Out);
1939     OMPBuilder.OffloadInfoManager.registerTargetRegionEntryInfo(
1940         CtorEntryInfo, Ctor, ID,
1941         llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryCtor);
1942   }
1943   if (VD->getType().isDestructedType() != QualType::DK_none) {
1944     llvm::Constant *Dtor;
1945     llvm::Constant *ID;
1946     if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1947       // Generate function that emits destructor call for the threadprivate
1948       // copy of the variable VD
1949       CodeGenFunction DtorCGF(CGM);
1950 
1951       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1952       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1953       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1954           FTy, Twine(Buffer, "_dtor"), FI, Loc, false,
1955           llvm::GlobalValue::WeakODRLinkage);
1956       Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1957       if (CGM.getTriple().isAMDGCN())
1958         Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1959       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1960       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1961                             FunctionArgList(), Loc, Loc);
1962       // Create a scope with an artificial location for the body of this
1963       // function.
1964       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1965       llvm::Constant *AddrInAS0 = Addr;
1966       if (Addr->getAddressSpace() != 0)
1967         AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1968             Addr, llvm::PointerType::get(CGM.getLLVMContext(), 0));
1969       DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(),
1970                                   CGM.getContext().getDeclAlign(VD)),
1971                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1972                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1973       DtorCGF.FinishFunction();
1974       Dtor = Fn;
1975       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1976     } else {
1977       Dtor = new llvm::GlobalVariable(
1978           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1979           llvm::GlobalValue::PrivateLinkage,
1980           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1981       ID = Dtor;
1982     }
1983     // Register the information for the entry associated with the destructor.
1984     Out.clear();
1985     auto DtorEntryInfo = EntryInfo;
1986     DtorEntryInfo.ParentName = Twine(Buffer, "_dtor").toStringRef(Out);
1987     OMPBuilder.OffloadInfoManager.registerTargetRegionEntryInfo(
1988         DtorEntryInfo, Dtor, ID,
1989         llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryDtor);
1990   }
1991   return CGM.getLangOpts().OpenMPIsTargetDevice;
1992 }
1993 
1994 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1995                                                           QualType VarType,
1996                                                           StringRef Name) {
1997   std::string Suffix = getName({"artificial", ""});
1998   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1999   llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
2000       VarLVType, Twine(Name).concat(Suffix).str());
2001   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2002       CGM.getTarget().isTLSSupported()) {
2003     GAddr->setThreadLocal(/*Val=*/true);
2004     return Address(GAddr, GAddr->getValueType(),
2005                    CGM.getContext().getTypeAlignInChars(VarType));
2006   }
2007   std::string CacheSuffix = getName({"cache", ""});
2008   llvm::Value *Args[] = {
2009       emitUpdateLocation(CGF, SourceLocation()),
2010       getThreadID(CGF, SourceLocation()),
2011       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2012       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2013                                 /*isSigned=*/false),
2014       OMPBuilder.getOrCreateInternalVariable(
2015           CGM.VoidPtrPtrTy,
2016           Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
2017   return Address(
2018       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2019           CGF.EmitRuntimeCall(
2020               OMPBuilder.getOrCreateRuntimeFunction(
2021                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2022               Args),
2023           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2024       VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
2025 }
2026 
2027 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2028                                    const RegionCodeGenTy &ThenGen,
2029                                    const RegionCodeGenTy &ElseGen) {
2030   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2031 
2032   // If the condition constant folds and can be elided, try to avoid emitting
2033   // the condition and the dead arm of the if/else.
2034   bool CondConstant;
2035   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2036     if (CondConstant)
2037       ThenGen(CGF);
2038     else
2039       ElseGen(CGF);
2040     return;
2041   }
2042 
2043   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2044   // emit the conditional branch.
2045   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2046   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2047   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2048   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2049 
2050   // Emit the 'then' code.
2051   CGF.EmitBlock(ThenBlock);
2052   ThenGen(CGF);
2053   CGF.EmitBranch(ContBlock);
2054   // Emit the 'else' code if present.
2055   // There is no need to emit line number for unconditional branch.
2056   (void)ApplyDebugLocation::CreateEmpty(CGF);
2057   CGF.EmitBlock(ElseBlock);
2058   ElseGen(CGF);
2059   // There is no need to emit line number for unconditional branch.
2060   (void)ApplyDebugLocation::CreateEmpty(CGF);
2061   CGF.EmitBranch(ContBlock);
2062   // Emit the continuation block for code after the if.
2063   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2064 }
2065 
2066 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2067                                        llvm::Function *OutlinedFn,
2068                                        ArrayRef<llvm::Value *> CapturedVars,
2069                                        const Expr *IfCond,
2070                                        llvm::Value *NumThreads) {
2071   if (!CGF.HaveInsertPoint())
2072     return;
2073   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2074   auto &M = CGM.getModule();
2075   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2076                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2077     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2078     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2079     llvm::Value *Args[] = {
2080         RTLoc,
2081         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2082         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2083     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2084     RealArgs.append(std::begin(Args), std::end(Args));
2085     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2086 
2087     llvm::FunctionCallee RTLFn =
2088         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2089     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2090   };
2091   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2092                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2093     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2094     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2095     // Build calls:
2096     // __kmpc_serialized_parallel(&Loc, GTid);
2097     llvm::Value *Args[] = {RTLoc, ThreadID};
2098     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2099                             M, OMPRTL___kmpc_serialized_parallel),
2100                         Args);
2101 
2102     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2103     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2104     Address ZeroAddrBound =
2105         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2106                                          /*Name=*/".bound.zero.addr");
2107     CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2108     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2109     // ThreadId for serialized parallels is 0.
2110     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2111     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2112     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2113 
2114     // Ensure we do not inline the function. This is trivially true for the ones
2115     // passed to __kmpc_fork_call but the ones called in serialized regions
2116     // could be inlined. This is not a perfect but it is closer to the invariant
2117     // we want, namely, every data environment starts with a new function.
2118     // TODO: We should pass the if condition to the runtime function and do the
2119     //       handling there. Much cleaner code.
2120     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2121     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2122     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2123 
2124     // __kmpc_end_serialized_parallel(&Loc, GTid);
2125     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2126     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2127                             M, OMPRTL___kmpc_end_serialized_parallel),
2128                         EndArgs);
2129   };
2130   if (IfCond) {
2131     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2132   } else {
2133     RegionCodeGenTy ThenRCG(ThenGen);
2134     ThenRCG(CGF);
2135   }
2136 }
2137 
2138 // If we're inside an (outlined) parallel region, use the region info's
2139 // thread-ID variable (it is passed in a first argument of the outlined function
2140 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2141 // regular serial code region, get thread ID by calling kmp_int32
2142 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2143 // return the address of that temp.
2144 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2145                                              SourceLocation Loc) {
2146   if (auto *OMPRegionInfo =
2147           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2148     if (OMPRegionInfo->getThreadIDVariable())
2149       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2150 
2151   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2152   QualType Int32Ty =
2153       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2154   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2155   CGF.EmitStoreOfScalar(ThreadID,
2156                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2157 
2158   return ThreadIDTemp;
2159 }
2160 
2161 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2162   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2163   std::string Name = getName({Prefix, "var"});
2164   llvm::GlobalVariable *G = OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2165   llvm::Align PtrAlign = OMPBuilder.M.getDataLayout().getPointerABIAlignment(G->getAddressSpace());
2166   if (PtrAlign > llvm::Align(G->getAlignment()))
2167     G->setAlignment(PtrAlign);
2168   return G;
2169 }
2170 
2171 namespace {
2172 /// Common pre(post)-action for different OpenMP constructs.
2173 class CommonActionTy final : public PrePostActionTy {
2174   llvm::FunctionCallee EnterCallee;
2175   ArrayRef<llvm::Value *> EnterArgs;
2176   llvm::FunctionCallee ExitCallee;
2177   ArrayRef<llvm::Value *> ExitArgs;
2178   bool Conditional;
2179   llvm::BasicBlock *ContBlock = nullptr;
2180 
2181 public:
2182   CommonActionTy(llvm::FunctionCallee EnterCallee,
2183                  ArrayRef<llvm::Value *> EnterArgs,
2184                  llvm::FunctionCallee ExitCallee,
2185                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2186       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2187         ExitArgs(ExitArgs), Conditional(Conditional) {}
2188   void Enter(CodeGenFunction &CGF) override {
2189     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2190     if (Conditional) {
2191       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2192       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2193       ContBlock = CGF.createBasicBlock("omp_if.end");
2194       // Generate the branch (If-stmt)
2195       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2196       CGF.EmitBlock(ThenBlock);
2197     }
2198   }
2199   void Done(CodeGenFunction &CGF) {
2200     // Emit the rest of blocks/branches
2201     CGF.EmitBranch(ContBlock);
2202     CGF.EmitBlock(ContBlock, true);
2203   }
2204   void Exit(CodeGenFunction &CGF) override {
2205     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2206   }
2207 };
2208 } // anonymous namespace
2209 
2210 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2211                                          StringRef CriticalName,
2212                                          const RegionCodeGenTy &CriticalOpGen,
2213                                          SourceLocation Loc, const Expr *Hint) {
2214   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2215   // CriticalOpGen();
2216   // __kmpc_end_critical(ident_t *, gtid, Lock);
2217   // Prepare arguments and build a call to __kmpc_critical
2218   if (!CGF.HaveInsertPoint())
2219     return;
2220   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2221                          getCriticalRegionLock(CriticalName)};
2222   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2223                                                 std::end(Args));
2224   if (Hint) {
2225     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2226         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2227   }
2228   CommonActionTy Action(
2229       OMPBuilder.getOrCreateRuntimeFunction(
2230           CGM.getModule(),
2231           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2232       EnterArgs,
2233       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2234                                             OMPRTL___kmpc_end_critical),
2235       Args);
2236   CriticalOpGen.setAction(Action);
2237   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2238 }
2239 
2240 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2241                                        const RegionCodeGenTy &MasterOpGen,
2242                                        SourceLocation Loc) {
2243   if (!CGF.HaveInsertPoint())
2244     return;
2245   // if(__kmpc_master(ident_t *, gtid)) {
2246   //   MasterOpGen();
2247   //   __kmpc_end_master(ident_t *, gtid);
2248   // }
2249   // Prepare arguments and build a call to __kmpc_master
2250   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2251   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2252                             CGM.getModule(), OMPRTL___kmpc_master),
2253                         Args,
2254                         OMPBuilder.getOrCreateRuntimeFunction(
2255                             CGM.getModule(), OMPRTL___kmpc_end_master),
2256                         Args,
2257                         /*Conditional=*/true);
2258   MasterOpGen.setAction(Action);
2259   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2260   Action.Done(CGF);
2261 }
2262 
2263 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2264                                        const RegionCodeGenTy &MaskedOpGen,
2265                                        SourceLocation Loc, const Expr *Filter) {
2266   if (!CGF.HaveInsertPoint())
2267     return;
2268   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2269   //   MaskedOpGen();
2270   //   __kmpc_end_masked(iden_t *, gtid);
2271   // }
2272   // Prepare arguments and build a call to __kmpc_masked
2273   llvm::Value *FilterVal = Filter
2274                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2275                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2276   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2277                          FilterVal};
2278   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2279                             getThreadID(CGF, Loc)};
2280   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2281                             CGM.getModule(), OMPRTL___kmpc_masked),
2282                         Args,
2283                         OMPBuilder.getOrCreateRuntimeFunction(
2284                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2285                         ArgsEnd,
2286                         /*Conditional=*/true);
2287   MaskedOpGen.setAction(Action);
2288   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2289   Action.Done(CGF);
2290 }
2291 
2292 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2293                                         SourceLocation Loc) {
2294   if (!CGF.HaveInsertPoint())
2295     return;
2296   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2297     OMPBuilder.createTaskyield(CGF.Builder);
2298   } else {
2299     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2300     llvm::Value *Args[] = {
2301         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2302         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2303     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2304                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2305                         Args);
2306   }
2307 
2308   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2309     Region->emitUntiedSwitch(CGF);
2310 }
2311 
2312 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2313                                           const RegionCodeGenTy &TaskgroupOpGen,
2314                                           SourceLocation Loc) {
2315   if (!CGF.HaveInsertPoint())
2316     return;
2317   // __kmpc_taskgroup(ident_t *, gtid);
2318   // TaskgroupOpGen();
2319   // __kmpc_end_taskgroup(ident_t *, gtid);
2320   // Prepare arguments and build a call to __kmpc_taskgroup
2321   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2322   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2323                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2324                         Args,
2325                         OMPBuilder.getOrCreateRuntimeFunction(
2326                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2327                         Args);
2328   TaskgroupOpGen.setAction(Action);
2329   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2330 }
2331 
2332 /// Given an array of pointers to variables, project the address of a
2333 /// given variable.
2334 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2335                                       unsigned Index, const VarDecl *Var) {
2336   // Pull out the pointer to the variable.
2337   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2338   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2339 
2340   llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2341   return Address(
2342       CGF.Builder.CreateBitCast(
2343           Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())),
2344       ElemTy, CGF.getContext().getDeclAlign(Var));
2345 }
2346 
2347 static llvm::Value *emitCopyprivateCopyFunction(
2348     CodeGenModule &CGM, llvm::Type *ArgsElemType,
2349     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2350     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2351     SourceLocation Loc) {
2352   ASTContext &C = CGM.getContext();
2353   // void copy_func(void *LHSArg, void *RHSArg);
2354   FunctionArgList Args;
2355   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2356                            ImplicitParamDecl::Other);
2357   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2358                            ImplicitParamDecl::Other);
2359   Args.push_back(&LHSArg);
2360   Args.push_back(&RHSArg);
2361   const auto &CGFI =
2362       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2363   std::string Name =
2364       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2365   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2366                                     llvm::GlobalValue::InternalLinkage, Name,
2367                                     &CGM.getModule());
2368   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2369   Fn->setDoesNotRecurse();
2370   CodeGenFunction CGF(CGM);
2371   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2372   // Dest = (void*[n])(LHSArg);
2373   // Src = (void*[n])(RHSArg);
2374   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2375                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2376                   ArgsElemType->getPointerTo()),
2377               ArgsElemType, CGF.getPointerAlign());
2378   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2379                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2380                   ArgsElemType->getPointerTo()),
2381               ArgsElemType, CGF.getPointerAlign());
2382   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2383   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2384   // ...
2385   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2386   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2387     const auto *DestVar =
2388         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2389     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2390 
2391     const auto *SrcVar =
2392         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2393     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2394 
2395     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2396     QualType Type = VD->getType();
2397     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2398   }
2399   CGF.FinishFunction();
2400   return Fn;
2401 }
2402 
2403 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2404                                        const RegionCodeGenTy &SingleOpGen,
2405                                        SourceLocation Loc,
2406                                        ArrayRef<const Expr *> CopyprivateVars,
2407                                        ArrayRef<const Expr *> SrcExprs,
2408                                        ArrayRef<const Expr *> DstExprs,
2409                                        ArrayRef<const Expr *> AssignmentOps) {
2410   if (!CGF.HaveInsertPoint())
2411     return;
2412   assert(CopyprivateVars.size() == SrcExprs.size() &&
2413          CopyprivateVars.size() == DstExprs.size() &&
2414          CopyprivateVars.size() == AssignmentOps.size());
2415   ASTContext &C = CGM.getContext();
2416   // int32 did_it = 0;
2417   // if(__kmpc_single(ident_t *, gtid)) {
2418   //   SingleOpGen();
2419   //   __kmpc_end_single(ident_t *, gtid);
2420   //   did_it = 1;
2421   // }
2422   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2423   // <copy_func>, did_it);
2424 
2425   Address DidIt = Address::invalid();
2426   if (!CopyprivateVars.empty()) {
2427     // int32 did_it = 0;
2428     QualType KmpInt32Ty =
2429         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2430     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2431     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2432   }
2433   // Prepare arguments and build a call to __kmpc_single
2434   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2435   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2436                             CGM.getModule(), OMPRTL___kmpc_single),
2437                         Args,
2438                         OMPBuilder.getOrCreateRuntimeFunction(
2439                             CGM.getModule(), OMPRTL___kmpc_end_single),
2440                         Args,
2441                         /*Conditional=*/true);
2442   SingleOpGen.setAction(Action);
2443   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2444   if (DidIt.isValid()) {
2445     // did_it = 1;
2446     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2447   }
2448   Action.Done(CGF);
2449   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2450   // <copy_func>, did_it);
2451   if (DidIt.isValid()) {
2452     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2453     QualType CopyprivateArrayTy = C.getConstantArrayType(
2454         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2455         /*IndexTypeQuals=*/0);
2456     // Create a list of all private variables for copyprivate.
2457     Address CopyprivateList =
2458         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2459     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2460       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2461       CGF.Builder.CreateStore(
2462           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2463               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2464               CGF.VoidPtrTy),
2465           Elem);
2466     }
2467     // Build function that copies private values from single region to all other
2468     // threads in the corresponding parallel region.
2469     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2470         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2471         SrcExprs, DstExprs, AssignmentOps, Loc);
2472     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2473     Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2474         CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2475     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2476     llvm::Value *Args[] = {
2477         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2478         getThreadID(CGF, Loc),        // i32 <gtid>
2479         BufSize,                      // size_t <buf_size>
2480         CL.getPointer(),              // void *<copyprivate list>
2481         CpyFn,                        // void (*) (void *, void *) <copy_func>
2482         DidItVal                      // i32 did_it
2483     };
2484     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2485                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2486                         Args);
2487   }
2488 }
2489 
2490 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2491                                         const RegionCodeGenTy &OrderedOpGen,
2492                                         SourceLocation Loc, bool IsThreads) {
2493   if (!CGF.HaveInsertPoint())
2494     return;
2495   // __kmpc_ordered(ident_t *, gtid);
2496   // OrderedOpGen();
2497   // __kmpc_end_ordered(ident_t *, gtid);
2498   // Prepare arguments and build a call to __kmpc_ordered
2499   if (IsThreads) {
2500     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2501     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2502                               CGM.getModule(), OMPRTL___kmpc_ordered),
2503                           Args,
2504                           OMPBuilder.getOrCreateRuntimeFunction(
2505                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2506                           Args);
2507     OrderedOpGen.setAction(Action);
2508     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2509     return;
2510   }
2511   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2512 }
2513 
2514 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2515   unsigned Flags;
2516   if (Kind == OMPD_for)
2517     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2518   else if (Kind == OMPD_sections)
2519     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2520   else if (Kind == OMPD_single)
2521     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2522   else if (Kind == OMPD_barrier)
2523     Flags = OMP_IDENT_BARRIER_EXPL;
2524   else
2525     Flags = OMP_IDENT_BARRIER_IMPL;
2526   return Flags;
2527 }
2528 
2529 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2530     CodeGenFunction &CGF, const OMPLoopDirective &S,
2531     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2532   // Check if the loop directive is actually a doacross loop directive. In this
2533   // case choose static, 1 schedule.
2534   if (llvm::any_of(
2535           S.getClausesOfKind<OMPOrderedClause>(),
2536           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2537     ScheduleKind = OMPC_SCHEDULE_static;
2538     // Chunk size is 1 in this case.
2539     llvm::APInt ChunkSize(32, 1);
2540     ChunkExpr = IntegerLiteral::Create(
2541         CGF.getContext(), ChunkSize,
2542         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2543         SourceLocation());
2544   }
2545 }
2546 
2547 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2548                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2549                                       bool ForceSimpleCall) {
2550   // Check if we should use the OMPBuilder
2551   auto *OMPRegionInfo =
2552       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2553   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2554     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2555         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2556     return;
2557   }
2558 
2559   if (!CGF.HaveInsertPoint())
2560     return;
2561   // Build call __kmpc_cancel_barrier(loc, thread_id);
2562   // Build call __kmpc_barrier(loc, thread_id);
2563   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2564   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2565   // thread_id);
2566   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2567                          getThreadID(CGF, Loc)};
2568   if (OMPRegionInfo) {
2569     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2570       llvm::Value *Result = CGF.EmitRuntimeCall(
2571           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2572                                                 OMPRTL___kmpc_cancel_barrier),
2573           Args);
2574       if (EmitChecks) {
2575         // if (__kmpc_cancel_barrier()) {
2576         //   exit from construct;
2577         // }
2578         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2579         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2580         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2581         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2582         CGF.EmitBlock(ExitBB);
2583         //   exit from construct;
2584         CodeGenFunction::JumpDest CancelDestination =
2585             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2586         CGF.EmitBranchThroughCleanup(CancelDestination);
2587         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2588       }
2589       return;
2590     }
2591   }
2592   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2593                           CGM.getModule(), OMPRTL___kmpc_barrier),
2594                       Args);
2595 }
2596 
2597 void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc,
2598                                     Expr *ME, bool IsFatal) {
2599   llvm::Value *MVL =
2600       ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF)
2601          : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2602   // Build call void __kmpc_error(ident_t *loc, int severity, const char
2603   // *message)
2604   llvm::Value *Args[] = {
2605       emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2606       llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2607       CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2608   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2609                           CGM.getModule(), OMPRTL___kmpc_error),
2610                       Args);
2611 }
2612 
2613 /// Map the OpenMP loop schedule to the runtime enumeration.
2614 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2615                                           bool Chunked, bool Ordered) {
2616   switch (ScheduleKind) {
2617   case OMPC_SCHEDULE_static:
2618     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2619                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2620   case OMPC_SCHEDULE_dynamic:
2621     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2622   case OMPC_SCHEDULE_guided:
2623     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2624   case OMPC_SCHEDULE_runtime:
2625     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2626   case OMPC_SCHEDULE_auto:
2627     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2628   case OMPC_SCHEDULE_unknown:
2629     assert(!Chunked && "chunk was specified but schedule kind not known");
2630     return Ordered ? OMP_ord_static : OMP_sch_static;
2631   }
2632   llvm_unreachable("Unexpected runtime schedule");
2633 }
2634 
2635 /// Map the OpenMP distribute schedule to the runtime enumeration.
2636 static OpenMPSchedType
2637 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2638   // only static is allowed for dist_schedule
2639   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2640 }
2641 
2642 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2643                                          bool Chunked) const {
2644   OpenMPSchedType Schedule =
2645       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2646   return Schedule == OMP_sch_static;
2647 }
2648 
2649 bool CGOpenMPRuntime::isStaticNonchunked(
2650     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2651   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2652   return Schedule == OMP_dist_sch_static;
2653 }
2654 
2655 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2656                                       bool Chunked) const {
2657   OpenMPSchedType Schedule =
2658       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2659   return Schedule == OMP_sch_static_chunked;
2660 }
2661 
2662 bool CGOpenMPRuntime::isStaticChunked(
2663     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2664   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2665   return Schedule == OMP_dist_sch_static_chunked;
2666 }
2667 
2668 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2669   OpenMPSchedType Schedule =
2670       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2671   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2672   return Schedule != OMP_sch_static;
2673 }
2674 
2675 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2676                                   OpenMPScheduleClauseModifier M1,
2677                                   OpenMPScheduleClauseModifier M2) {
2678   int Modifier = 0;
2679   switch (M1) {
2680   case OMPC_SCHEDULE_MODIFIER_monotonic:
2681     Modifier = OMP_sch_modifier_monotonic;
2682     break;
2683   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2684     Modifier = OMP_sch_modifier_nonmonotonic;
2685     break;
2686   case OMPC_SCHEDULE_MODIFIER_simd:
2687     if (Schedule == OMP_sch_static_chunked)
2688       Schedule = OMP_sch_static_balanced_chunked;
2689     break;
2690   case OMPC_SCHEDULE_MODIFIER_last:
2691   case OMPC_SCHEDULE_MODIFIER_unknown:
2692     break;
2693   }
2694   switch (M2) {
2695   case OMPC_SCHEDULE_MODIFIER_monotonic:
2696     Modifier = OMP_sch_modifier_monotonic;
2697     break;
2698   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2699     Modifier = OMP_sch_modifier_nonmonotonic;
2700     break;
2701   case OMPC_SCHEDULE_MODIFIER_simd:
2702     if (Schedule == OMP_sch_static_chunked)
2703       Schedule = OMP_sch_static_balanced_chunked;
2704     break;
2705   case OMPC_SCHEDULE_MODIFIER_last:
2706   case OMPC_SCHEDULE_MODIFIER_unknown:
2707     break;
2708   }
2709   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2710   // If the static schedule kind is specified or if the ordered clause is
2711   // specified, and if the nonmonotonic modifier is not specified, the effect is
2712   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2713   // modifier is specified, the effect is as if the nonmonotonic modifier is
2714   // specified.
2715   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2716     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2717           Schedule == OMP_sch_static_balanced_chunked ||
2718           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2719           Schedule == OMP_dist_sch_static_chunked ||
2720           Schedule == OMP_dist_sch_static))
2721       Modifier = OMP_sch_modifier_nonmonotonic;
2722   }
2723   return Schedule | Modifier;
2724 }
2725 
2726 void CGOpenMPRuntime::emitForDispatchInit(
2727     CodeGenFunction &CGF, SourceLocation Loc,
2728     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2729     bool Ordered, const DispatchRTInput &DispatchValues) {
2730   if (!CGF.HaveInsertPoint())
2731     return;
2732   OpenMPSchedType Schedule = getRuntimeSchedule(
2733       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2734   assert(Ordered ||
2735          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2736           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2737           Schedule != OMP_sch_static_balanced_chunked));
2738   // Call __kmpc_dispatch_init(
2739   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2740   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2741   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2742 
2743   // If the Chunk was not specified in the clause - use default value 1.
2744   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2745                                             : CGF.Builder.getIntN(IVSize, 1);
2746   llvm::Value *Args[] = {
2747       emitUpdateLocation(CGF, Loc),
2748       getThreadID(CGF, Loc),
2749       CGF.Builder.getInt32(addMonoNonMonoModifier(
2750           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2751       DispatchValues.LB,                                     // Lower
2752       DispatchValues.UB,                                     // Upper
2753       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2754       Chunk                                                  // Chunk
2755   };
2756   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2757 }
2758 
2759 static void emitForStaticInitCall(
2760     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2761     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2762     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2763     const CGOpenMPRuntime::StaticRTInput &Values) {
2764   if (!CGF.HaveInsertPoint())
2765     return;
2766 
2767   assert(!Values.Ordered);
2768   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2769          Schedule == OMP_sch_static_balanced_chunked ||
2770          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2771          Schedule == OMP_dist_sch_static ||
2772          Schedule == OMP_dist_sch_static_chunked);
2773 
2774   // Call __kmpc_for_static_init(
2775   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2776   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2777   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2778   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2779   llvm::Value *Chunk = Values.Chunk;
2780   if (Chunk == nullptr) {
2781     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2782             Schedule == OMP_dist_sch_static) &&
2783            "expected static non-chunked schedule");
2784     // If the Chunk was not specified in the clause - use default value 1.
2785     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2786   } else {
2787     assert((Schedule == OMP_sch_static_chunked ||
2788             Schedule == OMP_sch_static_balanced_chunked ||
2789             Schedule == OMP_ord_static_chunked ||
2790             Schedule == OMP_dist_sch_static_chunked) &&
2791            "expected static chunked schedule");
2792   }
2793   llvm::Value *Args[] = {
2794       UpdateLocation,
2795       ThreadId,
2796       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2797                                                   M2)), // Schedule type
2798       Values.IL.getPointer(),                           // &isLastIter
2799       Values.LB.getPointer(),                           // &LB
2800       Values.UB.getPointer(),                           // &UB
2801       Values.ST.getPointer(),                           // &Stride
2802       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2803       Chunk                                             // Chunk
2804   };
2805   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2806 }
2807 
2808 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2809                                         SourceLocation Loc,
2810                                         OpenMPDirectiveKind DKind,
2811                                         const OpenMPScheduleTy &ScheduleKind,
2812                                         const StaticRTInput &Values) {
2813   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2814       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2815   assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2816          "Expected loop-based or sections-based directive.");
2817   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2818                                              isOpenMPLoopDirective(DKind)
2819                                                  ? OMP_IDENT_WORK_LOOP
2820                                                  : OMP_IDENT_WORK_SECTIONS);
2821   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2822   llvm::FunctionCallee StaticInitFunction =
2823       createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2824   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2825   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2826                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2827 }
2828 
2829 void CGOpenMPRuntime::emitDistributeStaticInit(
2830     CodeGenFunction &CGF, SourceLocation Loc,
2831     OpenMPDistScheduleClauseKind SchedKind,
2832     const CGOpenMPRuntime::StaticRTInput &Values) {
2833   OpenMPSchedType ScheduleNum =
2834       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2835   llvm::Value *UpdatedLocation =
2836       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2837   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2838   llvm::FunctionCallee StaticInitFunction;
2839   bool isGPUDistribute =
2840       CGM.getLangOpts().OpenMPIsTargetDevice &&
2841       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2842   StaticInitFunction = createForStaticInitFunction(
2843       Values.IVSize, Values.IVSigned, isGPUDistribute);
2844 
2845   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2846                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2847                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2848 }
2849 
2850 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2851                                           SourceLocation Loc,
2852                                           OpenMPDirectiveKind DKind) {
2853   if (!CGF.HaveInsertPoint())
2854     return;
2855   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2856   llvm::Value *Args[] = {
2857       emitUpdateLocation(CGF, Loc,
2858                          isOpenMPDistributeDirective(DKind)
2859                              ? OMP_IDENT_WORK_DISTRIBUTE
2860                              : isOpenMPLoopDirective(DKind)
2861                                    ? OMP_IDENT_WORK_LOOP
2862                                    : OMP_IDENT_WORK_SECTIONS),
2863       getThreadID(CGF, Loc)};
2864   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2865   if (isOpenMPDistributeDirective(DKind) &&
2866       CGM.getLangOpts().OpenMPIsTargetDevice &&
2867       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2868     CGF.EmitRuntimeCall(
2869         OMPBuilder.getOrCreateRuntimeFunction(
2870             CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2871         Args);
2872   else
2873     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2874                             CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2875                         Args);
2876 }
2877 
2878 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2879                                                  SourceLocation Loc,
2880                                                  unsigned IVSize,
2881                                                  bool IVSigned) {
2882   if (!CGF.HaveInsertPoint())
2883     return;
2884   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2885   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2886   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2887 }
2888 
2889 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2890                                           SourceLocation Loc, unsigned IVSize,
2891                                           bool IVSigned, Address IL,
2892                                           Address LB, Address UB,
2893                                           Address ST) {
2894   // Call __kmpc_dispatch_next(
2895   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2896   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2897   //          kmp_int[32|64] *p_stride);
2898   llvm::Value *Args[] = {
2899       emitUpdateLocation(CGF, Loc),
2900       getThreadID(CGF, Loc),
2901       IL.getPointer(), // &isLastIter
2902       LB.getPointer(), // &Lower
2903       UB.getPointer(), // &Upper
2904       ST.getPointer()  // &Stride
2905   };
2906   llvm::Value *Call =
2907       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2908   return CGF.EmitScalarConversion(
2909       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2910       CGF.getContext().BoolTy, Loc);
2911 }
2912 
2913 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2914                                            llvm::Value *NumThreads,
2915                                            SourceLocation Loc) {
2916   if (!CGF.HaveInsertPoint())
2917     return;
2918   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2919   llvm::Value *Args[] = {
2920       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2921       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2922   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2923                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2924                       Args);
2925 }
2926 
2927 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2928                                          ProcBindKind ProcBind,
2929                                          SourceLocation Loc) {
2930   if (!CGF.HaveInsertPoint())
2931     return;
2932   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2933   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2934   llvm::Value *Args[] = {
2935       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2936       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2937   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2938                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2939                       Args);
2940 }
2941 
2942 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2943                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2944   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2945     OMPBuilder.createFlush(CGF.Builder);
2946   } else {
2947     if (!CGF.HaveInsertPoint())
2948       return;
2949     // Build call void __kmpc_flush(ident_t *loc)
2950     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2951                             CGM.getModule(), OMPRTL___kmpc_flush),
2952                         emitUpdateLocation(CGF, Loc));
2953   }
2954 }
2955 
2956 namespace {
2957 /// Indexes of fields for type kmp_task_t.
2958 enum KmpTaskTFields {
2959   /// List of shared variables.
2960   KmpTaskTShareds,
2961   /// Task routine.
2962   KmpTaskTRoutine,
2963   /// Partition id for the untied tasks.
2964   KmpTaskTPartId,
2965   /// Function with call of destructors for private variables.
2966   Data1,
2967   /// Task priority.
2968   Data2,
2969   /// (Taskloops only) Lower bound.
2970   KmpTaskTLowerBound,
2971   /// (Taskloops only) Upper bound.
2972   KmpTaskTUpperBound,
2973   /// (Taskloops only) Stride.
2974   KmpTaskTStride,
2975   /// (Taskloops only) Is last iteration flag.
2976   KmpTaskTLastIter,
2977   /// (Taskloops only) Reduction data.
2978   KmpTaskTReductions,
2979 };
2980 } // anonymous namespace
2981 
2982 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2983   // If we are in simd mode or there are no entries, we don't need to do
2984   // anything.
2985   if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2986     return;
2987 
2988   llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2989       [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2990              const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2991     SourceLocation Loc;
2992     if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2993       for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2994                 E = CGM.getContext().getSourceManager().fileinfo_end();
2995            I != E; ++I) {
2996         if (I->getFirst()->getUniqueID().getDevice() == EntryInfo.DeviceID &&
2997             I->getFirst()->getUniqueID().getFile() == EntryInfo.FileID) {
2998           Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2999               I->getFirst(), EntryInfo.Line, 1);
3000           break;
3001         }
3002       }
3003     }
3004     switch (Kind) {
3005     case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
3006       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3007           DiagnosticsEngine::Error, "Offloading entry for target region in "
3008                                     "%0 is incorrect: either the "
3009                                     "address or the ID is invalid.");
3010       CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
3011     } break;
3012     case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
3013       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3014           DiagnosticsEngine::Error, "Offloading entry for declare target "
3015                                     "variable %0 is incorrect: the "
3016                                     "address is invalid.");
3017       CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
3018     } break;
3019     case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
3020       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3021           DiagnosticsEngine::Error,
3022           "Offloading entry for declare target variable is incorrect: the "
3023           "address is invalid.");
3024       CGM.getDiags().Report(DiagID);
3025     } break;
3026     }
3027   };
3028 
3029   OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
3030 }
3031 
3032 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3033   if (!KmpRoutineEntryPtrTy) {
3034     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3035     ASTContext &C = CGM.getContext();
3036     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3037     FunctionProtoType::ExtProtoInfo EPI;
3038     KmpRoutineEntryPtrQTy = C.getPointerType(
3039         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3040     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3041   }
3042 }
3043 
3044 namespace {
3045 struct PrivateHelpersTy {
3046   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3047                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3048       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3049         PrivateElemInit(PrivateElemInit) {}
3050   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3051   const Expr *OriginalRef = nullptr;
3052   const VarDecl *Original = nullptr;
3053   const VarDecl *PrivateCopy = nullptr;
3054   const VarDecl *PrivateElemInit = nullptr;
3055   bool isLocalPrivate() const {
3056     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3057   }
3058 };
3059 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3060 } // anonymous namespace
3061 
3062 static bool isAllocatableDecl(const VarDecl *VD) {
3063   const VarDecl *CVD = VD->getCanonicalDecl();
3064   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3065     return false;
3066   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3067   // Use the default allocation.
3068   return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3069            !AA->getAllocator());
3070 }
3071 
3072 static RecordDecl *
3073 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3074   if (!Privates.empty()) {
3075     ASTContext &C = CGM.getContext();
3076     // Build struct .kmp_privates_t. {
3077     //         /*  private vars  */
3078     //       };
3079     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3080     RD->startDefinition();
3081     for (const auto &Pair : Privates) {
3082       const VarDecl *VD = Pair.second.Original;
3083       QualType Type = VD->getType().getNonReferenceType();
3084       // If the private variable is a local variable with lvalue ref type,
3085       // allocate the pointer instead of the pointee type.
3086       if (Pair.second.isLocalPrivate()) {
3087         if (VD->getType()->isLValueReferenceType())
3088           Type = C.getPointerType(Type);
3089         if (isAllocatableDecl(VD))
3090           Type = C.getPointerType(Type);
3091       }
3092       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3093       if (VD->hasAttrs()) {
3094         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3095              E(VD->getAttrs().end());
3096              I != E; ++I)
3097           FD->addAttr(*I);
3098       }
3099     }
3100     RD->completeDefinition();
3101     return RD;
3102   }
3103   return nullptr;
3104 }
3105 
3106 static RecordDecl *
3107 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3108                          QualType KmpInt32Ty,
3109                          QualType KmpRoutineEntryPointerQTy) {
3110   ASTContext &C = CGM.getContext();
3111   // Build struct kmp_task_t {
3112   //         void *              shareds;
3113   //         kmp_routine_entry_t routine;
3114   //         kmp_int32           part_id;
3115   //         kmp_cmplrdata_t data1;
3116   //         kmp_cmplrdata_t data2;
3117   // For taskloops additional fields:
3118   //         kmp_uint64          lb;
3119   //         kmp_uint64          ub;
3120   //         kmp_int64           st;
3121   //         kmp_int32           liter;
3122   //         void *              reductions;
3123   //       };
3124   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3125   UD->startDefinition();
3126   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3127   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3128   UD->completeDefinition();
3129   QualType KmpCmplrdataTy = C.getRecordType(UD);
3130   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3131   RD->startDefinition();
3132   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3133   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3134   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3135   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3136   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3137   if (isOpenMPTaskLoopDirective(Kind)) {
3138     QualType KmpUInt64Ty =
3139         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3140     QualType KmpInt64Ty =
3141         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3142     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3143     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3144     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3145     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3146     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3147   }
3148   RD->completeDefinition();
3149   return RD;
3150 }
3151 
3152 static RecordDecl *
3153 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3154                                      ArrayRef<PrivateDataTy> Privates) {
3155   ASTContext &C = CGM.getContext();
3156   // Build struct kmp_task_t_with_privates {
3157   //         kmp_task_t task_data;
3158   //         .kmp_privates_t. privates;
3159   //       };
3160   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3161   RD->startDefinition();
3162   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3163   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3164     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3165   RD->completeDefinition();
3166   return RD;
3167 }
3168 
3169 /// Emit a proxy function which accepts kmp_task_t as the second
3170 /// argument.
3171 /// \code
3172 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3173 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3174 ///   For taskloops:
3175 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3176 ///   tt->reductions, tt->shareds);
3177 ///   return 0;
3178 /// }
3179 /// \endcode
3180 static llvm::Function *
3181 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3182                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3183                       QualType KmpTaskTWithPrivatesPtrQTy,
3184                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3185                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3186                       llvm::Value *TaskPrivatesMap) {
3187   ASTContext &C = CGM.getContext();
3188   FunctionArgList Args;
3189   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3190                             ImplicitParamDecl::Other);
3191   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3192                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3193                                 ImplicitParamDecl::Other);
3194   Args.push_back(&GtidArg);
3195   Args.push_back(&TaskTypeArg);
3196   const auto &TaskEntryFnInfo =
3197       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3198   llvm::FunctionType *TaskEntryTy =
3199       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3200   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3201   auto *TaskEntry = llvm::Function::Create(
3202       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3203   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3204   TaskEntry->setDoesNotRecurse();
3205   CodeGenFunction CGF(CGM);
3206   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3207                     Loc, Loc);
3208 
3209   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3210   // tt,
3211   // For taskloops:
3212   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3213   // tt->task_data.shareds);
3214   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3215       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3216   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3217       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3218       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3219   const auto *KmpTaskTWithPrivatesQTyRD =
3220       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3221   LValue Base =
3222       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3223   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3224   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3225   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3226   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3227 
3228   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3229   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3230   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3231       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3232       CGF.ConvertTypeForMem(SharedsPtrTy));
3233 
3234   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3235   llvm::Value *PrivatesParam;
3236   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3237     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3238     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3239         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3240   } else {
3241     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3242   }
3243 
3244   llvm::Value *CommonArgs[] = {
3245       GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3246       CGF.Builder
3247           .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF),
3248                                                CGF.VoidPtrTy, CGF.Int8Ty)
3249           .getPointer()};
3250   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3251                                           std::end(CommonArgs));
3252   if (isOpenMPTaskLoopDirective(Kind)) {
3253     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3254     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3255     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3256     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3257     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3258     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3259     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3260     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3261     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3262     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3263     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3264     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3265     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3266     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3267     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3268     CallArgs.push_back(LBParam);
3269     CallArgs.push_back(UBParam);
3270     CallArgs.push_back(StParam);
3271     CallArgs.push_back(LIParam);
3272     CallArgs.push_back(RParam);
3273   }
3274   CallArgs.push_back(SharedsParam);
3275 
3276   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3277                                                   CallArgs);
3278   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3279                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3280   CGF.FinishFunction();
3281   return TaskEntry;
3282 }
3283 
3284 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3285                                             SourceLocation Loc,
3286                                             QualType KmpInt32Ty,
3287                                             QualType KmpTaskTWithPrivatesPtrQTy,
3288                                             QualType KmpTaskTWithPrivatesQTy) {
3289   ASTContext &C = CGM.getContext();
3290   FunctionArgList Args;
3291   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3292                             ImplicitParamDecl::Other);
3293   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3294                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3295                                 ImplicitParamDecl::Other);
3296   Args.push_back(&GtidArg);
3297   Args.push_back(&TaskTypeArg);
3298   const auto &DestructorFnInfo =
3299       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3300   llvm::FunctionType *DestructorFnTy =
3301       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3302   std::string Name =
3303       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3304   auto *DestructorFn =
3305       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3306                              Name, &CGM.getModule());
3307   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3308                                     DestructorFnInfo);
3309   DestructorFn->setDoesNotRecurse();
3310   CodeGenFunction CGF(CGM);
3311   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3312                     Args, Loc, Loc);
3313 
3314   LValue Base = CGF.EmitLoadOfPointerLValue(
3315       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3316       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3317   const auto *KmpTaskTWithPrivatesQTyRD =
3318       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3319   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3320   Base = CGF.EmitLValueForField(Base, *FI);
3321   for (const auto *Field :
3322        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3323     if (QualType::DestructionKind DtorKind =
3324             Field->getType().isDestructedType()) {
3325       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3326       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3327     }
3328   }
3329   CGF.FinishFunction();
3330   return DestructorFn;
3331 }
3332 
3333 /// Emit a privates mapping function for correct handling of private and
3334 /// firstprivate variables.
3335 /// \code
3336 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3337 /// **noalias priv1,...,  <tyn> **noalias privn) {
3338 ///   *priv1 = &.privates.priv1;
3339 ///   ...;
3340 ///   *privn = &.privates.privn;
3341 /// }
3342 /// \endcode
3343 static llvm::Value *
3344 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3345                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3346                                ArrayRef<PrivateDataTy> Privates) {
3347   ASTContext &C = CGM.getContext();
3348   FunctionArgList Args;
3349   ImplicitParamDecl TaskPrivatesArg(
3350       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3351       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3352       ImplicitParamDecl::Other);
3353   Args.push_back(&TaskPrivatesArg);
3354   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3355   unsigned Counter = 1;
3356   for (const Expr *E : Data.PrivateVars) {
3357     Args.push_back(ImplicitParamDecl::Create(
3358         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3359         C.getPointerType(C.getPointerType(E->getType()))
3360             .withConst()
3361             .withRestrict(),
3362         ImplicitParamDecl::Other));
3363     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3364     PrivateVarsPos[VD] = Counter;
3365     ++Counter;
3366   }
3367   for (const Expr *E : Data.FirstprivateVars) {
3368     Args.push_back(ImplicitParamDecl::Create(
3369         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3370         C.getPointerType(C.getPointerType(E->getType()))
3371             .withConst()
3372             .withRestrict(),
3373         ImplicitParamDecl::Other));
3374     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3375     PrivateVarsPos[VD] = Counter;
3376     ++Counter;
3377   }
3378   for (const Expr *E : Data.LastprivateVars) {
3379     Args.push_back(ImplicitParamDecl::Create(
3380         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3381         C.getPointerType(C.getPointerType(E->getType()))
3382             .withConst()
3383             .withRestrict(),
3384         ImplicitParamDecl::Other));
3385     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3386     PrivateVarsPos[VD] = Counter;
3387     ++Counter;
3388   }
3389   for (const VarDecl *VD : Data.PrivateLocals) {
3390     QualType Ty = VD->getType().getNonReferenceType();
3391     if (VD->getType()->isLValueReferenceType())
3392       Ty = C.getPointerType(Ty);
3393     if (isAllocatableDecl(VD))
3394       Ty = C.getPointerType(Ty);
3395     Args.push_back(ImplicitParamDecl::Create(
3396         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3397         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3398         ImplicitParamDecl::Other));
3399     PrivateVarsPos[VD] = Counter;
3400     ++Counter;
3401   }
3402   const auto &TaskPrivatesMapFnInfo =
3403       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3404   llvm::FunctionType *TaskPrivatesMapTy =
3405       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3406   std::string Name =
3407       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3408   auto *TaskPrivatesMap = llvm::Function::Create(
3409       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3410       &CGM.getModule());
3411   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3412                                     TaskPrivatesMapFnInfo);
3413   if (CGM.getLangOpts().Optimize) {
3414     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3415     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3416     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3417   }
3418   CodeGenFunction CGF(CGM);
3419   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3420                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3421 
3422   // *privi = &.privates.privi;
3423   LValue Base = CGF.EmitLoadOfPointerLValue(
3424       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3425       TaskPrivatesArg.getType()->castAs<PointerType>());
3426   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3427   Counter = 0;
3428   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3429     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3430     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3431     LValue RefLVal =
3432         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3433     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3434         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3435     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3436     ++Counter;
3437   }
3438   CGF.FinishFunction();
3439   return TaskPrivatesMap;
3440 }
3441 
3442 /// Emit initialization for private variables in task-based directives.
3443 static void emitPrivatesInit(CodeGenFunction &CGF,
3444                              const OMPExecutableDirective &D,
3445                              Address KmpTaskSharedsPtr, LValue TDBase,
3446                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3447                              QualType SharedsTy, QualType SharedsPtrTy,
3448                              const OMPTaskDataTy &Data,
3449                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3450   ASTContext &C = CGF.getContext();
3451   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3452   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3453   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3454                                  ? OMPD_taskloop
3455                                  : OMPD_task;
3456   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3457   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3458   LValue SrcBase;
3459   bool IsTargetTask =
3460       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3461       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3462   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3463   // PointersArray, SizesArray, and MappersArray. The original variables for
3464   // these arrays are not captured and we get their addresses explicitly.
3465   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3466       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3467     SrcBase = CGF.MakeAddrLValue(
3468         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3469             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3470             CGF.ConvertTypeForMem(SharedsTy)),
3471         SharedsTy);
3472   }
3473   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3474   for (const PrivateDataTy &Pair : Privates) {
3475     // Do not initialize private locals.
3476     if (Pair.second.isLocalPrivate()) {
3477       ++FI;
3478       continue;
3479     }
3480     const VarDecl *VD = Pair.second.PrivateCopy;
3481     const Expr *Init = VD->getAnyInitializer();
3482     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3483                              !CGF.isTrivialInitializer(Init)))) {
3484       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3485       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3486         const VarDecl *OriginalVD = Pair.second.Original;
3487         // Check if the variable is the target-based BasePointersArray,
3488         // PointersArray, SizesArray, or MappersArray.
3489         LValue SharedRefLValue;
3490         QualType Type = PrivateLValue.getType();
3491         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3492         if (IsTargetTask && !SharedField) {
3493           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3494                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3495                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3496                          ->getNumParams() == 0 &&
3497                  isa<TranslationUnitDecl>(
3498                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3499                          ->getDeclContext()) &&
3500                  "Expected artificial target data variable.");
3501           SharedRefLValue =
3502               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3503         } else if (ForDup) {
3504           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3505           SharedRefLValue = CGF.MakeAddrLValue(
3506               SharedRefLValue.getAddress(CGF).withAlignment(
3507                   C.getDeclAlign(OriginalVD)),
3508               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3509               SharedRefLValue.getTBAAInfo());
3510         } else if (CGF.LambdaCaptureFields.count(
3511                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3512                    isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3513           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3514         } else {
3515           // Processing for implicitly captured variables.
3516           InlinedOpenMPRegionRAII Region(
3517               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3518               /*HasCancel=*/false, /*NoInheritance=*/true);
3519           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3520         }
3521         if (Type->isArrayType()) {
3522           // Initialize firstprivate array.
3523           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3524             // Perform simple memcpy.
3525             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3526           } else {
3527             // Initialize firstprivate array using element-by-element
3528             // initialization.
3529             CGF.EmitOMPAggregateAssign(
3530                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3531                 Type,
3532                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3533                                                   Address SrcElement) {
3534                   // Clean up any temporaries needed by the initialization.
3535                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3536                   InitScope.addPrivate(Elem, SrcElement);
3537                   (void)InitScope.Privatize();
3538                   // Emit initialization for single element.
3539                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3540                       CGF, &CapturesInfo);
3541                   CGF.EmitAnyExprToMem(Init, DestElement,
3542                                        Init->getType().getQualifiers(),
3543                                        /*IsInitializer=*/false);
3544                 });
3545           }
3546         } else {
3547           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3548           InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF));
3549           (void)InitScope.Privatize();
3550           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3551           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3552                              /*capturedByInit=*/false);
3553         }
3554       } else {
3555         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3556       }
3557     }
3558     ++FI;
3559   }
3560 }
3561 
3562 /// Check if duplication function is required for taskloops.
3563 static bool checkInitIsRequired(CodeGenFunction &CGF,
3564                                 ArrayRef<PrivateDataTy> Privates) {
3565   bool InitRequired = false;
3566   for (const PrivateDataTy &Pair : Privates) {
3567     if (Pair.second.isLocalPrivate())
3568       continue;
3569     const VarDecl *VD = Pair.second.PrivateCopy;
3570     const Expr *Init = VD->getAnyInitializer();
3571     InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3572                                     !CGF.isTrivialInitializer(Init));
3573     if (InitRequired)
3574       break;
3575   }
3576   return InitRequired;
3577 }
3578 
3579 
3580 /// Emit task_dup function (for initialization of
3581 /// private/firstprivate/lastprivate vars and last_iter flag)
3582 /// \code
3583 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3584 /// lastpriv) {
3585 /// // setup lastprivate flag
3586 ///    task_dst->last = lastpriv;
3587 /// // could be constructor calls here...
3588 /// }
3589 /// \endcode
3590 static llvm::Value *
3591 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3592                     const OMPExecutableDirective &D,
3593                     QualType KmpTaskTWithPrivatesPtrQTy,
3594                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3595                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3596                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3597                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3598   ASTContext &C = CGM.getContext();
3599   FunctionArgList Args;
3600   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3601                            KmpTaskTWithPrivatesPtrQTy,
3602                            ImplicitParamDecl::Other);
3603   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3604                            KmpTaskTWithPrivatesPtrQTy,
3605                            ImplicitParamDecl::Other);
3606   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3607                                 ImplicitParamDecl::Other);
3608   Args.push_back(&DstArg);
3609   Args.push_back(&SrcArg);
3610   Args.push_back(&LastprivArg);
3611   const auto &TaskDupFnInfo =
3612       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3613   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3614   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3615   auto *TaskDup = llvm::Function::Create(
3616       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3617   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3618   TaskDup->setDoesNotRecurse();
3619   CodeGenFunction CGF(CGM);
3620   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3621                     Loc);
3622 
3623   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3624       CGF.GetAddrOfLocalVar(&DstArg),
3625       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3626   // task_dst->liter = lastpriv;
3627   if (WithLastIter) {
3628     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3629     LValue Base = CGF.EmitLValueForField(
3630         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3631     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3632     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3633         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3634     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3635   }
3636 
3637   // Emit initial values for private copies (if any).
3638   assert(!Privates.empty());
3639   Address KmpTaskSharedsPtr = Address::invalid();
3640   if (!Data.FirstprivateVars.empty()) {
3641     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3642         CGF.GetAddrOfLocalVar(&SrcArg),
3643         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3644     LValue Base = CGF.EmitLValueForField(
3645         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3646     KmpTaskSharedsPtr = Address(
3647         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3648                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
3649                                                   KmpTaskTShareds)),
3650                              Loc),
3651         CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3652   }
3653   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3654                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3655   CGF.FinishFunction();
3656   return TaskDup;
3657 }
3658 
3659 /// Checks if destructor function is required to be generated.
3660 /// \return true if cleanups are required, false otherwise.
3661 static bool
3662 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3663                          ArrayRef<PrivateDataTy> Privates) {
3664   for (const PrivateDataTy &P : Privates) {
3665     if (P.second.isLocalPrivate())
3666       continue;
3667     QualType Ty = P.second.Original->getType().getNonReferenceType();
3668     if (Ty.isDestructedType())
3669       return true;
3670   }
3671   return false;
3672 }
3673 
3674 namespace {
3675 /// Loop generator for OpenMP iterator expression.
3676 class OMPIteratorGeneratorScope final
3677     : public CodeGenFunction::OMPPrivateScope {
3678   CodeGenFunction &CGF;
3679   const OMPIteratorExpr *E = nullptr;
3680   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3681   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3682   OMPIteratorGeneratorScope() = delete;
3683   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3684 
3685 public:
3686   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3687       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3688     if (!E)
3689       return;
3690     SmallVector<llvm::Value *, 4> Uppers;
3691     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3692       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3693       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3694       addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3695       const OMPIteratorHelperData &HelperData = E->getHelper(I);
3696       addPrivate(
3697           HelperData.CounterVD,
3698           CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3699     }
3700     Privatize();
3701 
3702     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3703       const OMPIteratorHelperData &HelperData = E->getHelper(I);
3704       LValue CLVal =
3705           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3706                              HelperData.CounterVD->getType());
3707       // Counter = 0;
3708       CGF.EmitStoreOfScalar(
3709           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
3710           CLVal);
3711       CodeGenFunction::JumpDest &ContDest =
3712           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3713       CodeGenFunction::JumpDest &ExitDest =
3714           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3715       // N = <number-of_iterations>;
3716       llvm::Value *N = Uppers[I];
3717       // cont:
3718       // if (Counter < N) goto body; else goto exit;
3719       CGF.EmitBlock(ContDest.getBlock());
3720       auto *CVal =
3721           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3722       llvm::Value *Cmp =
3723           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3724               ? CGF.Builder.CreateICmpSLT(CVal, N)
3725               : CGF.Builder.CreateICmpULT(CVal, N);
3726       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3727       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3728       // body:
3729       CGF.EmitBlock(BodyBB);
3730       // Iteri = Begini + Counter * Stepi;
3731       CGF.EmitIgnoredExpr(HelperData.Update);
3732     }
3733   }
3734   ~OMPIteratorGeneratorScope() {
3735     if (!E)
3736       return;
3737     for (unsigned I = E->numOfIterators(); I > 0; --I) {
3738       // Counter = Counter + 1;
3739       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3740       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3741       // goto cont;
3742       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3743       // exit:
3744       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3745     }
3746   }
3747 };
3748 } // namespace
3749 
3750 static std::pair<llvm::Value *, llvm::Value *>
3751 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
3752   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3753   llvm::Value *Addr;
3754   if (OASE) {
3755     const Expr *Base = OASE->getBase();
3756     Addr = CGF.EmitScalarExpr(Base);
3757   } else {
3758     Addr = CGF.EmitLValue(E).getPointer(CGF);
3759   }
3760   llvm::Value *SizeVal;
3761   QualType Ty = E->getType();
3762   if (OASE) {
3763     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3764     for (const Expr *SE : OASE->getDimensions()) {
3765       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3766       Sz = CGF.EmitScalarConversion(
3767           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3768       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3769     }
3770   } else if (const auto *ASE =
3771                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
3772     LValue UpAddrLVal =
3773         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
3774     Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
3775     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3776         UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
3777     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3778     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3779     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3780   } else {
3781     SizeVal = CGF.getTypeSize(Ty);
3782   }
3783   return std::make_pair(Addr, SizeVal);
3784 }
3785 
3786 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3787 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3788   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3789   if (KmpTaskAffinityInfoTy.isNull()) {
3790     RecordDecl *KmpAffinityInfoRD =
3791         C.buildImplicitRecord("kmp_task_affinity_info_t");
3792     KmpAffinityInfoRD->startDefinition();
3793     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3794     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3795     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3796     KmpAffinityInfoRD->completeDefinition();
3797     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
3798   }
3799 }
3800 
3801 CGOpenMPRuntime::TaskResultTy
3802 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3803                               const OMPExecutableDirective &D,
3804                               llvm::Function *TaskFunction, QualType SharedsTy,
3805                               Address Shareds, const OMPTaskDataTy &Data) {
3806   ASTContext &C = CGM.getContext();
3807   llvm::SmallVector<PrivateDataTy, 4> Privates;
3808   // Aggregate privates and sort them by the alignment.
3809   const auto *I = Data.PrivateCopies.begin();
3810   for (const Expr *E : Data.PrivateVars) {
3811     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3812     Privates.emplace_back(
3813         C.getDeclAlign(VD),
3814         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3815                          /*PrivateElemInit=*/nullptr));
3816     ++I;
3817   }
3818   I = Data.FirstprivateCopies.begin();
3819   const auto *IElemInitRef = Data.FirstprivateInits.begin();
3820   for (const Expr *E : Data.FirstprivateVars) {
3821     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3822     Privates.emplace_back(
3823         C.getDeclAlign(VD),
3824         PrivateHelpersTy(
3825             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3826             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3827     ++I;
3828     ++IElemInitRef;
3829   }
3830   I = Data.LastprivateCopies.begin();
3831   for (const Expr *E : Data.LastprivateVars) {
3832     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3833     Privates.emplace_back(
3834         C.getDeclAlign(VD),
3835         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3836                          /*PrivateElemInit=*/nullptr));
3837     ++I;
3838   }
3839   for (const VarDecl *VD : Data.PrivateLocals) {
3840     if (isAllocatableDecl(VD))
3841       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3842     else
3843       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3844   }
3845   llvm::stable_sort(Privates,
3846                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
3847                       return L.first > R.first;
3848                     });
3849   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3850   // Build type kmp_routine_entry_t (if not built yet).
3851   emitKmpRoutineEntryT(KmpInt32Ty);
3852   // Build type kmp_task_t (if not built yet).
3853   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3854     if (SavedKmpTaskloopTQTy.isNull()) {
3855       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3856           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3857     }
3858     KmpTaskTQTy = SavedKmpTaskloopTQTy;
3859   } else {
3860     assert((D.getDirectiveKind() == OMPD_task ||
3861             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3862             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3863            "Expected taskloop, task or target directive");
3864     if (SavedKmpTaskTQTy.isNull()) {
3865       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3866           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3867     }
3868     KmpTaskTQTy = SavedKmpTaskTQTy;
3869   }
3870   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3871   // Build particular struct kmp_task_t for the given task.
3872   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3873       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3874   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3875   QualType KmpTaskTWithPrivatesPtrQTy =
3876       C.getPointerType(KmpTaskTWithPrivatesQTy);
3877   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
3878   llvm::Type *KmpTaskTWithPrivatesPtrTy =
3879       KmpTaskTWithPrivatesTy->getPointerTo();
3880   llvm::Value *KmpTaskTWithPrivatesTySize =
3881       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3882   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3883 
3884   // Emit initial values for private copies (if any).
3885   llvm::Value *TaskPrivatesMap = nullptr;
3886   llvm::Type *TaskPrivatesMapTy =
3887       std::next(TaskFunction->arg_begin(), 3)->getType();
3888   if (!Privates.empty()) {
3889     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3890     TaskPrivatesMap =
3891         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3892     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3893         TaskPrivatesMap, TaskPrivatesMapTy);
3894   } else {
3895     TaskPrivatesMap = llvm::ConstantPointerNull::get(
3896         cast<llvm::PointerType>(TaskPrivatesMapTy));
3897   }
3898   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3899   // kmp_task_t *tt);
3900   llvm::Function *TaskEntry = emitProxyTaskFunction(
3901       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3902       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3903       TaskPrivatesMap);
3904 
3905   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3906   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3907   // kmp_routine_entry_t *task_entry);
3908   // Task flags. Format is taken from
3909   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3910   // description of kmp_tasking_flags struct.
3911   enum {
3912     TiedFlag = 0x1,
3913     FinalFlag = 0x2,
3914     DestructorsFlag = 0x8,
3915     PriorityFlag = 0x20,
3916     DetachableFlag = 0x40,
3917   };
3918   unsigned Flags = Data.Tied ? TiedFlag : 0;
3919   bool NeedsCleanup = false;
3920   if (!Privates.empty()) {
3921     NeedsCleanup =
3922         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3923     if (NeedsCleanup)
3924       Flags = Flags | DestructorsFlag;
3925   }
3926   if (Data.Priority.getInt())
3927     Flags = Flags | PriorityFlag;
3928   if (D.hasClausesOfKind<OMPDetachClause>())
3929     Flags = Flags | DetachableFlag;
3930   llvm::Value *TaskFlags =
3931       Data.Final.getPointer()
3932           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3933                                      CGF.Builder.getInt32(FinalFlag),
3934                                      CGF.Builder.getInt32(/*C=*/0))
3935           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3936   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3937   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3938   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
3939       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3940       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3941           TaskEntry, KmpRoutineEntryPtrTy)};
3942   llvm::Value *NewTask;
3943   if (D.hasClausesOfKind<OMPNowaitClause>()) {
3944     // Check if we have any device clause associated with the directive.
3945     const Expr *Device = nullptr;
3946     if (auto *C = D.getSingleClause<OMPDeviceClause>())
3947       Device = C->getDevice();
3948     // Emit device ID if any otherwise use default value.
3949     llvm::Value *DeviceID;
3950     if (Device)
3951       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3952                                            CGF.Int64Ty, /*isSigned=*/true);
3953     else
3954       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3955     AllocArgs.push_back(DeviceID);
3956     NewTask = CGF.EmitRuntimeCall(
3957         OMPBuilder.getOrCreateRuntimeFunction(
3958             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3959         AllocArgs);
3960   } else {
3961     NewTask =
3962         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3963                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3964                             AllocArgs);
3965   }
3966   // Emit detach clause initialization.
3967   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3968   // task_descriptor);
3969   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3970     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3971     LValue EvtLVal = CGF.EmitLValue(Evt);
3972 
3973     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3974     // int gtid, kmp_task_t *task);
3975     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3976     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3977     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3978     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3979         OMPBuilder.getOrCreateRuntimeFunction(
3980             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3981         {Loc, Tid, NewTask});
3982     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3983                                       Evt->getExprLoc());
3984     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3985   }
3986   // Process affinity clauses.
3987   if (D.hasClausesOfKind<OMPAffinityClause>()) {
3988     // Process list of affinity data.
3989     ASTContext &C = CGM.getContext();
3990     Address AffinitiesArray = Address::invalid();
3991     // Calculate number of elements to form the array of affinity data.
3992     llvm::Value *NumOfElements = nullptr;
3993     unsigned NumAffinities = 0;
3994     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3995       if (const Expr *Modifier = C->getModifier()) {
3996         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3997         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3998           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3999           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4000           NumOfElements =
4001               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4002         }
4003       } else {
4004         NumAffinities += C->varlist_size();
4005       }
4006     }
4007     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4008     // Fields ids in kmp_task_affinity_info record.
4009     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4010 
4011     QualType KmpTaskAffinityInfoArrayTy;
4012     if (NumOfElements) {
4013       NumOfElements = CGF.Builder.CreateNUWAdd(
4014           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4015       auto *OVE = new (C) OpaqueValueExpr(
4016           Loc,
4017           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4018           VK_PRValue);
4019       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4020                                                     RValue::get(NumOfElements));
4021       KmpTaskAffinityInfoArrayTy =
4022           C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4023                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4024       // Properly emit variable-sized array.
4025       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4026                                            ImplicitParamDecl::Other);
4027       CGF.EmitVarDecl(*PD);
4028       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4029       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4030                                                 /*isSigned=*/false);
4031     } else {
4032       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4033           KmpTaskAffinityInfoTy,
4034           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4035           ArrayType::Normal, /*IndexTypeQuals=*/0);
4036       AffinitiesArray =
4037           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4038       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4039       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4040                                              /*isSigned=*/false);
4041     }
4042 
4043     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4044     // Fill array by elements without iterators.
4045     unsigned Pos = 0;
4046     bool HasIterator = false;
4047     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4048       if (C->getModifier()) {
4049         HasIterator = true;
4050         continue;
4051       }
4052       for (const Expr *E : C->varlists()) {
4053         llvm::Value *Addr;
4054         llvm::Value *Size;
4055         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4056         LValue Base =
4057             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4058                                KmpTaskAffinityInfoTy);
4059         // affs[i].base_addr = &<Affinities[i].second>;
4060         LValue BaseAddrLVal = CGF.EmitLValueForField(
4061             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4062         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4063                               BaseAddrLVal);
4064         // affs[i].len = sizeof(<Affinities[i].second>);
4065         LValue LenLVal = CGF.EmitLValueForField(
4066             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4067         CGF.EmitStoreOfScalar(Size, LenLVal);
4068         ++Pos;
4069       }
4070     }
4071     LValue PosLVal;
4072     if (HasIterator) {
4073       PosLVal = CGF.MakeAddrLValue(
4074           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4075           C.getSizeType());
4076       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4077     }
4078     // Process elements with iterators.
4079     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4080       const Expr *Modifier = C->getModifier();
4081       if (!Modifier)
4082         continue;
4083       OMPIteratorGeneratorScope IteratorScope(
4084           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4085       for (const Expr *E : C->varlists()) {
4086         llvm::Value *Addr;
4087         llvm::Value *Size;
4088         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4089         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4090         LValue Base = CGF.MakeAddrLValue(
4091             CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
4092         // affs[i].base_addr = &<Affinities[i].second>;
4093         LValue BaseAddrLVal = CGF.EmitLValueForField(
4094             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4095         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4096                               BaseAddrLVal);
4097         // affs[i].len = sizeof(<Affinities[i].second>);
4098         LValue LenLVal = CGF.EmitLValueForField(
4099             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4100         CGF.EmitStoreOfScalar(Size, LenLVal);
4101         Idx = CGF.Builder.CreateNUWAdd(
4102             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4103         CGF.EmitStoreOfScalar(Idx, PosLVal);
4104       }
4105     }
4106     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4107     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4108     // naffins, kmp_task_affinity_info_t *affin_list);
4109     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4110     llvm::Value *GTid = getThreadID(CGF, Loc);
4111     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4112         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4113     // FIXME: Emit the function and ignore its result for now unless the
4114     // runtime function is properly implemented.
4115     (void)CGF.EmitRuntimeCall(
4116         OMPBuilder.getOrCreateRuntimeFunction(
4117             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4118         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4119   }
4120   llvm::Value *NewTaskNewTaskTTy =
4121       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4122           NewTask, KmpTaskTWithPrivatesPtrTy);
4123   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4124                                                KmpTaskTWithPrivatesQTy);
4125   LValue TDBase =
4126       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4127   // Fill the data in the resulting kmp_task_t record.
4128   // Copy shareds if there are any.
4129   Address KmpTaskSharedsPtr = Address::invalid();
4130   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4131     KmpTaskSharedsPtr = Address(
4132         CGF.EmitLoadOfScalar(
4133             CGF.EmitLValueForField(
4134                 TDBase,
4135                 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
4136             Loc),
4137         CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
4138     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4139     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4140     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4141   }
4142   // Emit initial values for private copies (if any).
4143   TaskResultTy Result;
4144   if (!Privates.empty()) {
4145     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4146                      SharedsTy, SharedsPtrTy, Data, Privates,
4147                      /*ForDup=*/false);
4148     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4149         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4150       Result.TaskDupFn = emitTaskDupFunction(
4151           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4152           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4153           /*WithLastIter=*/!Data.LastprivateVars.empty());
4154     }
4155   }
4156   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4157   enum { Priority = 0, Destructors = 1 };
4158   // Provide pointer to function with destructors for privates.
4159   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4160   const RecordDecl *KmpCmplrdataUD =
4161       (*FI)->getType()->getAsUnionType()->getDecl();
4162   if (NeedsCleanup) {
4163     llvm::Value *DestructorFn = emitDestructorsFunction(
4164         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4165         KmpTaskTWithPrivatesQTy);
4166     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4167     LValue DestructorsLV = CGF.EmitLValueForField(
4168         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4169     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4170                               DestructorFn, KmpRoutineEntryPtrTy),
4171                           DestructorsLV);
4172   }
4173   // Set priority.
4174   if (Data.Priority.getInt()) {
4175     LValue Data2LV = CGF.EmitLValueForField(
4176         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4177     LValue PriorityLV = CGF.EmitLValueForField(
4178         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4179     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4180   }
4181   Result.NewTask = NewTask;
4182   Result.TaskEntry = TaskEntry;
4183   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4184   Result.TDBase = TDBase;
4185   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4186   return Result;
4187 }
4188 
4189 /// Translates internal dependency kind into the runtime kind.
4190 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4191   RTLDependenceKindTy DepKind;
4192   switch (K) {
4193   case OMPC_DEPEND_in:
4194     DepKind = RTLDependenceKindTy::DepIn;
4195     break;
4196   // Out and InOut dependencies must use the same code.
4197   case OMPC_DEPEND_out:
4198   case OMPC_DEPEND_inout:
4199     DepKind = RTLDependenceKindTy::DepInOut;
4200     break;
4201   case OMPC_DEPEND_mutexinoutset:
4202     DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4203     break;
4204   case OMPC_DEPEND_inoutset:
4205     DepKind = RTLDependenceKindTy::DepInOutSet;
4206     break;
4207   case OMPC_DEPEND_outallmemory:
4208     DepKind = RTLDependenceKindTy::DepOmpAllMem;
4209     break;
4210   case OMPC_DEPEND_source:
4211   case OMPC_DEPEND_sink:
4212   case OMPC_DEPEND_depobj:
4213   case OMPC_DEPEND_inoutallmemory:
4214   case OMPC_DEPEND_unknown:
4215     llvm_unreachable("Unknown task dependence type");
4216   }
4217   return DepKind;
4218 }
4219 
4220 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4221 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4222                            QualType &FlagsTy) {
4223   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4224   if (KmpDependInfoTy.isNull()) {
4225     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4226     KmpDependInfoRD->startDefinition();
4227     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4228     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4229     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4230     KmpDependInfoRD->completeDefinition();
4231     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4232   }
4233 }
4234 
4235 std::pair<llvm::Value *, LValue>
4236 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4237                                    SourceLocation Loc) {
4238   ASTContext &C = CGM.getContext();
4239   QualType FlagsTy;
4240   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4241   RecordDecl *KmpDependInfoRD =
4242       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4243   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4244   LValue Base = CGF.EmitLoadOfPointerLValue(
4245       DepobjLVal.getAddress(CGF).withElementType(
4246           CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4247       KmpDependInfoPtrTy->castAs<PointerType>());
4248   Address DepObjAddr = CGF.Builder.CreateGEP(
4249       Base.getAddress(CGF),
4250       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4251   LValue NumDepsBase = CGF.MakeAddrLValue(
4252       DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4253   // NumDeps = deps[i].base_addr;
4254   LValue BaseAddrLVal = CGF.EmitLValueForField(
4255       NumDepsBase,
4256       *std::next(KmpDependInfoRD->field_begin(),
4257                  static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4258   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4259   return std::make_pair(NumDeps, Base);
4260 }
4261 
4262 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4263                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4264                            const OMPTaskDataTy::DependData &Data,
4265                            Address DependenciesArray) {
4266   CodeGenModule &CGM = CGF.CGM;
4267   ASTContext &C = CGM.getContext();
4268   QualType FlagsTy;
4269   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4270   RecordDecl *KmpDependInfoRD =
4271       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4272   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4273 
4274   OMPIteratorGeneratorScope IteratorScope(
4275       CGF, cast_or_null<OMPIteratorExpr>(
4276                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4277                                  : nullptr));
4278   for (const Expr *E : Data.DepExprs) {
4279     llvm::Value *Addr;
4280     llvm::Value *Size;
4281 
4282     // The expression will be a nullptr in the 'omp_all_memory' case.
4283     if (E) {
4284       std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4285       Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4286     } else {
4287       Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4288       Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4289     }
4290     LValue Base;
4291     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4292       Base = CGF.MakeAddrLValue(
4293           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4294     } else {
4295       assert(E && "Expected a non-null expression");
4296       LValue &PosLVal = *Pos.get<LValue *>();
4297       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4298       Base = CGF.MakeAddrLValue(
4299           CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
4300     }
4301     // deps[i].base_addr = &<Dependencies[i].second>;
4302     LValue BaseAddrLVal = CGF.EmitLValueForField(
4303         Base,
4304         *std::next(KmpDependInfoRD->field_begin(),
4305                    static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4306     CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4307     // deps[i].len = sizeof(<Dependencies[i].second>);
4308     LValue LenLVal = CGF.EmitLValueForField(
4309         Base, *std::next(KmpDependInfoRD->field_begin(),
4310                          static_cast<unsigned int>(RTLDependInfoFields::Len)));
4311     CGF.EmitStoreOfScalar(Size, LenLVal);
4312     // deps[i].flags = <Dependencies[i].first>;
4313     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4314     LValue FlagsLVal = CGF.EmitLValueForField(
4315         Base,
4316         *std::next(KmpDependInfoRD->field_begin(),
4317                    static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4318     CGF.EmitStoreOfScalar(
4319         llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4320         FlagsLVal);
4321     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4322       ++(*P);
4323     } else {
4324       LValue &PosLVal = *Pos.get<LValue *>();
4325       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4326       Idx = CGF.Builder.CreateNUWAdd(Idx,
4327                                      llvm::ConstantInt::get(Idx->getType(), 1));
4328       CGF.EmitStoreOfScalar(Idx, PosLVal);
4329     }
4330   }
4331 }
4332 
4333 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4334     CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4335     const OMPTaskDataTy::DependData &Data) {
4336   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4337          "Expected depobj dependency kind.");
4338   SmallVector<llvm::Value *, 4> Sizes;
4339   SmallVector<LValue, 4> SizeLVals;
4340   ASTContext &C = CGF.getContext();
4341   {
4342     OMPIteratorGeneratorScope IteratorScope(
4343         CGF, cast_or_null<OMPIteratorExpr>(
4344                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4345                                    : nullptr));
4346     for (const Expr *E : Data.DepExprs) {
4347       llvm::Value *NumDeps;
4348       LValue Base;
4349       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4350       std::tie(NumDeps, Base) =
4351           getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4352       LValue NumLVal = CGF.MakeAddrLValue(
4353           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4354           C.getUIntPtrType());
4355       CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4356                               NumLVal.getAddress(CGF));
4357       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4358       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4359       CGF.EmitStoreOfScalar(Add, NumLVal);
4360       SizeLVals.push_back(NumLVal);
4361     }
4362   }
4363   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4364     llvm::Value *Size =
4365         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4366     Sizes.push_back(Size);
4367   }
4368   return Sizes;
4369 }
4370 
4371 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4372                                          QualType &KmpDependInfoTy,
4373                                          LValue PosLVal,
4374                                          const OMPTaskDataTy::DependData &Data,
4375                                          Address DependenciesArray) {
4376   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4377          "Expected depobj dependency kind.");
4378   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4379   {
4380     OMPIteratorGeneratorScope IteratorScope(
4381         CGF, cast_or_null<OMPIteratorExpr>(
4382                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4383                                    : nullptr));
4384     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4385       const Expr *E = Data.DepExprs[I];
4386       llvm::Value *NumDeps;
4387       LValue Base;
4388       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4389       std::tie(NumDeps, Base) =
4390           getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4391 
4392       // memcopy dependency data.
4393       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4394           ElSize,
4395           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4396       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4397       Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
4398       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4399 
4400       // Increase pos.
4401       // pos += size;
4402       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4403       CGF.EmitStoreOfScalar(Add, PosLVal);
4404     }
4405   }
4406 }
4407 
4408 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4409     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4410     SourceLocation Loc) {
4411   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4412         return D.DepExprs.empty();
4413       }))
4414     return std::make_pair(nullptr, Address::invalid());
4415   // Process list of dependencies.
4416   ASTContext &C = CGM.getContext();
4417   Address DependenciesArray = Address::invalid();
4418   llvm::Value *NumOfElements = nullptr;
4419   unsigned NumDependencies = std::accumulate(
4420       Dependencies.begin(), Dependencies.end(), 0,
4421       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4422         return D.DepKind == OMPC_DEPEND_depobj
4423                    ? V
4424                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4425       });
4426   QualType FlagsTy;
4427   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4428   bool HasDepobjDeps = false;
4429   bool HasRegularWithIterators = false;
4430   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4431   llvm::Value *NumOfRegularWithIterators =
4432       llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4433   // Calculate number of depobj dependencies and regular deps with the
4434   // iterators.
4435   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4436     if (D.DepKind == OMPC_DEPEND_depobj) {
4437       SmallVector<llvm::Value *, 4> Sizes =
4438           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4439       for (llvm::Value *Size : Sizes) {
4440         NumOfDepobjElements =
4441             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4442       }
4443       HasDepobjDeps = true;
4444       continue;
4445     }
4446     // Include number of iterations, if any.
4447 
4448     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4449       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4450         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4451         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4452         llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4453             Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4454         NumOfRegularWithIterators =
4455             CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4456       }
4457       HasRegularWithIterators = true;
4458       continue;
4459     }
4460   }
4461 
4462   QualType KmpDependInfoArrayTy;
4463   if (HasDepobjDeps || HasRegularWithIterators) {
4464     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4465                                            /*isSigned=*/false);
4466     if (HasDepobjDeps) {
4467       NumOfElements =
4468           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4469     }
4470     if (HasRegularWithIterators) {
4471       NumOfElements =
4472           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4473     }
4474     auto *OVE = new (C) OpaqueValueExpr(
4475         Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4476         VK_PRValue);
4477     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4478                                                   RValue::get(NumOfElements));
4479     KmpDependInfoArrayTy =
4480         C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4481                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4482     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4483     // Properly emit variable-sized array.
4484     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4485                                          ImplicitParamDecl::Other);
4486     CGF.EmitVarDecl(*PD);
4487     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4488     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4489                                               /*isSigned=*/false);
4490   } else {
4491     KmpDependInfoArrayTy = C.getConstantArrayType(
4492         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4493         ArrayType::Normal, /*IndexTypeQuals=*/0);
4494     DependenciesArray =
4495         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4496     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4497     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4498                                            /*isSigned=*/false);
4499   }
4500   unsigned Pos = 0;
4501   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4502     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4503         Dependencies[I].IteratorExpr)
4504       continue;
4505     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4506                    DependenciesArray);
4507   }
4508   // Copy regular dependencies with iterators.
4509   LValue PosLVal = CGF.MakeAddrLValue(
4510       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4511   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4512   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4513     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4514         !Dependencies[I].IteratorExpr)
4515       continue;
4516     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4517                    DependenciesArray);
4518   }
4519   // Copy final depobj arrays without iterators.
4520   if (HasDepobjDeps) {
4521     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4522       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4523         continue;
4524       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4525                          DependenciesArray);
4526     }
4527   }
4528   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4529       DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4530   return std::make_pair(NumOfElements, DependenciesArray);
4531 }
4532 
4533 Address CGOpenMPRuntime::emitDepobjDependClause(
4534     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4535     SourceLocation Loc) {
4536   if (Dependencies.DepExprs.empty())
4537     return Address::invalid();
4538   // Process list of dependencies.
4539   ASTContext &C = CGM.getContext();
4540   Address DependenciesArray = Address::invalid();
4541   unsigned NumDependencies = Dependencies.DepExprs.size();
4542   QualType FlagsTy;
4543   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4544   RecordDecl *KmpDependInfoRD =
4545       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4546 
4547   llvm::Value *Size;
4548   // Define type kmp_depend_info[<Dependencies.size()>];
4549   // For depobj reserve one extra element to store the number of elements.
4550   // It is required to handle depobj(x) update(in) construct.
4551   // kmp_depend_info[<Dependencies.size()>] deps;
4552   llvm::Value *NumDepsVal;
4553   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4554   if (const auto *IE =
4555           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4556     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4557     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4558       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4559       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4560       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4561     }
4562     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4563                                     NumDepsVal);
4564     CharUnits SizeInBytes =
4565         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4566     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4567     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4568     NumDepsVal =
4569         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4570   } else {
4571     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4572         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4573         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4574     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4575     Size = CGM.getSize(Sz.alignTo(Align));
4576     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4577   }
4578   // Need to allocate on the dynamic memory.
4579   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4580   // Use default allocator.
4581   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4582   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4583 
4584   llvm::Value *Addr =
4585       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4586                               CGM.getModule(), OMPRTL___kmpc_alloc),
4587                           Args, ".dep.arr.addr");
4588   llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4589   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4590       Addr, KmpDependInfoLlvmTy->getPointerTo());
4591   DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4592   // Write number of elements in the first element of array for depobj.
4593   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4594   // deps[i].base_addr = NumDependencies;
4595   LValue BaseAddrLVal = CGF.EmitLValueForField(
4596       Base,
4597       *std::next(KmpDependInfoRD->field_begin(),
4598                  static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4599   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4600   llvm::PointerUnion<unsigned *, LValue *> Pos;
4601   unsigned Idx = 1;
4602   LValue PosLVal;
4603   if (Dependencies.IteratorExpr) {
4604     PosLVal = CGF.MakeAddrLValue(
4605         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4606         C.getSizeType());
4607     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4608                           /*IsInit=*/true);
4609     Pos = &PosLVal;
4610   } else {
4611     Pos = &Idx;
4612   }
4613   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4614   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4615       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4616       CGF.Int8Ty);
4617   return DependenciesArray;
4618 }
4619 
4620 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4621                                         SourceLocation Loc) {
4622   ASTContext &C = CGM.getContext();
4623   QualType FlagsTy;
4624   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4625   LValue Base = CGF.EmitLoadOfPointerLValue(
4626       DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>());
4627   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4628   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4629       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4630       CGF.ConvertTypeForMem(KmpDependInfoTy));
4631   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4632       Addr.getElementType(), Addr.getPointer(),
4633       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4634   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4635                                                                CGF.VoidPtrTy);
4636   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4637   // Use default allocator.
4638   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4639   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4640 
4641   // _kmpc_free(gtid, addr, nullptr);
4642   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4643                                 CGM.getModule(), OMPRTL___kmpc_free),
4644                             Args);
4645 }
4646 
4647 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4648                                        OpenMPDependClauseKind NewDepKind,
4649                                        SourceLocation Loc) {
4650   ASTContext &C = CGM.getContext();
4651   QualType FlagsTy;
4652   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4653   RecordDecl *KmpDependInfoRD =
4654       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4655   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4656   llvm::Value *NumDeps;
4657   LValue Base;
4658   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4659 
4660   Address Begin = Base.getAddress(CGF);
4661   // Cast from pointer to array type to pointer to single element.
4662   llvm::Value *End = CGF.Builder.CreateGEP(
4663       Begin.getElementType(), Begin.getPointer(), NumDeps);
4664   // The basic structure here is a while-do loop.
4665   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4666   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4667   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4668   CGF.EmitBlock(BodyBB);
4669   llvm::PHINode *ElementPHI =
4670       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4671   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
4672   Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4673   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4674                             Base.getTBAAInfo());
4675   // deps[i].flags = NewDepKind;
4676   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4677   LValue FlagsLVal = CGF.EmitLValueForField(
4678       Base, *std::next(KmpDependInfoRD->field_begin(),
4679                        static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4680   CGF.EmitStoreOfScalar(
4681       llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4682       FlagsLVal);
4683 
4684   // Shift the address forward by one element.
4685   Address ElementNext =
4686       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
4687   ElementPHI->addIncoming(ElementNext.getPointer(),
4688                           CGF.Builder.GetInsertBlock());
4689   llvm::Value *IsEmpty =
4690       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
4691   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4692   // Done.
4693   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4694 }
4695 
4696 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4697                                    const OMPExecutableDirective &D,
4698                                    llvm::Function *TaskFunction,
4699                                    QualType SharedsTy, Address Shareds,
4700                                    const Expr *IfCond,
4701                                    const OMPTaskDataTy &Data) {
4702   if (!CGF.HaveInsertPoint())
4703     return;
4704 
4705   TaskResultTy Result =
4706       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4707   llvm::Value *NewTask = Result.NewTask;
4708   llvm::Function *TaskEntry = Result.TaskEntry;
4709   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4710   LValue TDBase = Result.TDBase;
4711   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4712   // Process list of dependences.
4713   Address DependenciesArray = Address::invalid();
4714   llvm::Value *NumOfElements;
4715   std::tie(NumOfElements, DependenciesArray) =
4716       emitDependClause(CGF, Data.Dependences, Loc);
4717 
4718   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4719   // libcall.
4720   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4721   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4722   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4723   // list is not empty
4724   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4725   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4726   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4727   llvm::Value *DepTaskArgs[7];
4728   if (!Data.Dependences.empty()) {
4729     DepTaskArgs[0] = UpLoc;
4730     DepTaskArgs[1] = ThreadID;
4731     DepTaskArgs[2] = NewTask;
4732     DepTaskArgs[3] = NumOfElements;
4733     DepTaskArgs[4] = DependenciesArray.getPointer();
4734     DepTaskArgs[5] = CGF.Builder.getInt32(0);
4735     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4736   }
4737   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4738                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4739     if (!Data.Tied) {
4740       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4741       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4742       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4743     }
4744     if (!Data.Dependences.empty()) {
4745       CGF.EmitRuntimeCall(
4746           OMPBuilder.getOrCreateRuntimeFunction(
4747               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4748           DepTaskArgs);
4749     } else {
4750       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4751                               CGM.getModule(), OMPRTL___kmpc_omp_task),
4752                           TaskArgs);
4753     }
4754     // Check if parent region is untied and build return for untied task;
4755     if (auto *Region =
4756             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4757       Region->emitUntiedSwitch(CGF);
4758   };
4759 
4760   llvm::Value *DepWaitTaskArgs[7];
4761   if (!Data.Dependences.empty()) {
4762     DepWaitTaskArgs[0] = UpLoc;
4763     DepWaitTaskArgs[1] = ThreadID;
4764     DepWaitTaskArgs[2] = NumOfElements;
4765     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
4766     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4767     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4768     DepWaitTaskArgs[6] =
4769         llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4770   }
4771   auto &M = CGM.getModule();
4772   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4773                         TaskEntry, &Data, &DepWaitTaskArgs,
4774                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4775     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4776     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4777     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4778     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4779     // is specified.
4780     if (!Data.Dependences.empty())
4781       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4782                               M, OMPRTL___kmpc_omp_taskwait_deps_51),
4783                           DepWaitTaskArgs);
4784     // Call proxy_task_entry(gtid, new_task);
4785     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4786                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4787       Action.Enter(CGF);
4788       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4789       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4790                                                           OutlinedFnArgs);
4791     };
4792 
4793     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4794     // kmp_task_t *new_task);
4795     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4796     // kmp_task_t *new_task);
4797     RegionCodeGenTy RCG(CodeGen);
4798     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4799                               M, OMPRTL___kmpc_omp_task_begin_if0),
4800                           TaskArgs,
4801                           OMPBuilder.getOrCreateRuntimeFunction(
4802                               M, OMPRTL___kmpc_omp_task_complete_if0),
4803                           TaskArgs);
4804     RCG.setAction(Action);
4805     RCG(CGF);
4806   };
4807 
4808   if (IfCond) {
4809     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4810   } else {
4811     RegionCodeGenTy ThenRCG(ThenCodeGen);
4812     ThenRCG(CGF);
4813   }
4814 }
4815 
4816 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4817                                        const OMPLoopDirective &D,
4818                                        llvm::Function *TaskFunction,
4819                                        QualType SharedsTy, Address Shareds,
4820                                        const Expr *IfCond,
4821                                        const OMPTaskDataTy &Data) {
4822   if (!CGF.HaveInsertPoint())
4823     return;
4824   TaskResultTy Result =
4825       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4826   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4827   // libcall.
4828   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4829   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4830   // sched, kmp_uint64 grainsize, void *task_dup);
4831   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4832   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4833   llvm::Value *IfVal;
4834   if (IfCond) {
4835     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4836                                       /*isSigned=*/true);
4837   } else {
4838     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4839   }
4840 
4841   LValue LBLVal = CGF.EmitLValueForField(
4842       Result.TDBase,
4843       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4844   const auto *LBVar =
4845       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4846   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
4847                        LBLVal.getQuals(),
4848                        /*IsInitializer=*/true);
4849   LValue UBLVal = CGF.EmitLValueForField(
4850       Result.TDBase,
4851       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4852   const auto *UBVar =
4853       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4854   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
4855                        UBLVal.getQuals(),
4856                        /*IsInitializer=*/true);
4857   LValue StLVal = CGF.EmitLValueForField(
4858       Result.TDBase,
4859       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4860   const auto *StVar =
4861       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4862   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
4863                        StLVal.getQuals(),
4864                        /*IsInitializer=*/true);
4865   // Store reductions address.
4866   LValue RedLVal = CGF.EmitLValueForField(
4867       Result.TDBase,
4868       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4869   if (Data.Reductions) {
4870     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4871   } else {
4872     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
4873                                CGF.getContext().VoidPtrTy);
4874   }
4875   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4876   llvm::Value *TaskArgs[] = {
4877       UpLoc,
4878       ThreadID,
4879       Result.NewTask,
4880       IfVal,
4881       LBLVal.getPointer(CGF),
4882       UBLVal.getPointer(CGF),
4883       CGF.EmitLoadOfScalar(StLVal, Loc),
4884       llvm::ConstantInt::getSigned(
4885           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4886       llvm::ConstantInt::getSigned(
4887           CGF.IntTy, Data.Schedule.getPointer()
4888                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
4889                          : NoSchedule),
4890       Data.Schedule.getPointer()
4891           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4892                                       /*isSigned=*/false)
4893           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
4894       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4895                              Result.TaskDupFn, CGF.VoidPtrTy)
4896                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
4897   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4898                           CGM.getModule(), OMPRTL___kmpc_taskloop),
4899                       TaskArgs);
4900 }
4901 
4902 /// Emit reduction operation for each element of array (required for
4903 /// array sections) LHS op = RHS.
4904 /// \param Type Type of array.
4905 /// \param LHSVar Variable on the left side of the reduction operation
4906 /// (references element of array in original variable).
4907 /// \param RHSVar Variable on the right side of the reduction operation
4908 /// (references element of array in original variable).
4909 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
4910 /// RHSVar.
4911 static void EmitOMPAggregateReduction(
4912     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4913     const VarDecl *RHSVar,
4914     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4915                                   const Expr *, const Expr *)> &RedOpGen,
4916     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4917     const Expr *UpExpr = nullptr) {
4918   // Perform element-by-element initialization.
4919   QualType ElementTy;
4920   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4921   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4922 
4923   // Drill down to the base element type on both arrays.
4924   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4925   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4926 
4927   llvm::Value *RHSBegin = RHSAddr.getPointer();
4928   llvm::Value *LHSBegin = LHSAddr.getPointer();
4929   // Cast from pointer to array type to pointer to single element.
4930   llvm::Value *LHSEnd =
4931       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4932   // The basic structure here is a while-do loop.
4933   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4934   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4935   llvm::Value *IsEmpty =
4936       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4937   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4938 
4939   // Enter the loop body, making that address the current address.
4940   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4941   CGF.EmitBlock(BodyBB);
4942 
4943   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4944 
4945   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4946       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4947   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4948   Address RHSElementCurrent(
4949       RHSElementPHI, RHSAddr.getElementType(),
4950       RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4951 
4952   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4953       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4954   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4955   Address LHSElementCurrent(
4956       LHSElementPHI, LHSAddr.getElementType(),
4957       LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4958 
4959   // Emit copy.
4960   CodeGenFunction::OMPPrivateScope Scope(CGF);
4961   Scope.addPrivate(LHSVar, LHSElementCurrent);
4962   Scope.addPrivate(RHSVar, RHSElementCurrent);
4963   Scope.Privatize();
4964   RedOpGen(CGF, XExpr, EExpr, UpExpr);
4965   Scope.ForceCleanup();
4966 
4967   // Shift the address forward by one element.
4968   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4969       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4970       "omp.arraycpy.dest.element");
4971   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4972       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4973       "omp.arraycpy.src.element");
4974   // Check whether we've reached the end.
4975   llvm::Value *Done =
4976       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4977   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4978   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4979   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4980 
4981   // Done.
4982   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4983 }
4984 
4985 /// Emit reduction combiner. If the combiner is a simple expression emit it as
4986 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4987 /// UDR combiner function.
4988 static void emitReductionCombiner(CodeGenFunction &CGF,
4989                                   const Expr *ReductionOp) {
4990   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4991     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4992       if (const auto *DRE =
4993               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4994         if (const auto *DRD =
4995                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4996           std::pair<llvm::Function *, llvm::Function *> Reduction =
4997               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
4998           RValue Func = RValue::get(Reduction.first);
4999           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5000           CGF.EmitIgnoredExpr(ReductionOp);
5001           return;
5002         }
5003   CGF.EmitIgnoredExpr(ReductionOp);
5004 }
5005 
5006 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5007     StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
5008     ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
5009     ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
5010   ASTContext &C = CGM.getContext();
5011 
5012   // void reduction_func(void *LHSArg, void *RHSArg);
5013   FunctionArgList Args;
5014   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5015                            ImplicitParamDecl::Other);
5016   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5017                            ImplicitParamDecl::Other);
5018   Args.push_back(&LHSArg);
5019   Args.push_back(&RHSArg);
5020   const auto &CGFI =
5021       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5022   std::string Name = getReductionFuncName(ReducerName);
5023   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5024                                     llvm::GlobalValue::InternalLinkage, Name,
5025                                     &CGM.getModule());
5026   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5027   Fn->setDoesNotRecurse();
5028   CodeGenFunction CGF(CGM);
5029   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5030 
5031   // Dst = (void*[n])(LHSArg);
5032   // Src = (void*[n])(RHSArg);
5033   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5034                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5035                   ArgsElemType->getPointerTo()),
5036               ArgsElemType, CGF.getPointerAlign());
5037   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5038                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5039                   ArgsElemType->getPointerTo()),
5040               ArgsElemType, CGF.getPointerAlign());
5041 
5042   //  ...
5043   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5044   //  ...
5045   CodeGenFunction::OMPPrivateScope Scope(CGF);
5046   const auto *IPriv = Privates.begin();
5047   unsigned Idx = 0;
5048   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5049     const auto *RHSVar =
5050         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5051     Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
5052     const auto *LHSVar =
5053         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5054     Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
5055     QualType PrivTy = (*IPriv)->getType();
5056     if (PrivTy->isVariablyModifiedType()) {
5057       // Get array size and emit VLA type.
5058       ++Idx;
5059       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5060       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5061       const VariableArrayType *VLA =
5062           CGF.getContext().getAsVariableArrayType(PrivTy);
5063       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5064       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5065           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5066       CGF.EmitVariablyModifiedType(PrivTy);
5067     }
5068   }
5069   Scope.Privatize();
5070   IPriv = Privates.begin();
5071   const auto *ILHS = LHSExprs.begin();
5072   const auto *IRHS = RHSExprs.begin();
5073   for (const Expr *E : ReductionOps) {
5074     if ((*IPriv)->getType()->isArrayType()) {
5075       // Emit reduction for array section.
5076       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5077       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5078       EmitOMPAggregateReduction(
5079           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5080           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5081             emitReductionCombiner(CGF, E);
5082           });
5083     } else {
5084       // Emit reduction for array subscript or single variable.
5085       emitReductionCombiner(CGF, E);
5086     }
5087     ++IPriv;
5088     ++ILHS;
5089     ++IRHS;
5090   }
5091   Scope.ForceCleanup();
5092   CGF.FinishFunction();
5093   return Fn;
5094 }
5095 
5096 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5097                                                   const Expr *ReductionOp,
5098                                                   const Expr *PrivateRef,
5099                                                   const DeclRefExpr *LHS,
5100                                                   const DeclRefExpr *RHS) {
5101   if (PrivateRef->getType()->isArrayType()) {
5102     // Emit reduction for array section.
5103     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5104     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5105     EmitOMPAggregateReduction(
5106         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5107         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5108           emitReductionCombiner(CGF, ReductionOp);
5109         });
5110   } else {
5111     // Emit reduction for array subscript or single variable.
5112     emitReductionCombiner(CGF, ReductionOp);
5113   }
5114 }
5115 
5116 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5117                                     ArrayRef<const Expr *> Privates,
5118                                     ArrayRef<const Expr *> LHSExprs,
5119                                     ArrayRef<const Expr *> RHSExprs,
5120                                     ArrayRef<const Expr *> ReductionOps,
5121                                     ReductionOptionsTy Options) {
5122   if (!CGF.HaveInsertPoint())
5123     return;
5124 
5125   bool WithNowait = Options.WithNowait;
5126   bool SimpleReduction = Options.SimpleReduction;
5127 
5128   // Next code should be emitted for reduction:
5129   //
5130   // static kmp_critical_name lock = { 0 };
5131   //
5132   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5133   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5134   //  ...
5135   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5136   //  *(Type<n>-1*)rhs[<n>-1]);
5137   // }
5138   //
5139   // ...
5140   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5141   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5142   // RedList, reduce_func, &<lock>)) {
5143   // case 1:
5144   //  ...
5145   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5146   //  ...
5147   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5148   // break;
5149   // case 2:
5150   //  ...
5151   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5152   //  ...
5153   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5154   // break;
5155   // default:;
5156   // }
5157   //
5158   // if SimpleReduction is true, only the next code is generated:
5159   //  ...
5160   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5161   //  ...
5162 
5163   ASTContext &C = CGM.getContext();
5164 
5165   if (SimpleReduction) {
5166     CodeGenFunction::RunCleanupsScope Scope(CGF);
5167     const auto *IPriv = Privates.begin();
5168     const auto *ILHS = LHSExprs.begin();
5169     const auto *IRHS = RHSExprs.begin();
5170     for (const Expr *E : ReductionOps) {
5171       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5172                                   cast<DeclRefExpr>(*IRHS));
5173       ++IPriv;
5174       ++ILHS;
5175       ++IRHS;
5176     }
5177     return;
5178   }
5179 
5180   // 1. Build a list of reduction variables.
5181   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5182   auto Size = RHSExprs.size();
5183   for (const Expr *E : Privates) {
5184     if (E->getType()->isVariablyModifiedType())
5185       // Reserve place for array size.
5186       ++Size;
5187   }
5188   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5189   QualType ReductionArrayTy =
5190       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5191                              /*IndexTypeQuals=*/0);
5192   Address ReductionList =
5193       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5194   const auto *IPriv = Privates.begin();
5195   unsigned Idx = 0;
5196   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5197     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5198     CGF.Builder.CreateStore(
5199         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5200             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5201         Elem);
5202     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5203       // Store array size.
5204       ++Idx;
5205       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5206       llvm::Value *Size = CGF.Builder.CreateIntCast(
5207           CGF.getVLASize(
5208                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5209               .NumElts,
5210           CGF.SizeTy, /*isSigned=*/false);
5211       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5212                               Elem);
5213     }
5214   }
5215 
5216   // 2. Emit reduce_func().
5217   llvm::Function *ReductionFn = emitReductionFunction(
5218       CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5219       Privates, LHSExprs, RHSExprs, ReductionOps);
5220 
5221   // 3. Create static kmp_critical_name lock = { 0 };
5222   std::string Name = getName({"reduction"});
5223   llvm::Value *Lock = getCriticalRegionLock(Name);
5224 
5225   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5226   // RedList, reduce_func, &<lock>);
5227   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5228   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5229   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5230   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5231       ReductionList.getPointer(), CGF.VoidPtrTy);
5232   llvm::Value *Args[] = {
5233       IdentTLoc,                             // ident_t *<loc>
5234       ThreadId,                              // i32 <gtid>
5235       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5236       ReductionArrayTySize,                  // size_type sizeof(RedList)
5237       RL,                                    // void *RedList
5238       ReductionFn, // void (*) (void *, void *) <reduce_func>
5239       Lock         // kmp_critical_name *&<lock>
5240   };
5241   llvm::Value *Res = CGF.EmitRuntimeCall(
5242       OMPBuilder.getOrCreateRuntimeFunction(
5243           CGM.getModule(),
5244           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5245       Args);
5246 
5247   // 5. Build switch(res)
5248   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5249   llvm::SwitchInst *SwInst =
5250       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5251 
5252   // 6. Build case 1:
5253   //  ...
5254   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5255   //  ...
5256   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5257   // break;
5258   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5259   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5260   CGF.EmitBlock(Case1BB);
5261 
5262   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5263   llvm::Value *EndArgs[] = {
5264       IdentTLoc, // ident_t *<loc>
5265       ThreadId,  // i32 <gtid>
5266       Lock       // kmp_critical_name *&<lock>
5267   };
5268   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5269                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5270     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5271     const auto *IPriv = Privates.begin();
5272     const auto *ILHS = LHSExprs.begin();
5273     const auto *IRHS = RHSExprs.begin();
5274     for (const Expr *E : ReductionOps) {
5275       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5276                                      cast<DeclRefExpr>(*IRHS));
5277       ++IPriv;
5278       ++ILHS;
5279       ++IRHS;
5280     }
5281   };
5282   RegionCodeGenTy RCG(CodeGen);
5283   CommonActionTy Action(
5284       nullptr, std::nullopt,
5285       OMPBuilder.getOrCreateRuntimeFunction(
5286           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5287                                       : OMPRTL___kmpc_end_reduce),
5288       EndArgs);
5289   RCG.setAction(Action);
5290   RCG(CGF);
5291 
5292   CGF.EmitBranch(DefaultBB);
5293 
5294   // 7. Build case 2:
5295   //  ...
5296   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5297   //  ...
5298   // break;
5299   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5300   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5301   CGF.EmitBlock(Case2BB);
5302 
5303   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5304                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5305     const auto *ILHS = LHSExprs.begin();
5306     const auto *IRHS = RHSExprs.begin();
5307     const auto *IPriv = Privates.begin();
5308     for (const Expr *E : ReductionOps) {
5309       const Expr *XExpr = nullptr;
5310       const Expr *EExpr = nullptr;
5311       const Expr *UpExpr = nullptr;
5312       BinaryOperatorKind BO = BO_Comma;
5313       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5314         if (BO->getOpcode() == BO_Assign) {
5315           XExpr = BO->getLHS();
5316           UpExpr = BO->getRHS();
5317         }
5318       }
5319       // Try to emit update expression as a simple atomic.
5320       const Expr *RHSExpr = UpExpr;
5321       if (RHSExpr) {
5322         // Analyze RHS part of the whole expression.
5323         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5324                 RHSExpr->IgnoreParenImpCasts())) {
5325           // If this is a conditional operator, analyze its condition for
5326           // min/max reduction operator.
5327           RHSExpr = ACO->getCond();
5328         }
5329         if (const auto *BORHS =
5330                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5331           EExpr = BORHS->getRHS();
5332           BO = BORHS->getOpcode();
5333         }
5334       }
5335       if (XExpr) {
5336         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5337         auto &&AtomicRedGen = [BO, VD,
5338                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5339                                     const Expr *EExpr, const Expr *UpExpr) {
5340           LValue X = CGF.EmitLValue(XExpr);
5341           RValue E;
5342           if (EExpr)
5343             E = CGF.EmitAnyExpr(EExpr);
5344           CGF.EmitOMPAtomicSimpleUpdateExpr(
5345               X, E, BO, /*IsXLHSInRHSPart=*/true,
5346               llvm::AtomicOrdering::Monotonic, Loc,
5347               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5348                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5349                 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5350                 CGF.emitOMPSimpleStore(
5351                     CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5352                     VD->getType().getNonReferenceType(), Loc);
5353                 PrivateScope.addPrivate(VD, LHSTemp);
5354                 (void)PrivateScope.Privatize();
5355                 return CGF.EmitAnyExpr(UpExpr);
5356               });
5357         };
5358         if ((*IPriv)->getType()->isArrayType()) {
5359           // Emit atomic reduction for array section.
5360           const auto *RHSVar =
5361               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5362           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5363                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5364         } else {
5365           // Emit atomic reduction for array subscript or single variable.
5366           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5367         }
5368       } else {
5369         // Emit as a critical region.
5370         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5371                                            const Expr *, const Expr *) {
5372           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5373           std::string Name = RT.getName({"atomic_reduction"});
5374           RT.emitCriticalRegion(
5375               CGF, Name,
5376               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5377                 Action.Enter(CGF);
5378                 emitReductionCombiner(CGF, E);
5379               },
5380               Loc);
5381         };
5382         if ((*IPriv)->getType()->isArrayType()) {
5383           const auto *LHSVar =
5384               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5385           const auto *RHSVar =
5386               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5387           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5388                                     CritRedGen);
5389         } else {
5390           CritRedGen(CGF, nullptr, nullptr, nullptr);
5391         }
5392       }
5393       ++ILHS;
5394       ++IRHS;
5395       ++IPriv;
5396     }
5397   };
5398   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5399   if (!WithNowait) {
5400     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5401     llvm::Value *EndArgs[] = {
5402         IdentTLoc, // ident_t *<loc>
5403         ThreadId,  // i32 <gtid>
5404         Lock       // kmp_critical_name *&<lock>
5405     };
5406     CommonActionTy Action(nullptr, std::nullopt,
5407                           OMPBuilder.getOrCreateRuntimeFunction(
5408                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5409                           EndArgs);
5410     AtomicRCG.setAction(Action);
5411     AtomicRCG(CGF);
5412   } else {
5413     AtomicRCG(CGF);
5414   }
5415 
5416   CGF.EmitBranch(DefaultBB);
5417   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5418 }
5419 
5420 /// Generates unique name for artificial threadprivate variables.
5421 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5422 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5423                                       const Expr *Ref) {
5424   SmallString<256> Buffer;
5425   llvm::raw_svector_ostream Out(Buffer);
5426   const clang::DeclRefExpr *DE;
5427   const VarDecl *D = ::getBaseDecl(Ref, DE);
5428   if (!D)
5429     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5430   D = D->getCanonicalDecl();
5431   std::string Name = CGM.getOpenMPRuntime().getName(
5432       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5433   Out << Prefix << Name << "_"
5434       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5435   return std::string(Out.str());
5436 }
5437 
5438 /// Emits reduction initializer function:
5439 /// \code
5440 /// void @.red_init(void* %arg, void* %orig) {
5441 /// %0 = bitcast void* %arg to <type>*
5442 /// store <type> <init>, <type>* %0
5443 /// ret void
5444 /// }
5445 /// \endcode
5446 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5447                                            SourceLocation Loc,
5448                                            ReductionCodeGen &RCG, unsigned N) {
5449   ASTContext &C = CGM.getContext();
5450   QualType VoidPtrTy = C.VoidPtrTy;
5451   VoidPtrTy.addRestrict();
5452   FunctionArgList Args;
5453   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5454                           ImplicitParamDecl::Other);
5455   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5456                               ImplicitParamDecl::Other);
5457   Args.emplace_back(&Param);
5458   Args.emplace_back(&ParamOrig);
5459   const auto &FnInfo =
5460       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5461   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5462   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5463   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5464                                     Name, &CGM.getModule());
5465   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5466   Fn->setDoesNotRecurse();
5467   CodeGenFunction CGF(CGM);
5468   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5469   QualType PrivateType = RCG.getPrivateType(N);
5470   Address PrivateAddr = CGF.EmitLoadOfPointer(
5471       CGF.GetAddrOfLocalVar(&Param).withElementType(
5472           CGF.ConvertTypeForMem(PrivateType)->getPointerTo()),
5473       C.getPointerType(PrivateType)->castAs<PointerType>());
5474   llvm::Value *Size = nullptr;
5475   // If the size of the reduction item is non-constant, load it from global
5476   // threadprivate variable.
5477   if (RCG.getSizes(N).second) {
5478     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5479         CGF, CGM.getContext().getSizeType(),
5480         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5481     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5482                                 CGM.getContext().getSizeType(), Loc);
5483   }
5484   RCG.emitAggregateType(CGF, N, Size);
5485   Address OrigAddr = Address::invalid();
5486   // If initializer uses initializer from declare reduction construct, emit a
5487   // pointer to the address of the original reduction item (reuired by reduction
5488   // initializer)
5489   if (RCG.usesReductionInitializer(N)) {
5490     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5491     OrigAddr = CGF.EmitLoadOfPointer(
5492         SharedAddr,
5493         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5494   }
5495   // Emit the initializer:
5496   // %0 = bitcast void* %arg to <type>*
5497   // store <type> <init>, <type>* %0
5498   RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5499                          [](CodeGenFunction &) { return false; });
5500   CGF.FinishFunction();
5501   return Fn;
5502 }
5503 
5504 /// Emits reduction combiner function:
5505 /// \code
5506 /// void @.red_comb(void* %arg0, void* %arg1) {
5507 /// %lhs = bitcast void* %arg0 to <type>*
5508 /// %rhs = bitcast void* %arg1 to <type>*
5509 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5510 /// store <type> %2, <type>* %lhs
5511 /// ret void
5512 /// }
5513 /// \endcode
5514 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5515                                            SourceLocation Loc,
5516                                            ReductionCodeGen &RCG, unsigned N,
5517                                            const Expr *ReductionOp,
5518                                            const Expr *LHS, const Expr *RHS,
5519                                            const Expr *PrivateRef) {
5520   ASTContext &C = CGM.getContext();
5521   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5522   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5523   FunctionArgList Args;
5524   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5525                                C.VoidPtrTy, ImplicitParamDecl::Other);
5526   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5527                             ImplicitParamDecl::Other);
5528   Args.emplace_back(&ParamInOut);
5529   Args.emplace_back(&ParamIn);
5530   const auto &FnInfo =
5531       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5532   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5533   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5534   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5535                                     Name, &CGM.getModule());
5536   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5537   Fn->setDoesNotRecurse();
5538   CodeGenFunction CGF(CGM);
5539   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5540   llvm::Value *Size = nullptr;
5541   // If the size of the reduction item is non-constant, load it from global
5542   // threadprivate variable.
5543   if (RCG.getSizes(N).second) {
5544     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5545         CGF, CGM.getContext().getSizeType(),
5546         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5547     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5548                                 CGM.getContext().getSizeType(), Loc);
5549   }
5550   RCG.emitAggregateType(CGF, N, Size);
5551   // Remap lhs and rhs variables to the addresses of the function arguments.
5552   // %lhs = bitcast void* %arg0 to <type>*
5553   // %rhs = bitcast void* %arg1 to <type>*
5554   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5555   PrivateScope.addPrivate(
5556       LHSVD,
5557       // Pull out the pointer to the variable.
5558       CGF.EmitLoadOfPointer(
5559           CGF.GetAddrOfLocalVar(&ParamInOut)
5560               .withElementType(
5561                   CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()),
5562           C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5563   PrivateScope.addPrivate(
5564       RHSVD,
5565       // Pull out the pointer to the variable.
5566       CGF.EmitLoadOfPointer(
5567           CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5568               CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()),
5569           C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5570   PrivateScope.Privatize();
5571   // Emit the combiner body:
5572   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5573   // store <type> %2, <type>* %lhs
5574   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5575       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5576       cast<DeclRefExpr>(RHS));
5577   CGF.FinishFunction();
5578   return Fn;
5579 }
5580 
5581 /// Emits reduction finalizer function:
5582 /// \code
5583 /// void @.red_fini(void* %arg) {
5584 /// %0 = bitcast void* %arg to <type>*
5585 /// <destroy>(<type>* %0)
5586 /// ret void
5587 /// }
5588 /// \endcode
5589 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5590                                            SourceLocation Loc,
5591                                            ReductionCodeGen &RCG, unsigned N) {
5592   if (!RCG.needCleanups(N))
5593     return nullptr;
5594   ASTContext &C = CGM.getContext();
5595   FunctionArgList Args;
5596   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5597                           ImplicitParamDecl::Other);
5598   Args.emplace_back(&Param);
5599   const auto &FnInfo =
5600       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5601   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5602   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5603   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5604                                     Name, &CGM.getModule());
5605   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5606   Fn->setDoesNotRecurse();
5607   CodeGenFunction CGF(CGM);
5608   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5609   Address PrivateAddr = CGF.EmitLoadOfPointer(
5610       CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5611   llvm::Value *Size = nullptr;
5612   // If the size of the reduction item is non-constant, load it from global
5613   // threadprivate variable.
5614   if (RCG.getSizes(N).second) {
5615     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5616         CGF, CGM.getContext().getSizeType(),
5617         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5618     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5619                                 CGM.getContext().getSizeType(), Loc);
5620   }
5621   RCG.emitAggregateType(CGF, N, Size);
5622   // Emit the finalizer body:
5623   // <destroy>(<type>* %0)
5624   RCG.emitCleanups(CGF, N, PrivateAddr);
5625   CGF.FinishFunction(Loc);
5626   return Fn;
5627 }
5628 
5629 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5630     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5631     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5632   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5633     return nullptr;
5634 
5635   // Build typedef struct:
5636   // kmp_taskred_input {
5637   //   void *reduce_shar; // shared reduction item
5638   //   void *reduce_orig; // original reduction item used for initialization
5639   //   size_t reduce_size; // size of data item
5640   //   void *reduce_init; // data initialization routine
5641   //   void *reduce_fini; // data finalization routine
5642   //   void *reduce_comb; // data combiner routine
5643   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
5644   // } kmp_taskred_input_t;
5645   ASTContext &C = CGM.getContext();
5646   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5647   RD->startDefinition();
5648   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5649   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5650   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5651   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5652   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5653   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5654   const FieldDecl *FlagsFD = addFieldToRecordDecl(
5655       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5656   RD->completeDefinition();
5657   QualType RDType = C.getRecordType(RD);
5658   unsigned Size = Data.ReductionVars.size();
5659   llvm::APInt ArraySize(/*numBits=*/64, Size);
5660   QualType ArrayRDType = C.getConstantArrayType(
5661       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5662   // kmp_task_red_input_t .rd_input.[Size];
5663   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5664   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5665                        Data.ReductionCopies, Data.ReductionOps);
5666   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5667     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5668     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5669                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5670     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5671         TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5672         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5673         ".rd_input.gep.");
5674     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
5675     // ElemLVal.reduce_shar = &Shareds[Cnt];
5676     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5677     RCG.emitSharedOrigLValue(CGF, Cnt);
5678     llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5679     CGF.EmitStoreOfScalar(Shared, SharedLVal);
5680     // ElemLVal.reduce_orig = &Origs[Cnt];
5681     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5682     llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5683     CGF.EmitStoreOfScalar(Orig, OrigLVal);
5684     RCG.emitAggregateType(CGF, Cnt);
5685     llvm::Value *SizeValInChars;
5686     llvm::Value *SizeVal;
5687     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5688     // We use delayed creation/initialization for VLAs and array sections. It is
5689     // required because runtime does not provide the way to pass the sizes of
5690     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5691     // threadprivate global variables are used to store these values and use
5692     // them in the functions.
5693     bool DelayedCreation = !!SizeVal;
5694     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5695                                                /*isSigned=*/false);
5696     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5697     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5698     // ElemLVal.reduce_init = init;
5699     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5700     llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5701     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5702     // ElemLVal.reduce_fini = fini;
5703     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5704     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5705     llvm::Value *FiniAddr =
5706         Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5707     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5708     // ElemLVal.reduce_comb = comb;
5709     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5710     llvm::Value *CombAddr = emitReduceCombFunction(
5711         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5712         RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5713     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5714     // ElemLVal.flags = 0;
5715     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5716     if (DelayedCreation) {
5717       CGF.EmitStoreOfScalar(
5718           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5719           FlagsLVal);
5720     } else
5721       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
5722                                  FlagsLVal.getType());
5723   }
5724   if (Data.IsReductionWithTaskMod) {
5725     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5726     // is_ws, int num, void *data);
5727     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5728     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5729                                                   CGM.IntTy, /*isSigned=*/true);
5730     llvm::Value *Args[] = {
5731         IdentTLoc, GTid,
5732         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5733                                /*isSigned=*/true),
5734         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5735         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5736             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5737     return CGF.EmitRuntimeCall(
5738         OMPBuilder.getOrCreateRuntimeFunction(
5739             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5740         Args);
5741   }
5742   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5743   llvm::Value *Args[] = {
5744       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5745                                 /*isSigned=*/true),
5746       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5747       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
5748                                                       CGM.VoidPtrTy)};
5749   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5750                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
5751                              Args);
5752 }
5753 
5754 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
5755                                             SourceLocation Loc,
5756                                             bool IsWorksharingReduction) {
5757   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5758   // is_ws, int num, void *data);
5759   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5760   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5761                                                 CGM.IntTy, /*isSigned=*/true);
5762   llvm::Value *Args[] = {IdentTLoc, GTid,
5763                          llvm::ConstantInt::get(CGM.IntTy,
5764                                                 IsWorksharingReduction ? 1 : 0,
5765                                                 /*isSigned=*/true)};
5766   (void)CGF.EmitRuntimeCall(
5767       OMPBuilder.getOrCreateRuntimeFunction(
5768           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5769       Args);
5770 }
5771 
5772 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
5773                                               SourceLocation Loc,
5774                                               ReductionCodeGen &RCG,
5775                                               unsigned N) {
5776   auto Sizes = RCG.getSizes(N);
5777   // Emit threadprivate global variable if the type is non-constant
5778   // (Sizes.second = nullptr).
5779   if (Sizes.second) {
5780     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5781                                                      /*isSigned=*/false);
5782     Address SizeAddr = getAddrOfArtificialThreadPrivate(
5783         CGF, CGM.getContext().getSizeType(),
5784         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5785     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5786   }
5787 }
5788 
5789 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
5790                                               SourceLocation Loc,
5791                                               llvm::Value *ReductionsPtr,
5792                                               LValue SharedLVal) {
5793   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5794   // *d);
5795   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5796                                                    CGM.IntTy,
5797                                                    /*isSigned=*/true),
5798                          ReductionsPtr,
5799                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5800                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
5801   return Address(
5802       CGF.EmitRuntimeCall(
5803           OMPBuilder.getOrCreateRuntimeFunction(
5804               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
5805           Args),
5806       CGF.Int8Ty, SharedLVal.getAlignment());
5807 }
5808 
5809 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
5810                                        const OMPTaskDataTy &Data) {
5811   if (!CGF.HaveInsertPoint())
5812     return;
5813 
5814   if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5815     // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5816     OMPBuilder.createTaskwait(CGF.Builder);
5817   } else {
5818     llvm::Value *ThreadID = getThreadID(CGF, Loc);
5819     llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5820     auto &M = CGM.getModule();
5821     Address DependenciesArray = Address::invalid();
5822     llvm::Value *NumOfElements;
5823     std::tie(NumOfElements, DependenciesArray) =
5824         emitDependClause(CGF, Data.Dependences, Loc);
5825     if (!Data.Dependences.empty()) {
5826       llvm::Value *DepWaitTaskArgs[7];
5827       DepWaitTaskArgs[0] = UpLoc;
5828       DepWaitTaskArgs[1] = ThreadID;
5829       DepWaitTaskArgs[2] = NumOfElements;
5830       DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5831       DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5832       DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5833       DepWaitTaskArgs[6] =
5834           llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
5835 
5836       CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5837 
5838       // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5839       // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5840       // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5841       // kmp_int32 has_no_wait); if dependence info is specified.
5842       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5843                               M, OMPRTL___kmpc_omp_taskwait_deps_51),
5844                           DepWaitTaskArgs);
5845 
5846     } else {
5847 
5848       // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5849       // global_tid);
5850       llvm::Value *Args[] = {UpLoc, ThreadID};
5851       // Ignore return result until untied tasks are supported.
5852       CGF.EmitRuntimeCall(
5853           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
5854           Args);
5855     }
5856   }
5857 
5858   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5859     Region->emitUntiedSwitch(CGF);
5860 }
5861 
5862 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
5863                                            OpenMPDirectiveKind InnerKind,
5864                                            const RegionCodeGenTy &CodeGen,
5865                                            bool HasCancel) {
5866   if (!CGF.HaveInsertPoint())
5867     return;
5868   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5869                                  InnerKind != OMPD_critical &&
5870                                      InnerKind != OMPD_master &&
5871                                      InnerKind != OMPD_masked);
5872   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5873 }
5874 
5875 namespace {
5876 enum RTCancelKind {
5877   CancelNoreq = 0,
5878   CancelParallel = 1,
5879   CancelLoop = 2,
5880   CancelSections = 3,
5881   CancelTaskgroup = 4
5882 };
5883 } // anonymous namespace
5884 
5885 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5886   RTCancelKind CancelKind = CancelNoreq;
5887   if (CancelRegion == OMPD_parallel)
5888     CancelKind = CancelParallel;
5889   else if (CancelRegion == OMPD_for)
5890     CancelKind = CancelLoop;
5891   else if (CancelRegion == OMPD_sections)
5892     CancelKind = CancelSections;
5893   else {
5894     assert(CancelRegion == OMPD_taskgroup);
5895     CancelKind = CancelTaskgroup;
5896   }
5897   return CancelKind;
5898 }
5899 
5900 void CGOpenMPRuntime::emitCancellationPointCall(
5901     CodeGenFunction &CGF, SourceLocation Loc,
5902     OpenMPDirectiveKind CancelRegion) {
5903   if (!CGF.HaveInsertPoint())
5904     return;
5905   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5906   // global_tid, kmp_int32 cncl_kind);
5907   if (auto *OMPRegionInfo =
5908           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5909     // For 'cancellation point taskgroup', the task region info may not have a
5910     // cancel. This may instead happen in another adjacent task.
5911     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5912       llvm::Value *Args[] = {
5913           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
5914           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5915       // Ignore return result until untied tasks are supported.
5916       llvm::Value *Result = CGF.EmitRuntimeCall(
5917           OMPBuilder.getOrCreateRuntimeFunction(
5918               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
5919           Args);
5920       // if (__kmpc_cancellationpoint()) {
5921       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5922       //   exit from construct;
5923       // }
5924       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5925       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5926       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5927       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5928       CGF.EmitBlock(ExitBB);
5929       if (CancelRegion == OMPD_parallel)
5930         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5931       // exit from construct;
5932       CodeGenFunction::JumpDest CancelDest =
5933           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5934       CGF.EmitBranchThroughCleanup(CancelDest);
5935       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5936     }
5937   }
5938 }
5939 
5940 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
5941                                      const Expr *IfCond,
5942                                      OpenMPDirectiveKind CancelRegion) {
5943   if (!CGF.HaveInsertPoint())
5944     return;
5945   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5946   // kmp_int32 cncl_kind);
5947   auto &M = CGM.getModule();
5948   if (auto *OMPRegionInfo =
5949           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5950     auto &&ThenGen = [this, &M, Loc, CancelRegion,
5951                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
5952       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5953       llvm::Value *Args[] = {
5954           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
5955           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5956       // Ignore return result until untied tasks are supported.
5957       llvm::Value *Result = CGF.EmitRuntimeCall(
5958           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
5959       // if (__kmpc_cancel()) {
5960       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5961       //   exit from construct;
5962       // }
5963       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5964       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5965       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5966       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5967       CGF.EmitBlock(ExitBB);
5968       if (CancelRegion == OMPD_parallel)
5969         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5970       // exit from construct;
5971       CodeGenFunction::JumpDest CancelDest =
5972           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5973       CGF.EmitBranchThroughCleanup(CancelDest);
5974       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5975     };
5976     if (IfCond) {
5977       emitIfClause(CGF, IfCond, ThenGen,
5978                    [](CodeGenFunction &, PrePostActionTy &) {});
5979     } else {
5980       RegionCodeGenTy ThenRCG(ThenGen);
5981       ThenRCG(CGF);
5982     }
5983   }
5984 }
5985 
5986 namespace {
5987 /// Cleanup action for uses_allocators support.
5988 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
5989   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
5990 
5991 public:
5992   OMPUsesAllocatorsActionTy(
5993       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
5994       : Allocators(Allocators) {}
5995   void Enter(CodeGenFunction &CGF) override {
5996     if (!CGF.HaveInsertPoint())
5997       return;
5998     for (const auto &AllocatorData : Allocators) {
5999       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6000           CGF, AllocatorData.first, AllocatorData.second);
6001     }
6002   }
6003   void Exit(CodeGenFunction &CGF) override {
6004     if (!CGF.HaveInsertPoint())
6005       return;
6006     for (const auto &AllocatorData : Allocators) {
6007       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6008                                                         AllocatorData.first);
6009     }
6010   }
6011 };
6012 } // namespace
6013 
6014 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6015     const OMPExecutableDirective &D, StringRef ParentName,
6016     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6017     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6018   assert(!ParentName.empty() && "Invalid target entry parent name!");
6019   HasEmittedTargetRegion = true;
6020   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6021   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6022     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6023       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6024       if (!D.AllocatorTraits)
6025         continue;
6026       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6027     }
6028   }
6029   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6030   CodeGen.setAction(UsesAllocatorAction);
6031   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6032                                    IsOffloadEntry, CodeGen);
6033 }
6034 
6035 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6036                                              const Expr *Allocator,
6037                                              const Expr *AllocatorTraits) {
6038   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6039   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6040   // Use default memspace handle.
6041   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6042   llvm::Value *NumTraits = llvm::ConstantInt::get(
6043       CGF.IntTy, cast<ConstantArrayType>(
6044                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6045                      ->getSize()
6046                      .getLimitedValue());
6047   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6048   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6049       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
6050   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6051                                            AllocatorTraitsLVal.getBaseInfo(),
6052                                            AllocatorTraitsLVal.getTBAAInfo());
6053   llvm::Value *Traits = Addr.getPointer();
6054 
6055   llvm::Value *AllocatorVal =
6056       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6057                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6058                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6059   // Store to allocator.
6060   CGF.EmitAutoVarAlloca(*cast<VarDecl>(
6061       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6062   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6063   AllocatorVal =
6064       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6065                                Allocator->getType(), Allocator->getExprLoc());
6066   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6067 }
6068 
6069 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6070                                              const Expr *Allocator) {
6071   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6072   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6073   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6074   llvm::Value *AllocatorVal =
6075       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6076   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6077                                           CGF.getContext().VoidPtrTy,
6078                                           Allocator->getExprLoc());
6079   (void)CGF.EmitRuntimeCall(
6080       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6081                                             OMPRTL___kmpc_destroy_allocator),
6082       {ThreadId, AllocatorVal});
6083 }
6084 
6085 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6086     const OMPExecutableDirective &D, StringRef ParentName,
6087     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6088     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6089 
6090   llvm::TargetRegionEntryInfo EntryInfo =
6091       getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName);
6092 
6093   CodeGenFunction CGF(CGM, true);
6094   llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
6095       [&CGF, &D, &CodeGen](StringRef EntryFnName) {
6096         const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6097 
6098         CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6099         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6100         return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6101       };
6102 
6103   // Get NumTeams and ThreadLimit attributes
6104   int32_t DefaultValTeams = -1;
6105   int32_t DefaultValThreads = -1;
6106   getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6107   getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6108 
6109   OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction,
6110                                       DefaultValTeams, DefaultValThreads,
6111                                       IsOffloadEntry, OutlinedFn, OutlinedFnID);
6112 
6113   if (OutlinedFn != nullptr)
6114     CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6115 }
6116 
6117 /// Checks if the expression is constant or does not have non-trivial function
6118 /// calls.
6119 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6120   // We can skip constant expressions.
6121   // We can skip expressions with trivial calls or simple expressions.
6122   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6123           !E->hasNonTrivialCall(Ctx)) &&
6124          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6125 }
6126 
6127 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6128                                                     const Stmt *Body) {
6129   const Stmt *Child = Body->IgnoreContainers();
6130   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6131     Child = nullptr;
6132     for (const Stmt *S : C->body()) {
6133       if (const auto *E = dyn_cast<Expr>(S)) {
6134         if (isTrivial(Ctx, E))
6135           continue;
6136       }
6137       // Some of the statements can be ignored.
6138       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6139           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6140         continue;
6141       // Analyze declarations.
6142       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6143         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6144               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6145                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6146                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6147                   isa<UsingDirectiveDecl>(D) ||
6148                   isa<OMPDeclareReductionDecl>(D) ||
6149                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6150                 return true;
6151               const auto *VD = dyn_cast<VarDecl>(D);
6152               if (!VD)
6153                 return false;
6154               return VD->hasGlobalStorage() || !VD->isUsed();
6155             }))
6156           continue;
6157       }
6158       // Found multiple children - cannot get the one child only.
6159       if (Child)
6160         return nullptr;
6161       Child = S;
6162     }
6163     if (Child)
6164       Child = Child->IgnoreContainers();
6165   }
6166   return Child;
6167 }
6168 
6169 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6170     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6171     int32_t &DefaultVal) {
6172 
6173   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6174   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6175          "Expected target-based executable directive.");
6176   switch (DirectiveKind) {
6177   case OMPD_target: {
6178     const auto *CS = D.getInnermostCapturedStmt();
6179     const auto *Body =
6180         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6181     const Stmt *ChildStmt =
6182         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6183     if (const auto *NestedDir =
6184             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6185       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6186         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6187           const Expr *NumTeams =
6188               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6189           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6190             if (auto Constant =
6191                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6192               DefaultVal = Constant->getExtValue();
6193           return NumTeams;
6194         }
6195         DefaultVal = 0;
6196         return nullptr;
6197       }
6198       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6199           isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6200         DefaultVal = 1;
6201         return nullptr;
6202       }
6203       DefaultVal = 1;
6204       return nullptr;
6205     }
6206     // A value of -1 is used to check if we need to emit no teams region
6207     DefaultVal = -1;
6208     return nullptr;
6209   }
6210   case OMPD_target_teams_loop:
6211   case OMPD_target_teams:
6212   case OMPD_target_teams_distribute:
6213   case OMPD_target_teams_distribute_simd:
6214   case OMPD_target_teams_distribute_parallel_for:
6215   case OMPD_target_teams_distribute_parallel_for_simd: {
6216     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6217       const Expr *NumTeams =
6218           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6219       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6220         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6221           DefaultVal = Constant->getExtValue();
6222       return NumTeams;
6223     }
6224     DefaultVal = 0;
6225     return nullptr;
6226   }
6227   case OMPD_target_parallel:
6228   case OMPD_target_parallel_for:
6229   case OMPD_target_parallel_for_simd:
6230   case OMPD_target_parallel_loop:
6231   case OMPD_target_simd:
6232     DefaultVal = 1;
6233     return nullptr;
6234   case OMPD_parallel:
6235   case OMPD_for:
6236   case OMPD_parallel_for:
6237   case OMPD_parallel_loop:
6238   case OMPD_parallel_master:
6239   case OMPD_parallel_sections:
6240   case OMPD_for_simd:
6241   case OMPD_parallel_for_simd:
6242   case OMPD_cancel:
6243   case OMPD_cancellation_point:
6244   case OMPD_ordered:
6245   case OMPD_threadprivate:
6246   case OMPD_allocate:
6247   case OMPD_task:
6248   case OMPD_simd:
6249   case OMPD_tile:
6250   case OMPD_unroll:
6251   case OMPD_sections:
6252   case OMPD_section:
6253   case OMPD_single:
6254   case OMPD_master:
6255   case OMPD_critical:
6256   case OMPD_taskyield:
6257   case OMPD_barrier:
6258   case OMPD_taskwait:
6259   case OMPD_taskgroup:
6260   case OMPD_atomic:
6261   case OMPD_flush:
6262   case OMPD_depobj:
6263   case OMPD_scan:
6264   case OMPD_teams:
6265   case OMPD_target_data:
6266   case OMPD_target_exit_data:
6267   case OMPD_target_enter_data:
6268   case OMPD_distribute:
6269   case OMPD_distribute_simd:
6270   case OMPD_distribute_parallel_for:
6271   case OMPD_distribute_parallel_for_simd:
6272   case OMPD_teams_distribute:
6273   case OMPD_teams_distribute_simd:
6274   case OMPD_teams_distribute_parallel_for:
6275   case OMPD_teams_distribute_parallel_for_simd:
6276   case OMPD_target_update:
6277   case OMPD_declare_simd:
6278   case OMPD_declare_variant:
6279   case OMPD_begin_declare_variant:
6280   case OMPD_end_declare_variant:
6281   case OMPD_declare_target:
6282   case OMPD_end_declare_target:
6283   case OMPD_declare_reduction:
6284   case OMPD_declare_mapper:
6285   case OMPD_taskloop:
6286   case OMPD_taskloop_simd:
6287   case OMPD_master_taskloop:
6288   case OMPD_master_taskloop_simd:
6289   case OMPD_parallel_master_taskloop:
6290   case OMPD_parallel_master_taskloop_simd:
6291   case OMPD_requires:
6292   case OMPD_metadirective:
6293   case OMPD_unknown:
6294     break;
6295   default:
6296     break;
6297   }
6298   llvm_unreachable("Unexpected directive kind.");
6299 }
6300 
6301 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6302     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6303   assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6304          "Clauses associated with the teams directive expected to be emitted "
6305          "only for the host!");
6306   CGBuilderTy &Bld = CGF.Builder;
6307   int32_t DefaultNT = -1;
6308   const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6309   if (NumTeams != nullptr) {
6310     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6311 
6312     switch (DirectiveKind) {
6313     case OMPD_target: {
6314       const auto *CS = D.getInnermostCapturedStmt();
6315       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6316       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6317       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6318                                                   /*IgnoreResultAssign*/ true);
6319       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6320                              /*isSigned=*/true);
6321     }
6322     case OMPD_target_teams:
6323     case OMPD_target_teams_distribute:
6324     case OMPD_target_teams_distribute_simd:
6325     case OMPD_target_teams_distribute_parallel_for:
6326     case OMPD_target_teams_distribute_parallel_for_simd: {
6327       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6328       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6329                                                   /*IgnoreResultAssign*/ true);
6330       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6331                              /*isSigned=*/true);
6332     }
6333     default:
6334       break;
6335     }
6336   }
6337 
6338   return llvm::ConstantInt::get(CGF.Int32Ty, DefaultNT);
6339 }
6340 
6341 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6342                                   llvm::Value *DefaultThreadLimitVal) {
6343   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6344       CGF.getContext(), CS->getCapturedStmt());
6345   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6346     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6347       llvm::Value *NumThreads = nullptr;
6348       llvm::Value *CondVal = nullptr;
6349       // Handle if clause. If if clause present, the number of threads is
6350       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6351       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6352         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6353         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6354         const OMPIfClause *IfClause = nullptr;
6355         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6356           if (C->getNameModifier() == OMPD_unknown ||
6357               C->getNameModifier() == OMPD_parallel) {
6358             IfClause = C;
6359             break;
6360           }
6361         }
6362         if (IfClause) {
6363           const Expr *Cond = IfClause->getCondition();
6364           bool Result;
6365           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6366             if (!Result)
6367               return CGF.Builder.getInt32(1);
6368           } else {
6369             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6370             if (const auto *PreInit =
6371                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6372               for (const auto *I : PreInit->decls()) {
6373                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6374                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6375                 } else {
6376                   CodeGenFunction::AutoVarEmission Emission =
6377                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6378                   CGF.EmitAutoVarCleanups(Emission);
6379                 }
6380               }
6381             }
6382             CondVal = CGF.EvaluateExprAsBool(Cond);
6383           }
6384         }
6385       }
6386       // Check the value of num_threads clause iff if clause was not specified
6387       // or is not evaluated to false.
6388       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6389         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6390         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6391         const auto *NumThreadsClause =
6392             Dir->getSingleClause<OMPNumThreadsClause>();
6393         CodeGenFunction::LexicalScope Scope(
6394             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6395         if (const auto *PreInit =
6396                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6397           for (const auto *I : PreInit->decls()) {
6398             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6399               CGF.EmitVarDecl(cast<VarDecl>(*I));
6400             } else {
6401               CodeGenFunction::AutoVarEmission Emission =
6402                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6403               CGF.EmitAutoVarCleanups(Emission);
6404             }
6405           }
6406         }
6407         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6408         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6409                                                /*isSigned=*/false);
6410         if (DefaultThreadLimitVal)
6411           NumThreads = CGF.Builder.CreateSelect(
6412               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6413               DefaultThreadLimitVal, NumThreads);
6414       } else {
6415         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6416                                            : CGF.Builder.getInt32(0);
6417       }
6418       // Process condition of the if clause.
6419       if (CondVal) {
6420         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6421                                               CGF.Builder.getInt32(1));
6422       }
6423       return NumThreads;
6424     }
6425     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6426       return CGF.Builder.getInt32(1);
6427   }
6428   return DefaultThreadLimitVal;
6429 }
6430 
6431 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6432     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6433     int32_t &DefaultVal) {
6434   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6435   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6436          "Expected target-based executable directive.");
6437 
6438   switch (DirectiveKind) {
6439   case OMPD_target:
6440     // Teams have no clause thread_limit
6441     return nullptr;
6442   case OMPD_target_teams:
6443   case OMPD_target_teams_distribute:
6444     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6445       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6446       const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6447       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6448         if (auto Constant =
6449                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6450           DefaultVal = Constant->getExtValue();
6451       return ThreadLimit;
6452     }
6453     return nullptr;
6454   case OMPD_target_teams_loop:
6455   case OMPD_target_parallel_loop:
6456   case OMPD_target_parallel:
6457   case OMPD_target_parallel_for:
6458   case OMPD_target_parallel_for_simd:
6459   case OMPD_target_teams_distribute_parallel_for:
6460   case OMPD_target_teams_distribute_parallel_for_simd: {
6461     Expr *ThreadLimit = nullptr;
6462     Expr *NumThreads = nullptr;
6463     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6464       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6465       ThreadLimit = ThreadLimitClause->getThreadLimit();
6466       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6467         if (auto Constant =
6468                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6469           DefaultVal = Constant->getExtValue();
6470     }
6471     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6472       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6473       NumThreads = NumThreadsClause->getNumThreads();
6474       if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6475         if (auto Constant =
6476                 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6477           if (Constant->getExtValue() < DefaultVal) {
6478             DefaultVal = Constant->getExtValue();
6479             ThreadLimit = NumThreads;
6480           }
6481         }
6482       }
6483     }
6484     return ThreadLimit;
6485   }
6486   case OMPD_target_teams_distribute_simd:
6487   case OMPD_target_simd:
6488     DefaultVal = 1;
6489     return nullptr;
6490   case OMPD_parallel:
6491   case OMPD_for:
6492   case OMPD_parallel_for:
6493   case OMPD_parallel_master:
6494   case OMPD_parallel_sections:
6495   case OMPD_for_simd:
6496   case OMPD_parallel_for_simd:
6497   case OMPD_cancel:
6498   case OMPD_cancellation_point:
6499   case OMPD_ordered:
6500   case OMPD_threadprivate:
6501   case OMPD_allocate:
6502   case OMPD_task:
6503   case OMPD_simd:
6504   case OMPD_tile:
6505   case OMPD_unroll:
6506   case OMPD_sections:
6507   case OMPD_section:
6508   case OMPD_single:
6509   case OMPD_master:
6510   case OMPD_critical:
6511   case OMPD_taskyield:
6512   case OMPD_barrier:
6513   case OMPD_taskwait:
6514   case OMPD_taskgroup:
6515   case OMPD_atomic:
6516   case OMPD_flush:
6517   case OMPD_depobj:
6518   case OMPD_scan:
6519   case OMPD_teams:
6520   case OMPD_target_data:
6521   case OMPD_target_exit_data:
6522   case OMPD_target_enter_data:
6523   case OMPD_distribute:
6524   case OMPD_distribute_simd:
6525   case OMPD_distribute_parallel_for:
6526   case OMPD_distribute_parallel_for_simd:
6527   case OMPD_teams_distribute:
6528   case OMPD_teams_distribute_simd:
6529   case OMPD_teams_distribute_parallel_for:
6530   case OMPD_teams_distribute_parallel_for_simd:
6531   case OMPD_target_update:
6532   case OMPD_declare_simd:
6533   case OMPD_declare_variant:
6534   case OMPD_begin_declare_variant:
6535   case OMPD_end_declare_variant:
6536   case OMPD_declare_target:
6537   case OMPD_end_declare_target:
6538   case OMPD_declare_reduction:
6539   case OMPD_declare_mapper:
6540   case OMPD_taskloop:
6541   case OMPD_taskloop_simd:
6542   case OMPD_master_taskloop:
6543   case OMPD_master_taskloop_simd:
6544   case OMPD_parallel_master_taskloop:
6545   case OMPD_parallel_master_taskloop_simd:
6546   case OMPD_requires:
6547   case OMPD_unknown:
6548     break;
6549   default:
6550     break;
6551   }
6552   llvm_unreachable("Unsupported directive kind.");
6553 }
6554 
6555 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6556     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6557   assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6558          "Clauses associated with the teams directive expected to be emitted "
6559          "only for the host!");
6560   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6561   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6562          "Expected target-based executable directive.");
6563   CGBuilderTy &Bld = CGF.Builder;
6564   llvm::Value *ThreadLimitVal = nullptr;
6565   llvm::Value *NumThreadsVal = nullptr;
6566   switch (DirectiveKind) {
6567   case OMPD_target: {
6568     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6569     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6570       return NumThreads;
6571     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6572         CGF.getContext(), CS->getCapturedStmt());
6573     // TODO: The standard is not clear how to resolve two thread limit clauses,
6574     //       let's pick the teams one if it's present, otherwise the target one.
6575     const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6576     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6577       if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6578         ThreadLimitClause = TLC;
6579         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6580         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6581         CodeGenFunction::LexicalScope Scope(
6582             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6583         if (const auto *PreInit =
6584                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6585           for (const auto *I : PreInit->decls()) {
6586             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6587               CGF.EmitVarDecl(cast<VarDecl>(*I));
6588             } else {
6589               CodeGenFunction::AutoVarEmission Emission =
6590                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6591               CGF.EmitAutoVarCleanups(Emission);
6592             }
6593           }
6594         }
6595       }
6596     }
6597     if (ThreadLimitClause) {
6598       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6599           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6600       ThreadLimitVal =
6601           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6602     }
6603     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6604       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6605           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6606         CS = Dir->getInnermostCapturedStmt();
6607         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6608             CGF.getContext(), CS->getCapturedStmt());
6609         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6610       }
6611       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6612           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6613         CS = Dir->getInnermostCapturedStmt();
6614         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6615           return NumThreads;
6616       }
6617       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6618         return Bld.getInt32(1);
6619     }
6620     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6621   }
6622   case OMPD_target_teams: {
6623     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6624       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6625       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6626       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6627           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6628       ThreadLimitVal =
6629           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6630     }
6631     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6632     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6633       return NumThreads;
6634     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6635         CGF.getContext(), CS->getCapturedStmt());
6636     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6637       if (Dir->getDirectiveKind() == OMPD_distribute) {
6638         CS = Dir->getInnermostCapturedStmt();
6639         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6640           return NumThreads;
6641       }
6642     }
6643     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6644   }
6645   case OMPD_target_teams_distribute:
6646     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6647       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6648       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6649       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6650           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6651       ThreadLimitVal =
6652           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6653     }
6654     if (llvm::Value *NumThreads =
6655             getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal))
6656       return NumThreads;
6657     return Bld.getInt32(0);
6658   case OMPD_target_teams_loop:
6659   case OMPD_target_parallel_loop:
6660   case OMPD_target_parallel:
6661   case OMPD_target_parallel_for:
6662   case OMPD_target_parallel_for_simd:
6663   case OMPD_target_teams_distribute_parallel_for:
6664   case OMPD_target_teams_distribute_parallel_for_simd: {
6665     llvm::Value *CondVal = nullptr;
6666     // Handle if clause. If if clause present, the number of threads is
6667     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6668     if (D.hasClausesOfKind<OMPIfClause>()) {
6669       const OMPIfClause *IfClause = nullptr;
6670       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6671         if (C->getNameModifier() == OMPD_unknown ||
6672             C->getNameModifier() == OMPD_parallel) {
6673           IfClause = C;
6674           break;
6675         }
6676       }
6677       if (IfClause) {
6678         const Expr *Cond = IfClause->getCondition();
6679         bool Result;
6680         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6681           if (!Result)
6682             return Bld.getInt32(1);
6683         } else {
6684           CodeGenFunction::RunCleanupsScope Scope(CGF);
6685           CondVal = CGF.EvaluateExprAsBool(Cond);
6686         }
6687       }
6688     }
6689     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6690       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6691       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6692       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6693           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6694       ThreadLimitVal =
6695           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6696     }
6697     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6698       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6699       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6700       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6701           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6702       NumThreadsVal =
6703           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6704       ThreadLimitVal = ThreadLimitVal
6705                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6706                                                                 ThreadLimitVal),
6707                                               NumThreadsVal, ThreadLimitVal)
6708                            : NumThreadsVal;
6709     }
6710     if (!ThreadLimitVal)
6711       ThreadLimitVal = Bld.getInt32(0);
6712     if (CondVal)
6713       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6714     return ThreadLimitVal;
6715   }
6716   case OMPD_target_teams_distribute_simd:
6717   case OMPD_target_simd:
6718     return Bld.getInt32(1);
6719   case OMPD_parallel:
6720   case OMPD_for:
6721   case OMPD_parallel_for:
6722   case OMPD_parallel_master:
6723   case OMPD_parallel_sections:
6724   case OMPD_for_simd:
6725   case OMPD_parallel_for_simd:
6726   case OMPD_cancel:
6727   case OMPD_cancellation_point:
6728   case OMPD_ordered:
6729   case OMPD_threadprivate:
6730   case OMPD_allocate:
6731   case OMPD_task:
6732   case OMPD_simd:
6733   case OMPD_tile:
6734   case OMPD_unroll:
6735   case OMPD_sections:
6736   case OMPD_section:
6737   case OMPD_single:
6738   case OMPD_master:
6739   case OMPD_critical:
6740   case OMPD_taskyield:
6741   case OMPD_barrier:
6742   case OMPD_taskwait:
6743   case OMPD_taskgroup:
6744   case OMPD_atomic:
6745   case OMPD_flush:
6746   case OMPD_depobj:
6747   case OMPD_scan:
6748   case OMPD_teams:
6749   case OMPD_target_data:
6750   case OMPD_target_exit_data:
6751   case OMPD_target_enter_data:
6752   case OMPD_distribute:
6753   case OMPD_distribute_simd:
6754   case OMPD_distribute_parallel_for:
6755   case OMPD_distribute_parallel_for_simd:
6756   case OMPD_teams_distribute:
6757   case OMPD_teams_distribute_simd:
6758   case OMPD_teams_distribute_parallel_for:
6759   case OMPD_teams_distribute_parallel_for_simd:
6760   case OMPD_target_update:
6761   case OMPD_declare_simd:
6762   case OMPD_declare_variant:
6763   case OMPD_begin_declare_variant:
6764   case OMPD_end_declare_variant:
6765   case OMPD_declare_target:
6766   case OMPD_end_declare_target:
6767   case OMPD_declare_reduction:
6768   case OMPD_declare_mapper:
6769   case OMPD_taskloop:
6770   case OMPD_taskloop_simd:
6771   case OMPD_master_taskloop:
6772   case OMPD_master_taskloop_simd:
6773   case OMPD_parallel_master_taskloop:
6774   case OMPD_parallel_master_taskloop_simd:
6775   case OMPD_requires:
6776   case OMPD_metadirective:
6777   case OMPD_unknown:
6778     break;
6779   default:
6780     break;
6781   }
6782   llvm_unreachable("Unsupported directive kind.");
6783 }
6784 
6785 namespace {
6786 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6787 
6788 // Utility to handle information from clauses associated with a given
6789 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6790 // It provides a convenient interface to obtain the information and generate
6791 // code for that information.
6792 class MappableExprsHandler {
6793 public:
6794   /// Get the offset of the OMP_MAP_MEMBER_OF field.
6795   static unsigned getFlagMemberOffset() {
6796     unsigned Offset = 0;
6797     for (uint64_t Remain =
6798              static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
6799                  OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
6800          !(Remain & 1); Remain = Remain >> 1)
6801       Offset++;
6802     return Offset;
6803   }
6804 
6805   /// Class that holds debugging information for a data mapping to be passed to
6806   /// the runtime library.
6807   class MappingExprInfo {
6808     /// The variable declaration used for the data mapping.
6809     const ValueDecl *MapDecl = nullptr;
6810     /// The original expression used in the map clause, or null if there is
6811     /// none.
6812     const Expr *MapExpr = nullptr;
6813 
6814   public:
6815     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
6816         : MapDecl(MapDecl), MapExpr(MapExpr) {}
6817 
6818     const ValueDecl *getMapDecl() const { return MapDecl; }
6819     const Expr *getMapExpr() const { return MapExpr; }
6820   };
6821 
6822   using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
6823   using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6824   using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6825   using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
6826   using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
6827   using MapNonContiguousArrayTy =
6828       llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
6829   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
6830   using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
6831 
6832   /// This structure contains combined information generated for mappable
6833   /// clauses, including base pointers, pointers, sizes, map types, user-defined
6834   /// mappers, and non-contiguous information.
6835   struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
6836     MapExprsArrayTy Exprs;
6837     MapValueDeclsArrayTy Mappers;
6838     MapValueDeclsArrayTy DevicePtrDecls;
6839 
6840     /// Append arrays in \a CurInfo.
6841     void append(MapCombinedInfoTy &CurInfo) {
6842       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
6843       DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
6844                             CurInfo.DevicePtrDecls.end());
6845       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
6846       llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
6847     }
6848   };
6849 
6850   /// Map between a struct and the its lowest & highest elements which have been
6851   /// mapped.
6852   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6853   ///                    HE(FieldIndex, Pointer)}
6854   struct StructRangeInfoTy {
6855     MapCombinedInfoTy PreliminaryMapData;
6856     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6857         0, Address::invalid()};
6858     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6859         0, Address::invalid()};
6860     Address Base = Address::invalid();
6861     Address LB = Address::invalid();
6862     bool IsArraySection = false;
6863     bool HasCompleteRecord = false;
6864   };
6865 
6866 private:
6867   /// Kind that defines how a device pointer has to be returned.
6868   struct MapInfo {
6869     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
6870     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
6871     ArrayRef<OpenMPMapModifierKind> MapModifiers;
6872     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
6873     bool ReturnDevicePointer = false;
6874     bool IsImplicit = false;
6875     const ValueDecl *Mapper = nullptr;
6876     const Expr *VarRef = nullptr;
6877     bool ForDeviceAddr = false;
6878 
6879     MapInfo() = default;
6880     MapInfo(
6881         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6882         OpenMPMapClauseKind MapType,
6883         ArrayRef<OpenMPMapModifierKind> MapModifiers,
6884         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6885         bool ReturnDevicePointer, bool IsImplicit,
6886         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
6887         bool ForDeviceAddr = false)
6888         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
6889           MotionModifiers(MotionModifiers),
6890           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
6891           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
6892   };
6893 
6894   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6895   /// member and there is no map information about it, then emission of that
6896   /// entry is deferred until the whole struct has been processed.
6897   struct DeferredDevicePtrEntryTy {
6898     const Expr *IE = nullptr;
6899     const ValueDecl *VD = nullptr;
6900     bool ForDeviceAddr = false;
6901 
6902     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
6903                              bool ForDeviceAddr)
6904         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
6905   };
6906 
6907   /// The target directive from where the mappable clauses were extracted. It
6908   /// is either a executable directive or a user-defined mapper directive.
6909   llvm::PointerUnion<const OMPExecutableDirective *,
6910                      const OMPDeclareMapperDecl *>
6911       CurDir;
6912 
6913   /// Function the directive is being generated for.
6914   CodeGenFunction &CGF;
6915 
6916   /// Set of all first private variables in the current directive.
6917   /// bool data is set to true if the variable is implicitly marked as
6918   /// firstprivate, false otherwise.
6919   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
6920 
6921   /// Map between device pointer declarations and their expression components.
6922   /// The key value for declarations in 'this' is null.
6923   llvm::DenseMap<
6924       const ValueDecl *,
6925       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6926       DevPointersMap;
6927 
6928   /// Map between device addr declarations and their expression components.
6929   /// The key value for declarations in 'this' is null.
6930   llvm::DenseMap<
6931       const ValueDecl *,
6932       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6933       HasDevAddrsMap;
6934 
6935   /// Map between lambda declarations and their map type.
6936   llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
6937 
6938   llvm::Value *getExprTypeSize(const Expr *E) const {
6939     QualType ExprTy = E->getType().getCanonicalType();
6940 
6941     // Calculate the size for array shaping expression.
6942     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
6943       llvm::Value *Size =
6944           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
6945       for (const Expr *SE : OAE->getDimensions()) {
6946         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
6947         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
6948                                       CGF.getContext().getSizeType(),
6949                                       SE->getExprLoc());
6950         Size = CGF.Builder.CreateNUWMul(Size, Sz);
6951       }
6952       return Size;
6953     }
6954 
6955     // Reference types are ignored for mapping purposes.
6956     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6957       ExprTy = RefTy->getPointeeType().getCanonicalType();
6958 
6959     // Given that an array section is considered a built-in type, we need to
6960     // do the calculation based on the length of the section instead of relying
6961     // on CGF.getTypeSize(E->getType()).
6962     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
6963       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
6964                             OAE->getBase()->IgnoreParenImpCasts())
6965                             .getCanonicalType();
6966 
6967       // If there is no length associated with the expression and lower bound is
6968       // not specified too, that means we are using the whole length of the
6969       // base.
6970       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6971           !OAE->getLowerBound())
6972         return CGF.getTypeSize(BaseTy);
6973 
6974       llvm::Value *ElemSize;
6975       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6976         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
6977       } else {
6978         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
6979         assert(ATy && "Expecting array type if not a pointer type.");
6980         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
6981       }
6982 
6983       // If we don't have a length at this point, that is because we have an
6984       // array section with a single element.
6985       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
6986         return ElemSize;
6987 
6988       if (const Expr *LenExpr = OAE->getLength()) {
6989         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
6990         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
6991                                              CGF.getContext().getSizeType(),
6992                                              LenExpr->getExprLoc());
6993         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
6994       }
6995       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6996              OAE->getLowerBound() && "expected array_section[lb:].");
6997       // Size = sizetype - lb * elemtype;
6998       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
6999       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7000       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7001                                        CGF.getContext().getSizeType(),
7002                                        OAE->getLowerBound()->getExprLoc());
7003       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7004       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7005       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7006       LengthVal = CGF.Builder.CreateSelect(
7007           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7008       return LengthVal;
7009     }
7010     return CGF.getTypeSize(ExprTy);
7011   }
7012 
7013   /// Return the corresponding bits for a given map clause modifier. Add
7014   /// a flag marking the map as a pointer if requested. Add a flag marking the
7015   /// map as the first one of a series of maps that relate to the same map
7016   /// expression.
7017   OpenMPOffloadMappingFlags getMapTypeBits(
7018       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7019       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7020       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7021     OpenMPOffloadMappingFlags Bits =
7022         IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
7023                    : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7024     switch (MapType) {
7025     case OMPC_MAP_alloc:
7026     case OMPC_MAP_release:
7027       // alloc and release is the default behavior in the runtime library,  i.e.
7028       // if we don't pass any bits alloc/release that is what the runtime is
7029       // going to do. Therefore, we don't need to signal anything for these two
7030       // type modifiers.
7031       break;
7032     case OMPC_MAP_to:
7033       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
7034       break;
7035     case OMPC_MAP_from:
7036       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7037       break;
7038     case OMPC_MAP_tofrom:
7039       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
7040               OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7041       break;
7042     case OMPC_MAP_delete:
7043       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
7044       break;
7045     case OMPC_MAP_unknown:
7046       llvm_unreachable("Unexpected map type!");
7047     }
7048     if (AddPtrFlag)
7049       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7050     if (AddIsTargetParamFlag)
7051       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
7052     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7053       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
7054     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7055       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
7056     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7057         llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7058       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
7059     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7060       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
7061     if (IsNonContiguous)
7062       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
7063     return Bits;
7064   }
7065 
7066   /// Return true if the provided expression is a final array section. A
7067   /// final array section, is one whose length can't be proved to be one.
7068   bool isFinalArraySectionExpression(const Expr *E) const {
7069     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7070 
7071     // It is not an array section and therefore not a unity-size one.
7072     if (!OASE)
7073       return false;
7074 
7075     // An array section with no colon always refer to a single element.
7076     if (OASE->getColonLocFirst().isInvalid())
7077       return false;
7078 
7079     const Expr *Length = OASE->getLength();
7080 
7081     // If we don't have a length we have to check if the array has size 1
7082     // for this dimension. Also, we should always expect a length if the
7083     // base type is pointer.
7084     if (!Length) {
7085       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7086                              OASE->getBase()->IgnoreParenImpCasts())
7087                              .getCanonicalType();
7088       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7089         return ATy->getSize().getSExtValue() != 1;
7090       // If we don't have a constant dimension length, we have to consider
7091       // the current section as having any size, so it is not necessarily
7092       // unitary. If it happen to be unity size, that's user fault.
7093       return true;
7094     }
7095 
7096     // Check if the length evaluates to 1.
7097     Expr::EvalResult Result;
7098     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7099       return true; // Can have more that size 1.
7100 
7101     llvm::APSInt ConstLength = Result.Val.getInt();
7102     return ConstLength.getSExtValue() != 1;
7103   }
7104 
7105   /// Generate the base pointers, section pointers, sizes, map type bits, and
7106   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7107   /// map type, map or motion modifiers, and expression components.
7108   /// \a IsFirstComponent should be set to true if the provided set of
7109   /// components is the first associated with a capture.
7110   void generateInfoForComponentList(
7111       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7112       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7113       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7114       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7115       bool IsFirstComponentList, bool IsImplicit,
7116       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7117       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7118       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7119           OverlappedElements = std::nullopt) const {
7120     // The following summarizes what has to be generated for each map and the
7121     // types below. The generated information is expressed in this order:
7122     // base pointer, section pointer, size, flags
7123     // (to add to the ones that come from the map type and modifier).
7124     //
7125     // double d;
7126     // int i[100];
7127     // float *p;
7128     // int **a = &i;
7129     //
7130     // struct S1 {
7131     //   int i;
7132     //   float f[50];
7133     // }
7134     // struct S2 {
7135     //   int i;
7136     //   float f[50];
7137     //   S1 s;
7138     //   double *p;
7139     //   struct S2 *ps;
7140     //   int &ref;
7141     // }
7142     // S2 s;
7143     // S2 *ps;
7144     //
7145     // map(d)
7146     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7147     //
7148     // map(i)
7149     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7150     //
7151     // map(i[1:23])
7152     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7153     //
7154     // map(p)
7155     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7156     //
7157     // map(p[1:24])
7158     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7159     // in unified shared memory mode or for local pointers
7160     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7161     //
7162     // map((*a)[0:3])
7163     // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
7164     // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
7165     //
7166     // map(**a)
7167     // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
7168     // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
7169     //
7170     // map(s)
7171     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7172     //
7173     // map(s.i)
7174     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7175     //
7176     // map(s.s.f)
7177     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7178     //
7179     // map(s.p)
7180     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7181     //
7182     // map(to: s.p[:22])
7183     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7184     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7185     // &(s.p), &(s.p[0]), 22*sizeof(double),
7186     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7187     // (*) alloc space for struct members, only this is a target parameter
7188     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7189     //      optimizes this entry out, same in the examples below)
7190     // (***) map the pointee (map: to)
7191     //
7192     // map(to: s.ref)
7193     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7194     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7195     // (*) alloc space for struct members, only this is a target parameter
7196     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7197     //      optimizes this entry out, same in the examples below)
7198     // (***) map the pointee (map: to)
7199     //
7200     // map(s.ps)
7201     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7202     //
7203     // map(from: s.ps->s.i)
7204     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7205     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7206     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7207     //
7208     // map(to: s.ps->ps)
7209     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7210     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7211     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7212     //
7213     // map(s.ps->ps->ps)
7214     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7215     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7216     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7217     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7218     //
7219     // map(to: s.ps->ps->s.f[:22])
7220     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7221     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7222     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7223     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7224     //
7225     // map(ps)
7226     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7227     //
7228     // map(ps->i)
7229     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7230     //
7231     // map(ps->s.f)
7232     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7233     //
7234     // map(from: ps->p)
7235     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7236     //
7237     // map(to: ps->p[:22])
7238     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7239     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7240     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7241     //
7242     // map(ps->ps)
7243     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7244     //
7245     // map(from: ps->ps->s.i)
7246     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7247     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7248     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7249     //
7250     // map(from: ps->ps->ps)
7251     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7252     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7253     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7254     //
7255     // map(ps->ps->ps->ps)
7256     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7257     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7258     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7259     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7260     //
7261     // map(to: ps->ps->ps->s.f[:22])
7262     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7263     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7264     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7265     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7266     //
7267     // map(to: s.f[:22]) map(from: s.p[:33])
7268     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7269     //     sizeof(double*) (**), TARGET_PARAM
7270     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7271     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7272     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7273     // (*) allocate contiguous space needed to fit all mapped members even if
7274     //     we allocate space for members not mapped (in this example,
7275     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7276     //     them as well because they fall between &s.f[0] and &s.p)
7277     //
7278     // map(from: s.f[:22]) map(to: ps->p[:33])
7279     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7280     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7281     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7282     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7283     // (*) the struct this entry pertains to is the 2nd element in the list of
7284     //     arguments, hence MEMBER_OF(2)
7285     //
7286     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7287     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7288     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7289     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7290     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7291     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7292     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7293     // (*) the struct this entry pertains to is the 4th element in the list
7294     //     of arguments, hence MEMBER_OF(4)
7295 
7296     // Track if the map information being generated is the first for a capture.
7297     bool IsCaptureFirstInfo = IsFirstComponentList;
7298     // When the variable is on a declare target link or in a to clause with
7299     // unified memory, a reference is needed to hold the host/device address
7300     // of the variable.
7301     bool RequiresReference = false;
7302 
7303     // Scan the components from the base to the complete expression.
7304     auto CI = Components.rbegin();
7305     auto CE = Components.rend();
7306     auto I = CI;
7307 
7308     // Track if the map information being generated is the first for a list of
7309     // components.
7310     bool IsExpressionFirstInfo = true;
7311     bool FirstPointerInComplexData = false;
7312     Address BP = Address::invalid();
7313     const Expr *AssocExpr = I->getAssociatedExpression();
7314     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7315     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7316     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7317 
7318     if (isa<MemberExpr>(AssocExpr)) {
7319       // The base is the 'this' pointer. The content of the pointer is going
7320       // to be the base of the field being mapped.
7321       BP = CGF.LoadCXXThisAddress();
7322     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7323                (OASE &&
7324                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7325       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7326     } else if (OAShE &&
7327                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7328       BP = Address(
7329           CGF.EmitScalarExpr(OAShE->getBase()),
7330           CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7331           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7332     } else {
7333       // The base is the reference to the variable.
7334       // BP = &Var.
7335       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7336       if (const auto *VD =
7337               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7338         if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7339                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7340           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7341               ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7342                 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7343                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7344             RequiresReference = true;
7345             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7346           }
7347         }
7348       }
7349 
7350       // If the variable is a pointer and is being dereferenced (i.e. is not
7351       // the last component), the base has to be the pointer itself, not its
7352       // reference. References are ignored for mapping purposes.
7353       QualType Ty =
7354           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7355       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7356         // No need to generate individual map information for the pointer, it
7357         // can be associated with the combined storage if shared memory mode is
7358         // active or the base declaration is not global variable.
7359         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7360         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7361             !VD || VD->hasLocalStorage())
7362           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7363         else
7364           FirstPointerInComplexData = true;
7365         ++I;
7366       }
7367     }
7368 
7369     // Track whether a component of the list should be marked as MEMBER_OF some
7370     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7371     // in a component list should be marked as MEMBER_OF, all subsequent entries
7372     // do not belong to the base struct. E.g.
7373     // struct S2 s;
7374     // s.ps->ps->ps->f[:]
7375     //   (1) (2) (3) (4)
7376     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7377     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7378     // is the pointee of ps(2) which is not member of struct s, so it should not
7379     // be marked as such (it is still PTR_AND_OBJ).
7380     // The variable is initialized to false so that PTR_AND_OBJ entries which
7381     // are not struct members are not considered (e.g. array of pointers to
7382     // data).
7383     bool ShouldBeMemberOf = false;
7384 
7385     // Variable keeping track of whether or not we have encountered a component
7386     // in the component list which is a member expression. Useful when we have a
7387     // pointer or a final array section, in which case it is the previous
7388     // component in the list which tells us whether we have a member expression.
7389     // E.g. X.f[:]
7390     // While processing the final array section "[:]" it is "f" which tells us
7391     // whether we are dealing with a member of a declared struct.
7392     const MemberExpr *EncounteredME = nullptr;
7393 
7394     // Track for the total number of dimension. Start from one for the dummy
7395     // dimension.
7396     uint64_t DimSize = 1;
7397 
7398     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7399     bool IsPrevMemberReference = false;
7400 
7401     for (; I != CE; ++I) {
7402       // If the current component is member of a struct (parent struct) mark it.
7403       if (!EncounteredME) {
7404         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7405         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7406         // as MEMBER_OF the parent struct.
7407         if (EncounteredME) {
7408           ShouldBeMemberOf = true;
7409           // Do not emit as complex pointer if this is actually not array-like
7410           // expression.
7411           if (FirstPointerInComplexData) {
7412             QualType Ty = std::prev(I)
7413                               ->getAssociatedDeclaration()
7414                               ->getType()
7415                               .getNonReferenceType();
7416             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7417             FirstPointerInComplexData = false;
7418           }
7419         }
7420       }
7421 
7422       auto Next = std::next(I);
7423 
7424       // We need to generate the addresses and sizes if this is the last
7425       // component, if the component is a pointer or if it is an array section
7426       // whose length can't be proved to be one. If this is a pointer, it
7427       // becomes the base address for the following components.
7428 
7429       // A final array section, is one whose length can't be proved to be one.
7430       // If the map item is non-contiguous then we don't treat any array section
7431       // as final array section.
7432       bool IsFinalArraySection =
7433           !IsNonContiguous &&
7434           isFinalArraySectionExpression(I->getAssociatedExpression());
7435 
7436       // If we have a declaration for the mapping use that, otherwise use
7437       // the base declaration of the map clause.
7438       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7439                                      ? I->getAssociatedDeclaration()
7440                                      : BaseDecl;
7441       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7442                                                : MapExpr;
7443 
7444       // Get information on whether the element is a pointer. Have to do a
7445       // special treatment for array sections given that they are built-in
7446       // types.
7447       const auto *OASE =
7448           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7449       const auto *OAShE =
7450           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7451       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7452       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7453       bool IsPointer =
7454           OAShE ||
7455           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7456                        .getCanonicalType()
7457                        ->isAnyPointerType()) ||
7458           I->getAssociatedExpression()->getType()->isAnyPointerType();
7459       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7460                                MapDecl &&
7461                                MapDecl->getType()->isLValueReferenceType();
7462       bool IsNonDerefPointer = IsPointer &&
7463                                !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7464                                !IsNonContiguous;
7465 
7466       if (OASE)
7467         ++DimSize;
7468 
7469       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7470           IsFinalArraySection) {
7471         // If this is not the last component, we expect the pointer to be
7472         // associated with an array expression or member expression.
7473         assert((Next == CE ||
7474                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7475                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7476                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7477                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7478                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7479                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7480                "Unexpected expression");
7481 
7482         Address LB = Address::invalid();
7483         Address LowestElem = Address::invalid();
7484         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7485                                        const MemberExpr *E) {
7486           const Expr *BaseExpr = E->getBase();
7487           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
7488           // scalar.
7489           LValue BaseLV;
7490           if (E->isArrow()) {
7491             LValueBaseInfo BaseInfo;
7492             TBAAAccessInfo TBAAInfo;
7493             Address Addr =
7494                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7495             QualType PtrTy = BaseExpr->getType()->getPointeeType();
7496             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7497           } else {
7498             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7499           }
7500           return BaseLV;
7501         };
7502         if (OAShE) {
7503           LowestElem = LB =
7504               Address(CGF.EmitScalarExpr(OAShE->getBase()),
7505                       CGF.ConvertTypeForMem(
7506                           OAShE->getBase()->getType()->getPointeeType()),
7507                       CGF.getContext().getTypeAlignInChars(
7508                           OAShE->getBase()->getType()));
7509         } else if (IsMemberReference) {
7510           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7511           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7512           LowestElem = CGF.EmitLValueForFieldInitialization(
7513                               BaseLVal, cast<FieldDecl>(MapDecl))
7514                            .getAddress(CGF);
7515           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7516                    .getAddress(CGF);
7517         } else {
7518           LowestElem = LB =
7519               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7520                   .getAddress(CGF);
7521         }
7522 
7523         // If this component is a pointer inside the base struct then we don't
7524         // need to create any entry for it - it will be combined with the object
7525         // it is pointing to into a single PTR_AND_OBJ entry.
7526         bool IsMemberPointerOrAddr =
7527             EncounteredME &&
7528             (((IsPointer || ForDeviceAddr) &&
7529               I->getAssociatedExpression() == EncounteredME) ||
7530              (IsPrevMemberReference && !IsPointer) ||
7531              (IsMemberReference && Next != CE &&
7532               !Next->getAssociatedExpression()->getType()->isPointerType()));
7533         if (!OverlappedElements.empty() && Next == CE) {
7534           // Handle base element with the info for overlapped elements.
7535           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7536           assert(!IsPointer &&
7537                  "Unexpected base element with the pointer type.");
7538           // Mark the whole struct as the struct that requires allocation on the
7539           // device.
7540           PartialStruct.LowestElem = {0, LowestElem};
7541           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7542               I->getAssociatedExpression()->getType());
7543           Address HB = CGF.Builder.CreateConstGEP(
7544               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7545                   LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
7546               TypeSize.getQuantity() - 1);
7547           PartialStruct.HighestElem = {
7548               std::numeric_limits<decltype(
7549                   PartialStruct.HighestElem.first)>::max(),
7550               HB};
7551           PartialStruct.Base = BP;
7552           PartialStruct.LB = LB;
7553           assert(
7554               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7555               "Overlapped elements must be used only once for the variable.");
7556           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7557           // Emit data for non-overlapped data.
7558           OpenMPOffloadMappingFlags Flags =
7559               OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
7560               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7561                              /*AddPtrFlag=*/false,
7562                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7563           llvm::Value *Size = nullptr;
7564           // Do bitcopy of all non-overlapped structure elements.
7565           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7566                    Component : OverlappedElements) {
7567             Address ComponentLB = Address::invalid();
7568             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7569                  Component) {
7570               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7571                 const auto *FD = dyn_cast<FieldDecl>(VD);
7572                 if (FD && FD->getType()->isLValueReferenceType()) {
7573                   const auto *ME =
7574                       cast<MemberExpr>(MC.getAssociatedExpression());
7575                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7576                   ComponentLB =
7577                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7578                           .getAddress(CGF);
7579                 } else {
7580                   ComponentLB =
7581                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7582                           .getAddress(CGF);
7583                 }
7584                 Size = CGF.Builder.CreatePtrDiff(
7585                     CGF.Int8Ty, ComponentLB.getPointer(), LB.getPointer());
7586                 break;
7587               }
7588             }
7589             assert(Size && "Failed to determine structure size");
7590             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7591             CombinedInfo.BasePointers.push_back(BP.getPointer());
7592             CombinedInfo.DevicePtrDecls.push_back(nullptr);
7593             CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7594             CombinedInfo.Pointers.push_back(LB.getPointer());
7595             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7596                 Size, CGF.Int64Ty, /*isSigned=*/true));
7597             CombinedInfo.Types.push_back(Flags);
7598             CombinedInfo.Mappers.push_back(nullptr);
7599             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7600                                                                       : 1);
7601             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7602           }
7603           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7604           CombinedInfo.BasePointers.push_back(BP.getPointer());
7605           CombinedInfo.DevicePtrDecls.push_back(nullptr);
7606           CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7607           CombinedInfo.Pointers.push_back(LB.getPointer());
7608           Size = CGF.Builder.CreatePtrDiff(
7609               CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
7610               LB.getPointer());
7611           CombinedInfo.Sizes.push_back(
7612               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7613           CombinedInfo.Types.push_back(Flags);
7614           CombinedInfo.Mappers.push_back(nullptr);
7615           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7616                                                                     : 1);
7617           break;
7618         }
7619         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7620         if (!IsMemberPointerOrAddr ||
7621             (Next == CE && MapType != OMPC_MAP_unknown)) {
7622           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7623           CombinedInfo.BasePointers.push_back(BP.getPointer());
7624           CombinedInfo.DevicePtrDecls.push_back(nullptr);
7625           CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7626           CombinedInfo.Pointers.push_back(LB.getPointer());
7627           CombinedInfo.Sizes.push_back(
7628               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7629           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7630                                                                     : 1);
7631 
7632           // If Mapper is valid, the last component inherits the mapper.
7633           bool HasMapper = Mapper && Next == CE;
7634           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7635 
7636           // We need to add a pointer flag for each map that comes from the
7637           // same expression except for the first one. We also need to signal
7638           // this map is the first one that relates with the current capture
7639           // (there is a set of entries for each capture).
7640           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7641               MapType, MapModifiers, MotionModifiers, IsImplicit,
7642               !IsExpressionFirstInfo || RequiresReference ||
7643                   FirstPointerInComplexData || IsMemberReference,
7644               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
7645 
7646           if (!IsExpressionFirstInfo || IsMemberReference) {
7647             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7648             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7649             if (IsPointer || (IsMemberReference && Next != CE))
7650               Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
7651                          OpenMPOffloadMappingFlags::OMP_MAP_FROM |
7652                          OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
7653                          OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
7654                          OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
7655 
7656             if (ShouldBeMemberOf) {
7657               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7658               // should be later updated with the correct value of MEMBER_OF.
7659               Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7660               // From now on, all subsequent PTR_AND_OBJ entries should not be
7661               // marked as MEMBER_OF.
7662               ShouldBeMemberOf = false;
7663             }
7664           }
7665 
7666           CombinedInfo.Types.push_back(Flags);
7667         }
7668 
7669         // If we have encountered a member expression so far, keep track of the
7670         // mapped member. If the parent is "*this", then the value declaration
7671         // is nullptr.
7672         if (EncounteredME) {
7673           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7674           unsigned FieldIndex = FD->getFieldIndex();
7675 
7676           // Update info about the lowest and highest elements for this struct
7677           if (!PartialStruct.Base.isValid()) {
7678             PartialStruct.LowestElem = {FieldIndex, LowestElem};
7679             if (IsFinalArraySection) {
7680               Address HB =
7681                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7682                       .getAddress(CGF);
7683               PartialStruct.HighestElem = {FieldIndex, HB};
7684             } else {
7685               PartialStruct.HighestElem = {FieldIndex, LowestElem};
7686             }
7687             PartialStruct.Base = BP;
7688             PartialStruct.LB = BP;
7689           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7690             PartialStruct.LowestElem = {FieldIndex, LowestElem};
7691           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7692             PartialStruct.HighestElem = {FieldIndex, LowestElem};
7693           }
7694         }
7695 
7696         // Need to emit combined struct for array sections.
7697         if (IsFinalArraySection || IsNonContiguous)
7698           PartialStruct.IsArraySection = true;
7699 
7700         // If we have a final array section, we are done with this expression.
7701         if (IsFinalArraySection)
7702           break;
7703 
7704         // The pointer becomes the base for the next element.
7705         if (Next != CE)
7706           BP = IsMemberReference ? LowestElem : LB;
7707 
7708         IsExpressionFirstInfo = false;
7709         IsCaptureFirstInfo = false;
7710         FirstPointerInComplexData = false;
7711         IsPrevMemberReference = IsMemberReference;
7712       } else if (FirstPointerInComplexData) {
7713         QualType Ty = Components.rbegin()
7714                           ->getAssociatedDeclaration()
7715                           ->getType()
7716                           .getNonReferenceType();
7717         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7718         FirstPointerInComplexData = false;
7719       }
7720     }
7721     // If ran into the whole component - allocate the space for the whole
7722     // record.
7723     if (!EncounteredME)
7724       PartialStruct.HasCompleteRecord = true;
7725 
7726     if (!IsNonContiguous)
7727       return;
7728 
7729     const ASTContext &Context = CGF.getContext();
7730 
7731     // For supporting stride in array section, we need to initialize the first
7732     // dimension size as 1, first offset as 0, and first count as 1
7733     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7734     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7735     MapValuesArrayTy CurStrides;
7736     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7737     uint64_t ElementTypeSize;
7738 
7739     // Collect Size information for each dimension and get the element size as
7740     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7741     // should be [10, 10] and the first stride is 4 btyes.
7742     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7743          Components) {
7744       const Expr *AssocExpr = Component.getAssociatedExpression();
7745       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7746 
7747       if (!OASE)
7748         continue;
7749 
7750       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
7751       auto *CAT = Context.getAsConstantArrayType(Ty);
7752       auto *VAT = Context.getAsVariableArrayType(Ty);
7753 
7754       // We need all the dimension size except for the last dimension.
7755       assert((VAT || CAT || &Component == &*Components.begin()) &&
7756              "Should be either ConstantArray or VariableArray if not the "
7757              "first Component");
7758 
7759       // Get element size if CurStrides is empty.
7760       if (CurStrides.empty()) {
7761         const Type *ElementType = nullptr;
7762         if (CAT)
7763           ElementType = CAT->getElementType().getTypePtr();
7764         else if (VAT)
7765           ElementType = VAT->getElementType().getTypePtr();
7766         else
7767           assert(&Component == &*Components.begin() &&
7768                  "Only expect pointer (non CAT or VAT) when this is the "
7769                  "first Component");
7770         // If ElementType is null, then it means the base is a pointer
7771         // (neither CAT nor VAT) and we'll attempt to get ElementType again
7772         // for next iteration.
7773         if (ElementType) {
7774           // For the case that having pointer as base, we need to remove one
7775           // level of indirection.
7776           if (&Component != &*Components.begin())
7777             ElementType = ElementType->getPointeeOrArrayElementType();
7778           ElementTypeSize =
7779               Context.getTypeSizeInChars(ElementType).getQuantity();
7780           CurStrides.push_back(
7781               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7782         }
7783       }
7784       // Get dimension value except for the last dimension since we don't need
7785       // it.
7786       if (DimSizes.size() < Components.size() - 1) {
7787         if (CAT)
7788           DimSizes.push_back(llvm::ConstantInt::get(
7789               CGF.Int64Ty, CAT->getSize().getZExtValue()));
7790         else if (VAT)
7791           DimSizes.push_back(CGF.Builder.CreateIntCast(
7792               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
7793               /*IsSigned=*/false));
7794       }
7795     }
7796 
7797     // Skip the dummy dimension since we have already have its information.
7798     auto *DI = DimSizes.begin() + 1;
7799     // Product of dimension.
7800     llvm::Value *DimProd =
7801         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
7802 
7803     // Collect info for non-contiguous. Notice that offset, count, and stride
7804     // are only meaningful for array-section, so we insert a null for anything
7805     // other than array-section.
7806     // Also, the size of offset, count, and stride are not the same as
7807     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7808     // count, and stride are the same as the number of non-contiguous
7809     // declaration in target update to/from clause.
7810     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7811          Components) {
7812       const Expr *AssocExpr = Component.getAssociatedExpression();
7813 
7814       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
7815         llvm::Value *Offset = CGF.Builder.CreateIntCast(
7816             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
7817             /*isSigned=*/false);
7818         CurOffsets.push_back(Offset);
7819         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
7820         CurStrides.push_back(CurStrides.back());
7821         continue;
7822       }
7823 
7824       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7825 
7826       if (!OASE)
7827         continue;
7828 
7829       // Offset
7830       const Expr *OffsetExpr = OASE->getLowerBound();
7831       llvm::Value *Offset = nullptr;
7832       if (!OffsetExpr) {
7833         // If offset is absent, then we just set it to zero.
7834         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
7835       } else {
7836         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
7837                                            CGF.Int64Ty,
7838                                            /*isSigned=*/false);
7839       }
7840       CurOffsets.push_back(Offset);
7841 
7842       // Count
7843       const Expr *CountExpr = OASE->getLength();
7844       llvm::Value *Count = nullptr;
7845       if (!CountExpr) {
7846         // In Clang, once a high dimension is an array section, we construct all
7847         // the lower dimension as array section, however, for case like
7848         // arr[0:2][2], Clang construct the inner dimension as an array section
7849         // but it actually is not in an array section form according to spec.
7850         if (!OASE->getColonLocFirst().isValid() &&
7851             !OASE->getColonLocSecond().isValid()) {
7852           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
7853         } else {
7854           // OpenMP 5.0, 2.1.5 Array Sections, Description.
7855           // When the length is absent it defaults to ⌈(size −
7856           // lower-bound)/stride⌉, where size is the size of the array
7857           // dimension.
7858           const Expr *StrideExpr = OASE->getStride();
7859           llvm::Value *Stride =
7860               StrideExpr
7861                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7862                                               CGF.Int64Ty, /*isSigned=*/false)
7863                   : nullptr;
7864           if (Stride)
7865             Count = CGF.Builder.CreateUDiv(
7866                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
7867           else
7868             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
7869         }
7870       } else {
7871         Count = CGF.EmitScalarExpr(CountExpr);
7872       }
7873       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
7874       CurCounts.push_back(Count);
7875 
7876       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7877       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7878       //              Offset      Count     Stride
7879       //    D0          0           1         4    (int)    <- dummy dimension
7880       //    D1          0           2         8    (2 * (1) * 4)
7881       //    D2          1           2         20   (1 * (1 * 5) * 4)
7882       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
7883       const Expr *StrideExpr = OASE->getStride();
7884       llvm::Value *Stride =
7885           StrideExpr
7886               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7887                                           CGF.Int64Ty, /*isSigned=*/false)
7888               : nullptr;
7889       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
7890       if (Stride)
7891         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
7892       else
7893         CurStrides.push_back(DimProd);
7894       if (DI != DimSizes.end())
7895         ++DI;
7896     }
7897 
7898     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
7899     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
7900     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
7901   }
7902 
7903   /// Return the adjusted map modifiers if the declaration a capture refers to
7904   /// appears in a first-private clause. This is expected to be used only with
7905   /// directives that start with 'target'.
7906   OpenMPOffloadMappingFlags
7907   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7908     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7909 
7910     // A first private variable captured by reference will use only the
7911     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7912     // declaration is known as first-private in this handler.
7913     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7914       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7915         return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7916                OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7917       return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
7918              OpenMPOffloadMappingFlags::OMP_MAP_TO;
7919     }
7920     auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
7921     if (I != LambdasMap.end())
7922       // for map(to: lambda): using user specified map type.
7923       return getMapTypeBits(
7924           I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
7925           /*MotionModifiers=*/std::nullopt, I->getSecond()->isImplicit(),
7926           /*AddPtrFlag=*/false,
7927           /*AddIsTargetParamFlag=*/false,
7928           /*isNonContiguous=*/false);
7929     return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7930            OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7931   }
7932 
7933   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7934     // Rotate by getFlagMemberOffset() bits.
7935     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7936                                                   << getFlagMemberOffset());
7937   }
7938 
7939   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7940                                      OpenMPOffloadMappingFlags MemberOfFlag) {
7941     // If the entry is PTR_AND_OBJ but has not been marked with the special
7942     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7943     // marked as MEMBER_OF.
7944     if (static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
7945             Flags & OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ) &&
7946         static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
7947             (Flags & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
7948             OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF))
7949       return;
7950 
7951     // Reset the placeholder value to prepare the flag for the assignment of the
7952     // proper MEMBER_OF value.
7953     Flags &= ~OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7954     Flags |= MemberOfFlag;
7955   }
7956 
7957   void getPlainLayout(const CXXRecordDecl *RD,
7958                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7959                       bool AsBase) const {
7960     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7961 
7962     llvm::StructType *St =
7963         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7964 
7965     unsigned NumElements = St->getNumElements();
7966     llvm::SmallVector<
7967         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7968         RecordLayout(NumElements);
7969 
7970     // Fill bases.
7971     for (const auto &I : RD->bases()) {
7972       if (I.isVirtual())
7973         continue;
7974       const auto *Base = I.getType()->getAsCXXRecordDecl();
7975       // Ignore empty bases.
7976       if (Base->isEmpty() || CGF.getContext()
7977                                  .getASTRecordLayout(Base)
7978                                  .getNonVirtualSize()
7979                                  .isZero())
7980         continue;
7981 
7982       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7983       RecordLayout[FieldIndex] = Base;
7984     }
7985     // Fill in virtual bases.
7986     for (const auto &I : RD->vbases()) {
7987       const auto *Base = I.getType()->getAsCXXRecordDecl();
7988       // Ignore empty bases.
7989       if (Base->isEmpty())
7990         continue;
7991       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7992       if (RecordLayout[FieldIndex])
7993         continue;
7994       RecordLayout[FieldIndex] = Base;
7995     }
7996     // Fill in all the fields.
7997     assert(!RD->isUnion() && "Unexpected union.");
7998     for (const auto *Field : RD->fields()) {
7999       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8000       // will fill in later.)
8001       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8002         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8003         RecordLayout[FieldIndex] = Field;
8004       }
8005     }
8006     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8007              &Data : RecordLayout) {
8008       if (Data.isNull())
8009         continue;
8010       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8011         getPlainLayout(Base, Layout, /*AsBase=*/true);
8012       else
8013         Layout.push_back(Data.get<const FieldDecl *>());
8014     }
8015   }
8016 
8017   /// Generate all the base pointers, section pointers, sizes, map types, and
8018   /// mappers for the extracted mappable expressions (all included in \a
8019   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8020   /// pair of the relevant declaration and index where it occurs is appended to
8021   /// the device pointers info array.
8022   void generateAllInfoForClauses(
8023       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8024       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8025           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8026     // We have to process the component lists that relate with the same
8027     // declaration in a single chunk so that we can generate the map flags
8028     // correctly. Therefore, we organize all lists in a map.
8029     enum MapKind { Present, Allocs, Other, Total };
8030     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8031                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8032         Info;
8033 
8034     // Helper function to fill the information map for the different supported
8035     // clauses.
8036     auto &&InfoGen =
8037         [&Info, &SkipVarSet](
8038             const ValueDecl *D, MapKind Kind,
8039             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8040             OpenMPMapClauseKind MapType,
8041             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8042             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8043             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8044             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8045           if (SkipVarSet.contains(D))
8046             return;
8047           auto It = Info.find(D);
8048           if (It == Info.end())
8049             It = Info
8050                      .insert(std::make_pair(
8051                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8052                      .first;
8053           It->second[Kind].emplace_back(
8054               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8055               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8056         };
8057 
8058     for (const auto *Cl : Clauses) {
8059       const auto *C = dyn_cast<OMPMapClause>(Cl);
8060       if (!C)
8061         continue;
8062       MapKind Kind = Other;
8063       if (llvm::is_contained(C->getMapTypeModifiers(),
8064                              OMPC_MAP_MODIFIER_present))
8065         Kind = Present;
8066       else if (C->getMapType() == OMPC_MAP_alloc)
8067         Kind = Allocs;
8068       const auto *EI = C->getVarRefs().begin();
8069       for (const auto L : C->component_lists()) {
8070         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8071         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8072                 C->getMapTypeModifiers(), std::nullopt,
8073                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8074                 E);
8075         ++EI;
8076       }
8077     }
8078     for (const auto *Cl : Clauses) {
8079       const auto *C = dyn_cast<OMPToClause>(Cl);
8080       if (!C)
8081         continue;
8082       MapKind Kind = Other;
8083       if (llvm::is_contained(C->getMotionModifiers(),
8084                              OMPC_MOTION_MODIFIER_present))
8085         Kind = Present;
8086       const auto *EI = C->getVarRefs().begin();
8087       for (const auto L : C->component_lists()) {
8088         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, std::nullopt,
8089                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8090                 C->isImplicit(), std::get<2>(L), *EI);
8091         ++EI;
8092       }
8093     }
8094     for (const auto *Cl : Clauses) {
8095       const auto *C = dyn_cast<OMPFromClause>(Cl);
8096       if (!C)
8097         continue;
8098       MapKind Kind = Other;
8099       if (llvm::is_contained(C->getMotionModifiers(),
8100                              OMPC_MOTION_MODIFIER_present))
8101         Kind = Present;
8102       const auto *EI = C->getVarRefs().begin();
8103       for (const auto L : C->component_lists()) {
8104         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from,
8105                 std::nullopt, C->getMotionModifiers(),
8106                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8107                 *EI);
8108         ++EI;
8109       }
8110     }
8111 
8112     // Look at the use_device_ptr and use_device_addr clauses information and
8113     // mark the existing map entries as such. If there is no map information for
8114     // an entry in the use_device_ptr and use_device_addr list, we create one
8115     // with map type 'alloc' and zero size section. It is the user fault if that
8116     // was not mapped before. If there is no map information and the pointer is
8117     // a struct member, then we defer the emission of that entry until the whole
8118     // struct has been processed.
8119     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8120                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8121         DeferredInfo;
8122     MapCombinedInfoTy UseDeviceDataCombinedInfo;
8123 
8124     auto &&UseDeviceDataCombinedInfoGen =
8125         [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
8126                                      CodeGenFunction &CGF, bool IsDevAddr) {
8127           UseDeviceDataCombinedInfo.Exprs.push_back(VD);
8128           UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
8129           UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
8130           UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
8131               IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8132           UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
8133           UseDeviceDataCombinedInfo.Sizes.push_back(
8134               llvm::Constant::getNullValue(CGF.Int64Ty));
8135           UseDeviceDataCombinedInfo.Types.push_back(
8136               OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
8137           UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
8138         };
8139 
8140     auto &&MapInfoGen =
8141         [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
8142          &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
8143                    OMPClauseMappableExprCommon::MappableExprComponentListRef
8144                        Components,
8145                    bool IsImplicit, bool IsDevAddr) {
8146           // We didn't find any match in our map information - generate a zero
8147           // size array section - if the pointer is a struct member we defer
8148           // this action until the whole struct has been processed.
8149           if (isa<MemberExpr>(IE)) {
8150             // Insert the pointer into Info to be processed by
8151             // generateInfoForComponentList. Because it is a member pointer
8152             // without a pointee, no entry will be generated for it, therefore
8153             // we need to generate one after the whole struct has been
8154             // processed. Nonetheless, generateInfoForComponentList must be
8155             // called to take the pointer into account for the calculation of
8156             // the range of the partial struct.
8157             InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt,
8158                     std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit,
8159                     nullptr, nullptr, IsDevAddr);
8160             DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
8161           } else {
8162             llvm::Value *Ptr;
8163             if (IsDevAddr) {
8164               if (IE->isGLValue())
8165                 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8166               else
8167                 Ptr = CGF.EmitScalarExpr(IE);
8168             } else {
8169               Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8170             }
8171             UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);
8172           }
8173         };
8174 
8175     auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
8176                                     const Expr *IE, bool IsDevAddr) -> bool {
8177       // We potentially have map information for this declaration already.
8178       // Look for the first set of components that refer to it. If found,
8179       // return true.
8180       // If the first component is a member expression, we have to look into
8181       // 'this', which maps to null in the map of map information. Otherwise
8182       // look directly for the information.
8183       auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8184       if (It != Info.end()) {
8185         bool Found = false;
8186         for (auto &Data : It->second) {
8187           auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8188             return MI.Components.back().getAssociatedDeclaration() == VD;
8189           });
8190           // If we found a map entry, signal that the pointer has to be
8191           // returned and move on to the next declaration. Exclude cases where
8192           // the base pointer is mapped as array subscript, array section or
8193           // array shaping. The base address is passed as a pointer to base in
8194           // this case and cannot be used as a base for use_device_ptr list
8195           // item.
8196           if (CI != Data.end()) {
8197             if (IsDevAddr) {
8198               CI->ForDeviceAddr = IsDevAddr;
8199               CI->ReturnDevicePointer = true;
8200               Found = true;
8201               break;
8202             } else {
8203               auto PrevCI = std::next(CI->Components.rbegin());
8204               const auto *VarD = dyn_cast<VarDecl>(VD);
8205               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8206                   isa<MemberExpr>(IE) ||
8207                   !VD->getType().getNonReferenceType()->isPointerType() ||
8208                   PrevCI == CI->Components.rend() ||
8209                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8210                   VarD->hasLocalStorage()) {
8211                 CI->ForDeviceAddr = IsDevAddr;
8212                 CI->ReturnDevicePointer = true;
8213                 Found = true;
8214                 break;
8215               }
8216             }
8217           }
8218         }
8219         return Found;
8220       }
8221       return false;
8222     };
8223 
8224     // Look at the use_device_ptr clause information and mark the existing map
8225     // entries as such. If there is no map information for an entry in the
8226     // use_device_ptr list, we create one with map type 'alloc' and zero size
8227     // section. It is the user fault if that was not mapped before. If there is
8228     // no map information and the pointer is a struct member, then we defer the
8229     // emission of that entry until the whole struct has been processed.
8230     for (const auto *Cl : Clauses) {
8231       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8232       if (!C)
8233         continue;
8234       for (const auto L : C->component_lists()) {
8235         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8236             std::get<1>(L);
8237         assert(!Components.empty() &&
8238                "Not expecting empty list of components!");
8239         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8240         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8241         const Expr *IE = Components.back().getAssociatedExpression();
8242         if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
8243           continue;
8244         MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8245                    /*IsDevAddr=*/false);
8246       }
8247     }
8248 
8249     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8250     for (const auto *Cl : Clauses) {
8251       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8252       if (!C)
8253         continue;
8254       for (const auto L : C->component_lists()) {
8255         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8256             std::get<1>(L);
8257         assert(!std::get<1>(L).empty() &&
8258                "Not expecting empty list of components!");
8259         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8260         if (!Processed.insert(VD).second)
8261           continue;
8262         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8263         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8264         if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8265           continue;
8266         MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8267                    /*IsDevAddr=*/true);
8268       }
8269     }
8270 
8271     for (const auto &Data : Info) {
8272       StructRangeInfoTy PartialStruct;
8273       // Temporary generated information.
8274       MapCombinedInfoTy CurInfo;
8275       const Decl *D = Data.first;
8276       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8277       for (const auto &M : Data.second) {
8278         for (const MapInfo &L : M) {
8279           assert(!L.Components.empty() &&
8280                  "Not expecting declaration with no component lists.");
8281 
8282           // Remember the current base pointer index.
8283           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8284           CurInfo.NonContigInfo.IsNonContiguous =
8285               L.Components.back().isNonContiguous();
8286           generateInfoForComponentList(
8287               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8288               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8289               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8290 
8291           // If this entry relates with a device pointer, set the relevant
8292           // declaration and add the 'return pointer' flag.
8293           if (L.ReturnDevicePointer) {
8294             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8295                    "Unexpected number of mapped base pointers.");
8296 
8297             const ValueDecl *RelevantVD =
8298                 L.Components.back().getAssociatedDeclaration();
8299             assert(RelevantVD &&
8300                    "No relevant declaration related with device pointer??");
8301 
8302             CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8303             CurInfo.DevicePointers[CurrentBasePointersIdx] =
8304                 L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer;
8305             CurInfo.Types[CurrentBasePointersIdx] |=
8306                 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8307           }
8308         }
8309       }
8310 
8311       // Append any pending zero-length pointers which are struct members and
8312       // used with use_device_ptr or use_device_addr.
8313       auto CI = DeferredInfo.find(Data.first);
8314       if (CI != DeferredInfo.end()) {
8315         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8316           llvm::Value *BasePtr;
8317           llvm::Value *Ptr;
8318           if (L.ForDeviceAddr) {
8319             if (L.IE->isGLValue())
8320               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8321             else
8322               Ptr = this->CGF.EmitScalarExpr(L.IE);
8323             BasePtr = Ptr;
8324             // Entry is RETURN_PARAM. Also, set the placeholder value
8325             // MEMBER_OF=FFFF so that the entry is later updated with the
8326             // correct value of MEMBER_OF.
8327             CurInfo.Types.push_back(
8328                 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8329                 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8330           } else {
8331             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8332             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8333                                              L.IE->getExprLoc());
8334             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8335             // placeholder value MEMBER_OF=FFFF so that the entry is later
8336             // updated with the correct value of MEMBER_OF.
8337             CurInfo.Types.push_back(
8338                 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8339                 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8340                 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8341           }
8342           CurInfo.Exprs.push_back(L.VD);
8343           CurInfo.BasePointers.emplace_back(BasePtr);
8344           CurInfo.DevicePtrDecls.emplace_back(L.VD);
8345           CurInfo.DevicePointers.emplace_back(
8346               L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8347           CurInfo.Pointers.push_back(Ptr);
8348           CurInfo.Sizes.push_back(
8349               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8350           CurInfo.Mappers.push_back(nullptr);
8351         }
8352       }
8353       // If there is an entry in PartialStruct it means we have a struct with
8354       // individual members mapped. Emit an extra combined entry.
8355       if (PartialStruct.Base.isValid()) {
8356         CurInfo.NonContigInfo.Dims.push_back(0);
8357         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct,
8358                           /*IsMapThis*/ !VD, VD);
8359       }
8360 
8361       // We need to append the results of this capture to what we already
8362       // have.
8363       CombinedInfo.append(CurInfo);
8364     }
8365     // Append data for use_device_ptr clauses.
8366     CombinedInfo.append(UseDeviceDataCombinedInfo);
8367   }
8368 
8369 public:
8370   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8371       : CurDir(&Dir), CGF(CGF) {
8372     // Extract firstprivate clause information.
8373     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8374       for (const auto *D : C->varlists())
8375         FirstPrivateDecls.try_emplace(
8376             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8377     // Extract implicit firstprivates from uses_allocators clauses.
8378     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8379       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8380         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8381         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8382           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8383                                         /*Implicit=*/true);
8384         else if (const auto *VD = dyn_cast<VarDecl>(
8385                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8386                          ->getDecl()))
8387           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8388       }
8389     }
8390     // Extract device pointer clause information.
8391     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8392       for (auto L : C->component_lists())
8393         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8394     // Extract device addr clause information.
8395     for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8396       for (auto L : C->component_lists())
8397         HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8398     // Extract map information.
8399     for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8400       if (C->getMapType() != OMPC_MAP_to)
8401         continue;
8402       for (auto L : C->component_lists()) {
8403         const ValueDecl *VD = std::get<0>(L);
8404         const auto *RD = VD ? VD->getType()
8405                                   .getCanonicalType()
8406                                   .getNonReferenceType()
8407                                   ->getAsCXXRecordDecl()
8408                             : nullptr;
8409         if (RD && RD->isLambda())
8410           LambdasMap.try_emplace(std::get<0>(L), C);
8411       }
8412     }
8413   }
8414 
8415   /// Constructor for the declare mapper directive.
8416   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8417       : CurDir(&Dir), CGF(CGF) {}
8418 
8419   /// Generate code for the combined entry if we have a partially mapped struct
8420   /// and take care of the mapping flags of the arguments corresponding to
8421   /// individual struct members.
8422   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8423                          MapFlagsArrayTy &CurTypes,
8424                          const StructRangeInfoTy &PartialStruct, bool IsMapThis,
8425                          const ValueDecl *VD = nullptr,
8426                          bool NotTargetParams = true) const {
8427     if (CurTypes.size() == 1 &&
8428         ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
8429          OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
8430         !PartialStruct.IsArraySection)
8431       return;
8432     Address LBAddr = PartialStruct.LowestElem.second;
8433     Address HBAddr = PartialStruct.HighestElem.second;
8434     if (PartialStruct.HasCompleteRecord) {
8435       LBAddr = PartialStruct.LB;
8436       HBAddr = PartialStruct.LB;
8437     }
8438     CombinedInfo.Exprs.push_back(VD);
8439     // Base is the base of the struct
8440     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8441     CombinedInfo.DevicePtrDecls.push_back(nullptr);
8442     CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8443     // Pointer is the address of the lowest element
8444     llvm::Value *LB = LBAddr.getPointer();
8445     const CXXMethodDecl *MD =
8446         CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
8447     const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
8448     bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
8449     // There should not be a mapper for a combined entry.
8450     if (HasBaseClass) {
8451       // OpenMP 5.2 148:21:
8452       // If the target construct is within a class non-static member function,
8453       // and a variable is an accessible data member of the object for which the
8454       // non-static data member function is invoked, the variable is treated as
8455       // if the this[:1] expression had appeared in a map clause with a map-type
8456       // of tofrom.
8457       // Emit this[:1]
8458       CombinedInfo.Pointers.push_back(PartialStruct.Base.getPointer());
8459       QualType Ty = MD->getThisType()->getPointeeType();
8460       llvm::Value *Size =
8461           CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
8462                                     /*isSigned=*/true);
8463       CombinedInfo.Sizes.push_back(Size);
8464     } else {
8465       CombinedInfo.Pointers.push_back(LB);
8466       // Size is (addr of {highest+1} element) - (addr of lowest element)
8467       llvm::Value *HB = HBAddr.getPointer();
8468       llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
8469           HBAddr.getElementType(), HB, /*Idx0=*/1);
8470       llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8471       llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8472       llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8473       llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8474                                                     /*isSigned=*/false);
8475       CombinedInfo.Sizes.push_back(Size);
8476     }
8477     CombinedInfo.Mappers.push_back(nullptr);
8478     // Map type is always TARGET_PARAM, if generate info for captures.
8479     CombinedInfo.Types.push_back(
8480         NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8481                         : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8482     // If any element has the present modifier, then make sure the runtime
8483     // doesn't attempt to allocate the struct.
8484     if (CurTypes.end() !=
8485         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8486           return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8487               Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8488         }))
8489       CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
8490     // Remove TARGET_PARAM flag from the first element
8491     (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8492     // If any element has the ompx_hold modifier, then make sure the runtime
8493     // uses the hold reference count for the struct as a whole so that it won't
8494     // be unmapped by an extra dynamic reference count decrement.  Add it to all
8495     // elements as well so the runtime knows which reference count to check
8496     // when determining whether it's time for device-to-host transfers of
8497     // individual elements.
8498     if (CurTypes.end() !=
8499         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8500           return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8501               Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
8502         })) {
8503       CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8504       for (auto &M : CurTypes)
8505         M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8506     }
8507 
8508     // All other current entries will be MEMBER_OF the combined entry
8509     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8510     // 0xFFFF in the MEMBER_OF field).
8511     OpenMPOffloadMappingFlags MemberOfFlag =
8512         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8513     for (auto &M : CurTypes)
8514       setCorrectMemberOfFlag(M, MemberOfFlag);
8515   }
8516 
8517   /// Generate all the base pointers, section pointers, sizes, map types, and
8518   /// mappers for the extracted mappable expressions (all included in \a
8519   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8520   /// pair of the relevant declaration and index where it occurs is appended to
8521   /// the device pointers info array.
8522   void generateAllInfo(
8523       MapCombinedInfoTy &CombinedInfo,
8524       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8525           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8526     assert(CurDir.is<const OMPExecutableDirective *>() &&
8527            "Expect a executable directive");
8528     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8529     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
8530   }
8531 
8532   /// Generate all the base pointers, section pointers, sizes, map types, and
8533   /// mappers for the extracted map clauses of user-defined mapper (all included
8534   /// in \a CombinedInfo).
8535   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8536     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8537            "Expect a declare mapper directive");
8538     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8539     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
8540   }
8541 
8542   /// Emit capture info for lambdas for variables captured by reference.
8543   void generateInfoForLambdaCaptures(
8544       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8545       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8546     QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
8547     const auto *RD = VDType->getAsCXXRecordDecl();
8548     if (!RD || !RD->isLambda())
8549       return;
8550     Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
8551                    CGF.getContext().getDeclAlign(VD));
8552     LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
8553     llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8554     FieldDecl *ThisCapture = nullptr;
8555     RD->getCaptureFields(Captures, ThisCapture);
8556     if (ThisCapture) {
8557       LValue ThisLVal =
8558           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8559       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8560       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8561                                  VDLVal.getPointer(CGF));
8562       CombinedInfo.Exprs.push_back(VD);
8563       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8564       CombinedInfo.DevicePtrDecls.push_back(nullptr);
8565       CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8566       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8567       CombinedInfo.Sizes.push_back(
8568           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8569                                     CGF.Int64Ty, /*isSigned=*/true));
8570       CombinedInfo.Types.push_back(
8571           OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8572           OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8573           OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8574           OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8575       CombinedInfo.Mappers.push_back(nullptr);
8576     }
8577     for (const LambdaCapture &LC : RD->captures()) {
8578       if (!LC.capturesVariable())
8579         continue;
8580       const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
8581       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8582         continue;
8583       auto It = Captures.find(VD);
8584       assert(It != Captures.end() && "Found lambda capture without field.");
8585       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8586       if (LC.getCaptureKind() == LCK_ByRef) {
8587         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8588         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8589                                    VDLVal.getPointer(CGF));
8590         CombinedInfo.Exprs.push_back(VD);
8591         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8592         CombinedInfo.DevicePtrDecls.push_back(nullptr);
8593         CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8594         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8595         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8596             CGF.getTypeSize(
8597                 VD->getType().getCanonicalType().getNonReferenceType()),
8598             CGF.Int64Ty, /*isSigned=*/true));
8599       } else {
8600         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8601         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8602                                    VDLVal.getPointer(CGF));
8603         CombinedInfo.Exprs.push_back(VD);
8604         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8605         CombinedInfo.DevicePtrDecls.push_back(nullptr);
8606         CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8607         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8608         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8609       }
8610       CombinedInfo.Types.push_back(
8611           OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8612           OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8613           OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8614           OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8615       CombinedInfo.Mappers.push_back(nullptr);
8616     }
8617   }
8618 
8619   /// Set correct indices for lambdas captures.
8620   void adjustMemberOfForLambdaCaptures(
8621       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8622       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8623       MapFlagsArrayTy &Types) const {
8624     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8625       // Set correct member_of idx for all implicit lambda captures.
8626       if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8627                        OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8628                        OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8629                        OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
8630         continue;
8631       llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
8632       assert(BasePtr && "Unable to find base lambda address.");
8633       int TgtIdx = -1;
8634       for (unsigned J = I; J > 0; --J) {
8635         unsigned Idx = J - 1;
8636         if (Pointers[Idx] != BasePtr)
8637           continue;
8638         TgtIdx = Idx;
8639         break;
8640       }
8641       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8642       // All other current entries will be MEMBER_OF the combined entry
8643       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8644       // 0xFFFF in the MEMBER_OF field).
8645       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8646       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8647     }
8648   }
8649 
8650   /// Generate the base pointers, section pointers, sizes, map types, and
8651   /// mappers associated to a given capture (all included in \a CombinedInfo).
8652   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8653                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8654                               StructRangeInfoTy &PartialStruct) const {
8655     assert(!Cap->capturesVariableArrayType() &&
8656            "Not expecting to generate map info for a variable array type!");
8657 
8658     // We need to know when we generating information for the first component
8659     const ValueDecl *VD = Cap->capturesThis()
8660                               ? nullptr
8661                               : Cap->getCapturedVar()->getCanonicalDecl();
8662 
8663     // for map(to: lambda): skip here, processing it in
8664     // generateDefaultMapInfo
8665     if (LambdasMap.count(VD))
8666       return;
8667 
8668     // If this declaration appears in a is_device_ptr clause we just have to
8669     // pass the pointer by value. If it is a reference to a declaration, we just
8670     // pass its value.
8671     if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
8672       CombinedInfo.Exprs.push_back(VD);
8673       CombinedInfo.BasePointers.emplace_back(Arg);
8674       CombinedInfo.DevicePtrDecls.emplace_back(VD);
8675       CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
8676       CombinedInfo.Pointers.push_back(Arg);
8677       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8678           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8679           /*isSigned=*/true));
8680       CombinedInfo.Types.push_back(
8681           OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8682           OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8683       CombinedInfo.Mappers.push_back(nullptr);
8684       return;
8685     }
8686 
8687     using MapData =
8688         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8689                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8690                    const ValueDecl *, const Expr *>;
8691     SmallVector<MapData, 4> DeclComponentLists;
8692     // For member fields list in is_device_ptr, store it in
8693     // DeclComponentLists for generating components info.
8694     static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
8695     auto It = DevPointersMap.find(VD);
8696     if (It != DevPointersMap.end())
8697       for (const auto &MCL : It->second)
8698         DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
8699                                         /*IsImpicit = */ true, nullptr,
8700                                         nullptr);
8701     auto I = HasDevAddrsMap.find(VD);
8702     if (I != HasDevAddrsMap.end())
8703       for (const auto &MCL : I->second)
8704         DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
8705                                         /*IsImpicit = */ true, nullptr,
8706                                         nullptr);
8707     assert(CurDir.is<const OMPExecutableDirective *>() &&
8708            "Expect a executable directive");
8709     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8710     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8711       const auto *EI = C->getVarRefs().begin();
8712       for (const auto L : C->decl_component_lists(VD)) {
8713         const ValueDecl *VDecl, *Mapper;
8714         // The Expression is not correct if the mapping is implicit
8715         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8716         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8717         std::tie(VDecl, Components, Mapper) = L;
8718         assert(VDecl == VD && "We got information for the wrong declaration??");
8719         assert(!Components.empty() &&
8720                "Not expecting declaration with no component lists.");
8721         DeclComponentLists.emplace_back(Components, C->getMapType(),
8722                                         C->getMapTypeModifiers(),
8723                                         C->isImplicit(), Mapper, E);
8724         ++EI;
8725       }
8726     }
8727     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8728                                              const MapData &RHS) {
8729       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8730       OpenMPMapClauseKind MapType = std::get<1>(RHS);
8731       bool HasPresent =
8732           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8733       bool HasAllocs = MapType == OMPC_MAP_alloc;
8734       MapModifiers = std::get<2>(RHS);
8735       MapType = std::get<1>(LHS);
8736       bool HasPresentR =
8737           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8738       bool HasAllocsR = MapType == OMPC_MAP_alloc;
8739       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8740     });
8741 
8742     // Find overlapping elements (including the offset from the base element).
8743     llvm::SmallDenseMap<
8744         const MapData *,
8745         llvm::SmallVector<
8746             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8747         4>
8748         OverlappedData;
8749     size_t Count = 0;
8750     for (const MapData &L : DeclComponentLists) {
8751       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8752       OpenMPMapClauseKind MapType;
8753       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8754       bool IsImplicit;
8755       const ValueDecl *Mapper;
8756       const Expr *VarRef;
8757       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8758           L;
8759       ++Count;
8760       for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
8761         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8762         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8763                  VarRef) = L1;
8764         auto CI = Components.rbegin();
8765         auto CE = Components.rend();
8766         auto SI = Components1.rbegin();
8767         auto SE = Components1.rend();
8768         for (; CI != CE && SI != SE; ++CI, ++SI) {
8769           if (CI->getAssociatedExpression()->getStmtClass() !=
8770               SI->getAssociatedExpression()->getStmtClass())
8771             break;
8772           // Are we dealing with different variables/fields?
8773           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8774             break;
8775         }
8776         // Found overlapping if, at least for one component, reached the head
8777         // of the components list.
8778         if (CI == CE || SI == SE) {
8779           // Ignore it if it is the same component.
8780           if (CI == CE && SI == SE)
8781             continue;
8782           const auto It = (SI == SE) ? CI : SI;
8783           // If one component is a pointer and another one is a kind of
8784           // dereference of this pointer (array subscript, section, dereference,
8785           // etc.), it is not an overlapping.
8786           // Same, if one component is a base and another component is a
8787           // dereferenced pointer memberexpr with the same base.
8788           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
8789               (std::prev(It)->getAssociatedDeclaration() &&
8790                std::prev(It)
8791                    ->getAssociatedDeclaration()
8792                    ->getType()
8793                    ->isPointerType()) ||
8794               (It->getAssociatedDeclaration() &&
8795                It->getAssociatedDeclaration()->getType()->isPointerType() &&
8796                std::next(It) != CE && std::next(It) != SE))
8797             continue;
8798           const MapData &BaseData = CI == CE ? L : L1;
8799           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8800               SI == SE ? Components : Components1;
8801           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8802           OverlappedElements.getSecond().push_back(SubData);
8803         }
8804       }
8805     }
8806     // Sort the overlapped elements for each item.
8807     llvm::SmallVector<const FieldDecl *, 4> Layout;
8808     if (!OverlappedData.empty()) {
8809       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8810       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8811       while (BaseType != OrigType) {
8812         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8813         OrigType = BaseType->getPointeeOrArrayElementType();
8814       }
8815 
8816       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8817         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8818       else {
8819         const auto *RD = BaseType->getAsRecordDecl();
8820         Layout.append(RD->field_begin(), RD->field_end());
8821       }
8822     }
8823     for (auto &Pair : OverlappedData) {
8824       llvm::stable_sort(
8825           Pair.getSecond(),
8826           [&Layout](
8827               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8828               OMPClauseMappableExprCommon::MappableExprComponentListRef
8829                   Second) {
8830             auto CI = First.rbegin();
8831             auto CE = First.rend();
8832             auto SI = Second.rbegin();
8833             auto SE = Second.rend();
8834             for (; CI != CE && SI != SE; ++CI, ++SI) {
8835               if (CI->getAssociatedExpression()->getStmtClass() !=
8836                   SI->getAssociatedExpression()->getStmtClass())
8837                 break;
8838               // Are we dealing with different variables/fields?
8839               if (CI->getAssociatedDeclaration() !=
8840                   SI->getAssociatedDeclaration())
8841                 break;
8842             }
8843 
8844             // Lists contain the same elements.
8845             if (CI == CE && SI == SE)
8846               return false;
8847 
8848             // List with less elements is less than list with more elements.
8849             if (CI == CE || SI == SE)
8850               return CI == CE;
8851 
8852             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8853             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8854             if (FD1->getParent() == FD2->getParent())
8855               return FD1->getFieldIndex() < FD2->getFieldIndex();
8856             const auto *It =
8857                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8858                   return FD == FD1 || FD == FD2;
8859                 });
8860             return *It == FD1;
8861           });
8862     }
8863 
8864     // Associated with a capture, because the mapping flags depend on it.
8865     // Go through all of the elements with the overlapped elements.
8866     bool IsFirstComponentList = true;
8867     for (const auto &Pair : OverlappedData) {
8868       const MapData &L = *Pair.getFirst();
8869       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8870       OpenMPMapClauseKind MapType;
8871       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8872       bool IsImplicit;
8873       const ValueDecl *Mapper;
8874       const Expr *VarRef;
8875       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8876           L;
8877       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8878           OverlappedComponents = Pair.getSecond();
8879       generateInfoForComponentList(
8880           MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
8881           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
8882           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
8883       IsFirstComponentList = false;
8884     }
8885     // Go through other elements without overlapped elements.
8886     for (const MapData &L : DeclComponentLists) {
8887       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8888       OpenMPMapClauseKind MapType;
8889       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8890       bool IsImplicit;
8891       const ValueDecl *Mapper;
8892       const Expr *VarRef;
8893       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8894           L;
8895       auto It = OverlappedData.find(&L);
8896       if (It == OverlappedData.end())
8897         generateInfoForComponentList(MapType, MapModifiers, std::nullopt,
8898                                      Components, CombinedInfo, PartialStruct,
8899                                      IsFirstComponentList, IsImplicit, Mapper,
8900                                      /*ForDeviceAddr=*/false, VD, VarRef);
8901       IsFirstComponentList = false;
8902     }
8903   }
8904 
8905   /// Generate the default map information for a given capture \a CI,
8906   /// record field declaration \a RI and captured value \a CV.
8907   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8908                               const FieldDecl &RI, llvm::Value *CV,
8909                               MapCombinedInfoTy &CombinedInfo) const {
8910     bool IsImplicit = true;
8911     // Do the default mapping.
8912     if (CI.capturesThis()) {
8913       CombinedInfo.Exprs.push_back(nullptr);
8914       CombinedInfo.BasePointers.push_back(CV);
8915       CombinedInfo.DevicePtrDecls.push_back(nullptr);
8916       CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8917       CombinedInfo.Pointers.push_back(CV);
8918       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8919       CombinedInfo.Sizes.push_back(
8920           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8921                                     CGF.Int64Ty, /*isSigned=*/true));
8922       // Default map type.
8923       CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8924                                    OpenMPOffloadMappingFlags::OMP_MAP_FROM);
8925     } else if (CI.capturesVariableByCopy()) {
8926       const VarDecl *VD = CI.getCapturedVar();
8927       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8928       CombinedInfo.BasePointers.push_back(CV);
8929       CombinedInfo.DevicePtrDecls.push_back(nullptr);
8930       CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8931       CombinedInfo.Pointers.push_back(CV);
8932       if (!RI.getType()->isAnyPointerType()) {
8933         // We have to signal to the runtime captures passed by value that are
8934         // not pointers.
8935         CombinedInfo.Types.push_back(
8936             OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
8937         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8938             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8939       } else {
8940         // Pointers are implicitly mapped with a zero size and no flags
8941         // (other than first map that is added for all implicit maps).
8942         CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
8943         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8944       }
8945       auto I = FirstPrivateDecls.find(VD);
8946       if (I != FirstPrivateDecls.end())
8947         IsImplicit = I->getSecond();
8948     } else {
8949       assert(CI.capturesVariable() && "Expected captured reference.");
8950       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8951       QualType ElementType = PtrTy->getPointeeType();
8952       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8953           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8954       // The default map type for a scalar/complex type is 'to' because by
8955       // default the value doesn't have to be retrieved. For an aggregate
8956       // type, the default is 'tofrom'.
8957       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
8958       const VarDecl *VD = CI.getCapturedVar();
8959       auto I = FirstPrivateDecls.find(VD);
8960       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8961       CombinedInfo.BasePointers.push_back(CV);
8962       CombinedInfo.DevicePtrDecls.push_back(nullptr);
8963       CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8964       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8965         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8966             CV, ElementType, CGF.getContext().getDeclAlign(VD),
8967             AlignmentSource::Decl));
8968         CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
8969       } else {
8970         CombinedInfo.Pointers.push_back(CV);
8971       }
8972       if (I != FirstPrivateDecls.end())
8973         IsImplicit = I->getSecond();
8974     }
8975     // Every default map produces a single argument which is a target parameter.
8976     CombinedInfo.Types.back() |=
8977         OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8978 
8979     // Add flag stating this is an implicit map.
8980     if (IsImplicit)
8981       CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
8982 
8983     // No user-defined mapper for default mapping.
8984     CombinedInfo.Mappers.push_back(nullptr);
8985   }
8986 };
8987 } // anonymous namespace
8988 
8989 // Try to extract the base declaration from a `this->x` expression if possible.
8990 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
8991   if (!E)
8992     return nullptr;
8993 
8994   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
8995     if (const MemberExpr *ME =
8996             dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
8997       return ME->getMemberDecl();
8998   return nullptr;
8999 }
9000 
9001 /// Emit a string constant containing the names of the values mapped to the
9002 /// offloading runtime library.
9003 llvm::Constant *
9004 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9005                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9006 
9007   uint32_t SrcLocStrSize;
9008   if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9009     return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9010 
9011   SourceLocation Loc;
9012   if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9013     if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9014       Loc = VD->getLocation();
9015     else
9016       Loc = MapExprs.getMapExpr()->getExprLoc();
9017   } else {
9018     Loc = MapExprs.getMapDecl()->getLocation();
9019   }
9020 
9021   std::string ExprName;
9022   if (MapExprs.getMapExpr()) {
9023     PrintingPolicy P(CGF.getContext().getLangOpts());
9024     llvm::raw_string_ostream OS(ExprName);
9025     MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9026     OS.flush();
9027   } else {
9028     ExprName = MapExprs.getMapDecl()->getNameAsString();
9029   }
9030 
9031   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9032   return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
9033                                          PLoc.getLine(), PLoc.getColumn(),
9034                                          SrcLocStrSize);
9035 }
9036 
9037 /// Emit the arrays used to pass the captures and map information to the
9038 /// offloading runtime library. If there is no map or capture information,
9039 /// return nullptr by reference.
9040 static void emitOffloadingArrays(
9041     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9042     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9043     bool IsNonContiguous = false) {
9044   CodeGenModule &CGM = CGF.CGM;
9045 
9046   // Reset the array information.
9047   Info.clearArrayInfo();
9048   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9049 
9050   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
9051   InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
9052                          CGF.AllocaInsertPt->getIterator());
9053   InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
9054                           CGF.Builder.GetInsertPoint());
9055 
9056   auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9057     return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9058   };
9059   if (CGM.getCodeGenOpts().getDebugInfo() !=
9060       llvm::codegenoptions::NoDebugInfo) {
9061     CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
9062     llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
9063                     FillInfoMap);
9064   }
9065 
9066   auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
9067     if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
9068       Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
9069     }
9070   };
9071 
9072   auto CustomMapperCB = [&](unsigned int I) {
9073     llvm::Value *MFunc = nullptr;
9074     if (CombinedInfo.Mappers[I]) {
9075       Info.HasMapper = true;
9076       MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9077           cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9078     }
9079     return MFunc;
9080   };
9081   OMPBuilder.emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info,
9082                                   /*IsNonContiguous=*/true, DeviceAddrCB,
9083                                   CustomMapperCB);
9084 }
9085 
9086 /// Check for inner distribute directive.
9087 static const OMPExecutableDirective *
9088 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9089   const auto *CS = D.getInnermostCapturedStmt();
9090   const auto *Body =
9091       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9092   const Stmt *ChildStmt =
9093       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9094 
9095   if (const auto *NestedDir =
9096           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9097     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9098     switch (D.getDirectiveKind()) {
9099     case OMPD_target:
9100       // For now, just treat 'target teams loop' as if it's distributed.
9101       if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
9102         return NestedDir;
9103       if (DKind == OMPD_teams) {
9104         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9105             /*IgnoreCaptured=*/true);
9106         if (!Body)
9107           return nullptr;
9108         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9109         if (const auto *NND =
9110                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9111           DKind = NND->getDirectiveKind();
9112           if (isOpenMPDistributeDirective(DKind))
9113             return NND;
9114         }
9115       }
9116       return nullptr;
9117     case OMPD_target_teams:
9118       if (isOpenMPDistributeDirective(DKind))
9119         return NestedDir;
9120       return nullptr;
9121     case OMPD_target_parallel:
9122     case OMPD_target_simd:
9123     case OMPD_target_parallel_for:
9124     case OMPD_target_parallel_for_simd:
9125       return nullptr;
9126     case OMPD_target_teams_distribute:
9127     case OMPD_target_teams_distribute_simd:
9128     case OMPD_target_teams_distribute_parallel_for:
9129     case OMPD_target_teams_distribute_parallel_for_simd:
9130     case OMPD_parallel:
9131     case OMPD_for:
9132     case OMPD_parallel_for:
9133     case OMPD_parallel_master:
9134     case OMPD_parallel_sections:
9135     case OMPD_for_simd:
9136     case OMPD_parallel_for_simd:
9137     case OMPD_cancel:
9138     case OMPD_cancellation_point:
9139     case OMPD_ordered:
9140     case OMPD_threadprivate:
9141     case OMPD_allocate:
9142     case OMPD_task:
9143     case OMPD_simd:
9144     case OMPD_tile:
9145     case OMPD_unroll:
9146     case OMPD_sections:
9147     case OMPD_section:
9148     case OMPD_single:
9149     case OMPD_master:
9150     case OMPD_critical:
9151     case OMPD_taskyield:
9152     case OMPD_barrier:
9153     case OMPD_taskwait:
9154     case OMPD_taskgroup:
9155     case OMPD_atomic:
9156     case OMPD_flush:
9157     case OMPD_depobj:
9158     case OMPD_scan:
9159     case OMPD_teams:
9160     case OMPD_target_data:
9161     case OMPD_target_exit_data:
9162     case OMPD_target_enter_data:
9163     case OMPD_distribute:
9164     case OMPD_distribute_simd:
9165     case OMPD_distribute_parallel_for:
9166     case OMPD_distribute_parallel_for_simd:
9167     case OMPD_teams_distribute:
9168     case OMPD_teams_distribute_simd:
9169     case OMPD_teams_distribute_parallel_for:
9170     case OMPD_teams_distribute_parallel_for_simd:
9171     case OMPD_target_update:
9172     case OMPD_declare_simd:
9173     case OMPD_declare_variant:
9174     case OMPD_begin_declare_variant:
9175     case OMPD_end_declare_variant:
9176     case OMPD_declare_target:
9177     case OMPD_end_declare_target:
9178     case OMPD_declare_reduction:
9179     case OMPD_declare_mapper:
9180     case OMPD_taskloop:
9181     case OMPD_taskloop_simd:
9182     case OMPD_master_taskloop:
9183     case OMPD_master_taskloop_simd:
9184     case OMPD_parallel_master_taskloop:
9185     case OMPD_parallel_master_taskloop_simd:
9186     case OMPD_requires:
9187     case OMPD_metadirective:
9188     case OMPD_unknown:
9189     default:
9190       llvm_unreachable("Unexpected directive.");
9191     }
9192   }
9193 
9194   return nullptr;
9195 }
9196 
9197 /// Emit the user-defined mapper function. The code generation follows the
9198 /// pattern in the example below.
9199 /// \code
9200 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9201 ///                                           void *base, void *begin,
9202 ///                                           int64_t size, int64_t type,
9203 ///                                           void *name = nullptr) {
9204 ///   // Allocate space for an array section first or add a base/begin for
9205 ///   // pointer dereference.
9206 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9207 ///       !maptype.IsDelete)
9208 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9209 ///                                 size*sizeof(Ty), clearToFromMember(type));
9210 ///   // Map members.
9211 ///   for (unsigned i = 0; i < size; i++) {
9212 ///     // For each component specified by this mapper:
9213 ///     for (auto c : begin[i]->all_components) {
9214 ///       if (c.hasMapper())
9215 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9216 ///                       c.arg_type, c.arg_name);
9217 ///       else
9218 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9219 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9220 ///                                     c.arg_name);
9221 ///     }
9222 ///   }
9223 ///   // Delete the array section.
9224 ///   if (size > 1 && maptype.IsDelete)
9225 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9226 ///                                 size*sizeof(Ty), clearToFromMember(type));
9227 /// }
9228 /// \endcode
9229 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9230                                             CodeGenFunction *CGF) {
9231   if (UDMMap.count(D) > 0)
9232     return;
9233   ASTContext &C = CGM.getContext();
9234   QualType Ty = D->getType();
9235   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9236   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9237   auto *MapperVarDecl =
9238       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9239   SourceLocation Loc = D->getLocation();
9240   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9241   llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9242 
9243   // Prepare mapper function arguments and attributes.
9244   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9245                               C.VoidPtrTy, ImplicitParamDecl::Other);
9246   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9247                             ImplicitParamDecl::Other);
9248   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9249                              C.VoidPtrTy, ImplicitParamDecl::Other);
9250   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9251                             ImplicitParamDecl::Other);
9252   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9253                             ImplicitParamDecl::Other);
9254   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9255                             ImplicitParamDecl::Other);
9256   FunctionArgList Args;
9257   Args.push_back(&HandleArg);
9258   Args.push_back(&BaseArg);
9259   Args.push_back(&BeginArg);
9260   Args.push_back(&SizeArg);
9261   Args.push_back(&TypeArg);
9262   Args.push_back(&NameArg);
9263   const CGFunctionInfo &FnInfo =
9264       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9265   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9266   SmallString<64> TyStr;
9267   llvm::raw_svector_ostream Out(TyStr);
9268   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9269   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9270   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9271                                     Name, &CGM.getModule());
9272   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9273   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9274   // Start the mapper function code generation.
9275   CodeGenFunction MapperCGF(CGM);
9276   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9277   // Compute the starting and end addresses of array elements.
9278   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9279       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9280       C.getPointerType(Int64Ty), Loc);
9281   // Prepare common arguments for array initiation and deletion.
9282   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9283       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9284       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9285   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9286       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9287       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9288   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9289       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9290       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9291   // Convert the size in bytes into the number of array elements.
9292   Size = MapperCGF.Builder.CreateExactUDiv(
9293       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9294   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9295       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9296   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size);
9297   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9298       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9299       C.getPointerType(Int64Ty), Loc);
9300   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9301       MapperCGF.GetAddrOfLocalVar(&NameArg),
9302       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9303 
9304   // Emit array initiation if this is an array section and \p MapType indicates
9305   // that memory allocation is required.
9306   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9307   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9308                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
9309 
9310   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9311 
9312   // Emit the loop header block.
9313   MapperCGF.EmitBlock(HeadBB);
9314   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9315   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9316   // Evaluate whether the initial condition is satisfied.
9317   llvm::Value *IsEmpty =
9318       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9319   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9320   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9321 
9322   // Emit the loop body block.
9323   MapperCGF.EmitBlock(BodyBB);
9324   llvm::BasicBlock *LastBB = BodyBB;
9325   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9326       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9327   PtrPHI->addIncoming(PtrBegin, EntryBB);
9328   Address PtrCurrent(PtrPHI, ElemTy,
9329                      MapperCGF.GetAddrOfLocalVar(&BeginArg)
9330                          .getAlignment()
9331                          .alignmentOfArrayElement(ElementSize));
9332   // Privatize the declared variable of mapper to be the current array element.
9333   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9334   Scope.addPrivate(MapperVarDecl, PtrCurrent);
9335   (void)Scope.Privatize();
9336 
9337   // Get map clause information. Fill up the arrays with all mapped variables.
9338   MappableExprsHandler::MapCombinedInfoTy Info;
9339   MappableExprsHandler MEHandler(*D, MapperCGF);
9340   MEHandler.generateAllInfoForMapper(Info);
9341 
9342   // Call the runtime API __tgt_mapper_num_components to get the number of
9343   // pre-existing components.
9344   llvm::Value *OffloadingArgs[] = {Handle};
9345   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9346       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9347                                             OMPRTL___tgt_mapper_num_components),
9348       OffloadingArgs);
9349   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9350       PreviousSize,
9351       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9352 
9353   // Fill up the runtime mapper handle for all components.
9354   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9355     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9356         Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9357     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9358         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9359     llvm::Value *CurSizeArg = Info.Sizes[I];
9360     llvm::Value *CurNameArg =
9361         (CGM.getCodeGenOpts().getDebugInfo() ==
9362          llvm::codegenoptions::NoDebugInfo)
9363             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9364             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9365 
9366     // Extract the MEMBER_OF field from the map type.
9367     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(
9368         static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9369             Info.Types[I]));
9370     llvm::Value *MemberMapType =
9371         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9372 
9373     // Combine the map type inherited from user-defined mapper with that
9374     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9375     // bits of the \a MapType, which is the input argument of the mapper
9376     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9377     // bits of MemberMapType.
9378     // [OpenMP 5.0], 1.2.6. map-type decay.
9379     //        | alloc |  to   | from  | tofrom | release | delete
9380     // ----------------------------------------------------------
9381     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9382     // to     | alloc |  to   | alloc |   to   | release | delete
9383     // from   | alloc | alloc | from  |  from  | release | delete
9384     // tofrom | alloc |  to   | from  | tofrom | release | delete
9385     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9386         MapType,
9387         MapperCGF.Builder.getInt64(
9388             static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9389                 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9390                 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9391     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9392     llvm::BasicBlock *AllocElseBB =
9393         MapperCGF.createBasicBlock("omp.type.alloc.else");
9394     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9395     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9396     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9397     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9398     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9399     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9400     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9401     MapperCGF.EmitBlock(AllocBB);
9402     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9403         MemberMapType,
9404         MapperCGF.Builder.getInt64(
9405             ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9406                 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9407                 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9408     MapperCGF.Builder.CreateBr(EndBB);
9409     MapperCGF.EmitBlock(AllocElseBB);
9410     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9411         LeftToFrom,
9412         MapperCGF.Builder.getInt64(
9413             static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9414                 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9415     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9416     // In case of to, clear OMP_MAP_FROM.
9417     MapperCGF.EmitBlock(ToBB);
9418     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9419         MemberMapType,
9420         MapperCGF.Builder.getInt64(
9421             ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9422                 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9423     MapperCGF.Builder.CreateBr(EndBB);
9424     MapperCGF.EmitBlock(ToElseBB);
9425     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9426         LeftToFrom,
9427         MapperCGF.Builder.getInt64(
9428             static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9429                 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9430     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9431     // In case of from, clear OMP_MAP_TO.
9432     MapperCGF.EmitBlock(FromBB);
9433     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9434         MemberMapType,
9435         MapperCGF.Builder.getInt64(
9436             ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9437                 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9438     // In case of tofrom, do nothing.
9439     MapperCGF.EmitBlock(EndBB);
9440     LastBB = EndBB;
9441     llvm::PHINode *CurMapType =
9442         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9443     CurMapType->addIncoming(AllocMapType, AllocBB);
9444     CurMapType->addIncoming(ToMapType, ToBB);
9445     CurMapType->addIncoming(FromMapType, FromBB);
9446     CurMapType->addIncoming(MemberMapType, ToElseBB);
9447 
9448     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
9449                                      CurSizeArg, CurMapType, CurNameArg};
9450     if (Info.Mappers[I]) {
9451       // Call the corresponding mapper function.
9452       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9453           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9454       assert(MapperFunc && "Expect a valid mapper function is available.");
9455       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9456     } else {
9457       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9458       // data structure.
9459       MapperCGF.EmitRuntimeCall(
9460           OMPBuilder.getOrCreateRuntimeFunction(
9461               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9462           OffloadingArgs);
9463     }
9464   }
9465 
9466   // Update the pointer to point to the next element that needs to be mapped,
9467   // and check whether we have mapped all elements.
9468   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9469       ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9470   PtrPHI->addIncoming(PtrNext, LastBB);
9471   llvm::Value *IsDone =
9472       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9473   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9474   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9475 
9476   MapperCGF.EmitBlock(ExitBB);
9477   // Emit array deletion if this is an array section and \p MapType indicates
9478   // that deletion is required.
9479   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9480                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
9481 
9482   // Emit the function exit block.
9483   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9484   MapperCGF.FinishFunction();
9485   UDMMap.try_emplace(D, Fn);
9486   if (CGF) {
9487     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9488     Decls.second.push_back(D);
9489   }
9490 }
9491 
9492 /// Emit the array initialization or deletion portion for user-defined mapper
9493 /// code generation. First, it evaluates whether an array section is mapped and
9494 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9495 /// true, and \a MapType indicates to not delete this array, array
9496 /// initialization code is generated. If \a IsInit is false, and \a MapType
9497 /// indicates to not this array, array deletion code is generated.
9498 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9499     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9500     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9501     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
9502     bool IsInit) {
9503   StringRef Prefix = IsInit ? ".init" : ".del";
9504 
9505   // Evaluate if this is an array section.
9506   llvm::BasicBlock *BodyBB =
9507       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9508   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9509       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9510   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9511       MapType,
9512       MapperCGF.Builder.getInt64(
9513           static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9514               OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
9515   llvm::Value *DeleteCond;
9516   llvm::Value *Cond;
9517   if (IsInit) {
9518     // base != begin?
9519     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
9520     // IsPtrAndObj?
9521     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
9522         MapType,
9523         MapperCGF.Builder.getInt64(
9524             static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9525                 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
9526     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
9527     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
9528     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
9529     DeleteCond = MapperCGF.Builder.CreateIsNull(
9530         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9531   } else {
9532     Cond = IsArray;
9533     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9534         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9535   }
9536   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
9537   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
9538 
9539   MapperCGF.EmitBlock(BodyBB);
9540   // Get the array size by multiplying element size and element number (i.e., \p
9541   // Size).
9542   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9543       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9544   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9545   // memory allocation/deletion purpose only.
9546   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9547       MapType,
9548       MapperCGF.Builder.getInt64(
9549           ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9550               OpenMPOffloadMappingFlags::OMP_MAP_TO |
9551               OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9552   MapTypeArg = MapperCGF.Builder.CreateOr(
9553       MapTypeArg,
9554       MapperCGF.Builder.getInt64(
9555           static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9556               OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
9557 
9558   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9559   // data structure.
9560   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
9561                                    ArraySize, MapTypeArg, MapName};
9562   MapperCGF.EmitRuntimeCall(
9563       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9564                                             OMPRTL___tgt_push_mapper_component),
9565       OffloadingArgs);
9566 }
9567 
9568 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9569     const OMPDeclareMapperDecl *D) {
9570   auto I = UDMMap.find(D);
9571   if (I != UDMMap.end())
9572     return I->second;
9573   emitUserDefinedMapper(D);
9574   return UDMMap.lookup(D);
9575 }
9576 
9577 llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
9578     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9579     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9580                                      const OMPLoopDirective &D)>
9581         SizeEmitter) {
9582   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9583   const OMPExecutableDirective *TD = &D;
9584   // Get nested teams distribute kind directive, if any.
9585   if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
9586       Kind != OMPD_target_teams_loop)
9587     TD = getNestedDistributeDirective(CGM.getContext(), D);
9588   if (!TD)
9589     return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9590 
9591   const auto *LD = cast<OMPLoopDirective>(TD);
9592   if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9593     return NumIterations;
9594   return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9595 }
9596 
9597 static void
9598 emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9599                        const OMPExecutableDirective &D,
9600                        llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9601                        bool RequiresOuterTask, const CapturedStmt &CS,
9602                        bool OffloadingMandatory, CodeGenFunction &CGF) {
9603   if (OffloadingMandatory) {
9604     CGF.Builder.CreateUnreachable();
9605   } else {
9606     if (RequiresOuterTask) {
9607       CapturedVars.clear();
9608       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9609     }
9610     OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
9611                                          CapturedVars);
9612   }
9613 }
9614 
9615 static llvm::Value *emitDeviceID(
9616     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9617     CodeGenFunction &CGF) {
9618   // Emit device ID if any.
9619   llvm::Value *DeviceID;
9620   if (Device.getPointer()) {
9621     assert((Device.getInt() == OMPC_DEVICE_unknown ||
9622             Device.getInt() == OMPC_DEVICE_device_num) &&
9623            "Expected device_num modifier.");
9624     llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9625     DeviceID =
9626         CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9627   } else {
9628     DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9629   }
9630   return DeviceID;
9631 }
9632 
9633 llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
9634                                CodeGenFunction &CGF) {
9635   llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0);
9636 
9637   if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
9638     CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
9639     llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
9640         DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
9641     DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty,
9642                                              /*isSigned=*/false);
9643   }
9644   return DynCGroupMem;
9645 }
9646 
9647 static void emitTargetCallKernelLaunch(
9648     CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9649     const OMPExecutableDirective &D,
9650     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
9651     const CapturedStmt &CS, bool OffloadingMandatory,
9652     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9653     llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
9654     llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
9655     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9656                                      const OMPLoopDirective &D)>
9657         SizeEmitter,
9658     CodeGenFunction &CGF, CodeGenModule &CGM) {
9659   llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
9660 
9661   // Fill up the arrays with all the captured variables.
9662   MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9663 
9664   // Get mappable expression information.
9665   MappableExprsHandler MEHandler(D, CGF);
9666   llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9667   llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
9668 
9669   auto RI = CS.getCapturedRecordDecl()->field_begin();
9670   auto *CV = CapturedVars.begin();
9671   for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9672                                             CE = CS.capture_end();
9673        CI != CE; ++CI, ++RI, ++CV) {
9674     MappableExprsHandler::MapCombinedInfoTy CurInfo;
9675     MappableExprsHandler::StructRangeInfoTy PartialStruct;
9676 
9677     // VLA sizes are passed to the outlined region by copy and do not have map
9678     // information associated.
9679     if (CI->capturesVariableArrayType()) {
9680       CurInfo.Exprs.push_back(nullptr);
9681       CurInfo.BasePointers.push_back(*CV);
9682       CurInfo.DevicePtrDecls.push_back(nullptr);
9683       CurInfo.DevicePointers.push_back(
9684           MappableExprsHandler::DeviceInfoTy::None);
9685       CurInfo.Pointers.push_back(*CV);
9686       CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9687           CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9688       // Copy to the device as an argument. No need to retrieve it.
9689       CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9690                               OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
9691                               OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9692       CurInfo.Mappers.push_back(nullptr);
9693     } else {
9694       // If we have any information in the map clause, we use it, otherwise we
9695       // just do a default mapping.
9696       MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
9697       if (!CI->capturesThis())
9698         MappedVarSet.insert(CI->getCapturedVar());
9699       else
9700         MappedVarSet.insert(nullptr);
9701       if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
9702         MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
9703       // Generate correct mapping for variables captured by reference in
9704       // lambdas.
9705       if (CI->capturesVariable())
9706         MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
9707                                                 CurInfo, LambdaPointers);
9708     }
9709     // We expect to have at least an element of information for this capture.
9710     assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
9711            "Non-existing map pointer for capture!");
9712     assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9713            CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9714            CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9715            CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9716            "Inconsistent map information sizes!");
9717 
9718     // If there is an entry in PartialStruct it means we have a struct with
9719     // individual members mapped. Emit an extra combined entry.
9720     if (PartialStruct.Base.isValid()) {
9721       CombinedInfo.append(PartialStruct.PreliminaryMapData);
9722       MEHandler.emitCombinedEntry(
9723           CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(),
9724           nullptr, !PartialStruct.PreliminaryMapData.BasePointers.empty());
9725     }
9726 
9727     // We need to append the results of this capture to what we already have.
9728     CombinedInfo.append(CurInfo);
9729   }
9730   // Adjust MEMBER_OF flags for the lambdas captures.
9731   MEHandler.adjustMemberOfForLambdaCaptures(
9732       LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
9733       CombinedInfo.Types);
9734   // Map any list items in a map clause that were not captures because they
9735   // weren't referenced within the construct.
9736   MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
9737 
9738   CGOpenMPRuntime::TargetDataInfo Info;
9739   // Fill up the arrays and create the arguments.
9740   emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
9741   bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
9742                    llvm::codegenoptions::NoDebugInfo;
9743   OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
9744                                           EmitDebug,
9745                                           /*ForEndCall=*/false);
9746 
9747   InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9748   InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
9749                                         CGF.VoidPtrTy, CGM.getPointerAlign());
9750   InputInfo.PointersArray =
9751       Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9752   InputInfo.SizesArray =
9753       Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
9754   InputInfo.MappersArray =
9755       Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9756   MapTypesArray = Info.RTArgs.MapTypesArray;
9757   MapNamesArray = Info.RTArgs.MapNamesArray;
9758 
9759   auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
9760                     RequiresOuterTask, &CS, OffloadingMandatory, Device,
9761                     OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
9762                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9763     bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
9764 
9765     if (IsReverseOffloading) {
9766       // Reverse offloading is not supported, so just execute on the host.
9767       // FIXME: This fallback solution is incorrect since it ignores the
9768       // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
9769       // assert here and ensure SEMA emits an error.
9770       emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9771                              RequiresOuterTask, CS, OffloadingMandatory, CGF);
9772       return;
9773     }
9774 
9775     bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
9776     unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
9777 
9778     llvm::Value *BasePointersArray = InputInfo.BasePointersArray.getPointer();
9779     llvm::Value *PointersArray = InputInfo.PointersArray.getPointer();
9780     llvm::Value *SizesArray = InputInfo.SizesArray.getPointer();
9781     llvm::Value *MappersArray = InputInfo.MappersArray.getPointer();
9782 
9783     auto &&EmitTargetCallFallbackCB =
9784         [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9785          OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
9786         -> llvm::OpenMPIRBuilder::InsertPointTy {
9787       CGF.Builder.restoreIP(IP);
9788       emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9789                              RequiresOuterTask, CS, OffloadingMandatory, CGF);
9790       return CGF.Builder.saveIP();
9791     };
9792 
9793     llvm::Value *DeviceID = emitDeviceID(Device, CGF);
9794     llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D);
9795     llvm::Value *NumThreads =
9796         OMPRuntime->emitNumThreadsForTargetDirective(CGF, D);
9797     llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
9798     llvm::Value *NumIterations =
9799         OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
9800     llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
9801     llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
9802         CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
9803 
9804     llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
9805         BasePointersArray, PointersArray, SizesArray, MapTypesArray,
9806         nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
9807 
9808     llvm::OpenMPIRBuilder::TargetKernelArgs Args(
9809         NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
9810         DynCGGroupMem, HasNoWait);
9811 
9812     CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch(
9813         CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args,
9814         DeviceID, RTLoc, AllocaIP));
9815   };
9816 
9817   if (RequiresOuterTask)
9818     CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9819   else
9820     OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9821 }
9822 
9823 static void
9824 emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9825                    const OMPExecutableDirective &D,
9826                    llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9827                    bool RequiresOuterTask, const CapturedStmt &CS,
9828                    bool OffloadingMandatory, CodeGenFunction &CGF) {
9829 
9830   // Notify that the host version must be executed.
9831   auto &&ElseGen =
9832       [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9833        OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9834         emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9835                                RequiresOuterTask, CS, OffloadingMandatory, CGF);
9836       };
9837 
9838   if (RequiresOuterTask) {
9839     CodeGenFunction::OMPTargetDataInfo InputInfo;
9840     CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9841   } else {
9842     OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9843   }
9844 }
9845 
9846 void CGOpenMPRuntime::emitTargetCall(
9847     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9848     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9849     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9850     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9851                                      const OMPLoopDirective &D)>
9852         SizeEmitter) {
9853   if (!CGF.HaveInsertPoint())
9854     return;
9855 
9856   const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
9857                                    CGM.getLangOpts().OpenMPOffloadMandatory;
9858 
9859   assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
9860 
9861   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
9862                                  D.hasClausesOfKind<OMPNowaitClause>() ||
9863                                  D.hasClausesOfKind<OMPInReductionClause>();
9864   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9865   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9866   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9867                                             PrePostActionTy &) {
9868     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9869   };
9870   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9871 
9872   CodeGenFunction::OMPTargetDataInfo InputInfo;
9873   llvm::Value *MapTypesArray = nullptr;
9874   llvm::Value *MapNamesArray = nullptr;
9875 
9876   auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
9877                           RequiresOuterTask, &CS, OffloadingMandatory, Device,
9878                           OutlinedFnID, &InputInfo, &MapTypesArray,
9879                           &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
9880                                                        PrePostActionTy &) {
9881     emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
9882                                RequiresOuterTask, CS, OffloadingMandatory,
9883                                Device, OutlinedFnID, InputInfo, MapTypesArray,
9884                                MapNamesArray, SizeEmitter, CGF, CGM);
9885   };
9886 
9887   auto &&TargetElseGen =
9888       [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9889        OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9890         emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
9891                            CS, OffloadingMandatory, CGF);
9892       };
9893 
9894   // If we have a target function ID it means that we need to support
9895   // offloading, otherwise, just execute on the host. We need to execute on host
9896   // regardless of the conditional in the if clause if, e.g., the user do not
9897   // specify target triples.
9898   if (OutlinedFnID) {
9899     if (IfCond) {
9900       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9901     } else {
9902       RegionCodeGenTy ThenRCG(TargetThenGen);
9903       ThenRCG(CGF);
9904     }
9905   } else {
9906     RegionCodeGenTy ElseRCG(TargetElseGen);
9907     ElseRCG(CGF);
9908   }
9909 }
9910 
9911 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9912                                                     StringRef ParentName) {
9913   if (!S)
9914     return;
9915 
9916   // Codegen OMP target directives that offload compute to the device.
9917   bool RequiresDeviceCodegen =
9918       isa<OMPExecutableDirective>(S) &&
9919       isOpenMPTargetExecutionDirective(
9920           cast<OMPExecutableDirective>(S)->getDirectiveKind());
9921 
9922   if (RequiresDeviceCodegen) {
9923     const auto &E = *cast<OMPExecutableDirective>(S);
9924 
9925     llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
9926         CGM, OMPBuilder, E.getBeginLoc(), ParentName);
9927 
9928     // Is this a target region that should not be emitted as an entry point? If
9929     // so just signal we are done with this target region.
9930     if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
9931       return;
9932 
9933     switch (E.getDirectiveKind()) {
9934     case OMPD_target:
9935       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9936                                                    cast<OMPTargetDirective>(E));
9937       break;
9938     case OMPD_target_parallel:
9939       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9940           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9941       break;
9942     case OMPD_target_teams:
9943       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9944           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9945       break;
9946     case OMPD_target_teams_distribute:
9947       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9948           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9949       break;
9950     case OMPD_target_teams_distribute_simd:
9951       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9952           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9953       break;
9954     case OMPD_target_parallel_for:
9955       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9956           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9957       break;
9958     case OMPD_target_parallel_for_simd:
9959       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9960           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9961       break;
9962     case OMPD_target_simd:
9963       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9964           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9965       break;
9966     case OMPD_target_teams_distribute_parallel_for:
9967       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9968           CGM, ParentName,
9969           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9970       break;
9971     case OMPD_target_teams_distribute_parallel_for_simd:
9972       CodeGenFunction::
9973           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9974               CGM, ParentName,
9975               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9976       break;
9977     case OMPD_target_teams_loop:
9978       CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
9979           CGM, ParentName, cast<OMPTargetTeamsGenericLoopDirective>(E));
9980       break;
9981     case OMPD_target_parallel_loop:
9982       CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
9983           CGM, ParentName, cast<OMPTargetParallelGenericLoopDirective>(E));
9984       break;
9985     case OMPD_parallel:
9986     case OMPD_for:
9987     case OMPD_parallel_for:
9988     case OMPD_parallel_master:
9989     case OMPD_parallel_sections:
9990     case OMPD_for_simd:
9991     case OMPD_parallel_for_simd:
9992     case OMPD_cancel:
9993     case OMPD_cancellation_point:
9994     case OMPD_ordered:
9995     case OMPD_threadprivate:
9996     case OMPD_allocate:
9997     case OMPD_task:
9998     case OMPD_simd:
9999     case OMPD_tile:
10000     case OMPD_unroll:
10001     case OMPD_sections:
10002     case OMPD_section:
10003     case OMPD_single:
10004     case OMPD_master:
10005     case OMPD_critical:
10006     case OMPD_taskyield:
10007     case OMPD_barrier:
10008     case OMPD_taskwait:
10009     case OMPD_taskgroup:
10010     case OMPD_atomic:
10011     case OMPD_flush:
10012     case OMPD_depobj:
10013     case OMPD_scan:
10014     case OMPD_teams:
10015     case OMPD_target_data:
10016     case OMPD_target_exit_data:
10017     case OMPD_target_enter_data:
10018     case OMPD_distribute:
10019     case OMPD_distribute_simd:
10020     case OMPD_distribute_parallel_for:
10021     case OMPD_distribute_parallel_for_simd:
10022     case OMPD_teams_distribute:
10023     case OMPD_teams_distribute_simd:
10024     case OMPD_teams_distribute_parallel_for:
10025     case OMPD_teams_distribute_parallel_for_simd:
10026     case OMPD_target_update:
10027     case OMPD_declare_simd:
10028     case OMPD_declare_variant:
10029     case OMPD_begin_declare_variant:
10030     case OMPD_end_declare_variant:
10031     case OMPD_declare_target:
10032     case OMPD_end_declare_target:
10033     case OMPD_declare_reduction:
10034     case OMPD_declare_mapper:
10035     case OMPD_taskloop:
10036     case OMPD_taskloop_simd:
10037     case OMPD_master_taskloop:
10038     case OMPD_master_taskloop_simd:
10039     case OMPD_parallel_master_taskloop:
10040     case OMPD_parallel_master_taskloop_simd:
10041     case OMPD_requires:
10042     case OMPD_metadirective:
10043     case OMPD_unknown:
10044     default:
10045       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10046     }
10047     return;
10048   }
10049 
10050   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10051     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10052       return;
10053 
10054     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10055     return;
10056   }
10057 
10058   // If this is a lambda function, look into its body.
10059   if (const auto *L = dyn_cast<LambdaExpr>(S))
10060     S = L->getBody();
10061 
10062   // Keep looking for target regions recursively.
10063   for (const Stmt *II : S->children())
10064     scanForTargetRegionsFunctions(II, ParentName);
10065 }
10066 
10067 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10068   std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10069       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10070   if (!DevTy)
10071     return false;
10072   // Do not emit device_type(nohost) functions for the host.
10073   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10074     return true;
10075   // Do not emit device_type(host) functions for the device.
10076   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10077     return true;
10078   return false;
10079 }
10080 
10081 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10082   // If emitting code for the host, we do not process FD here. Instead we do
10083   // the normal code generation.
10084   if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
10085     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10086       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10087                                   CGM.getLangOpts().OpenMPIsTargetDevice))
10088         return true;
10089     return false;
10090   }
10091 
10092   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10093   // Try to detect target regions in the function.
10094   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10095     StringRef Name = CGM.getMangledName(GD);
10096     scanForTargetRegionsFunctions(FD->getBody(), Name);
10097     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10098                                 CGM.getLangOpts().OpenMPIsTargetDevice))
10099       return true;
10100   }
10101 
10102   // Do not to emit function if it is not marked as declare target.
10103   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10104          AlreadyEmittedTargetDecls.count(VD) == 0;
10105 }
10106 
10107 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10108   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10109                               CGM.getLangOpts().OpenMPIsTargetDevice))
10110     return true;
10111 
10112   if (!CGM.getLangOpts().OpenMPIsTargetDevice)
10113     return false;
10114 
10115   // Check if there are Ctors/Dtors in this declaration and look for target
10116   // regions in it. We use the complete variant to produce the kernel name
10117   // mangling.
10118   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10119   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10120     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10121       StringRef ParentName =
10122           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10123       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10124     }
10125     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10126       StringRef ParentName =
10127           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10128       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10129     }
10130   }
10131 
10132   // Do not to emit variable if it is not marked as declare target.
10133   std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10134       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10135           cast<VarDecl>(GD.getDecl()));
10136   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10137       ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10138         *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10139        HasRequiresUnifiedSharedMemory)) {
10140     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10141     return true;
10142   }
10143   return false;
10144 }
10145 
10146 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10147                                                    llvm::Constant *Addr) {
10148   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10149       !CGM.getLangOpts().OpenMPIsTargetDevice)
10150     return;
10151 
10152   std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10153       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10154 
10155   // If this is an 'extern' declaration we defer to the canonical definition and
10156   // do not emit an offloading entry.
10157   if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
10158       VD->hasExternalStorage())
10159     return;
10160 
10161   if (!Res) {
10162     if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10163       // Register non-target variables being emitted in device code (debug info
10164       // may cause this).
10165       StringRef VarName = CGM.getMangledName(VD);
10166       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10167     }
10168     return;
10169   }
10170 
10171   auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
10172   auto LinkageForVariable = [&VD, this]() {
10173     return CGM.getLLVMLinkageVarDefinition(VD);
10174   };
10175 
10176   std::vector<llvm::GlobalVariable *> GeneratedRefs;
10177   OMPBuilder.registerTargetGlobalVariable(
10178       convertCaptureClause(VD), convertDeviceClause(VD),
10179       VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
10180       VD->isExternallyVisible(),
10181       getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
10182                                   VD->getCanonicalDecl()->getBeginLoc()),
10183       CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
10184       CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
10185       CGM.getTypes().ConvertTypeForMem(
10186           CGM.getContext().getPointerType(VD->getType())),
10187       Addr);
10188 
10189   for (auto *ref : GeneratedRefs)
10190     CGM.addCompilerUsedGlobal(ref);
10191 
10192   return;
10193 }
10194 
10195 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10196   if (isa<FunctionDecl>(GD.getDecl()) ||
10197       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10198     return emitTargetFunctions(GD);
10199 
10200   return emitTargetGlobalVariable(GD);
10201 }
10202 
10203 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10204   for (const VarDecl *VD : DeferredGlobalVariables) {
10205     std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10206         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10207     if (!Res)
10208       continue;
10209     if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10210          *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10211         !HasRequiresUnifiedSharedMemory) {
10212       CGM.EmitGlobal(VD);
10213     } else {
10214       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10215               ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10216                 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10217                HasRequiresUnifiedSharedMemory)) &&
10218              "Expected link clause or to clause with unified memory.");
10219       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10220     }
10221   }
10222 }
10223 
10224 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10225     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10226   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10227          " Expected target-based directive.");
10228 }
10229 
10230 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10231   for (const OMPClause *Clause : D->clauselists()) {
10232     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10233       HasRequiresUnifiedSharedMemory = true;
10234       OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
10235     } else if (const auto *AC =
10236                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10237       switch (AC->getAtomicDefaultMemOrderKind()) {
10238       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10239         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10240         break;
10241       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10242         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10243         break;
10244       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10245         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10246         break;
10247       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10248         break;
10249       }
10250     }
10251   }
10252 }
10253 
10254 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10255   return RequiresAtomicOrdering;
10256 }
10257 
10258 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10259                                                        LangAS &AS) {
10260   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10261     return false;
10262   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10263   switch(A->getAllocatorType()) {
10264   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10265   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10266   // Not supported, fallback to the default mem space.
10267   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10268   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10269   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10270   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10271   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10272   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10273   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10274     AS = LangAS::Default;
10275     return true;
10276   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10277     llvm_unreachable("Expected predefined allocator for the variables with the "
10278                      "static storage.");
10279   }
10280   return false;
10281 }
10282 
10283 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10284   return HasRequiresUnifiedSharedMemory;
10285 }
10286 
10287 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10288     CodeGenModule &CGM)
10289     : CGM(CGM) {
10290   if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10291     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10292     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10293   }
10294 }
10295 
10296 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10297   if (CGM.getLangOpts().OpenMPIsTargetDevice)
10298     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10299 }
10300 
10301 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10302   if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
10303     return true;
10304 
10305   const auto *D = cast<FunctionDecl>(GD.getDecl());
10306   // Do not to emit function if it is marked as declare target as it was already
10307   // emitted.
10308   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10309     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10310       if (auto *F = dyn_cast_or_null<llvm::Function>(
10311               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10312         return !F->isDeclaration();
10313       return false;
10314     }
10315     return true;
10316   }
10317 
10318   return !AlreadyEmittedTargetDecls.insert(D).second;
10319 }
10320 
10321 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10322   // If we don't have entries or if we are emitting code for the device, we
10323   // don't need to do anything.
10324   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10325       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsTargetDevice ||
10326       (OMPBuilder.OffloadInfoManager.empty() &&
10327        !HasEmittedDeclareTargetRegion && !HasEmittedTargetRegion))
10328     return nullptr;
10329 
10330   // Create and register the function that handles the requires directives.
10331   ASTContext &C = CGM.getContext();
10332 
10333   llvm::Function *RequiresRegFn;
10334   {
10335     CodeGenFunction CGF(CGM);
10336     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10337     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10338     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10339     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10340     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10341     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10342     // TODO: check for other requires clauses.
10343     // The requires directive takes effect only when a target region is
10344     // present in the compilation unit. Otherwise it is ignored and not
10345     // passed to the runtime. This avoids the runtime from throwing an error
10346     // for mismatching requires clauses across compilation units that don't
10347     // contain at least 1 target region.
10348     assert((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion ||
10349             !OMPBuilder.OffloadInfoManager.empty()) &&
10350            "Target or declare target region expected.");
10351     if (HasRequiresUnifiedSharedMemory)
10352       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10353     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10354                             CGM.getModule(), OMPRTL___tgt_register_requires),
10355                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10356     CGF.FinishFunction();
10357   }
10358   return RequiresRegFn;
10359 }
10360 
10361 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10362                                     const OMPExecutableDirective &D,
10363                                     SourceLocation Loc,
10364                                     llvm::Function *OutlinedFn,
10365                                     ArrayRef<llvm::Value *> CapturedVars) {
10366   if (!CGF.HaveInsertPoint())
10367     return;
10368 
10369   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10370   CodeGenFunction::RunCleanupsScope Scope(CGF);
10371 
10372   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10373   llvm::Value *Args[] = {
10374       RTLoc,
10375       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10376       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10377   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10378   RealArgs.append(std::begin(Args), std::end(Args));
10379   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10380 
10381   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10382       CGM.getModule(), OMPRTL___kmpc_fork_teams);
10383   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10384 }
10385 
10386 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10387                                          const Expr *NumTeams,
10388                                          const Expr *ThreadLimit,
10389                                          SourceLocation Loc) {
10390   if (!CGF.HaveInsertPoint())
10391     return;
10392 
10393   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10394 
10395   llvm::Value *NumTeamsVal =
10396       NumTeams
10397           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10398                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10399           : CGF.Builder.getInt32(0);
10400 
10401   llvm::Value *ThreadLimitVal =
10402       ThreadLimit
10403           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10404                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10405           : CGF.Builder.getInt32(0);
10406 
10407   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10408   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10409                                      ThreadLimitVal};
10410   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10411                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10412                       PushNumTeamsArgs);
10413 }
10414 
10415 void CGOpenMPRuntime::emitTargetDataCalls(
10416     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10417     const Expr *Device, const RegionCodeGenTy &CodeGen,
10418     CGOpenMPRuntime::TargetDataInfo &Info) {
10419   if (!CGF.HaveInsertPoint())
10420     return;
10421 
10422   // Action used to replace the default codegen action and turn privatization
10423   // off.
10424   PrePostActionTy NoPrivAction;
10425 
10426   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10427   InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10428                          CGF.AllocaInsertPt->getIterator());
10429   InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10430                           CGF.Builder.GetInsertPoint());
10431   llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
10432 
10433   llvm::Value *IfCondVal = nullptr;
10434   if (IfCond)
10435     IfCondVal = CGF.EvaluateExprAsBool(IfCond);
10436 
10437   // Emit device ID if any.
10438   llvm::Value *DeviceID = nullptr;
10439   if (Device) {
10440     DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10441                                          CGF.Int64Ty, /*isSigned=*/true);
10442   } else {
10443     DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10444   }
10445 
10446   // Fill up the arrays with all the mapped variables.
10447   MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10448   auto GenMapInfoCB =
10449       [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10450     CGF.Builder.restoreIP(CodeGenIP);
10451     // Get map clause information.
10452     MappableExprsHandler MEHandler(D, CGF);
10453     MEHandler.generateAllInfo(CombinedInfo);
10454 
10455     auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10456       return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10457     };
10458     if (CGM.getCodeGenOpts().getDebugInfo() !=
10459         llvm::codegenoptions::NoDebugInfo) {
10460       CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10461       llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10462                       FillInfoMap);
10463     }
10464 
10465     return CombinedInfo;
10466   };
10467   using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
10468   auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
10469     CGF.Builder.restoreIP(CodeGenIP);
10470     switch (BodyGenType) {
10471     case BodyGenTy::Priv:
10472       if (!Info.CaptureDeviceAddrMap.empty())
10473         CodeGen(CGF);
10474       break;
10475     case BodyGenTy::DupNoPriv:
10476       if (!Info.CaptureDeviceAddrMap.empty()) {
10477         CodeGen.setAction(NoPrivAction);
10478         CodeGen(CGF);
10479       }
10480       break;
10481     case BodyGenTy::NoPriv:
10482       if (Info.CaptureDeviceAddrMap.empty()) {
10483         CodeGen.setAction(NoPrivAction);
10484         CodeGen(CGF);
10485       }
10486       break;
10487     }
10488     return InsertPointTy(CGF.Builder.GetInsertBlock(),
10489                          CGF.Builder.GetInsertPoint());
10490   };
10491 
10492   auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10493     if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10494       Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
10495     }
10496   };
10497 
10498   auto CustomMapperCB = [&](unsigned int I) {
10499     llvm::Value *MFunc = nullptr;
10500     if (CombinedInfo.Mappers[I]) {
10501       Info.HasMapper = true;
10502       MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
10503           cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10504     }
10505     return MFunc;
10506   };
10507 
10508   // Source location for the ident struct
10509   llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10510 
10511   CGF.Builder.restoreIP(OMPBuilder.createTargetData(
10512       OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
10513       /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc));
10514 }
10515 
10516 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10517     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10518     const Expr *Device) {
10519   if (!CGF.HaveInsertPoint())
10520     return;
10521 
10522   assert((isa<OMPTargetEnterDataDirective>(D) ||
10523           isa<OMPTargetExitDataDirective>(D) ||
10524           isa<OMPTargetUpdateDirective>(D)) &&
10525          "Expecting either target enter, exit data, or update directives.");
10526 
10527   CodeGenFunction::OMPTargetDataInfo InputInfo;
10528   llvm::Value *MapTypesArray = nullptr;
10529   llvm::Value *MapNamesArray = nullptr;
10530   // Generate the code for the opening of the data environment.
10531   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10532                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10533     // Emit device ID if any.
10534     llvm::Value *DeviceID = nullptr;
10535     if (Device) {
10536       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10537                                            CGF.Int64Ty, /*isSigned=*/true);
10538     } else {
10539       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10540     }
10541 
10542     // Emit the number of elements in the offloading arrays.
10543     llvm::Constant *PointerNum =
10544         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10545 
10546     // Source location for the ident struct
10547     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10548 
10549     llvm::Value *OffloadingArgs[] = {RTLoc,
10550                                      DeviceID,
10551                                      PointerNum,
10552                                      InputInfo.BasePointersArray.getPointer(),
10553                                      InputInfo.PointersArray.getPointer(),
10554                                      InputInfo.SizesArray.getPointer(),
10555                                      MapTypesArray,
10556                                      MapNamesArray,
10557                                      InputInfo.MappersArray.getPointer()};
10558 
10559     // Select the right runtime function call for each standalone
10560     // directive.
10561     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10562     RuntimeFunction RTLFn;
10563     switch (D.getDirectiveKind()) {
10564     case OMPD_target_enter_data:
10565       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10566                         : OMPRTL___tgt_target_data_begin_mapper;
10567       break;
10568     case OMPD_target_exit_data:
10569       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10570                         : OMPRTL___tgt_target_data_end_mapper;
10571       break;
10572     case OMPD_target_update:
10573       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10574                         : OMPRTL___tgt_target_data_update_mapper;
10575       break;
10576     case OMPD_parallel:
10577     case OMPD_for:
10578     case OMPD_parallel_for:
10579     case OMPD_parallel_master:
10580     case OMPD_parallel_sections:
10581     case OMPD_for_simd:
10582     case OMPD_parallel_for_simd:
10583     case OMPD_cancel:
10584     case OMPD_cancellation_point:
10585     case OMPD_ordered:
10586     case OMPD_threadprivate:
10587     case OMPD_allocate:
10588     case OMPD_task:
10589     case OMPD_simd:
10590     case OMPD_tile:
10591     case OMPD_unroll:
10592     case OMPD_sections:
10593     case OMPD_section:
10594     case OMPD_single:
10595     case OMPD_master:
10596     case OMPD_critical:
10597     case OMPD_taskyield:
10598     case OMPD_barrier:
10599     case OMPD_taskwait:
10600     case OMPD_taskgroup:
10601     case OMPD_atomic:
10602     case OMPD_flush:
10603     case OMPD_depobj:
10604     case OMPD_scan:
10605     case OMPD_teams:
10606     case OMPD_target_data:
10607     case OMPD_distribute:
10608     case OMPD_distribute_simd:
10609     case OMPD_distribute_parallel_for:
10610     case OMPD_distribute_parallel_for_simd:
10611     case OMPD_teams_distribute:
10612     case OMPD_teams_distribute_simd:
10613     case OMPD_teams_distribute_parallel_for:
10614     case OMPD_teams_distribute_parallel_for_simd:
10615     case OMPD_declare_simd:
10616     case OMPD_declare_variant:
10617     case OMPD_begin_declare_variant:
10618     case OMPD_end_declare_variant:
10619     case OMPD_declare_target:
10620     case OMPD_end_declare_target:
10621     case OMPD_declare_reduction:
10622     case OMPD_declare_mapper:
10623     case OMPD_taskloop:
10624     case OMPD_taskloop_simd:
10625     case OMPD_master_taskloop:
10626     case OMPD_master_taskloop_simd:
10627     case OMPD_parallel_master_taskloop:
10628     case OMPD_parallel_master_taskloop_simd:
10629     case OMPD_target:
10630     case OMPD_target_simd:
10631     case OMPD_target_teams_distribute:
10632     case OMPD_target_teams_distribute_simd:
10633     case OMPD_target_teams_distribute_parallel_for:
10634     case OMPD_target_teams_distribute_parallel_for_simd:
10635     case OMPD_target_teams:
10636     case OMPD_target_parallel:
10637     case OMPD_target_parallel_for:
10638     case OMPD_target_parallel_for_simd:
10639     case OMPD_requires:
10640     case OMPD_metadirective:
10641     case OMPD_unknown:
10642     default:
10643       llvm_unreachable("Unexpected standalone target data directive.");
10644       break;
10645     }
10646     CGF.EmitRuntimeCall(
10647         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10648         OffloadingArgs);
10649   };
10650 
10651   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10652                           &MapNamesArray](CodeGenFunction &CGF,
10653                                           PrePostActionTy &) {
10654     // Fill up the arrays with all the mapped variables.
10655     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10656 
10657     // Get map clause information.
10658     MappableExprsHandler MEHandler(D, CGF);
10659     MEHandler.generateAllInfo(CombinedInfo);
10660 
10661     CGOpenMPRuntime::TargetDataInfo Info;
10662     // Fill up the arrays and create the arguments.
10663     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10664                          /*IsNonContiguous=*/true);
10665     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10666                              D.hasClausesOfKind<OMPNowaitClause>();
10667     bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
10668                      llvm::codegenoptions::NoDebugInfo;
10669     OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
10670                                             EmitDebug,
10671                                             /*ForEndCall=*/false);
10672     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10673     InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10674                                           CGF.VoidPtrTy, CGM.getPointerAlign());
10675     InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10676                                       CGM.getPointerAlign());
10677     InputInfo.SizesArray =
10678         Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10679     InputInfo.MappersArray =
10680         Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10681     MapTypesArray = Info.RTArgs.MapTypesArray;
10682     MapNamesArray = Info.RTArgs.MapNamesArray;
10683     if (RequiresOuterTask)
10684       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10685     else
10686       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10687   };
10688 
10689   if (IfCond) {
10690     emitIfClause(CGF, IfCond, TargetThenGen,
10691                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
10692   } else {
10693     RegionCodeGenTy ThenRCG(TargetThenGen);
10694     ThenRCG(CGF);
10695   }
10696 }
10697 
10698 namespace {
10699   /// Kind of parameter in a function with 'declare simd' directive.
10700 enum ParamKindTy {
10701   Linear,
10702   LinearRef,
10703   LinearUVal,
10704   LinearVal,
10705   Uniform,
10706   Vector,
10707 };
10708 /// Attribute set of the parameter.
10709 struct ParamAttrTy {
10710   ParamKindTy Kind = Vector;
10711   llvm::APSInt StrideOrArg;
10712   llvm::APSInt Alignment;
10713   bool HasVarStride = false;
10714 };
10715 } // namespace
10716 
10717 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10718                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10719   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10720   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10721   // of that clause. The VLEN value must be power of 2.
10722   // In other case the notion of the function`s "characteristic data type" (CDT)
10723   // is used to compute the vector length.
10724   // CDT is defined in the following order:
10725   //   a) For non-void function, the CDT is the return type.
10726   //   b) If the function has any non-uniform, non-linear parameters, then the
10727   //   CDT is the type of the first such parameter.
10728   //   c) If the CDT determined by a) or b) above is struct, union, or class
10729   //   type which is pass-by-value (except for the type that maps to the
10730   //   built-in complex data type), the characteristic data type is int.
10731   //   d) If none of the above three cases is applicable, the CDT is int.
10732   // The VLEN is then determined based on the CDT and the size of vector
10733   // register of that ISA for which current vector version is generated. The
10734   // VLEN is computed using the formula below:
10735   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10736   // where vector register size specified in section 3.2.1 Registers and the
10737   // Stack Frame of original AMD64 ABI document.
10738   QualType RetType = FD->getReturnType();
10739   if (RetType.isNull())
10740     return 0;
10741   ASTContext &C = FD->getASTContext();
10742   QualType CDT;
10743   if (!RetType.isNull() && !RetType->isVoidType()) {
10744     CDT = RetType;
10745   } else {
10746     unsigned Offset = 0;
10747     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10748       if (ParamAttrs[Offset].Kind == Vector)
10749         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10750       ++Offset;
10751     }
10752     if (CDT.isNull()) {
10753       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10754         if (ParamAttrs[I + Offset].Kind == Vector) {
10755           CDT = FD->getParamDecl(I)->getType();
10756           break;
10757         }
10758       }
10759     }
10760   }
10761   if (CDT.isNull())
10762     CDT = C.IntTy;
10763   CDT = CDT->getCanonicalTypeUnqualified();
10764   if (CDT->isRecordType() || CDT->isUnionType())
10765     CDT = C.IntTy;
10766   return C.getTypeSize(CDT);
10767 }
10768 
10769 /// Mangle the parameter part of the vector function name according to
10770 /// their OpenMP classification. The mangling function is defined in
10771 /// section 4.5 of the AAVFABI(2021Q1).
10772 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10773   SmallString<256> Buffer;
10774   llvm::raw_svector_ostream Out(Buffer);
10775   for (const auto &ParamAttr : ParamAttrs) {
10776     switch (ParamAttr.Kind) {
10777     case Linear:
10778       Out << 'l';
10779       break;
10780     case LinearRef:
10781       Out << 'R';
10782       break;
10783     case LinearUVal:
10784       Out << 'U';
10785       break;
10786     case LinearVal:
10787       Out << 'L';
10788       break;
10789     case Uniform:
10790       Out << 'u';
10791       break;
10792     case Vector:
10793       Out << 'v';
10794       break;
10795     }
10796     if (ParamAttr.HasVarStride)
10797       Out << "s" << ParamAttr.StrideOrArg;
10798     else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
10799              ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
10800       // Don't print the step value if it is not present or if it is
10801       // equal to 1.
10802       if (ParamAttr.StrideOrArg < 0)
10803         Out << 'n' << -ParamAttr.StrideOrArg;
10804       else if (ParamAttr.StrideOrArg != 1)
10805         Out << ParamAttr.StrideOrArg;
10806     }
10807 
10808     if (!!ParamAttr.Alignment)
10809       Out << 'a' << ParamAttr.Alignment;
10810   }
10811 
10812   return std::string(Out.str());
10813 }
10814 
10815 static void
10816 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10817                            const llvm::APSInt &VLENVal,
10818                            ArrayRef<ParamAttrTy> ParamAttrs,
10819                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
10820   struct ISADataTy {
10821     char ISA;
10822     unsigned VecRegSize;
10823   };
10824   ISADataTy ISAData[] = {
10825       {
10826           'b', 128
10827       }, // SSE
10828       {
10829           'c', 256
10830       }, // AVX
10831       {
10832           'd', 256
10833       }, // AVX2
10834       {
10835           'e', 512
10836       }, // AVX512
10837   };
10838   llvm::SmallVector<char, 2> Masked;
10839   switch (State) {
10840   case OMPDeclareSimdDeclAttr::BS_Undefined:
10841     Masked.push_back('N');
10842     Masked.push_back('M');
10843     break;
10844   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10845     Masked.push_back('N');
10846     break;
10847   case OMPDeclareSimdDeclAttr::BS_Inbranch:
10848     Masked.push_back('M');
10849     break;
10850   }
10851   for (char Mask : Masked) {
10852     for (const ISADataTy &Data : ISAData) {
10853       SmallString<256> Buffer;
10854       llvm::raw_svector_ostream Out(Buffer);
10855       Out << "_ZGV" << Data.ISA << Mask;
10856       if (!VLENVal) {
10857         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10858         assert(NumElts && "Non-zero simdlen/cdtsize expected");
10859         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10860       } else {
10861         Out << VLENVal;
10862       }
10863       Out << mangleVectorParameters(ParamAttrs);
10864       Out << '_' << Fn->getName();
10865       Fn->addFnAttr(Out.str());
10866     }
10867   }
10868 }
10869 
10870 // This are the Functions that are needed to mangle the name of the
10871 // vector functions generated by the compiler, according to the rules
10872 // defined in the "Vector Function ABI specifications for AArch64",
10873 // available at
10874 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10875 
10876 /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
10877 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10878   QT = QT.getCanonicalType();
10879 
10880   if (QT->isVoidType())
10881     return false;
10882 
10883   if (Kind == ParamKindTy::Uniform)
10884     return false;
10885 
10886   if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
10887     return false;
10888 
10889   if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
10890       !QT->isReferenceType())
10891     return false;
10892 
10893   return true;
10894 }
10895 
10896 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10897 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10898   QT = QT.getCanonicalType();
10899   unsigned Size = C.getTypeSize(QT);
10900 
10901   // Only scalars and complex within 16 bytes wide set PVB to true.
10902   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10903     return false;
10904 
10905   if (QT->isFloatingType())
10906     return true;
10907 
10908   if (QT->isIntegerType())
10909     return true;
10910 
10911   if (QT->isPointerType())
10912     return true;
10913 
10914   // TODO: Add support for complex types (section 3.1.2, item 2).
10915 
10916   return false;
10917 }
10918 
10919 /// Computes the lane size (LS) of a return type or of an input parameter,
10920 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10921 /// TODO: Add support for references, section 3.2.1, item 1.
10922 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10923   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10924     QualType PTy = QT.getCanonicalType()->getPointeeType();
10925     if (getAArch64PBV(PTy, C))
10926       return C.getTypeSize(PTy);
10927   }
10928   if (getAArch64PBV(QT, C))
10929     return C.getTypeSize(QT);
10930 
10931   return C.getTypeSize(C.getUIntPtrType());
10932 }
10933 
10934 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10935 // signature of the scalar function, as defined in 3.2.2 of the
10936 // AAVFABI.
10937 static std::tuple<unsigned, unsigned, bool>
10938 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10939   QualType RetType = FD->getReturnType().getCanonicalType();
10940 
10941   ASTContext &C = FD->getASTContext();
10942 
10943   bool OutputBecomesInput = false;
10944 
10945   llvm::SmallVector<unsigned, 8> Sizes;
10946   if (!RetType->isVoidType()) {
10947     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10948     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10949       OutputBecomesInput = true;
10950   }
10951   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10952     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10953     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10954   }
10955 
10956   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10957   // The LS of a function parameter / return value can only be a power
10958   // of 2, starting from 8 bits, up to 128.
10959   assert(llvm::all_of(Sizes,
10960                       [](unsigned Size) {
10961                         return Size == 8 || Size == 16 || Size == 32 ||
10962                                Size == 64 || Size == 128;
10963                       }) &&
10964          "Invalid size");
10965 
10966   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10967                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
10968                          OutputBecomesInput);
10969 }
10970 
10971 // Function used to add the attribute. The parameter `VLEN` is
10972 // templated to allow the use of "x" when targeting scalable functions
10973 // for SVE.
10974 template <typename T>
10975 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10976                                  char ISA, StringRef ParSeq,
10977                                  StringRef MangledName, bool OutputBecomesInput,
10978                                  llvm::Function *Fn) {
10979   SmallString<256> Buffer;
10980   llvm::raw_svector_ostream Out(Buffer);
10981   Out << Prefix << ISA << LMask << VLEN;
10982   if (OutputBecomesInput)
10983     Out << "v";
10984   Out << ParSeq << "_" << MangledName;
10985   Fn->addFnAttr(Out.str());
10986 }
10987 
10988 // Helper function to generate the Advanced SIMD names depending on
10989 // the value of the NDS when simdlen is not present.
10990 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10991                                       StringRef Prefix, char ISA,
10992                                       StringRef ParSeq, StringRef MangledName,
10993                                       bool OutputBecomesInput,
10994                                       llvm::Function *Fn) {
10995   switch (NDS) {
10996   case 8:
10997     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10998                          OutputBecomesInput, Fn);
10999     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11000                          OutputBecomesInput, Fn);
11001     break;
11002   case 16:
11003     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11004                          OutputBecomesInput, Fn);
11005     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11006                          OutputBecomesInput, Fn);
11007     break;
11008   case 32:
11009     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11010                          OutputBecomesInput, Fn);
11011     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11012                          OutputBecomesInput, Fn);
11013     break;
11014   case 64:
11015   case 128:
11016     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11017                          OutputBecomesInput, Fn);
11018     break;
11019   default:
11020     llvm_unreachable("Scalar type is too wide.");
11021   }
11022 }
11023 
11024 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11025 static void emitAArch64DeclareSimdFunction(
11026     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11027     ArrayRef<ParamAttrTy> ParamAttrs,
11028     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11029     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11030 
11031   // Get basic data for building the vector signature.
11032   const auto Data = getNDSWDS(FD, ParamAttrs);
11033   const unsigned NDS = std::get<0>(Data);
11034   const unsigned WDS = std::get<1>(Data);
11035   const bool OutputBecomesInput = std::get<2>(Data);
11036 
11037   // Check the values provided via `simdlen` by the user.
11038   // 1. A `simdlen(1)` doesn't produce vector signatures,
11039   if (UserVLEN == 1) {
11040     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11041         DiagnosticsEngine::Warning,
11042         "The clause simdlen(1) has no effect when targeting aarch64.");
11043     CGM.getDiags().Report(SLoc, DiagID);
11044     return;
11045   }
11046 
11047   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11048   // Advanced SIMD output.
11049   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11050     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11051         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11052                                     "power of 2 when targeting Advanced SIMD.");
11053     CGM.getDiags().Report(SLoc, DiagID);
11054     return;
11055   }
11056 
11057   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11058   // limits.
11059   if (ISA == 's' && UserVLEN != 0) {
11060     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11061       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11062           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11063                                       "lanes in the architectural constraints "
11064                                       "for SVE (min is 128-bit, max is "
11065                                       "2048-bit, by steps of 128-bit)");
11066       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11067       return;
11068     }
11069   }
11070 
11071   // Sort out parameter sequence.
11072   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11073   StringRef Prefix = "_ZGV";
11074   // Generate simdlen from user input (if any).
11075   if (UserVLEN) {
11076     if (ISA == 's') {
11077       // SVE generates only a masked function.
11078       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11079                            OutputBecomesInput, Fn);
11080     } else {
11081       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11082       // Advanced SIMD generates one or two functions, depending on
11083       // the `[not]inbranch` clause.
11084       switch (State) {
11085       case OMPDeclareSimdDeclAttr::BS_Undefined:
11086         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11087                              OutputBecomesInput, Fn);
11088         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11089                              OutputBecomesInput, Fn);
11090         break;
11091       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11092         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11093                              OutputBecomesInput, Fn);
11094         break;
11095       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11096         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11097                              OutputBecomesInput, Fn);
11098         break;
11099       }
11100     }
11101   } else {
11102     // If no user simdlen is provided, follow the AAVFABI rules for
11103     // generating the vector length.
11104     if (ISA == 's') {
11105       // SVE, section 3.4.1, item 1.
11106       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11107                            OutputBecomesInput, Fn);
11108     } else {
11109       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11110       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11111       // two vector names depending on the use of the clause
11112       // `[not]inbranch`.
11113       switch (State) {
11114       case OMPDeclareSimdDeclAttr::BS_Undefined:
11115         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11116                                   OutputBecomesInput, Fn);
11117         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11118                                   OutputBecomesInput, Fn);
11119         break;
11120       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11121         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11122                                   OutputBecomesInput, Fn);
11123         break;
11124       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11125         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11126                                   OutputBecomesInput, Fn);
11127         break;
11128       }
11129     }
11130   }
11131 }
11132 
11133 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11134                                               llvm::Function *Fn) {
11135   ASTContext &C = CGM.getContext();
11136   FD = FD->getMostRecentDecl();
11137   while (FD) {
11138     // Map params to their positions in function decl.
11139     llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11140     if (isa<CXXMethodDecl>(FD))
11141       ParamPositions.try_emplace(FD, 0);
11142     unsigned ParamPos = ParamPositions.size();
11143     for (const ParmVarDecl *P : FD->parameters()) {
11144       ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11145       ++ParamPos;
11146     }
11147     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11148       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11149       // Mark uniform parameters.
11150       for (const Expr *E : Attr->uniforms()) {
11151         E = E->IgnoreParenImpCasts();
11152         unsigned Pos;
11153         if (isa<CXXThisExpr>(E)) {
11154           Pos = ParamPositions[FD];
11155         } else {
11156           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11157                                 ->getCanonicalDecl();
11158           auto It = ParamPositions.find(PVD);
11159           assert(It != ParamPositions.end() && "Function parameter not found");
11160           Pos = It->second;
11161         }
11162         ParamAttrs[Pos].Kind = Uniform;
11163       }
11164       // Get alignment info.
11165       auto *NI = Attr->alignments_begin();
11166       for (const Expr *E : Attr->aligneds()) {
11167         E = E->IgnoreParenImpCasts();
11168         unsigned Pos;
11169         QualType ParmTy;
11170         if (isa<CXXThisExpr>(E)) {
11171           Pos = ParamPositions[FD];
11172           ParmTy = E->getType();
11173         } else {
11174           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11175                                 ->getCanonicalDecl();
11176           auto It = ParamPositions.find(PVD);
11177           assert(It != ParamPositions.end() && "Function parameter not found");
11178           Pos = It->second;
11179           ParmTy = PVD->getType();
11180         }
11181         ParamAttrs[Pos].Alignment =
11182             (*NI)
11183                 ? (*NI)->EvaluateKnownConstInt(C)
11184                 : llvm::APSInt::getUnsigned(
11185                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11186                           .getQuantity());
11187         ++NI;
11188       }
11189       // Mark linear parameters.
11190       auto *SI = Attr->steps_begin();
11191       auto *MI = Attr->modifiers_begin();
11192       for (const Expr *E : Attr->linears()) {
11193         E = E->IgnoreParenImpCasts();
11194         unsigned Pos;
11195         bool IsReferenceType = false;
11196         // Rescaling factor needed to compute the linear parameter
11197         // value in the mangled name.
11198         unsigned PtrRescalingFactor = 1;
11199         if (isa<CXXThisExpr>(E)) {
11200           Pos = ParamPositions[FD];
11201           auto *P = cast<PointerType>(E->getType());
11202           PtrRescalingFactor = CGM.getContext()
11203                                    .getTypeSizeInChars(P->getPointeeType())
11204                                    .getQuantity();
11205         } else {
11206           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11207                                 ->getCanonicalDecl();
11208           auto It = ParamPositions.find(PVD);
11209           assert(It != ParamPositions.end() && "Function parameter not found");
11210           Pos = It->second;
11211           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11212             PtrRescalingFactor = CGM.getContext()
11213                                      .getTypeSizeInChars(P->getPointeeType())
11214                                      .getQuantity();
11215           else if (PVD->getType()->isReferenceType()) {
11216             IsReferenceType = true;
11217             PtrRescalingFactor =
11218                 CGM.getContext()
11219                     .getTypeSizeInChars(PVD->getType().getNonReferenceType())
11220                     .getQuantity();
11221           }
11222         }
11223         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11224         if (*MI == OMPC_LINEAR_ref)
11225           ParamAttr.Kind = LinearRef;
11226         else if (*MI == OMPC_LINEAR_uval)
11227           ParamAttr.Kind = LinearUVal;
11228         else if (IsReferenceType)
11229           ParamAttr.Kind = LinearVal;
11230         else
11231           ParamAttr.Kind = Linear;
11232         // Assuming a stride of 1, for `linear` without modifiers.
11233         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11234         if (*SI) {
11235           Expr::EvalResult Result;
11236           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11237             if (const auto *DRE =
11238                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11239               if (const auto *StridePVD =
11240                       dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11241                 ParamAttr.HasVarStride = true;
11242                 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11243                 assert(It != ParamPositions.end() &&
11244                        "Function parameter not found");
11245                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11246               }
11247             }
11248           } else {
11249             ParamAttr.StrideOrArg = Result.Val.getInt();
11250           }
11251         }
11252         // If we are using a linear clause on a pointer, we need to
11253         // rescale the value of linear_step with the byte size of the
11254         // pointee type.
11255         if (!ParamAttr.HasVarStride &&
11256             (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11257           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11258         ++SI;
11259         ++MI;
11260       }
11261       llvm::APSInt VLENVal;
11262       SourceLocation ExprLoc;
11263       const Expr *VLENExpr = Attr->getSimdlen();
11264       if (VLENExpr) {
11265         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11266         ExprLoc = VLENExpr->getExprLoc();
11267       }
11268       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11269       if (CGM.getTriple().isX86()) {
11270         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11271       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11272         unsigned VLEN = VLENVal.getExtValue();
11273         StringRef MangledName = Fn->getName();
11274         if (CGM.getTarget().hasFeature("sve"))
11275           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11276                                          MangledName, 's', 128, Fn, ExprLoc);
11277         else if (CGM.getTarget().hasFeature("neon"))
11278           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11279                                          MangledName, 'n', 128, Fn, ExprLoc);
11280       }
11281     }
11282     FD = FD->getPreviousDecl();
11283   }
11284 }
11285 
11286 namespace {
11287 /// Cleanup action for doacross support.
11288 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11289 public:
11290   static const int DoacrossFinArgs = 2;
11291 
11292 private:
11293   llvm::FunctionCallee RTLFn;
11294   llvm::Value *Args[DoacrossFinArgs];
11295 
11296 public:
11297   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11298                     ArrayRef<llvm::Value *> CallArgs)
11299       : RTLFn(RTLFn) {
11300     assert(CallArgs.size() == DoacrossFinArgs);
11301     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11302   }
11303   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11304     if (!CGF.HaveInsertPoint())
11305       return;
11306     CGF.EmitRuntimeCall(RTLFn, Args);
11307   }
11308 };
11309 } // namespace
11310 
11311 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11312                                        const OMPLoopDirective &D,
11313                                        ArrayRef<Expr *> NumIterations) {
11314   if (!CGF.HaveInsertPoint())
11315     return;
11316 
11317   ASTContext &C = CGM.getContext();
11318   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11319   RecordDecl *RD;
11320   if (KmpDimTy.isNull()) {
11321     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11322     //  kmp_int64 lo; // lower
11323     //  kmp_int64 up; // upper
11324     //  kmp_int64 st; // stride
11325     // };
11326     RD = C.buildImplicitRecord("kmp_dim");
11327     RD->startDefinition();
11328     addFieldToRecordDecl(C, RD, Int64Ty);
11329     addFieldToRecordDecl(C, RD, Int64Ty);
11330     addFieldToRecordDecl(C, RD, Int64Ty);
11331     RD->completeDefinition();
11332     KmpDimTy = C.getRecordType(RD);
11333   } else {
11334     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11335   }
11336   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11337   QualType ArrayTy =
11338       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11339 
11340   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11341   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11342   enum { LowerFD = 0, UpperFD, StrideFD };
11343   // Fill dims with data.
11344   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11345     LValue DimsLVal = CGF.MakeAddrLValue(
11346         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11347     // dims.upper = num_iterations;
11348     LValue UpperLVal = CGF.EmitLValueForField(
11349         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11350     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11351         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11352         Int64Ty, NumIterations[I]->getExprLoc());
11353     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11354     // dims.stride = 1;
11355     LValue StrideLVal = CGF.EmitLValueForField(
11356         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11357     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11358                           StrideLVal);
11359   }
11360 
11361   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11362   // kmp_int32 num_dims, struct kmp_dim * dims);
11363   llvm::Value *Args[] = {
11364       emitUpdateLocation(CGF, D.getBeginLoc()),
11365       getThreadID(CGF, D.getBeginLoc()),
11366       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11367       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11368           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11369           CGM.VoidPtrTy)};
11370 
11371   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11372       CGM.getModule(), OMPRTL___kmpc_doacross_init);
11373   CGF.EmitRuntimeCall(RTLFn, Args);
11374   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11375       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11376   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11377       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11378   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11379                                              llvm::ArrayRef(FiniArgs));
11380 }
11381 
11382 template <typename T>
11383 static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM,
11384                                 const T *C, llvm::Value *ULoc,
11385                                 llvm::Value *ThreadID) {
11386   QualType Int64Ty =
11387       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11388   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11389   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11390       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11391   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11392   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11393     const Expr *CounterVal = C->getLoopData(I);
11394     assert(CounterVal);
11395     llvm::Value *CntVal = CGF.EmitScalarConversion(
11396         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11397         CounterVal->getExprLoc());
11398     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11399                           /*Volatile=*/false, Int64Ty);
11400   }
11401   llvm::Value *Args[] = {
11402       ULoc, ThreadID, CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11403   llvm::FunctionCallee RTLFn;
11404   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
11405   OMPDoacrossKind<T> ODK;
11406   if (ODK.isSource(C)) {
11407     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11408                                                   OMPRTL___kmpc_doacross_post);
11409   } else {
11410     assert(ODK.isSink(C) && "Expect sink modifier.");
11411     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11412                                                   OMPRTL___kmpc_doacross_wait);
11413   }
11414   CGF.EmitRuntimeCall(RTLFn, Args);
11415 }
11416 
11417 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11418                                           const OMPDependClause *C) {
11419   return EmitDoacrossOrdered<OMPDependClause>(
11420       CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11421       getThreadID(CGF, C->getBeginLoc()));
11422 }
11423 
11424 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11425                                           const OMPDoacrossClause *C) {
11426   return EmitDoacrossOrdered<OMPDoacrossClause>(
11427       CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11428       getThreadID(CGF, C->getBeginLoc()));
11429 }
11430 
11431 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11432                                llvm::FunctionCallee Callee,
11433                                ArrayRef<llvm::Value *> Args) const {
11434   assert(Loc.isValid() && "Outlined function call location must be valid.");
11435   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11436 
11437   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11438     if (Fn->doesNotThrow()) {
11439       CGF.EmitNounwindRuntimeCall(Fn, Args);
11440       return;
11441     }
11442   }
11443   CGF.EmitRuntimeCall(Callee, Args);
11444 }
11445 
11446 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11447     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11448     ArrayRef<llvm::Value *> Args) const {
11449   emitCall(CGF, Loc, OutlinedFn, Args);
11450 }
11451 
11452 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11453   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11454     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11455       HasEmittedDeclareTargetRegion = true;
11456 }
11457 
11458 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11459                                              const VarDecl *NativeParam,
11460                                              const VarDecl *TargetParam) const {
11461   return CGF.GetAddrOfLocalVar(NativeParam);
11462 }
11463 
11464 /// Return allocator value from expression, or return a null allocator (default
11465 /// when no allocator specified).
11466 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11467                                     const Expr *Allocator) {
11468   llvm::Value *AllocVal;
11469   if (Allocator) {
11470     AllocVal = CGF.EmitScalarExpr(Allocator);
11471     // According to the standard, the original allocator type is a enum
11472     // (integer). Convert to pointer type, if required.
11473     AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11474                                         CGF.getContext().VoidPtrTy,
11475                                         Allocator->getExprLoc());
11476   } else {
11477     // If no allocator specified, it defaults to the null allocator.
11478     AllocVal = llvm::Constant::getNullValue(
11479         CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
11480   }
11481   return AllocVal;
11482 }
11483 
11484 /// Return the alignment from an allocate directive if present.
11485 static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11486   std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11487 
11488   if (!AllocateAlignment)
11489     return nullptr;
11490 
11491   return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
11492 }
11493 
11494 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11495                                                    const VarDecl *VD) {
11496   if (!VD)
11497     return Address::invalid();
11498   Address UntiedAddr = Address::invalid();
11499   Address UntiedRealAddr = Address::invalid();
11500   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11501   if (It != FunctionToUntiedTaskStackMap.end()) {
11502     const UntiedLocalVarsAddressesMap &UntiedData =
11503         UntiedLocalVarsStack[It->second];
11504     auto I = UntiedData.find(VD);
11505     if (I != UntiedData.end()) {
11506       UntiedAddr = I->second.first;
11507       UntiedRealAddr = I->second.second;
11508     }
11509   }
11510   const VarDecl *CVD = VD->getCanonicalDecl();
11511   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11512     // Use the default allocation.
11513     if (!isAllocatableDecl(VD))
11514       return UntiedAddr;
11515     llvm::Value *Size;
11516     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11517     if (CVD->getType()->isVariablyModifiedType()) {
11518       Size = CGF.getTypeSize(CVD->getType());
11519       // Align the size: ((size + align - 1) / align) * align
11520       Size = CGF.Builder.CreateNUWAdd(
11521           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11522       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11523       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11524     } else {
11525       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11526       Size = CGM.getSize(Sz.alignTo(Align));
11527     }
11528     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11529     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11530     const Expr *Allocator = AA->getAllocator();
11531     llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11532     llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
11533     SmallVector<llvm::Value *, 4> Args;
11534     Args.push_back(ThreadID);
11535     if (Alignment)
11536       Args.push_back(Alignment);
11537     Args.push_back(Size);
11538     Args.push_back(AllocVal);
11539     llvm::omp::RuntimeFunction FnID =
11540         Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11541     llvm::Value *Addr = CGF.EmitRuntimeCall(
11542         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
11543         getName({CVD->getName(), ".void.addr"}));
11544     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11545         CGM.getModule(), OMPRTL___kmpc_free);
11546     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11547     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11548         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11549     if (UntiedAddr.isValid())
11550       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11551 
11552     // Cleanup action for allocate support.
11553     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11554       llvm::FunctionCallee RTLFn;
11555       SourceLocation::UIntTy LocEncoding;
11556       Address Addr;
11557       const Expr *AllocExpr;
11558 
11559     public:
11560       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11561                            SourceLocation::UIntTy LocEncoding, Address Addr,
11562                            const Expr *AllocExpr)
11563           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11564             AllocExpr(AllocExpr) {}
11565       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11566         if (!CGF.HaveInsertPoint())
11567           return;
11568         llvm::Value *Args[3];
11569         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11570             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11571         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11572             Addr.getPointer(), CGF.VoidPtrTy);
11573         llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
11574         Args[2] = AllocVal;
11575         CGF.EmitRuntimeCall(RTLFn, Args);
11576       }
11577     };
11578     Address VDAddr =
11579         UntiedRealAddr.isValid()
11580             ? UntiedRealAddr
11581             : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
11582     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11583         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11584         VDAddr, Allocator);
11585     if (UntiedRealAddr.isValid())
11586       if (auto *Region =
11587               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11588         Region->emitUntiedSwitch(CGF);
11589     return VDAddr;
11590   }
11591   return UntiedAddr;
11592 }
11593 
11594 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11595                                              const VarDecl *VD) const {
11596   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11597   if (It == FunctionToUntiedTaskStackMap.end())
11598     return false;
11599   return UntiedLocalVarsStack[It->second].count(VD) > 0;
11600 }
11601 
11602 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11603     CodeGenModule &CGM, const OMPLoopDirective &S)
11604     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11605   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11606   if (!NeedToPush)
11607     return;
11608   NontemporalDeclsSet &DS =
11609       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11610   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11611     for (const Stmt *Ref : C->private_refs()) {
11612       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11613       const ValueDecl *VD;
11614       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11615         VD = DRE->getDecl();
11616       } else {
11617         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11618         assert((ME->isImplicitCXXThis() ||
11619                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11620                "Expected member of current class.");
11621         VD = ME->getMemberDecl();
11622       }
11623       DS.insert(VD);
11624     }
11625   }
11626 }
11627 
11628 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11629   if (!NeedToPush)
11630     return;
11631   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11632 }
11633 
11634 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11635     CodeGenFunction &CGF,
11636     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
11637                           std::pair<Address, Address>> &LocalVars)
11638     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11639   if (!NeedToPush)
11640     return;
11641   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
11642       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11643   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11644 }
11645 
11646 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11647   if (!NeedToPush)
11648     return;
11649   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
11650 }
11651 
11652 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11653   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11654 
11655   return llvm::any_of(
11656       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11657       [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
11658 }
11659 
11660 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11661     const OMPExecutableDirective &S,
11662     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11663     const {
11664   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11665   // Vars in target/task regions must be excluded completely.
11666   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11667       isOpenMPTaskingDirective(S.getDirectiveKind())) {
11668     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11669     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11670     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11671     for (const CapturedStmt::Capture &Cap : CS->captures()) {
11672       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11673         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11674     }
11675   }
11676   // Exclude vars in private clauses.
11677   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11678     for (const Expr *Ref : C->varlists()) {
11679       if (!Ref->getType()->isScalarType())
11680         continue;
11681       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11682       if (!DRE)
11683         continue;
11684       NeedToCheckForLPCs.insert(DRE->getDecl());
11685     }
11686   }
11687   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11688     for (const Expr *Ref : C->varlists()) {
11689       if (!Ref->getType()->isScalarType())
11690         continue;
11691       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11692       if (!DRE)
11693         continue;
11694       NeedToCheckForLPCs.insert(DRE->getDecl());
11695     }
11696   }
11697   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11698     for (const Expr *Ref : C->varlists()) {
11699       if (!Ref->getType()->isScalarType())
11700         continue;
11701       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11702       if (!DRE)
11703         continue;
11704       NeedToCheckForLPCs.insert(DRE->getDecl());
11705     }
11706   }
11707   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11708     for (const Expr *Ref : C->varlists()) {
11709       if (!Ref->getType()->isScalarType())
11710         continue;
11711       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11712       if (!DRE)
11713         continue;
11714       NeedToCheckForLPCs.insert(DRE->getDecl());
11715     }
11716   }
11717   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11718     for (const Expr *Ref : C->varlists()) {
11719       if (!Ref->getType()->isScalarType())
11720         continue;
11721       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11722       if (!DRE)
11723         continue;
11724       NeedToCheckForLPCs.insert(DRE->getDecl());
11725     }
11726   }
11727   for (const Decl *VD : NeedToCheckForLPCs) {
11728     for (const LastprivateConditionalData &Data :
11729          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11730       if (Data.DeclToUniqueName.count(VD) > 0) {
11731         if (!Data.Disabled)
11732           NeedToAddForLPCsAsDisabled.insert(VD);
11733         break;
11734       }
11735     }
11736   }
11737 }
11738 
11739 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11740     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11741     : CGM(CGF.CGM),
11742       Action((CGM.getLangOpts().OpenMP >= 50 &&
11743               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11744                            [](const OMPLastprivateClause *C) {
11745                              return C->getKind() ==
11746                                     OMPC_LASTPRIVATE_conditional;
11747                            }))
11748                  ? ActionToDo::PushAsLastprivateConditional
11749                  : ActionToDo::DoNotPush) {
11750   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11751   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11752     return;
11753   assert(Action == ActionToDo::PushAsLastprivateConditional &&
11754          "Expected a push action.");
11755   LastprivateConditionalData &Data =
11756       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11757   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11758     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11759       continue;
11760 
11761     for (const Expr *Ref : C->varlists()) {
11762       Data.DeclToUniqueName.insert(std::make_pair(
11763           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11764           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11765     }
11766   }
11767   Data.IVLVal = IVLVal;
11768   Data.Fn = CGF.CurFn;
11769 }
11770 
11771 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11772     CodeGenFunction &CGF, const OMPExecutableDirective &S)
11773     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11774   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11775   if (CGM.getLangOpts().OpenMP < 50)
11776     return;
11777   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11778   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11779   if (!NeedToAddForLPCsAsDisabled.empty()) {
11780     Action = ActionToDo::DisableLastprivateConditional;
11781     LastprivateConditionalData &Data =
11782         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11783     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11784       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11785     Data.Fn = CGF.CurFn;
11786     Data.Disabled = true;
11787   }
11788 }
11789 
11790 CGOpenMPRuntime::LastprivateConditionalRAII
11791 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11792     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11793   return LastprivateConditionalRAII(CGF, S);
11794 }
11795 
11796 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11797   if (CGM.getLangOpts().OpenMP < 50)
11798     return;
11799   if (Action == ActionToDo::DisableLastprivateConditional) {
11800     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11801            "Expected list of disabled private vars.");
11802     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11803   }
11804   if (Action == ActionToDo::PushAsLastprivateConditional) {
11805     assert(
11806         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11807         "Expected list of lastprivate conditional vars.");
11808     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11809   }
11810 }
11811 
11812 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11813                                                         const VarDecl *VD) {
11814   ASTContext &C = CGM.getContext();
11815   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11816   if (I == LastprivateConditionalToTypes.end())
11817     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11818   QualType NewType;
11819   const FieldDecl *VDField;
11820   const FieldDecl *FiredField;
11821   LValue BaseLVal;
11822   auto VI = I->getSecond().find(VD);
11823   if (VI == I->getSecond().end()) {
11824     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11825     RD->startDefinition();
11826     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11827     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11828     RD->completeDefinition();
11829     NewType = C.getRecordType(RD);
11830     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11831     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11832     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11833   } else {
11834     NewType = std::get<0>(VI->getSecond());
11835     VDField = std::get<1>(VI->getSecond());
11836     FiredField = std::get<2>(VI->getSecond());
11837     BaseLVal = std::get<3>(VI->getSecond());
11838   }
11839   LValue FiredLVal =
11840       CGF.EmitLValueForField(BaseLVal, FiredField);
11841   CGF.EmitStoreOfScalar(
11842       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11843       FiredLVal);
11844   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
11845 }
11846 
11847 namespace {
11848 /// Checks if the lastprivate conditional variable is referenced in LHS.
11849 class LastprivateConditionalRefChecker final
11850     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11851   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11852   const Expr *FoundE = nullptr;
11853   const Decl *FoundD = nullptr;
11854   StringRef UniqueDeclName;
11855   LValue IVLVal;
11856   llvm::Function *FoundFn = nullptr;
11857   SourceLocation Loc;
11858 
11859 public:
11860   bool VisitDeclRefExpr(const DeclRefExpr *E) {
11861     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11862          llvm::reverse(LPM)) {
11863       auto It = D.DeclToUniqueName.find(E->getDecl());
11864       if (It == D.DeclToUniqueName.end())
11865         continue;
11866       if (D.Disabled)
11867         return false;
11868       FoundE = E;
11869       FoundD = E->getDecl()->getCanonicalDecl();
11870       UniqueDeclName = It->second;
11871       IVLVal = D.IVLVal;
11872       FoundFn = D.Fn;
11873       break;
11874     }
11875     return FoundE == E;
11876   }
11877   bool VisitMemberExpr(const MemberExpr *E) {
11878     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11879       return false;
11880     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11881          llvm::reverse(LPM)) {
11882       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11883       if (It == D.DeclToUniqueName.end())
11884         continue;
11885       if (D.Disabled)
11886         return false;
11887       FoundE = E;
11888       FoundD = E->getMemberDecl()->getCanonicalDecl();
11889       UniqueDeclName = It->second;
11890       IVLVal = D.IVLVal;
11891       FoundFn = D.Fn;
11892       break;
11893     }
11894     return FoundE == E;
11895   }
11896   bool VisitStmt(const Stmt *S) {
11897     for (const Stmt *Child : S->children()) {
11898       if (!Child)
11899         continue;
11900       if (const auto *E = dyn_cast<Expr>(Child))
11901         if (!E->isGLValue())
11902           continue;
11903       if (Visit(Child))
11904         return true;
11905     }
11906     return false;
11907   }
11908   explicit LastprivateConditionalRefChecker(
11909       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11910       : LPM(LPM) {}
11911   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11912   getFoundData() const {
11913     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11914   }
11915 };
11916 } // namespace
11917 
11918 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11919                                                        LValue IVLVal,
11920                                                        StringRef UniqueDeclName,
11921                                                        LValue LVal,
11922                                                        SourceLocation Loc) {
11923   // Last updated loop counter for the lastprivate conditional var.
11924   // int<xx> last_iv = 0;
11925   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11926   llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
11927       LLIVTy, getName({UniqueDeclName, "iv"}));
11928   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11929       IVLVal.getAlignment().getAsAlign());
11930   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11931 
11932   // Last value of the lastprivate conditional.
11933   // decltype(priv_a) last_a;
11934   llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
11935       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11936   Last->setAlignment(LVal.getAlignment().getAsAlign());
11937   LValue LastLVal = CGF.MakeAddrLValue(
11938       Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType());
11939 
11940   // Global loop counter. Required to handle inner parallel-for regions.
11941   // iv
11942   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11943 
11944   // #pragma omp critical(a)
11945   // if (last_iv <= iv) {
11946   //   last_iv = iv;
11947   //   last_a = priv_a;
11948   // }
11949   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11950                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11951     Action.Enter(CGF);
11952     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11953     // (last_iv <= iv) ? Check if the variable is updated and store new
11954     // value in global var.
11955     llvm::Value *CmpRes;
11956     if (IVLVal.getType()->isSignedIntegerType()) {
11957       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11958     } else {
11959       assert(IVLVal.getType()->isUnsignedIntegerType() &&
11960              "Loop iteration variable must be integer.");
11961       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11962     }
11963     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11964     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11965     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11966     // {
11967     CGF.EmitBlock(ThenBB);
11968 
11969     //   last_iv = iv;
11970     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11971 
11972     //   last_a = priv_a;
11973     switch (CGF.getEvaluationKind(LVal.getType())) {
11974     case TEK_Scalar: {
11975       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11976       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11977       break;
11978     }
11979     case TEK_Complex: {
11980       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11981       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11982       break;
11983     }
11984     case TEK_Aggregate:
11985       llvm_unreachable(
11986           "Aggregates are not supported in lastprivate conditional.");
11987     }
11988     // }
11989     CGF.EmitBranch(ExitBB);
11990     // There is no need to emit line number for unconditional branch.
11991     (void)ApplyDebugLocation::CreateEmpty(CGF);
11992     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11993   };
11994 
11995   if (CGM.getLangOpts().OpenMPSimd) {
11996     // Do not emit as a critical region as no parallel region could be emitted.
11997     RegionCodeGenTy ThenRCG(CodeGen);
11998     ThenRCG(CGF);
11999   } else {
12000     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12001   }
12002 }
12003 
12004 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12005                                                          const Expr *LHS) {
12006   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12007     return;
12008   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12009   if (!Checker.Visit(LHS))
12010     return;
12011   const Expr *FoundE;
12012   const Decl *FoundD;
12013   StringRef UniqueDeclName;
12014   LValue IVLVal;
12015   llvm::Function *FoundFn;
12016   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12017       Checker.getFoundData();
12018   if (FoundFn != CGF.CurFn) {
12019     // Special codegen for inner parallel regions.
12020     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12021     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12022     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12023            "Lastprivate conditional is not found in outer region.");
12024     QualType StructTy = std::get<0>(It->getSecond());
12025     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12026     LValue PrivLVal = CGF.EmitLValue(FoundE);
12027     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12028         PrivLVal.getAddress(CGF),
12029         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
12030         CGF.ConvertTypeForMem(StructTy));
12031     LValue BaseLVal =
12032         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12033     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12034     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12035                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12036                         FiredLVal, llvm::AtomicOrdering::Unordered,
12037                         /*IsVolatile=*/true, /*isInit=*/false);
12038     return;
12039   }
12040 
12041   // Private address of the lastprivate conditional in the current context.
12042   // priv_a
12043   LValue LVal = CGF.EmitLValue(FoundE);
12044   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12045                                    FoundE->getExprLoc());
12046 }
12047 
12048 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12049     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12050     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12051   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12052     return;
12053   auto Range = llvm::reverse(LastprivateConditionalStack);
12054   auto It = llvm::find_if(
12055       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12056   if (It == Range.end() || It->Fn != CGF.CurFn)
12057     return;
12058   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12059   assert(LPCI != LastprivateConditionalToTypes.end() &&
12060          "Lastprivates must be registered already.");
12061   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12062   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12063   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12064   for (const auto &Pair : It->DeclToUniqueName) {
12065     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12066     if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
12067       continue;
12068     auto I = LPCI->getSecond().find(Pair.first);
12069     assert(I != LPCI->getSecond().end() &&
12070            "Lastprivate must be rehistered already.");
12071     // bool Cmp = priv_a.Fired != 0;
12072     LValue BaseLVal = std::get<3>(I->getSecond());
12073     LValue FiredLVal =
12074         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12075     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12076     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12077     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12078     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12079     // if (Cmp) {
12080     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12081     CGF.EmitBlock(ThenBB);
12082     Address Addr = CGF.GetAddrOfLocalVar(VD);
12083     LValue LVal;
12084     if (VD->getType()->isReferenceType())
12085       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12086                                            AlignmentSource::Decl);
12087     else
12088       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12089                                 AlignmentSource::Decl);
12090     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12091                                      D.getBeginLoc());
12092     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12093     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12094     // }
12095   }
12096 }
12097 
12098 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12099     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12100     SourceLocation Loc) {
12101   if (CGF.getLangOpts().OpenMP < 50)
12102     return;
12103   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12104   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12105          "Unknown lastprivate conditional variable.");
12106   StringRef UniqueName = It->second;
12107   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12108   // The variable was not updated in the region - exit.
12109   if (!GV)
12110     return;
12111   LValue LPLVal = CGF.MakeAddrLValue(
12112       Address(GV, GV->getValueType(), PrivLVal.getAlignment()),
12113       PrivLVal.getType().getNonReferenceType());
12114   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12115   CGF.EmitStoreOfScalar(Res, PrivLVal);
12116 }
12117 
12118 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12119     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12120     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
12121     const RegionCodeGenTy &CodeGen) {
12122   llvm_unreachable("Not supported in SIMD-only mode");
12123 }
12124 
12125 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12126     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12127     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
12128     const RegionCodeGenTy &CodeGen) {
12129   llvm_unreachable("Not supported in SIMD-only mode");
12130 }
12131 
12132 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12133     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12134     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12135     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12136     bool Tied, unsigned &NumberOfParts) {
12137   llvm_unreachable("Not supported in SIMD-only mode");
12138 }
12139 
12140 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12141                                            SourceLocation Loc,
12142                                            llvm::Function *OutlinedFn,
12143                                            ArrayRef<llvm::Value *> CapturedVars,
12144                                            const Expr *IfCond,
12145                                            llvm::Value *NumThreads) {
12146   llvm_unreachable("Not supported in SIMD-only mode");
12147 }
12148 
12149 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12150     CodeGenFunction &CGF, StringRef CriticalName,
12151     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12152     const Expr *Hint) {
12153   llvm_unreachable("Not supported in SIMD-only mode");
12154 }
12155 
12156 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12157                                            const RegionCodeGenTy &MasterOpGen,
12158                                            SourceLocation Loc) {
12159   llvm_unreachable("Not supported in SIMD-only mode");
12160 }
12161 
12162 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12163                                            const RegionCodeGenTy &MasterOpGen,
12164                                            SourceLocation Loc,
12165                                            const Expr *Filter) {
12166   llvm_unreachable("Not supported in SIMD-only mode");
12167 }
12168 
12169 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12170                                             SourceLocation Loc) {
12171   llvm_unreachable("Not supported in SIMD-only mode");
12172 }
12173 
12174 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12175     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12176     SourceLocation Loc) {
12177   llvm_unreachable("Not supported in SIMD-only mode");
12178 }
12179 
12180 void CGOpenMPSIMDRuntime::emitSingleRegion(
12181     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12182     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12183     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12184     ArrayRef<const Expr *> AssignmentOps) {
12185   llvm_unreachable("Not supported in SIMD-only mode");
12186 }
12187 
12188 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12189                                             const RegionCodeGenTy &OrderedOpGen,
12190                                             SourceLocation Loc,
12191                                             bool IsThreads) {
12192   llvm_unreachable("Not supported in SIMD-only mode");
12193 }
12194 
12195 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12196                                           SourceLocation Loc,
12197                                           OpenMPDirectiveKind Kind,
12198                                           bool EmitChecks,
12199                                           bool ForceSimpleCall) {
12200   llvm_unreachable("Not supported in SIMD-only mode");
12201 }
12202 
12203 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12204     CodeGenFunction &CGF, SourceLocation Loc,
12205     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12206     bool Ordered, const DispatchRTInput &DispatchValues) {
12207   llvm_unreachable("Not supported in SIMD-only mode");
12208 }
12209 
12210 void CGOpenMPSIMDRuntime::emitForStaticInit(
12211     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12212     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12213   llvm_unreachable("Not supported in SIMD-only mode");
12214 }
12215 
12216 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12217     CodeGenFunction &CGF, SourceLocation Loc,
12218     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12219   llvm_unreachable("Not supported in SIMD-only mode");
12220 }
12221 
12222 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12223                                                      SourceLocation Loc,
12224                                                      unsigned IVSize,
12225                                                      bool IVSigned) {
12226   llvm_unreachable("Not supported in SIMD-only mode");
12227 }
12228 
12229 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12230                                               SourceLocation Loc,
12231                                               OpenMPDirectiveKind DKind) {
12232   llvm_unreachable("Not supported in SIMD-only mode");
12233 }
12234 
12235 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12236                                               SourceLocation Loc,
12237                                               unsigned IVSize, bool IVSigned,
12238                                               Address IL, Address LB,
12239                                               Address UB, Address ST) {
12240   llvm_unreachable("Not supported in SIMD-only mode");
12241 }
12242 
12243 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12244                                                llvm::Value *NumThreads,
12245                                                SourceLocation Loc) {
12246   llvm_unreachable("Not supported in SIMD-only mode");
12247 }
12248 
12249 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12250                                              ProcBindKind ProcBind,
12251                                              SourceLocation Loc) {
12252   llvm_unreachable("Not supported in SIMD-only mode");
12253 }
12254 
12255 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12256                                                     const VarDecl *VD,
12257                                                     Address VDAddr,
12258                                                     SourceLocation Loc) {
12259   llvm_unreachable("Not supported in SIMD-only mode");
12260 }
12261 
12262 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12263     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12264     CodeGenFunction *CGF) {
12265   llvm_unreachable("Not supported in SIMD-only mode");
12266 }
12267 
12268 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12269     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12270   llvm_unreachable("Not supported in SIMD-only mode");
12271 }
12272 
12273 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12274                                     ArrayRef<const Expr *> Vars,
12275                                     SourceLocation Loc,
12276                                     llvm::AtomicOrdering AO) {
12277   llvm_unreachable("Not supported in SIMD-only mode");
12278 }
12279 
12280 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12281                                        const OMPExecutableDirective &D,
12282                                        llvm::Function *TaskFunction,
12283                                        QualType SharedsTy, Address Shareds,
12284                                        const Expr *IfCond,
12285                                        const OMPTaskDataTy &Data) {
12286   llvm_unreachable("Not supported in SIMD-only mode");
12287 }
12288 
12289 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12290     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12291     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12292     const Expr *IfCond, const OMPTaskDataTy &Data) {
12293   llvm_unreachable("Not supported in SIMD-only mode");
12294 }
12295 
12296 void CGOpenMPSIMDRuntime::emitReduction(
12297     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12298     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12299     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12300   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12301   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12302                                  ReductionOps, Options);
12303 }
12304 
12305 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12306     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12307     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12308   llvm_unreachable("Not supported in SIMD-only mode");
12309 }
12310 
12311 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12312                                                 SourceLocation Loc,
12313                                                 bool IsWorksharingReduction) {
12314   llvm_unreachable("Not supported in SIMD-only mode");
12315 }
12316 
12317 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12318                                                   SourceLocation Loc,
12319                                                   ReductionCodeGen &RCG,
12320                                                   unsigned N) {
12321   llvm_unreachable("Not supported in SIMD-only mode");
12322 }
12323 
12324 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12325                                                   SourceLocation Loc,
12326                                                   llvm::Value *ReductionsPtr,
12327                                                   LValue SharedLVal) {
12328   llvm_unreachable("Not supported in SIMD-only mode");
12329 }
12330 
12331 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12332                                            SourceLocation Loc,
12333                                            const OMPTaskDataTy &Data) {
12334   llvm_unreachable("Not supported in SIMD-only mode");
12335 }
12336 
12337 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12338     CodeGenFunction &CGF, SourceLocation Loc,
12339     OpenMPDirectiveKind CancelRegion) {
12340   llvm_unreachable("Not supported in SIMD-only mode");
12341 }
12342 
12343 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12344                                          SourceLocation Loc, const Expr *IfCond,
12345                                          OpenMPDirectiveKind CancelRegion) {
12346   llvm_unreachable("Not supported in SIMD-only mode");
12347 }
12348 
12349 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12350     const OMPExecutableDirective &D, StringRef ParentName,
12351     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12352     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12353   llvm_unreachable("Not supported in SIMD-only mode");
12354 }
12355 
12356 void CGOpenMPSIMDRuntime::emitTargetCall(
12357     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12358     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12359     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12360     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12361                                      const OMPLoopDirective &D)>
12362         SizeEmitter) {
12363   llvm_unreachable("Not supported in SIMD-only mode");
12364 }
12365 
12366 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12367   llvm_unreachable("Not supported in SIMD-only mode");
12368 }
12369 
12370 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12371   llvm_unreachable("Not supported in SIMD-only mode");
12372 }
12373 
12374 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12375   return false;
12376 }
12377 
12378 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12379                                         const OMPExecutableDirective &D,
12380                                         SourceLocation Loc,
12381                                         llvm::Function *OutlinedFn,
12382                                         ArrayRef<llvm::Value *> CapturedVars) {
12383   llvm_unreachable("Not supported in SIMD-only mode");
12384 }
12385 
12386 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12387                                              const Expr *NumTeams,
12388                                              const Expr *ThreadLimit,
12389                                              SourceLocation Loc) {
12390   llvm_unreachable("Not supported in SIMD-only mode");
12391 }
12392 
12393 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12394     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12395     const Expr *Device, const RegionCodeGenTy &CodeGen,
12396     CGOpenMPRuntime::TargetDataInfo &Info) {
12397   llvm_unreachable("Not supported in SIMD-only mode");
12398 }
12399 
12400 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12401     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12402     const Expr *Device) {
12403   llvm_unreachable("Not supported in SIMD-only mode");
12404 }
12405 
12406 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12407                                            const OMPLoopDirective &D,
12408                                            ArrayRef<Expr *> NumIterations) {
12409   llvm_unreachable("Not supported in SIMD-only mode");
12410 }
12411 
12412 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12413                                               const OMPDependClause *C) {
12414   llvm_unreachable("Not supported in SIMD-only mode");
12415 }
12416 
12417 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12418                                               const OMPDoacrossClause *C) {
12419   llvm_unreachable("Not supported in SIMD-only mode");
12420 }
12421 
12422 const VarDecl *
12423 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12424                                         const VarDecl *NativeParam) const {
12425   llvm_unreachable("Not supported in SIMD-only mode");
12426 }
12427 
12428 Address
12429 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12430                                          const VarDecl *NativeParam,
12431                                          const VarDecl *TargetParam) const {
12432   llvm_unreachable("Not supported in SIMD-only mode");
12433 }
12434