xref: /freebsd/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp (revision a90b9d0159070121c221b966469c3e36d912bf82)
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/Attr.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/OpenMPClause.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/BitmaskEnum.h"
26 #include "clang/Basic/FileManager.h"
27 #include "clang/Basic/OpenMPKinds.h"
28 #include "clang/Basic/SourceManager.h"
29 #include "clang/CodeGen/ConstantInitBuilder.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SetOperations.h"
32 #include "llvm/ADT/SmallBitVector.h"
33 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/Bitcode/BitcodeReader.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/GlobalValue.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Value.h"
40 #include "llvm/Support/AtomicOrdering.h"
41 #include "llvm/Support/Format.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include <cassert>
44 #include <cstdint>
45 #include <numeric>
46 #include <optional>
47 
48 using namespace clang;
49 using namespace CodeGen;
50 using namespace llvm::omp;
51 
52 namespace {
53 /// Base class for handling code generation inside OpenMP regions.
54 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
55 public:
56   /// Kinds of OpenMP regions used in codegen.
57   enum CGOpenMPRegionKind {
58     /// Region with outlined function for standalone 'parallel'
59     /// directive.
60     ParallelOutlinedRegion,
61     /// Region with outlined function for standalone 'task' directive.
62     TaskOutlinedRegion,
63     /// Region for constructs that do not require function outlining,
64     /// like 'for', 'sections', 'atomic' etc. directives.
65     InlinedRegion,
66     /// Region with outlined function for standalone 'target' directive.
67     TargetRegion,
68   };
69 
70   CGOpenMPRegionInfo(const CapturedStmt &CS,
71                      const CGOpenMPRegionKind RegionKind,
72                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73                      bool HasCancel)
74       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
75         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
76 
77   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
78                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
79                      bool HasCancel)
80       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
81         Kind(Kind), HasCancel(HasCancel) {}
82 
83   /// Get a variable or parameter for storing global thread id
84   /// inside OpenMP construct.
85   virtual const VarDecl *getThreadIDVariable() const = 0;
86 
87   /// Emit the captured statement body.
88   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
89 
90   /// Get an LValue for the current ThreadID variable.
91   /// \return LValue for thread id variable. This LValue always has type int32*.
92   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
93 
94   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
95 
96   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
97 
98   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
99 
100   bool hasCancel() const { return HasCancel; }
101 
102   static bool classof(const CGCapturedStmtInfo *Info) {
103     return Info->getKind() == CR_OpenMP;
104   }
105 
106   ~CGOpenMPRegionInfo() override = default;
107 
108 protected:
109   CGOpenMPRegionKind RegionKind;
110   RegionCodeGenTy CodeGen;
111   OpenMPDirectiveKind Kind;
112   bool HasCancel;
113 };
114 
115 /// API for captured statement code generation in OpenMP constructs.
116 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
117 public:
118   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
119                              const RegionCodeGenTy &CodeGen,
120                              OpenMPDirectiveKind Kind, bool HasCancel,
121                              StringRef HelperName)
122       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
123                            HasCancel),
124         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
125     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
126   }
127 
128   /// Get a variable or parameter for storing global thread id
129   /// inside OpenMP construct.
130   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
131 
132   /// Get the name of the capture helper.
133   StringRef getHelperName() const override { return HelperName; }
134 
135   static bool classof(const CGCapturedStmtInfo *Info) {
136     return CGOpenMPRegionInfo::classof(Info) &&
137            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
138                ParallelOutlinedRegion;
139   }
140 
141 private:
142   /// A variable or parameter storing global thread id for OpenMP
143   /// constructs.
144   const VarDecl *ThreadIDVar;
145   StringRef HelperName;
146 };
147 
148 /// API for captured statement code generation in OpenMP constructs.
149 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
150 public:
151   class UntiedTaskActionTy final : public PrePostActionTy {
152     bool Untied;
153     const VarDecl *PartIDVar;
154     const RegionCodeGenTy UntiedCodeGen;
155     llvm::SwitchInst *UntiedSwitch = nullptr;
156 
157   public:
158     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
159                        const RegionCodeGenTy &UntiedCodeGen)
160         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
161     void Enter(CodeGenFunction &CGF) override {
162       if (Untied) {
163         // Emit task switching point.
164         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
165             CGF.GetAddrOfLocalVar(PartIDVar),
166             PartIDVar->getType()->castAs<PointerType>());
167         llvm::Value *Res =
168             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
169         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
170         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
171         CGF.EmitBlock(DoneBB);
172         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
173         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
174         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
175                               CGF.Builder.GetInsertBlock());
176         emitUntiedSwitch(CGF);
177       }
178     }
179     void emitUntiedSwitch(CodeGenFunction &CGF) const {
180       if (Untied) {
181         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
182             CGF.GetAddrOfLocalVar(PartIDVar),
183             PartIDVar->getType()->castAs<PointerType>());
184         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
185                               PartIdLVal);
186         UntiedCodeGen(CGF);
187         CodeGenFunction::JumpDest CurPoint =
188             CGF.getJumpDestInCurrentScope(".untied.next.");
189         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
190         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
191         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
192                               CGF.Builder.GetInsertBlock());
193         CGF.EmitBranchThroughCleanup(CurPoint);
194         CGF.EmitBlock(CurPoint.getBlock());
195       }
196     }
197     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
198   };
199   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
200                                  const VarDecl *ThreadIDVar,
201                                  const RegionCodeGenTy &CodeGen,
202                                  OpenMPDirectiveKind Kind, bool HasCancel,
203                                  const UntiedTaskActionTy &Action)
204       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
205         ThreadIDVar(ThreadIDVar), Action(Action) {
206     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
207   }
208 
209   /// Get a variable or parameter for storing global thread id
210   /// inside OpenMP construct.
211   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
212 
213   /// Get an LValue for the current ThreadID variable.
214   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
215 
216   /// Get the name of the capture helper.
217   StringRef getHelperName() const override { return ".omp_outlined."; }
218 
219   void emitUntiedSwitch(CodeGenFunction &CGF) override {
220     Action.emitUntiedSwitch(CGF);
221   }
222 
223   static bool classof(const CGCapturedStmtInfo *Info) {
224     return CGOpenMPRegionInfo::classof(Info) &&
225            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
226                TaskOutlinedRegion;
227   }
228 
229 private:
230   /// A variable or parameter storing global thread id for OpenMP
231   /// constructs.
232   const VarDecl *ThreadIDVar;
233   /// Action for emitting code for untied tasks.
234   const UntiedTaskActionTy &Action;
235 };
236 
237 /// API for inlined captured statement code generation in OpenMP
238 /// constructs.
239 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
240 public:
241   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
242                             const RegionCodeGenTy &CodeGen,
243                             OpenMPDirectiveKind Kind, bool HasCancel)
244       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
245         OldCSI(OldCSI),
246         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
247 
248   // Retrieve the value of the context parameter.
249   llvm::Value *getContextValue() const override {
250     if (OuterRegionInfo)
251       return OuterRegionInfo->getContextValue();
252     llvm_unreachable("No context value for inlined OpenMP region");
253   }
254 
255   void setContextValue(llvm::Value *V) override {
256     if (OuterRegionInfo) {
257       OuterRegionInfo->setContextValue(V);
258       return;
259     }
260     llvm_unreachable("No context value for inlined OpenMP region");
261   }
262 
263   /// Lookup the captured field decl for a variable.
264   const FieldDecl *lookup(const VarDecl *VD) const override {
265     if (OuterRegionInfo)
266       return OuterRegionInfo->lookup(VD);
267     // If there is no outer outlined region,no need to lookup in a list of
268     // captured variables, we can use the original one.
269     return nullptr;
270   }
271 
272   FieldDecl *getThisFieldDecl() const override {
273     if (OuterRegionInfo)
274       return OuterRegionInfo->getThisFieldDecl();
275     return nullptr;
276   }
277 
278   /// Get a variable or parameter for storing global thread id
279   /// inside OpenMP construct.
280   const VarDecl *getThreadIDVariable() const override {
281     if (OuterRegionInfo)
282       return OuterRegionInfo->getThreadIDVariable();
283     return nullptr;
284   }
285 
286   /// Get an LValue for the current ThreadID variable.
287   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
288     if (OuterRegionInfo)
289       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
290     llvm_unreachable("No LValue for inlined OpenMP construct");
291   }
292 
293   /// Get the name of the capture helper.
294   StringRef getHelperName() const override {
295     if (auto *OuterRegionInfo = getOldCSI())
296       return OuterRegionInfo->getHelperName();
297     llvm_unreachable("No helper name for inlined OpenMP construct");
298   }
299 
300   void emitUntiedSwitch(CodeGenFunction &CGF) override {
301     if (OuterRegionInfo)
302       OuterRegionInfo->emitUntiedSwitch(CGF);
303   }
304 
305   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
306 
307   static bool classof(const CGCapturedStmtInfo *Info) {
308     return CGOpenMPRegionInfo::classof(Info) &&
309            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
310   }
311 
312   ~CGOpenMPInlinedRegionInfo() override = default;
313 
314 private:
315   /// CodeGen info about outer OpenMP region.
316   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
317   CGOpenMPRegionInfo *OuterRegionInfo;
318 };
319 
320 /// API for captured statement code generation in OpenMP target
321 /// constructs. For this captures, implicit parameters are used instead of the
322 /// captured fields. The name of the target region has to be unique in a given
323 /// application so it is provided by the client, because only the client has
324 /// the information to generate that.
325 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
326 public:
327   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
328                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
329       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
330                            /*HasCancel=*/false),
331         HelperName(HelperName) {}
332 
333   /// This is unused for target regions because each starts executing
334   /// with a single thread.
335   const VarDecl *getThreadIDVariable() const override { return nullptr; }
336 
337   /// Get the name of the capture helper.
338   StringRef getHelperName() const override { return HelperName; }
339 
340   static bool classof(const CGCapturedStmtInfo *Info) {
341     return CGOpenMPRegionInfo::classof(Info) &&
342            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
343   }
344 
345 private:
346   StringRef HelperName;
347 };
348 
349 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
350   llvm_unreachable("No codegen for expressions");
351 }
352 /// API for generation of expressions captured in a innermost OpenMP
353 /// region.
354 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
355 public:
356   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
357       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
358                                   OMPD_unknown,
359                                   /*HasCancel=*/false),
360         PrivScope(CGF) {
361     // Make sure the globals captured in the provided statement are local by
362     // using the privatization logic. We assume the same variable is not
363     // captured more than once.
364     for (const auto &C : CS.captures()) {
365       if (!C.capturesVariable() && !C.capturesVariableByCopy())
366         continue;
367 
368       const VarDecl *VD = C.getCapturedVar();
369       if (VD->isLocalVarDeclOrParm())
370         continue;
371 
372       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
373                       /*RefersToEnclosingVariableOrCapture=*/false,
374                       VD->getType().getNonReferenceType(), VK_LValue,
375                       C.getLocation());
376       PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
377     }
378     (void)PrivScope.Privatize();
379   }
380 
381   /// Lookup the captured field decl for a variable.
382   const FieldDecl *lookup(const VarDecl *VD) const override {
383     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
384       return FD;
385     return nullptr;
386   }
387 
388   /// Emit the captured statement body.
389   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
390     llvm_unreachable("No body for expressions");
391   }
392 
393   /// Get a variable or parameter for storing global thread id
394   /// inside OpenMP construct.
395   const VarDecl *getThreadIDVariable() const override {
396     llvm_unreachable("No thread id for expressions");
397   }
398 
399   /// Get the name of the capture helper.
400   StringRef getHelperName() const override {
401     llvm_unreachable("No helper name for expressions");
402   }
403 
404   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
405 
406 private:
407   /// Private scope to capture global variables.
408   CodeGenFunction::OMPPrivateScope PrivScope;
409 };
410 
411 /// RAII for emitting code of OpenMP constructs.
412 class InlinedOpenMPRegionRAII {
413   CodeGenFunction &CGF;
414   llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
415   FieldDecl *LambdaThisCaptureField = nullptr;
416   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
417   bool NoInheritance = false;
418 
419 public:
420   /// Constructs region for combined constructs.
421   /// \param CodeGen Code generation sequence for combined directives. Includes
422   /// a list of functions used for code generation of implicitly inlined
423   /// regions.
424   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
425                           OpenMPDirectiveKind Kind, bool HasCancel,
426                           bool NoInheritance = true)
427       : CGF(CGF), NoInheritance(NoInheritance) {
428     // Start emission for the construct.
429     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
430         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
431     if (NoInheritance) {
432       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
433       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
434       CGF.LambdaThisCaptureField = nullptr;
435       BlockInfo = CGF.BlockInfo;
436       CGF.BlockInfo = nullptr;
437     }
438   }
439 
440   ~InlinedOpenMPRegionRAII() {
441     // Restore original CapturedStmtInfo only if we're done with code emission.
442     auto *OldCSI =
443         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
444     delete CGF.CapturedStmtInfo;
445     CGF.CapturedStmtInfo = OldCSI;
446     if (NoInheritance) {
447       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
448       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
449       CGF.BlockInfo = BlockInfo;
450     }
451   }
452 };
453 
454 /// Values for bit flags used in the ident_t to describe the fields.
455 /// All enumeric elements are named and described in accordance with the code
456 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
457 enum OpenMPLocationFlags : unsigned {
458   /// Use trampoline for internal microtask.
459   OMP_IDENT_IMD = 0x01,
460   /// Use c-style ident structure.
461   OMP_IDENT_KMPC = 0x02,
462   /// Atomic reduction option for kmpc_reduce.
463   OMP_ATOMIC_REDUCE = 0x10,
464   /// Explicit 'barrier' directive.
465   OMP_IDENT_BARRIER_EXPL = 0x20,
466   /// Implicit barrier in code.
467   OMP_IDENT_BARRIER_IMPL = 0x40,
468   /// Implicit barrier in 'for' directive.
469   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
470   /// Implicit barrier in 'sections' directive.
471   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
472   /// Implicit barrier in 'single' directive.
473   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
474   /// Call of __kmp_for_static_init for static loop.
475   OMP_IDENT_WORK_LOOP = 0x200,
476   /// Call of __kmp_for_static_init for sections.
477   OMP_IDENT_WORK_SECTIONS = 0x400,
478   /// Call of __kmp_for_static_init for distribute.
479   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
480   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
481 };
482 
483 /// Describes ident structure that describes a source location.
484 /// All descriptions are taken from
485 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
486 /// Original structure:
487 /// typedef struct ident {
488 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
489 ///                                  see above  */
490 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
491 ///                                  KMP_IDENT_KMPC identifies this union
492 ///                                  member  */
493 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
494 ///                                  see above */
495 ///#if USE_ITT_BUILD
496 ///                            /*  but currently used for storing
497 ///                                region-specific ITT */
498 ///                            /*  contextual information. */
499 ///#endif /* USE_ITT_BUILD */
500 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
501 ///                                 C++  */
502 ///    char const *psource;    /**< String describing the source location.
503 ///                            The string is composed of semi-colon separated
504 //                             fields which describe the source file,
505 ///                            the function and a pair of line numbers that
506 ///                            delimit the construct.
507 ///                             */
508 /// } ident_t;
509 enum IdentFieldIndex {
510   /// might be used in Fortran
511   IdentField_Reserved_1,
512   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
513   IdentField_Flags,
514   /// Not really used in Fortran any more
515   IdentField_Reserved_2,
516   /// Source[4] in Fortran, do not use for C++
517   IdentField_Reserved_3,
518   /// String describing the source location. The string is composed of
519   /// semi-colon separated fields which describe the source file, the function
520   /// and a pair of line numbers that delimit the construct.
521   IdentField_PSource
522 };
523 
524 /// Schedule types for 'omp for' loops (these enumerators are taken from
525 /// the enum sched_type in kmp.h).
526 enum OpenMPSchedType {
527   /// Lower bound for default (unordered) versions.
528   OMP_sch_lower = 32,
529   OMP_sch_static_chunked = 33,
530   OMP_sch_static = 34,
531   OMP_sch_dynamic_chunked = 35,
532   OMP_sch_guided_chunked = 36,
533   OMP_sch_runtime = 37,
534   OMP_sch_auto = 38,
535   /// static with chunk adjustment (e.g., simd)
536   OMP_sch_static_balanced_chunked = 45,
537   /// Lower bound for 'ordered' versions.
538   OMP_ord_lower = 64,
539   OMP_ord_static_chunked = 65,
540   OMP_ord_static = 66,
541   OMP_ord_dynamic_chunked = 67,
542   OMP_ord_guided_chunked = 68,
543   OMP_ord_runtime = 69,
544   OMP_ord_auto = 70,
545   OMP_sch_default = OMP_sch_static,
546   /// dist_schedule types
547   OMP_dist_sch_static_chunked = 91,
548   OMP_dist_sch_static = 92,
549   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
550   /// Set if the monotonic schedule modifier was present.
551   OMP_sch_modifier_monotonic = (1 << 29),
552   /// Set if the nonmonotonic schedule modifier was present.
553   OMP_sch_modifier_nonmonotonic = (1 << 30),
554 };
555 
556 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
557 /// region.
558 class CleanupTy final : public EHScopeStack::Cleanup {
559   PrePostActionTy *Action;
560 
561 public:
562   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
563   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
564     if (!CGF.HaveInsertPoint())
565       return;
566     Action->Exit(CGF);
567   }
568 };
569 
570 } // anonymous namespace
571 
572 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
573   CodeGenFunction::RunCleanupsScope Scope(CGF);
574   if (PrePostAction) {
575     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
576     Callback(CodeGen, CGF, *PrePostAction);
577   } else {
578     PrePostActionTy Action;
579     Callback(CodeGen, CGF, Action);
580   }
581 }
582 
583 /// Check if the combiner is a call to UDR combiner and if it is so return the
584 /// UDR decl used for reduction.
585 static const OMPDeclareReductionDecl *
586 getReductionInit(const Expr *ReductionOp) {
587   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
588     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
589       if (const auto *DRE =
590               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
591         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
592           return DRD;
593   return nullptr;
594 }
595 
596 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
597                                              const OMPDeclareReductionDecl *DRD,
598                                              const Expr *InitOp,
599                                              Address Private, Address Original,
600                                              QualType Ty) {
601   if (DRD->getInitializer()) {
602     std::pair<llvm::Function *, llvm::Function *> Reduction =
603         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
604     const auto *CE = cast<CallExpr>(InitOp);
605     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
606     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
607     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
608     const auto *LHSDRE =
609         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
610     const auto *RHSDRE =
611         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
612     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
613     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
614     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
615     (void)PrivateScope.Privatize();
616     RValue Func = RValue::get(Reduction.second);
617     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
618     CGF.EmitIgnoredExpr(InitOp);
619   } else {
620     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
621     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
622     auto *GV = new llvm::GlobalVariable(
623         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
624         llvm::GlobalValue::PrivateLinkage, Init, Name);
625     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
626     RValue InitRVal;
627     switch (CGF.getEvaluationKind(Ty)) {
628     case TEK_Scalar:
629       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
630       break;
631     case TEK_Complex:
632       InitRVal =
633           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
634       break;
635     case TEK_Aggregate: {
636       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
637       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
638       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
639                            /*IsInitializer=*/false);
640       return;
641     }
642     }
643     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
644     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
645     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
646                          /*IsInitializer=*/false);
647   }
648 }
649 
650 /// Emit initialization of arrays of complex types.
651 /// \param DestAddr Address of the array.
652 /// \param Type Type of array.
653 /// \param Init Initial expression of array.
654 /// \param SrcAddr Address of the original array.
655 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
656                                  QualType Type, bool EmitDeclareReductionInit,
657                                  const Expr *Init,
658                                  const OMPDeclareReductionDecl *DRD,
659                                  Address SrcAddr = Address::invalid()) {
660   // Perform element-by-element initialization.
661   QualType ElementTy;
662 
663   // Drill down to the base element type on both arrays.
664   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
665   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
666   if (DRD)
667     SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
668 
669   llvm::Value *SrcBegin = nullptr;
670   if (DRD)
671     SrcBegin = SrcAddr.getPointer();
672   llvm::Value *DestBegin = DestAddr.getPointer();
673   // Cast from pointer to array type to pointer to single element.
674   llvm::Value *DestEnd =
675       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
676   // The basic structure here is a while-do loop.
677   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
678   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
679   llvm::Value *IsEmpty =
680       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
681   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
682 
683   // Enter the loop body, making that address the current address.
684   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
685   CGF.EmitBlock(BodyBB);
686 
687   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
688 
689   llvm::PHINode *SrcElementPHI = nullptr;
690   Address SrcElementCurrent = Address::invalid();
691   if (DRD) {
692     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
693                                           "omp.arraycpy.srcElementPast");
694     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
695     SrcElementCurrent =
696         Address(SrcElementPHI, SrcAddr.getElementType(),
697                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
698   }
699   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
700       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
701   DestElementPHI->addIncoming(DestBegin, EntryBB);
702   Address DestElementCurrent =
703       Address(DestElementPHI, DestAddr.getElementType(),
704               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
705 
706   // Emit copy.
707   {
708     CodeGenFunction::RunCleanupsScope InitScope(CGF);
709     if (EmitDeclareReductionInit) {
710       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
711                                        SrcElementCurrent, ElementTy);
712     } else
713       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
714                            /*IsInitializer=*/false);
715   }
716 
717   if (DRD) {
718     // Shift the address forward by one element.
719     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
720         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
721         "omp.arraycpy.dest.element");
722     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
723   }
724 
725   // Shift the address forward by one element.
726   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
727       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
728       "omp.arraycpy.dest.element");
729   // Check whether we've reached the end.
730   llvm::Value *Done =
731       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
732   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
733   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
734 
735   // Done.
736   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
737 }
738 
739 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
740   return CGF.EmitOMPSharedLValue(E);
741 }
742 
743 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
744                                             const Expr *E) {
745   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
746     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
747   return LValue();
748 }
749 
750 void ReductionCodeGen::emitAggregateInitialization(
751     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
752     const OMPDeclareReductionDecl *DRD) {
753   // Emit VarDecl with copy init for arrays.
754   // Get the address of the original variable captured in current
755   // captured region.
756   const auto *PrivateVD =
757       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
758   bool EmitDeclareReductionInit =
759       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
760   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
761                        EmitDeclareReductionInit,
762                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
763                                                 : PrivateVD->getInit(),
764                        DRD, SharedAddr);
765 }
766 
767 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
768                                    ArrayRef<const Expr *> Origs,
769                                    ArrayRef<const Expr *> Privates,
770                                    ArrayRef<const Expr *> ReductionOps) {
771   ClausesData.reserve(Shareds.size());
772   SharedAddresses.reserve(Shareds.size());
773   Sizes.reserve(Shareds.size());
774   BaseDecls.reserve(Shareds.size());
775   const auto *IOrig = Origs.begin();
776   const auto *IPriv = Privates.begin();
777   const auto *IRed = ReductionOps.begin();
778   for (const Expr *Ref : Shareds) {
779     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
780     std::advance(IOrig, 1);
781     std::advance(IPriv, 1);
782     std::advance(IRed, 1);
783   }
784 }
785 
786 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
787   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
788          "Number of generated lvalues must be exactly N.");
789   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
790   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
791   SharedAddresses.emplace_back(First, Second);
792   if (ClausesData[N].Shared == ClausesData[N].Ref) {
793     OrigAddresses.emplace_back(First, Second);
794   } else {
795     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
796     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
797     OrigAddresses.emplace_back(First, Second);
798   }
799 }
800 
801 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
802   QualType PrivateType = getPrivateType(N);
803   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
804   if (!PrivateType->isVariablyModifiedType()) {
805     Sizes.emplace_back(
806         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
807         nullptr);
808     return;
809   }
810   llvm::Value *Size;
811   llvm::Value *SizeInChars;
812   auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
813   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
814   if (AsArraySection) {
815     Size = CGF.Builder.CreatePtrDiff(ElemType,
816                                      OrigAddresses[N].second.getPointer(CGF),
817                                      OrigAddresses[N].first.getPointer(CGF));
818     Size = CGF.Builder.CreateNUWAdd(
819         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
820     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
821   } else {
822     SizeInChars =
823         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
824     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
825   }
826   Sizes.emplace_back(SizeInChars, Size);
827   CodeGenFunction::OpaqueValueMapping OpaqueMap(
828       CGF,
829       cast<OpaqueValueExpr>(
830           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
831       RValue::get(Size));
832   CGF.EmitVariablyModifiedType(PrivateType);
833 }
834 
835 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
836                                          llvm::Value *Size) {
837   QualType PrivateType = getPrivateType(N);
838   if (!PrivateType->isVariablyModifiedType()) {
839     assert(!Size && !Sizes[N].second &&
840            "Size should be nullptr for non-variably modified reduction "
841            "items.");
842     return;
843   }
844   CodeGenFunction::OpaqueValueMapping OpaqueMap(
845       CGF,
846       cast<OpaqueValueExpr>(
847           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
848       RValue::get(Size));
849   CGF.EmitVariablyModifiedType(PrivateType);
850 }
851 
852 void ReductionCodeGen::emitInitialization(
853     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
854     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
855   assert(SharedAddresses.size() > N && "No variable was generated");
856   const auto *PrivateVD =
857       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
858   const OMPDeclareReductionDecl *DRD =
859       getReductionInit(ClausesData[N].ReductionOp);
860   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
861     if (DRD && DRD->getInitializer())
862       (void)DefaultInit(CGF);
863     emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
864   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
865     (void)DefaultInit(CGF);
866     QualType SharedType = SharedAddresses[N].first.getType();
867     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
868                                      PrivateAddr, SharedAddr, SharedType);
869   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
870              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
871     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
872                          PrivateVD->getType().getQualifiers(),
873                          /*IsInitializer=*/false);
874   }
875 }
876 
877 bool ReductionCodeGen::needCleanups(unsigned N) {
878   QualType PrivateType = getPrivateType(N);
879   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
880   return DTorKind != QualType::DK_none;
881 }
882 
883 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
884                                     Address PrivateAddr) {
885   QualType PrivateType = getPrivateType(N);
886   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
887   if (needCleanups(N)) {
888     PrivateAddr =
889         PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
890     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
891   }
892 }
893 
894 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
895                           LValue BaseLV) {
896   BaseTy = BaseTy.getNonReferenceType();
897   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
898          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
899     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
900       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
901     } else {
902       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
903       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
904     }
905     BaseTy = BaseTy->getPointeeType();
906   }
907   return CGF.MakeAddrLValue(
908       BaseLV.getAddress(CGF).withElementType(CGF.ConvertTypeForMem(ElTy)),
909       BaseLV.getType(), BaseLV.getBaseInfo(),
910       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
911 }
912 
913 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
914                           Address OriginalBaseAddress, llvm::Value *Addr) {
915   Address Tmp = Address::invalid();
916   Address TopTmp = Address::invalid();
917   Address MostTopTmp = Address::invalid();
918   BaseTy = BaseTy.getNonReferenceType();
919   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
920          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
921     Tmp = CGF.CreateMemTemp(BaseTy);
922     if (TopTmp.isValid())
923       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
924     else
925       MostTopTmp = Tmp;
926     TopTmp = Tmp;
927     BaseTy = BaseTy->getPointeeType();
928   }
929 
930   if (Tmp.isValid()) {
931     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
932         Addr, Tmp.getElementType());
933     CGF.Builder.CreateStore(Addr, Tmp);
934     return MostTopTmp;
935   }
936 
937   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
938       Addr, OriginalBaseAddress.getType());
939   return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
940 }
941 
942 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
943   const VarDecl *OrigVD = nullptr;
944   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
945     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
946     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
947       Base = TempOASE->getBase()->IgnoreParenImpCasts();
948     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
949       Base = TempASE->getBase()->IgnoreParenImpCasts();
950     DE = cast<DeclRefExpr>(Base);
951     OrigVD = cast<VarDecl>(DE->getDecl());
952   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
953     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
954     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
955       Base = TempASE->getBase()->IgnoreParenImpCasts();
956     DE = cast<DeclRefExpr>(Base);
957     OrigVD = cast<VarDecl>(DE->getDecl());
958   }
959   return OrigVD;
960 }
961 
962 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
963                                                Address PrivateAddr) {
964   const DeclRefExpr *DE;
965   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
966     BaseDecls.emplace_back(OrigVD);
967     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
968     LValue BaseLValue =
969         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
970                     OriginalBaseLValue);
971     Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
972     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
973         SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
974         SharedAddr.getPointer());
975     llvm::Value *PrivatePointer =
976         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
977             PrivateAddr.getPointer(), SharedAddr.getType());
978     llvm::Value *Ptr = CGF.Builder.CreateGEP(
979         SharedAddr.getElementType(), PrivatePointer, Adjustment);
980     return castToBase(CGF, OrigVD->getType(),
981                       SharedAddresses[N].first.getType(),
982                       OriginalBaseLValue.getAddress(CGF), Ptr);
983   }
984   BaseDecls.emplace_back(
985       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
986   return PrivateAddr;
987 }
988 
989 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
990   const OMPDeclareReductionDecl *DRD =
991       getReductionInit(ClausesData[N].ReductionOp);
992   return DRD && DRD->getInitializer();
993 }
994 
995 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
996   return CGF.EmitLoadOfPointerLValue(
997       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
998       getThreadIDVariable()->getType()->castAs<PointerType>());
999 }
1000 
1001 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1002   if (!CGF.HaveInsertPoint())
1003     return;
1004   // 1.2.2 OpenMP Language Terminology
1005   // Structured block - An executable statement with a single entry at the
1006   // top and a single exit at the bottom.
1007   // The point of exit cannot be a branch out of the structured block.
1008   // longjmp() and throw() must not violate the entry/exit criteria.
1009   CGF.EHStack.pushTerminate();
1010   if (S)
1011     CGF.incrementProfileCounter(S);
1012   CodeGen(CGF);
1013   CGF.EHStack.popTerminate();
1014 }
1015 
1016 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1017     CodeGenFunction &CGF) {
1018   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1019                             getThreadIDVariable()->getType(),
1020                             AlignmentSource::Decl);
1021 }
1022 
1023 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1024                                        QualType FieldTy) {
1025   auto *Field = FieldDecl::Create(
1026       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1027       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1028       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1029   Field->setAccess(AS_public);
1030   DC->addDecl(Field);
1031   return Field;
1032 }
1033 
1034 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
1035     : CGM(CGM), OMPBuilder(CGM.getModule()) {
1036   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1037   llvm::OpenMPIRBuilderConfig Config(
1038       CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1039       CGM.getLangOpts().OpenMPOffloadMandatory,
1040       /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1041       hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1042   OMPBuilder.initialize();
1043   OMPBuilder.loadOffloadInfoMetadata(CGM.getLangOpts().OpenMPIsTargetDevice
1044                                          ? CGM.getLangOpts().OMPHostIRFile
1045                                          : StringRef{});
1046   OMPBuilder.setConfig(Config);
1047 
1048   // The user forces the compiler to behave as if omp requires
1049   // unified_shared_memory was given.
1050   if (CGM.getLangOpts().OpenMPForceUSM) {
1051     HasRequiresUnifiedSharedMemory = true;
1052     OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1053   }
1054 }
1055 
1056 void CGOpenMPRuntime::clear() {
1057   InternalVars.clear();
1058   // Clean non-target variable declarations possibly used only in debug info.
1059   for (const auto &Data : EmittedNonTargetVariables) {
1060     if (!Data.getValue().pointsToAliveValue())
1061       continue;
1062     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1063     if (!GV)
1064       continue;
1065     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1066       continue;
1067     GV->eraseFromParent();
1068   }
1069 }
1070 
1071 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1072   return OMPBuilder.createPlatformSpecificName(Parts);
1073 }
1074 
1075 static llvm::Function *
1076 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1077                           const Expr *CombinerInitializer, const VarDecl *In,
1078                           const VarDecl *Out, bool IsCombiner) {
1079   // void .omp_combiner.(Ty *in, Ty *out);
1080   ASTContext &C = CGM.getContext();
1081   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1082   FunctionArgList Args;
1083   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1084                                /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1085   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1086                               /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1087   Args.push_back(&OmpOutParm);
1088   Args.push_back(&OmpInParm);
1089   const CGFunctionInfo &FnInfo =
1090       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1091   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1092   std::string Name = CGM.getOpenMPRuntime().getName(
1093       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1094   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1095                                     Name, &CGM.getModule());
1096   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1097   if (CGM.getLangOpts().Optimize) {
1098     Fn->removeFnAttr(llvm::Attribute::NoInline);
1099     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1100     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1101   }
1102   CodeGenFunction CGF(CGM);
1103   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1104   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1105   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1106                     Out->getLocation());
1107   CodeGenFunction::OMPPrivateScope Scope(CGF);
1108   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1109   Scope.addPrivate(
1110       In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1111               .getAddress(CGF));
1112   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1113   Scope.addPrivate(
1114       Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1115                .getAddress(CGF));
1116   (void)Scope.Privatize();
1117   if (!IsCombiner && Out->hasInit() &&
1118       !CGF.isTrivialInitializer(Out->getInit())) {
1119     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1120                          Out->getType().getQualifiers(),
1121                          /*IsInitializer=*/true);
1122   }
1123   if (CombinerInitializer)
1124     CGF.EmitIgnoredExpr(CombinerInitializer);
1125   Scope.ForceCleanup();
1126   CGF.FinishFunction();
1127   return Fn;
1128 }
1129 
1130 void CGOpenMPRuntime::emitUserDefinedReduction(
1131     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1132   if (UDRMap.count(D) > 0)
1133     return;
1134   llvm::Function *Combiner = emitCombinerOrInitializer(
1135       CGM, D->getType(), D->getCombiner(),
1136       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1137       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1138       /*IsCombiner=*/true);
1139   llvm::Function *Initializer = nullptr;
1140   if (const Expr *Init = D->getInitializer()) {
1141     Initializer = emitCombinerOrInitializer(
1142         CGM, D->getType(),
1143         D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init
1144                                                                      : nullptr,
1145         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1146         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1147         /*IsCombiner=*/false);
1148   }
1149   UDRMap.try_emplace(D, Combiner, Initializer);
1150   if (CGF) {
1151     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1152     Decls.second.push_back(D);
1153   }
1154 }
1155 
1156 std::pair<llvm::Function *, llvm::Function *>
1157 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1158   auto I = UDRMap.find(D);
1159   if (I != UDRMap.end())
1160     return I->second;
1161   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1162   return UDRMap.lookup(D);
1163 }
1164 
1165 namespace {
1166 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1167 // Builder if one is present.
1168 struct PushAndPopStackRAII {
1169   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1170                       bool HasCancel, llvm::omp::Directive Kind)
1171       : OMPBuilder(OMPBuilder) {
1172     if (!OMPBuilder)
1173       return;
1174 
1175     // The following callback is the crucial part of clangs cleanup process.
1176     //
1177     // NOTE:
1178     // Once the OpenMPIRBuilder is used to create parallel regions (and
1179     // similar), the cancellation destination (Dest below) is determined via
1180     // IP. That means if we have variables to finalize we split the block at IP,
1181     // use the new block (=BB) as destination to build a JumpDest (via
1182     // getJumpDestInCurrentScope(BB)) which then is fed to
1183     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1184     // to push & pop an FinalizationInfo object.
1185     // The FiniCB will still be needed but at the point where the
1186     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1187     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1188       assert(IP.getBlock()->end() == IP.getPoint() &&
1189              "Clang CG should cause non-terminated block!");
1190       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1191       CGF.Builder.restoreIP(IP);
1192       CodeGenFunction::JumpDest Dest =
1193           CGF.getOMPCancelDestination(OMPD_parallel);
1194       CGF.EmitBranchThroughCleanup(Dest);
1195     };
1196 
1197     // TODO: Remove this once we emit parallel regions through the
1198     //       OpenMPIRBuilder as it can do this setup internally.
1199     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1200     OMPBuilder->pushFinalizationCB(std::move(FI));
1201   }
1202   ~PushAndPopStackRAII() {
1203     if (OMPBuilder)
1204       OMPBuilder->popFinalizationCB();
1205   }
1206   llvm::OpenMPIRBuilder *OMPBuilder;
1207 };
1208 } // namespace
1209 
1210 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1211     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1212     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1213     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1214   assert(ThreadIDVar->getType()->isPointerType() &&
1215          "thread id variable must be of type kmp_int32 *");
1216   CodeGenFunction CGF(CGM, true);
1217   bool HasCancel = false;
1218   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1219     HasCancel = OPD->hasCancel();
1220   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1221     HasCancel = OPD->hasCancel();
1222   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1223     HasCancel = OPSD->hasCancel();
1224   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1225     HasCancel = OPFD->hasCancel();
1226   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1227     HasCancel = OPFD->hasCancel();
1228   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1229     HasCancel = OPFD->hasCancel();
1230   else if (const auto *OPFD =
1231                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1232     HasCancel = OPFD->hasCancel();
1233   else if (const auto *OPFD =
1234                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1235     HasCancel = OPFD->hasCancel();
1236 
1237   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1238   //       parallel region to make cancellation barriers work properly.
1239   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1240   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1241   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1242                                     HasCancel, OutlinedHelperName);
1243   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1244   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1245 }
1246 
1247 std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1248   std::string Suffix = getName({"omp_outlined"});
1249   return (Name + Suffix).str();
1250 }
1251 
1252 std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const {
1253   return getOutlinedHelperName(CGF.CurFn->getName());
1254 }
1255 
1256 std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1257   std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1258   return (Name + Suffix).str();
1259 }
1260 
1261 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1262     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1263     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1264     const RegionCodeGenTy &CodeGen) {
1265   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1266   return emitParallelOrTeamsOutlinedFunction(
1267       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1268       CodeGen);
1269 }
1270 
1271 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1272     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1273     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1274     const RegionCodeGenTy &CodeGen) {
1275   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1276   return emitParallelOrTeamsOutlinedFunction(
1277       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1278       CodeGen);
1279 }
1280 
1281 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1282     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1283     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1284     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1285     bool Tied, unsigned &NumberOfParts) {
1286   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1287                                               PrePostActionTy &) {
1288     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1289     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1290     llvm::Value *TaskArgs[] = {
1291         UpLoc, ThreadID,
1292         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1293                                     TaskTVar->getType()->castAs<PointerType>())
1294             .getPointer(CGF)};
1295     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1296                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1297                         TaskArgs);
1298   };
1299   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1300                                                             UntiedCodeGen);
1301   CodeGen.setAction(Action);
1302   assert(!ThreadIDVar->getType()->isPointerType() &&
1303          "thread id variable must be of type kmp_int32 for tasks");
1304   const OpenMPDirectiveKind Region =
1305       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1306                                                       : OMPD_task;
1307   const CapturedStmt *CS = D.getCapturedStmt(Region);
1308   bool HasCancel = false;
1309   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1310     HasCancel = TD->hasCancel();
1311   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1312     HasCancel = TD->hasCancel();
1313   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1314     HasCancel = TD->hasCancel();
1315   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1316     HasCancel = TD->hasCancel();
1317 
1318   CodeGenFunction CGF(CGM, true);
1319   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1320                                         InnermostKind, HasCancel, Action);
1321   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1322   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1323   if (!Tied)
1324     NumberOfParts = Action.getNumberOfParts();
1325   return Res;
1326 }
1327 
1328 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1329                                              bool AtCurrentPoint) {
1330   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1331   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1332 
1333   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1334   if (AtCurrentPoint) {
1335     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1336         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1337   } else {
1338     Elem.second.ServiceInsertPt =
1339         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1340     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1341   }
1342 }
1343 
1344 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1345   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1346   if (Elem.second.ServiceInsertPt) {
1347     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1348     Elem.second.ServiceInsertPt = nullptr;
1349     Ptr->eraseFromParent();
1350   }
1351 }
1352 
1353 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1354                                                   SourceLocation Loc,
1355                                                   SmallString<128> &Buffer) {
1356   llvm::raw_svector_ostream OS(Buffer);
1357   // Build debug location
1358   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1359   OS << ";" << PLoc.getFilename() << ";";
1360   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1361     OS << FD->getQualifiedNameAsString();
1362   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1363   return OS.str();
1364 }
1365 
1366 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1367                                                  SourceLocation Loc,
1368                                                  unsigned Flags, bool EmitLoc) {
1369   uint32_t SrcLocStrSize;
1370   llvm::Constant *SrcLocStr;
1371   if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1372                        llvm::codegenoptions::NoDebugInfo) ||
1373       Loc.isInvalid()) {
1374     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1375   } else {
1376     std::string FunctionName;
1377     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1378       FunctionName = FD->getQualifiedNameAsString();
1379     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1380     const char *FileName = PLoc.getFilename();
1381     unsigned Line = PLoc.getLine();
1382     unsigned Column = PLoc.getColumn();
1383     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1384                                                 Column, SrcLocStrSize);
1385   }
1386   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1387   return OMPBuilder.getOrCreateIdent(
1388       SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1389 }
1390 
1391 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1392                                           SourceLocation Loc) {
1393   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1394   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1395   // the clang invariants used below might be broken.
1396   if (CGM.getLangOpts().OpenMPIRBuilder) {
1397     SmallString<128> Buffer;
1398     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1399     uint32_t SrcLocStrSize;
1400     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1401         getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1402     return OMPBuilder.getOrCreateThreadID(
1403         OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1404   }
1405 
1406   llvm::Value *ThreadID = nullptr;
1407   // Check whether we've already cached a load of the thread id in this
1408   // function.
1409   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1410   if (I != OpenMPLocThreadIDMap.end()) {
1411     ThreadID = I->second.ThreadID;
1412     if (ThreadID != nullptr)
1413       return ThreadID;
1414   }
1415   // If exceptions are enabled, do not use parameter to avoid possible crash.
1416   if (auto *OMPRegionInfo =
1417           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1418     if (OMPRegionInfo->getThreadIDVariable()) {
1419       // Check if this an outlined function with thread id passed as argument.
1420       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1421       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1422       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1423           !CGF.getLangOpts().CXXExceptions ||
1424           CGF.Builder.GetInsertBlock() == TopBlock ||
1425           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1426           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1427               TopBlock ||
1428           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1429               CGF.Builder.GetInsertBlock()) {
1430         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1431         // If value loaded in entry block, cache it and use it everywhere in
1432         // function.
1433         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1434           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1435           Elem.second.ThreadID = ThreadID;
1436         }
1437         return ThreadID;
1438       }
1439     }
1440   }
1441 
1442   // This is not an outlined function region - need to call __kmpc_int32
1443   // kmpc_global_thread_num(ident_t *loc).
1444   // Generate thread id value and cache this value for use across the
1445   // function.
1446   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1447   if (!Elem.second.ServiceInsertPt)
1448     setLocThreadIdInsertPt(CGF);
1449   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1450   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1451   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
1452   llvm::CallInst *Call = CGF.Builder.CreateCall(
1453       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1454                                             OMPRTL___kmpc_global_thread_num),
1455       emitUpdateLocation(CGF, Loc));
1456   Call->setCallingConv(CGF.getRuntimeCC());
1457   Elem.second.ThreadID = Call;
1458   return Call;
1459 }
1460 
1461 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1462   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1463   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1464     clearLocThreadIdInsertPt(CGF);
1465     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1466   }
1467   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1468     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1469       UDRMap.erase(D);
1470     FunctionUDRMap.erase(CGF.CurFn);
1471   }
1472   auto I = FunctionUDMMap.find(CGF.CurFn);
1473   if (I != FunctionUDMMap.end()) {
1474     for(const auto *D : I->second)
1475       UDMMap.erase(D);
1476     FunctionUDMMap.erase(I);
1477   }
1478   LastprivateConditionalToTypes.erase(CGF.CurFn);
1479   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1480 }
1481 
1482 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1483   return OMPBuilder.IdentPtr;
1484 }
1485 
1486 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1487   if (!Kmpc_MicroTy) {
1488     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1489     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1490                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1491     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1492   }
1493   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1494 }
1495 
1496 llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1497 convertDeviceClause(const VarDecl *VD) {
1498   std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1499       OMPDeclareTargetDeclAttr::getDeviceType(VD);
1500   if (!DevTy)
1501     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1502 
1503   switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1504   case OMPDeclareTargetDeclAttr::DT_Host:
1505     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1506     break;
1507   case OMPDeclareTargetDeclAttr::DT_NoHost:
1508     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1509     break;
1510   case OMPDeclareTargetDeclAttr::DT_Any:
1511     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1512     break;
1513   default:
1514     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1515     break;
1516   }
1517 }
1518 
1519 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1520 convertCaptureClause(const VarDecl *VD) {
1521   std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1522       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1523   if (!MapType)
1524     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1525   switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1526   case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1527     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1528     break;
1529   case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1530     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1531     break;
1532   case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1533     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1534     break;
1535   default:
1536     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1537     break;
1538   }
1539 }
1540 
1541 static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1542     CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1543     SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1544 
1545   auto FileInfoCallBack = [&]() {
1546     SourceManager &SM = CGM.getContext().getSourceManager();
1547     PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1548 
1549     llvm::sys::fs::UniqueID ID;
1550     if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1551       PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1552     }
1553 
1554     return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1555   };
1556 
1557   return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName);
1558 }
1559 
1560 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1561   auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1562 
1563   auto LinkageForVariable = [&VD, this]() {
1564     return CGM.getLLVMLinkageVarDefinition(VD);
1565   };
1566 
1567   std::vector<llvm::GlobalVariable *> GeneratedRefs;
1568 
1569   llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1570       CGM.getContext().getPointerType(VD->getType()));
1571   llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1572       convertCaptureClause(VD), convertDeviceClause(VD),
1573       VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1574       VD->isExternallyVisible(),
1575       getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
1576                                   VD->getCanonicalDecl()->getBeginLoc()),
1577       CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1578       CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1579       LinkageForVariable);
1580 
1581   if (!addr)
1582     return Address::invalid();
1583   return Address(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1584 }
1585 
1586 llvm::Constant *
1587 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1588   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1589          !CGM.getContext().getTargetInfo().isTLSSupported());
1590   // Lookup the entry, lazily creating it if necessary.
1591   std::string Suffix = getName({"cache", ""});
1592   return OMPBuilder.getOrCreateInternalVariable(
1593       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1594 }
1595 
1596 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1597                                                 const VarDecl *VD,
1598                                                 Address VDAddr,
1599                                                 SourceLocation Loc) {
1600   if (CGM.getLangOpts().OpenMPUseTLS &&
1601       CGM.getContext().getTargetInfo().isTLSSupported())
1602     return VDAddr;
1603 
1604   llvm::Type *VarTy = VDAddr.getElementType();
1605   llvm::Value *Args[] = {
1606       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1607       CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy),
1608       CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1609       getOrCreateThreadPrivateCache(VD)};
1610   return Address(
1611       CGF.EmitRuntimeCall(
1612           OMPBuilder.getOrCreateRuntimeFunction(
1613               CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1614           Args),
1615       CGF.Int8Ty, VDAddr.getAlignment());
1616 }
1617 
1618 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1619     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1620     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1621   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1622   // library.
1623   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1624   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1625                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1626                       OMPLoc);
1627   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1628   // to register constructor/destructor for variable.
1629   llvm::Value *Args[] = {
1630       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1631       Ctor, CopyCtor, Dtor};
1632   CGF.EmitRuntimeCall(
1633       OMPBuilder.getOrCreateRuntimeFunction(
1634           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1635       Args);
1636 }
1637 
1638 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1639     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1640     bool PerformInit, CodeGenFunction *CGF) {
1641   if (CGM.getLangOpts().OpenMPUseTLS &&
1642       CGM.getContext().getTargetInfo().isTLSSupported())
1643     return nullptr;
1644 
1645   VD = VD->getDefinition(CGM.getContext());
1646   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1647     QualType ASTTy = VD->getType();
1648 
1649     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1650     const Expr *Init = VD->getAnyInitializer();
1651     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1652       // Generate function that re-emits the declaration's initializer into the
1653       // threadprivate copy of the variable VD
1654       CodeGenFunction CtorCGF(CGM);
1655       FunctionArgList Args;
1656       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1657                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1658                             ImplicitParamKind::Other);
1659       Args.push_back(&Dst);
1660 
1661       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1662           CGM.getContext().VoidPtrTy, Args);
1663       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1664       std::string Name = getName({"__kmpc_global_ctor_", ""});
1665       llvm::Function *Fn =
1666           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1667       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1668                             Args, Loc, Loc);
1669       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1670           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1671           CGM.getContext().VoidPtrTy, Dst.getLocation());
1672       Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1673                   VDAddr.getAlignment());
1674       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1675                                /*IsInitializer=*/true);
1676       ArgVal = CtorCGF.EmitLoadOfScalar(
1677           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1678           CGM.getContext().VoidPtrTy, Dst.getLocation());
1679       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1680       CtorCGF.FinishFunction();
1681       Ctor = Fn;
1682     }
1683     if (VD->getType().isDestructedType() != QualType::DK_none) {
1684       // Generate function that emits destructor call for the threadprivate copy
1685       // of the variable VD
1686       CodeGenFunction DtorCGF(CGM);
1687       FunctionArgList Args;
1688       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1689                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1690                             ImplicitParamKind::Other);
1691       Args.push_back(&Dst);
1692 
1693       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1694           CGM.getContext().VoidTy, Args);
1695       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1696       std::string Name = getName({"__kmpc_global_dtor_", ""});
1697       llvm::Function *Fn =
1698           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1699       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1700       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1701                             Loc, Loc);
1702       // Create a scope with an artificial location for the body of this function.
1703       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1704       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1705           DtorCGF.GetAddrOfLocalVar(&Dst),
1706           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1707       DtorCGF.emitDestroy(
1708           Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1709           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1710           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1711       DtorCGF.FinishFunction();
1712       Dtor = Fn;
1713     }
1714     // Do not emit init function if it is not required.
1715     if (!Ctor && !Dtor)
1716       return nullptr;
1717 
1718     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1719     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1720                                                /*isVarArg=*/false)
1721                            ->getPointerTo();
1722     // Copying constructor for the threadprivate variable.
1723     // Must be NULL - reserved by runtime, but currently it requires that this
1724     // parameter is always NULL. Otherwise it fires assertion.
1725     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1726     if (Ctor == nullptr) {
1727       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1728                                              /*isVarArg=*/false)
1729                          ->getPointerTo();
1730       Ctor = llvm::Constant::getNullValue(CtorTy);
1731     }
1732     if (Dtor == nullptr) {
1733       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1734                                              /*isVarArg=*/false)
1735                          ->getPointerTo();
1736       Dtor = llvm::Constant::getNullValue(DtorTy);
1737     }
1738     if (!CGF) {
1739       auto *InitFunctionTy =
1740           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1741       std::string Name = getName({"__omp_threadprivate_init_", ""});
1742       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1743           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1744       CodeGenFunction InitCGF(CGM);
1745       FunctionArgList ArgList;
1746       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1747                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1748                             Loc, Loc);
1749       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1750       InitCGF.FinishFunction();
1751       return InitFunction;
1752     }
1753     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1754   }
1755   return nullptr;
1756 }
1757 
1758 void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
1759                                                 llvm::GlobalValue *GV) {
1760   std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1761       OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1762 
1763   // We only need to handle active 'indirect' declare target functions.
1764   if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1765     return;
1766 
1767   // Get a mangled name to store the new device global in.
1768   llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1769       CGM, OMPBuilder, FD->getCanonicalDecl()->getBeginLoc(), FD->getName());
1770   SmallString<128> Name;
1771   OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1772 
1773   // We need to generate a new global to hold the address of the indirectly
1774   // called device function. Doing this allows us to keep the visibility and
1775   // linkage of the associated function unchanged while allowing the runtime to
1776   // access its value.
1777   llvm::GlobalValue *Addr = GV;
1778   if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1779     Addr = new llvm::GlobalVariable(
1780         CGM.getModule(), CGM.VoidPtrTy,
1781         /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1782         nullptr, llvm::GlobalValue::NotThreadLocal,
1783         CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1784     Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1785   }
1786 
1787   OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1788       Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),
1789       llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1790       llvm::GlobalValue::WeakODRLinkage);
1791 }
1792 
1793 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1794                                                           QualType VarType,
1795                                                           StringRef Name) {
1796   std::string Suffix = getName({"artificial", ""});
1797   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1798   llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1799       VarLVType, Twine(Name).concat(Suffix).str());
1800   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1801       CGM.getTarget().isTLSSupported()) {
1802     GAddr->setThreadLocal(/*Val=*/true);
1803     return Address(GAddr, GAddr->getValueType(),
1804                    CGM.getContext().getTypeAlignInChars(VarType));
1805   }
1806   std::string CacheSuffix = getName({"cache", ""});
1807   llvm::Value *Args[] = {
1808       emitUpdateLocation(CGF, SourceLocation()),
1809       getThreadID(CGF, SourceLocation()),
1810       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1811       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1812                                 /*isSigned=*/false),
1813       OMPBuilder.getOrCreateInternalVariable(
1814           CGM.VoidPtrPtrTy,
1815           Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1816   return Address(
1817       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1818           CGF.EmitRuntimeCall(
1819               OMPBuilder.getOrCreateRuntimeFunction(
1820                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1821               Args),
1822           VarLVType->getPointerTo(/*AddrSpace=*/0)),
1823       VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1824 }
1825 
1826 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
1827                                    const RegionCodeGenTy &ThenGen,
1828                                    const RegionCodeGenTy &ElseGen) {
1829   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1830 
1831   // If the condition constant folds and can be elided, try to avoid emitting
1832   // the condition and the dead arm of the if/else.
1833   bool CondConstant;
1834   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1835     if (CondConstant)
1836       ThenGen(CGF);
1837     else
1838       ElseGen(CGF);
1839     return;
1840   }
1841 
1842   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
1843   // emit the conditional branch.
1844   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1845   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1846   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1847   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1848 
1849   // Emit the 'then' code.
1850   CGF.EmitBlock(ThenBlock);
1851   ThenGen(CGF);
1852   CGF.EmitBranch(ContBlock);
1853   // Emit the 'else' code if present.
1854   // There is no need to emit line number for unconditional branch.
1855   (void)ApplyDebugLocation::CreateEmpty(CGF);
1856   CGF.EmitBlock(ElseBlock);
1857   ElseGen(CGF);
1858   // There is no need to emit line number for unconditional branch.
1859   (void)ApplyDebugLocation::CreateEmpty(CGF);
1860   CGF.EmitBranch(ContBlock);
1861   // Emit the continuation block for code after the if.
1862   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1863 }
1864 
1865 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1866                                        llvm::Function *OutlinedFn,
1867                                        ArrayRef<llvm::Value *> CapturedVars,
1868                                        const Expr *IfCond,
1869                                        llvm::Value *NumThreads) {
1870   if (!CGF.HaveInsertPoint())
1871     return;
1872   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1873   auto &M = CGM.getModule();
1874   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1875                     this](CodeGenFunction &CGF, PrePostActionTy &) {
1876     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1877     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
1878     llvm::Value *Args[] = {
1879         RTLoc,
1880         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1881         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
1882     llvm::SmallVector<llvm::Value *, 16> RealArgs;
1883     RealArgs.append(std::begin(Args), std::end(Args));
1884     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1885 
1886     llvm::FunctionCallee RTLFn =
1887         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
1888     CGF.EmitRuntimeCall(RTLFn, RealArgs);
1889   };
1890   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1891                     this](CodeGenFunction &CGF, PrePostActionTy &) {
1892     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
1893     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1894     // Build calls:
1895     // __kmpc_serialized_parallel(&Loc, GTid);
1896     llvm::Value *Args[] = {RTLoc, ThreadID};
1897     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1898                             M, OMPRTL___kmpc_serialized_parallel),
1899                         Args);
1900 
1901     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
1902     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1903     Address ZeroAddrBound =
1904         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
1905                                          /*Name=*/".bound.zero.addr");
1906     CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
1907     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1908     // ThreadId for serialized parallels is 0.
1909     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
1910     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
1911     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1912 
1913     // Ensure we do not inline the function. This is trivially true for the ones
1914     // passed to __kmpc_fork_call but the ones called in serialized regions
1915     // could be inlined. This is not a perfect but it is closer to the invariant
1916     // we want, namely, every data environment starts with a new function.
1917     // TODO: We should pass the if condition to the runtime function and do the
1918     //       handling there. Much cleaner code.
1919     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
1920     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
1921     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
1922 
1923     // __kmpc_end_serialized_parallel(&Loc, GTid);
1924     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1925     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1926                             M, OMPRTL___kmpc_end_serialized_parallel),
1927                         EndArgs);
1928   };
1929   if (IfCond) {
1930     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
1931   } else {
1932     RegionCodeGenTy ThenRCG(ThenGen);
1933     ThenRCG(CGF);
1934   }
1935 }
1936 
1937 // If we're inside an (outlined) parallel region, use the region info's
1938 // thread-ID variable (it is passed in a first argument of the outlined function
1939 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1940 // regular serial code region, get thread ID by calling kmp_int32
1941 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1942 // return the address of that temp.
1943 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1944                                              SourceLocation Loc) {
1945   if (auto *OMPRegionInfo =
1946           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1947     if (OMPRegionInfo->getThreadIDVariable())
1948       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
1949 
1950   llvm::Value *ThreadID = getThreadID(CGF, Loc);
1951   QualType Int32Ty =
1952       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1953   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1954   CGF.EmitStoreOfScalar(ThreadID,
1955                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1956 
1957   return ThreadIDTemp;
1958 }
1959 
1960 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1961   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1962   std::string Name = getName({Prefix, "var"});
1963   return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
1964 }
1965 
1966 namespace {
1967 /// Common pre(post)-action for different OpenMP constructs.
1968 class CommonActionTy final : public PrePostActionTy {
1969   llvm::FunctionCallee EnterCallee;
1970   ArrayRef<llvm::Value *> EnterArgs;
1971   llvm::FunctionCallee ExitCallee;
1972   ArrayRef<llvm::Value *> ExitArgs;
1973   bool Conditional;
1974   llvm::BasicBlock *ContBlock = nullptr;
1975 
1976 public:
1977   CommonActionTy(llvm::FunctionCallee EnterCallee,
1978                  ArrayRef<llvm::Value *> EnterArgs,
1979                  llvm::FunctionCallee ExitCallee,
1980                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1981       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1982         ExitArgs(ExitArgs), Conditional(Conditional) {}
1983   void Enter(CodeGenFunction &CGF) override {
1984     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
1985     if (Conditional) {
1986       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
1987       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1988       ContBlock = CGF.createBasicBlock("omp_if.end");
1989       // Generate the branch (If-stmt)
1990       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1991       CGF.EmitBlock(ThenBlock);
1992     }
1993   }
1994   void Done(CodeGenFunction &CGF) {
1995     // Emit the rest of blocks/branches
1996     CGF.EmitBranch(ContBlock);
1997     CGF.EmitBlock(ContBlock, true);
1998   }
1999   void Exit(CodeGenFunction &CGF) override {
2000     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2001   }
2002 };
2003 } // anonymous namespace
2004 
2005 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2006                                          StringRef CriticalName,
2007                                          const RegionCodeGenTy &CriticalOpGen,
2008                                          SourceLocation Loc, const Expr *Hint) {
2009   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2010   // CriticalOpGen();
2011   // __kmpc_end_critical(ident_t *, gtid, Lock);
2012   // Prepare arguments and build a call to __kmpc_critical
2013   if (!CGF.HaveInsertPoint())
2014     return;
2015   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2016                          getCriticalRegionLock(CriticalName)};
2017   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2018                                                 std::end(Args));
2019   if (Hint) {
2020     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2021         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2022   }
2023   CommonActionTy Action(
2024       OMPBuilder.getOrCreateRuntimeFunction(
2025           CGM.getModule(),
2026           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2027       EnterArgs,
2028       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2029                                             OMPRTL___kmpc_end_critical),
2030       Args);
2031   CriticalOpGen.setAction(Action);
2032   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2033 }
2034 
2035 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2036                                        const RegionCodeGenTy &MasterOpGen,
2037                                        SourceLocation Loc) {
2038   if (!CGF.HaveInsertPoint())
2039     return;
2040   // if(__kmpc_master(ident_t *, gtid)) {
2041   //   MasterOpGen();
2042   //   __kmpc_end_master(ident_t *, gtid);
2043   // }
2044   // Prepare arguments and build a call to __kmpc_master
2045   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2046   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2047                             CGM.getModule(), OMPRTL___kmpc_master),
2048                         Args,
2049                         OMPBuilder.getOrCreateRuntimeFunction(
2050                             CGM.getModule(), OMPRTL___kmpc_end_master),
2051                         Args,
2052                         /*Conditional=*/true);
2053   MasterOpGen.setAction(Action);
2054   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2055   Action.Done(CGF);
2056 }
2057 
2058 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2059                                        const RegionCodeGenTy &MaskedOpGen,
2060                                        SourceLocation Loc, const Expr *Filter) {
2061   if (!CGF.HaveInsertPoint())
2062     return;
2063   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2064   //   MaskedOpGen();
2065   //   __kmpc_end_masked(iden_t *, gtid);
2066   // }
2067   // Prepare arguments and build a call to __kmpc_masked
2068   llvm::Value *FilterVal = Filter
2069                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2070                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2071   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2072                          FilterVal};
2073   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2074                             getThreadID(CGF, Loc)};
2075   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2076                             CGM.getModule(), OMPRTL___kmpc_masked),
2077                         Args,
2078                         OMPBuilder.getOrCreateRuntimeFunction(
2079                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2080                         ArgsEnd,
2081                         /*Conditional=*/true);
2082   MaskedOpGen.setAction(Action);
2083   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2084   Action.Done(CGF);
2085 }
2086 
2087 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2088                                         SourceLocation Loc) {
2089   if (!CGF.HaveInsertPoint())
2090     return;
2091   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2092     OMPBuilder.createTaskyield(CGF.Builder);
2093   } else {
2094     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2095     llvm::Value *Args[] = {
2096         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2097         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2098     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2099                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2100                         Args);
2101   }
2102 
2103   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2104     Region->emitUntiedSwitch(CGF);
2105 }
2106 
2107 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2108                                           const RegionCodeGenTy &TaskgroupOpGen,
2109                                           SourceLocation Loc) {
2110   if (!CGF.HaveInsertPoint())
2111     return;
2112   // __kmpc_taskgroup(ident_t *, gtid);
2113   // TaskgroupOpGen();
2114   // __kmpc_end_taskgroup(ident_t *, gtid);
2115   // Prepare arguments and build a call to __kmpc_taskgroup
2116   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2117   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2118                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2119                         Args,
2120                         OMPBuilder.getOrCreateRuntimeFunction(
2121                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2122                         Args);
2123   TaskgroupOpGen.setAction(Action);
2124   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2125 }
2126 
2127 /// Given an array of pointers to variables, project the address of a
2128 /// given variable.
2129 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2130                                       unsigned Index, const VarDecl *Var) {
2131   // Pull out the pointer to the variable.
2132   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2133   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2134 
2135   llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2136   return Address(
2137       CGF.Builder.CreateBitCast(
2138           Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())),
2139       ElemTy, CGF.getContext().getDeclAlign(Var));
2140 }
2141 
2142 static llvm::Value *emitCopyprivateCopyFunction(
2143     CodeGenModule &CGM, llvm::Type *ArgsElemType,
2144     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2145     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2146     SourceLocation Loc) {
2147   ASTContext &C = CGM.getContext();
2148   // void copy_func(void *LHSArg, void *RHSArg);
2149   FunctionArgList Args;
2150   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2151                            ImplicitParamKind::Other);
2152   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2153                            ImplicitParamKind::Other);
2154   Args.push_back(&LHSArg);
2155   Args.push_back(&RHSArg);
2156   const auto &CGFI =
2157       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2158   std::string Name =
2159       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2160   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2161                                     llvm::GlobalValue::InternalLinkage, Name,
2162                                     &CGM.getModule());
2163   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2164   Fn->setDoesNotRecurse();
2165   CodeGenFunction CGF(CGM);
2166   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2167   // Dest = (void*[n])(LHSArg);
2168   // Src = (void*[n])(RHSArg);
2169   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2170                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2171                   ArgsElemType->getPointerTo()),
2172               ArgsElemType, CGF.getPointerAlign());
2173   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2174                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2175                   ArgsElemType->getPointerTo()),
2176               ArgsElemType, CGF.getPointerAlign());
2177   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2178   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2179   // ...
2180   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2181   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2182     const auto *DestVar =
2183         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2184     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2185 
2186     const auto *SrcVar =
2187         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2188     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2189 
2190     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2191     QualType Type = VD->getType();
2192     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2193   }
2194   CGF.FinishFunction();
2195   return Fn;
2196 }
2197 
2198 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2199                                        const RegionCodeGenTy &SingleOpGen,
2200                                        SourceLocation Loc,
2201                                        ArrayRef<const Expr *> CopyprivateVars,
2202                                        ArrayRef<const Expr *> SrcExprs,
2203                                        ArrayRef<const Expr *> DstExprs,
2204                                        ArrayRef<const Expr *> AssignmentOps) {
2205   if (!CGF.HaveInsertPoint())
2206     return;
2207   assert(CopyprivateVars.size() == SrcExprs.size() &&
2208          CopyprivateVars.size() == DstExprs.size() &&
2209          CopyprivateVars.size() == AssignmentOps.size());
2210   ASTContext &C = CGM.getContext();
2211   // int32 did_it = 0;
2212   // if(__kmpc_single(ident_t *, gtid)) {
2213   //   SingleOpGen();
2214   //   __kmpc_end_single(ident_t *, gtid);
2215   //   did_it = 1;
2216   // }
2217   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2218   // <copy_func>, did_it);
2219 
2220   Address DidIt = Address::invalid();
2221   if (!CopyprivateVars.empty()) {
2222     // int32 did_it = 0;
2223     QualType KmpInt32Ty =
2224         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2225     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2226     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2227   }
2228   // Prepare arguments and build a call to __kmpc_single
2229   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2230   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2231                             CGM.getModule(), OMPRTL___kmpc_single),
2232                         Args,
2233                         OMPBuilder.getOrCreateRuntimeFunction(
2234                             CGM.getModule(), OMPRTL___kmpc_end_single),
2235                         Args,
2236                         /*Conditional=*/true);
2237   SingleOpGen.setAction(Action);
2238   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2239   if (DidIt.isValid()) {
2240     // did_it = 1;
2241     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2242   }
2243   Action.Done(CGF);
2244   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2245   // <copy_func>, did_it);
2246   if (DidIt.isValid()) {
2247     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2248     QualType CopyprivateArrayTy = C.getConstantArrayType(
2249         C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2250         /*IndexTypeQuals=*/0);
2251     // Create a list of all private variables for copyprivate.
2252     Address CopyprivateList =
2253         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2254     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2255       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2256       CGF.Builder.CreateStore(
2257           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2258               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2259               CGF.VoidPtrTy),
2260           Elem);
2261     }
2262     // Build function that copies private values from single region to all other
2263     // threads in the corresponding parallel region.
2264     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2265         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2266         SrcExprs, DstExprs, AssignmentOps, Loc);
2267     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2268     Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2269         CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2270     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2271     llvm::Value *Args[] = {
2272         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2273         getThreadID(CGF, Loc),        // i32 <gtid>
2274         BufSize,                      // size_t <buf_size>
2275         CL.getPointer(),              // void *<copyprivate list>
2276         CpyFn,                        // void (*) (void *, void *) <copy_func>
2277         DidItVal                      // i32 did_it
2278     };
2279     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2280                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2281                         Args);
2282   }
2283 }
2284 
2285 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2286                                         const RegionCodeGenTy &OrderedOpGen,
2287                                         SourceLocation Loc, bool IsThreads) {
2288   if (!CGF.HaveInsertPoint())
2289     return;
2290   // __kmpc_ordered(ident_t *, gtid);
2291   // OrderedOpGen();
2292   // __kmpc_end_ordered(ident_t *, gtid);
2293   // Prepare arguments and build a call to __kmpc_ordered
2294   if (IsThreads) {
2295     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2296     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2297                               CGM.getModule(), OMPRTL___kmpc_ordered),
2298                           Args,
2299                           OMPBuilder.getOrCreateRuntimeFunction(
2300                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2301                           Args);
2302     OrderedOpGen.setAction(Action);
2303     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2304     return;
2305   }
2306   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2307 }
2308 
2309 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2310   unsigned Flags;
2311   if (Kind == OMPD_for)
2312     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2313   else if (Kind == OMPD_sections)
2314     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2315   else if (Kind == OMPD_single)
2316     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2317   else if (Kind == OMPD_barrier)
2318     Flags = OMP_IDENT_BARRIER_EXPL;
2319   else
2320     Flags = OMP_IDENT_BARRIER_IMPL;
2321   return Flags;
2322 }
2323 
2324 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2325     CodeGenFunction &CGF, const OMPLoopDirective &S,
2326     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2327   // Check if the loop directive is actually a doacross loop directive. In this
2328   // case choose static, 1 schedule.
2329   if (llvm::any_of(
2330           S.getClausesOfKind<OMPOrderedClause>(),
2331           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2332     ScheduleKind = OMPC_SCHEDULE_static;
2333     // Chunk size is 1 in this case.
2334     llvm::APInt ChunkSize(32, 1);
2335     ChunkExpr = IntegerLiteral::Create(
2336         CGF.getContext(), ChunkSize,
2337         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2338         SourceLocation());
2339   }
2340 }
2341 
2342 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2343                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2344                                       bool ForceSimpleCall) {
2345   // Check if we should use the OMPBuilder
2346   auto *OMPRegionInfo =
2347       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2348   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2349     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2350         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2351     return;
2352   }
2353 
2354   if (!CGF.HaveInsertPoint())
2355     return;
2356   // Build call __kmpc_cancel_barrier(loc, thread_id);
2357   // Build call __kmpc_barrier(loc, thread_id);
2358   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2359   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2360   // thread_id);
2361   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2362                          getThreadID(CGF, Loc)};
2363   if (OMPRegionInfo) {
2364     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2365       llvm::Value *Result = CGF.EmitRuntimeCall(
2366           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2367                                                 OMPRTL___kmpc_cancel_barrier),
2368           Args);
2369       if (EmitChecks) {
2370         // if (__kmpc_cancel_barrier()) {
2371         //   exit from construct;
2372         // }
2373         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2374         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2375         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2376         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2377         CGF.EmitBlock(ExitBB);
2378         //   exit from construct;
2379         CodeGenFunction::JumpDest CancelDestination =
2380             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2381         CGF.EmitBranchThroughCleanup(CancelDestination);
2382         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2383       }
2384       return;
2385     }
2386   }
2387   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2388                           CGM.getModule(), OMPRTL___kmpc_barrier),
2389                       Args);
2390 }
2391 
2392 void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc,
2393                                     Expr *ME, bool IsFatal) {
2394   llvm::Value *MVL =
2395       ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF)
2396          : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2397   // Build call void __kmpc_error(ident_t *loc, int severity, const char
2398   // *message)
2399   llvm::Value *Args[] = {
2400       emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2401       llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2402       CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2403   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2404                           CGM.getModule(), OMPRTL___kmpc_error),
2405                       Args);
2406 }
2407 
2408 /// Map the OpenMP loop schedule to the runtime enumeration.
2409 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2410                                           bool Chunked, bool Ordered) {
2411   switch (ScheduleKind) {
2412   case OMPC_SCHEDULE_static:
2413     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2414                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2415   case OMPC_SCHEDULE_dynamic:
2416     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2417   case OMPC_SCHEDULE_guided:
2418     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2419   case OMPC_SCHEDULE_runtime:
2420     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2421   case OMPC_SCHEDULE_auto:
2422     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2423   case OMPC_SCHEDULE_unknown:
2424     assert(!Chunked && "chunk was specified but schedule kind not known");
2425     return Ordered ? OMP_ord_static : OMP_sch_static;
2426   }
2427   llvm_unreachable("Unexpected runtime schedule");
2428 }
2429 
2430 /// Map the OpenMP distribute schedule to the runtime enumeration.
2431 static OpenMPSchedType
2432 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2433   // only static is allowed for dist_schedule
2434   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2435 }
2436 
2437 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2438                                          bool Chunked) const {
2439   OpenMPSchedType Schedule =
2440       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2441   return Schedule == OMP_sch_static;
2442 }
2443 
2444 bool CGOpenMPRuntime::isStaticNonchunked(
2445     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2446   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2447   return Schedule == OMP_dist_sch_static;
2448 }
2449 
2450 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2451                                       bool Chunked) const {
2452   OpenMPSchedType Schedule =
2453       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2454   return Schedule == OMP_sch_static_chunked;
2455 }
2456 
2457 bool CGOpenMPRuntime::isStaticChunked(
2458     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2459   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2460   return Schedule == OMP_dist_sch_static_chunked;
2461 }
2462 
2463 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2464   OpenMPSchedType Schedule =
2465       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2466   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2467   return Schedule != OMP_sch_static;
2468 }
2469 
2470 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2471                                   OpenMPScheduleClauseModifier M1,
2472                                   OpenMPScheduleClauseModifier M2) {
2473   int Modifier = 0;
2474   switch (M1) {
2475   case OMPC_SCHEDULE_MODIFIER_monotonic:
2476     Modifier = OMP_sch_modifier_monotonic;
2477     break;
2478   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2479     Modifier = OMP_sch_modifier_nonmonotonic;
2480     break;
2481   case OMPC_SCHEDULE_MODIFIER_simd:
2482     if (Schedule == OMP_sch_static_chunked)
2483       Schedule = OMP_sch_static_balanced_chunked;
2484     break;
2485   case OMPC_SCHEDULE_MODIFIER_last:
2486   case OMPC_SCHEDULE_MODIFIER_unknown:
2487     break;
2488   }
2489   switch (M2) {
2490   case OMPC_SCHEDULE_MODIFIER_monotonic:
2491     Modifier = OMP_sch_modifier_monotonic;
2492     break;
2493   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2494     Modifier = OMP_sch_modifier_nonmonotonic;
2495     break;
2496   case OMPC_SCHEDULE_MODIFIER_simd:
2497     if (Schedule == OMP_sch_static_chunked)
2498       Schedule = OMP_sch_static_balanced_chunked;
2499     break;
2500   case OMPC_SCHEDULE_MODIFIER_last:
2501   case OMPC_SCHEDULE_MODIFIER_unknown:
2502     break;
2503   }
2504   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2505   // If the static schedule kind is specified or if the ordered clause is
2506   // specified, and if the nonmonotonic modifier is not specified, the effect is
2507   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2508   // modifier is specified, the effect is as if the nonmonotonic modifier is
2509   // specified.
2510   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2511     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2512           Schedule == OMP_sch_static_balanced_chunked ||
2513           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2514           Schedule == OMP_dist_sch_static_chunked ||
2515           Schedule == OMP_dist_sch_static))
2516       Modifier = OMP_sch_modifier_nonmonotonic;
2517   }
2518   return Schedule | Modifier;
2519 }
2520 
2521 void CGOpenMPRuntime::emitForDispatchInit(
2522     CodeGenFunction &CGF, SourceLocation Loc,
2523     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2524     bool Ordered, const DispatchRTInput &DispatchValues) {
2525   if (!CGF.HaveInsertPoint())
2526     return;
2527   OpenMPSchedType Schedule = getRuntimeSchedule(
2528       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2529   assert(Ordered ||
2530          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2531           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2532           Schedule != OMP_sch_static_balanced_chunked));
2533   // Call __kmpc_dispatch_init(
2534   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2535   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2536   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2537 
2538   // If the Chunk was not specified in the clause - use default value 1.
2539   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2540                                             : CGF.Builder.getIntN(IVSize, 1);
2541   llvm::Value *Args[] = {
2542       emitUpdateLocation(CGF, Loc),
2543       getThreadID(CGF, Loc),
2544       CGF.Builder.getInt32(addMonoNonMonoModifier(
2545           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2546       DispatchValues.LB,                                     // Lower
2547       DispatchValues.UB,                                     // Upper
2548       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2549       Chunk                                                  // Chunk
2550   };
2551   CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2552                       Args);
2553 }
2554 
2555 static void emitForStaticInitCall(
2556     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2557     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2558     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2559     const CGOpenMPRuntime::StaticRTInput &Values) {
2560   if (!CGF.HaveInsertPoint())
2561     return;
2562 
2563   assert(!Values.Ordered);
2564   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2565          Schedule == OMP_sch_static_balanced_chunked ||
2566          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2567          Schedule == OMP_dist_sch_static ||
2568          Schedule == OMP_dist_sch_static_chunked);
2569 
2570   // Call __kmpc_for_static_init(
2571   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2572   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2573   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2574   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2575   llvm::Value *Chunk = Values.Chunk;
2576   if (Chunk == nullptr) {
2577     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2578             Schedule == OMP_dist_sch_static) &&
2579            "expected static non-chunked schedule");
2580     // If the Chunk was not specified in the clause - use default value 1.
2581     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2582   } else {
2583     assert((Schedule == OMP_sch_static_chunked ||
2584             Schedule == OMP_sch_static_balanced_chunked ||
2585             Schedule == OMP_ord_static_chunked ||
2586             Schedule == OMP_dist_sch_static_chunked) &&
2587            "expected static chunked schedule");
2588   }
2589   llvm::Value *Args[] = {
2590       UpdateLocation,
2591       ThreadId,
2592       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2593                                                   M2)), // Schedule type
2594       Values.IL.getPointer(),                           // &isLastIter
2595       Values.LB.getPointer(),                           // &LB
2596       Values.UB.getPointer(),                           // &UB
2597       Values.ST.getPointer(),                           // &Stride
2598       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2599       Chunk                                             // Chunk
2600   };
2601   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2602 }
2603 
2604 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2605                                         SourceLocation Loc,
2606                                         OpenMPDirectiveKind DKind,
2607                                         const OpenMPScheduleTy &ScheduleKind,
2608                                         const StaticRTInput &Values) {
2609   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2610       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2611   assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2612          "Expected loop-based or sections-based directive.");
2613   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2614                                              isOpenMPLoopDirective(DKind)
2615                                                  ? OMP_IDENT_WORK_LOOP
2616                                                  : OMP_IDENT_WORK_SECTIONS);
2617   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2618   llvm::FunctionCallee StaticInitFunction =
2619       OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2620                                              false);
2621   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2622   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2623                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2624 }
2625 
2626 void CGOpenMPRuntime::emitDistributeStaticInit(
2627     CodeGenFunction &CGF, SourceLocation Loc,
2628     OpenMPDistScheduleClauseKind SchedKind,
2629     const CGOpenMPRuntime::StaticRTInput &Values) {
2630   OpenMPSchedType ScheduleNum =
2631       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2632   llvm::Value *UpdatedLocation =
2633       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2634   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2635   llvm::FunctionCallee StaticInitFunction;
2636   bool isGPUDistribute =
2637       CGM.getLangOpts().OpenMPIsTargetDevice &&
2638       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2639   StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2640       Values.IVSize, Values.IVSigned, isGPUDistribute);
2641 
2642   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2643                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2644                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2645 }
2646 
2647 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2648                                           SourceLocation Loc,
2649                                           OpenMPDirectiveKind DKind) {
2650   if (!CGF.HaveInsertPoint())
2651     return;
2652   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2653   llvm::Value *Args[] = {
2654       emitUpdateLocation(CGF, Loc,
2655                          isOpenMPDistributeDirective(DKind)
2656                              ? OMP_IDENT_WORK_DISTRIBUTE
2657                              : isOpenMPLoopDirective(DKind)
2658                                    ? OMP_IDENT_WORK_LOOP
2659                                    : OMP_IDENT_WORK_SECTIONS),
2660       getThreadID(CGF, Loc)};
2661   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2662   if (isOpenMPDistributeDirective(DKind) &&
2663       CGM.getLangOpts().OpenMPIsTargetDevice &&
2664       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2665     CGF.EmitRuntimeCall(
2666         OMPBuilder.getOrCreateRuntimeFunction(
2667             CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2668         Args);
2669   else
2670     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2671                             CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2672                         Args);
2673 }
2674 
2675 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2676                                                  SourceLocation Loc,
2677                                                  unsigned IVSize,
2678                                                  bool IVSigned) {
2679   if (!CGF.HaveInsertPoint())
2680     return;
2681   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2682   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2683   CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2684                       Args);
2685 }
2686 
2687 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2688                                           SourceLocation Loc, unsigned IVSize,
2689                                           bool IVSigned, Address IL,
2690                                           Address LB, Address UB,
2691                                           Address ST) {
2692   // Call __kmpc_dispatch_next(
2693   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2694   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2695   //          kmp_int[32|64] *p_stride);
2696   llvm::Value *Args[] = {
2697       emitUpdateLocation(CGF, Loc),
2698       getThreadID(CGF, Loc),
2699       IL.getPointer(), // &isLastIter
2700       LB.getPointer(), // &Lower
2701       UB.getPointer(), // &Upper
2702       ST.getPointer()  // &Stride
2703   };
2704   llvm::Value *Call = CGF.EmitRuntimeCall(
2705       OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2706   return CGF.EmitScalarConversion(
2707       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2708       CGF.getContext().BoolTy, Loc);
2709 }
2710 
2711 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2712                                            llvm::Value *NumThreads,
2713                                            SourceLocation Loc) {
2714   if (!CGF.HaveInsertPoint())
2715     return;
2716   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2717   llvm::Value *Args[] = {
2718       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2719       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2720   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2721                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2722                       Args);
2723 }
2724 
2725 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2726                                          ProcBindKind ProcBind,
2727                                          SourceLocation Loc) {
2728   if (!CGF.HaveInsertPoint())
2729     return;
2730   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2731   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2732   llvm::Value *Args[] = {
2733       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2734       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2735   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2736                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2737                       Args);
2738 }
2739 
2740 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2741                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2742   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2743     OMPBuilder.createFlush(CGF.Builder);
2744   } else {
2745     if (!CGF.HaveInsertPoint())
2746       return;
2747     // Build call void __kmpc_flush(ident_t *loc)
2748     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2749                             CGM.getModule(), OMPRTL___kmpc_flush),
2750                         emitUpdateLocation(CGF, Loc));
2751   }
2752 }
2753 
2754 namespace {
2755 /// Indexes of fields for type kmp_task_t.
2756 enum KmpTaskTFields {
2757   /// List of shared variables.
2758   KmpTaskTShareds,
2759   /// Task routine.
2760   KmpTaskTRoutine,
2761   /// Partition id for the untied tasks.
2762   KmpTaskTPartId,
2763   /// Function with call of destructors for private variables.
2764   Data1,
2765   /// Task priority.
2766   Data2,
2767   /// (Taskloops only) Lower bound.
2768   KmpTaskTLowerBound,
2769   /// (Taskloops only) Upper bound.
2770   KmpTaskTUpperBound,
2771   /// (Taskloops only) Stride.
2772   KmpTaskTStride,
2773   /// (Taskloops only) Is last iteration flag.
2774   KmpTaskTLastIter,
2775   /// (Taskloops only) Reduction data.
2776   KmpTaskTReductions,
2777 };
2778 } // anonymous namespace
2779 
2780 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2781   // If we are in simd mode or there are no entries, we don't need to do
2782   // anything.
2783   if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2784     return;
2785 
2786   llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2787       [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2788              const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2789     SourceLocation Loc;
2790     if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2791       for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2792                 E = CGM.getContext().getSourceManager().fileinfo_end();
2793            I != E; ++I) {
2794         if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2795             I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2796           Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2797               I->getFirst(), EntryInfo.Line, 1);
2798           break;
2799         }
2800       }
2801     }
2802     switch (Kind) {
2803     case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2804       unsigned DiagID = CGM.getDiags().getCustomDiagID(
2805           DiagnosticsEngine::Error, "Offloading entry for target region in "
2806                                     "%0 is incorrect: either the "
2807                                     "address or the ID is invalid.");
2808       CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2809     } break;
2810     case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2811       unsigned DiagID = CGM.getDiags().getCustomDiagID(
2812           DiagnosticsEngine::Error, "Offloading entry for declare target "
2813                                     "variable %0 is incorrect: the "
2814                                     "address is invalid.");
2815       CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2816     } break;
2817     case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2818       unsigned DiagID = CGM.getDiags().getCustomDiagID(
2819           DiagnosticsEngine::Error,
2820           "Offloading entry for declare target variable is incorrect: the "
2821           "address is invalid.");
2822       CGM.getDiags().Report(DiagID);
2823     } break;
2824     }
2825   };
2826 
2827   OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2828 }
2829 
2830 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
2831   if (!KmpRoutineEntryPtrTy) {
2832     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2833     ASTContext &C = CGM.getContext();
2834     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2835     FunctionProtoType::ExtProtoInfo EPI;
2836     KmpRoutineEntryPtrQTy = C.getPointerType(
2837         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2838     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
2839   }
2840 }
2841 
2842 namespace {
2843 struct PrivateHelpersTy {
2844   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2845                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2846       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2847         PrivateElemInit(PrivateElemInit) {}
2848   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2849   const Expr *OriginalRef = nullptr;
2850   const VarDecl *Original = nullptr;
2851   const VarDecl *PrivateCopy = nullptr;
2852   const VarDecl *PrivateElemInit = nullptr;
2853   bool isLocalPrivate() const {
2854     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2855   }
2856 };
2857 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2858 } // anonymous namespace
2859 
2860 static bool isAllocatableDecl(const VarDecl *VD) {
2861   const VarDecl *CVD = VD->getCanonicalDecl();
2862   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2863     return false;
2864   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2865   // Use the default allocation.
2866   return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2867            !AA->getAllocator());
2868 }
2869 
2870 static RecordDecl *
2871 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
2872   if (!Privates.empty()) {
2873     ASTContext &C = CGM.getContext();
2874     // Build struct .kmp_privates_t. {
2875     //         /*  private vars  */
2876     //       };
2877     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
2878     RD->startDefinition();
2879     for (const auto &Pair : Privates) {
2880       const VarDecl *VD = Pair.second.Original;
2881       QualType Type = VD->getType().getNonReferenceType();
2882       // If the private variable is a local variable with lvalue ref type,
2883       // allocate the pointer instead of the pointee type.
2884       if (Pair.second.isLocalPrivate()) {
2885         if (VD->getType()->isLValueReferenceType())
2886           Type = C.getPointerType(Type);
2887         if (isAllocatableDecl(VD))
2888           Type = C.getPointerType(Type);
2889       }
2890       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
2891       if (VD->hasAttrs()) {
2892         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2893              E(VD->getAttrs().end());
2894              I != E; ++I)
2895           FD->addAttr(*I);
2896       }
2897     }
2898     RD->completeDefinition();
2899     return RD;
2900   }
2901   return nullptr;
2902 }
2903 
2904 static RecordDecl *
2905 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
2906                          QualType KmpInt32Ty,
2907                          QualType KmpRoutineEntryPointerQTy) {
2908   ASTContext &C = CGM.getContext();
2909   // Build struct kmp_task_t {
2910   //         void *              shareds;
2911   //         kmp_routine_entry_t routine;
2912   //         kmp_int32           part_id;
2913   //         kmp_cmplrdata_t data1;
2914   //         kmp_cmplrdata_t data2;
2915   // For taskloops additional fields:
2916   //         kmp_uint64          lb;
2917   //         kmp_uint64          ub;
2918   //         kmp_int64           st;
2919   //         kmp_int32           liter;
2920   //         void *              reductions;
2921   //       };
2922   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
2923   UD->startDefinition();
2924   addFieldToRecordDecl(C, UD, KmpInt32Ty);
2925   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
2926   UD->completeDefinition();
2927   QualType KmpCmplrdataTy = C.getRecordType(UD);
2928   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
2929   RD->startDefinition();
2930   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2931   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2932   addFieldToRecordDecl(C, RD, KmpInt32Ty);
2933   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2934   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2935   if (isOpenMPTaskLoopDirective(Kind)) {
2936     QualType KmpUInt64Ty =
2937         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2938     QualType KmpInt64Ty =
2939         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2940     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2941     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2942     addFieldToRecordDecl(C, RD, KmpInt64Ty);
2943     addFieldToRecordDecl(C, RD, KmpInt32Ty);
2944     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2945   }
2946   RD->completeDefinition();
2947   return RD;
2948 }
2949 
2950 static RecordDecl *
2951 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
2952                                      ArrayRef<PrivateDataTy> Privates) {
2953   ASTContext &C = CGM.getContext();
2954   // Build struct kmp_task_t_with_privates {
2955   //         kmp_task_t task_data;
2956   //         .kmp_privates_t. privates;
2957   //       };
2958   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2959   RD->startDefinition();
2960   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2961   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2962     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
2963   RD->completeDefinition();
2964   return RD;
2965 }
2966 
2967 /// Emit a proxy function which accepts kmp_task_t as the second
2968 /// argument.
2969 /// \code
2970 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2971 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
2972 ///   For taskloops:
2973 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
2974 ///   tt->reductions, tt->shareds);
2975 ///   return 0;
2976 /// }
2977 /// \endcode
2978 static llvm::Function *
2979 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
2980                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
2981                       QualType KmpTaskTWithPrivatesPtrQTy,
2982                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2983                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
2984                       llvm::Value *TaskPrivatesMap) {
2985   ASTContext &C = CGM.getContext();
2986   FunctionArgList Args;
2987   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
2988                             ImplicitParamKind::Other);
2989   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2990                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
2991                                 ImplicitParamKind::Other);
2992   Args.push_back(&GtidArg);
2993   Args.push_back(&TaskTypeArg);
2994   const auto &TaskEntryFnInfo =
2995       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
2996   llvm::FunctionType *TaskEntryTy =
2997       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
2998   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
2999   auto *TaskEntry = llvm::Function::Create(
3000       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3001   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3002   TaskEntry->setDoesNotRecurse();
3003   CodeGenFunction CGF(CGM);
3004   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3005                     Loc, Loc);
3006 
3007   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3008   // tt,
3009   // For taskloops:
3010   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3011   // tt->task_data.shareds);
3012   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3013       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3014   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3015       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3016       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3017   const auto *KmpTaskTWithPrivatesQTyRD =
3018       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3019   LValue Base =
3020       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3021   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3022   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3023   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3024   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3025 
3026   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3027   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3028   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3029       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3030       CGF.ConvertTypeForMem(SharedsPtrTy));
3031 
3032   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3033   llvm::Value *PrivatesParam;
3034   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3035     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3036     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3037         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3038   } else {
3039     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3040   }
3041 
3042   llvm::Value *CommonArgs[] = {
3043       GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3044       CGF.Builder
3045           .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF),
3046                                                CGF.VoidPtrTy, CGF.Int8Ty)
3047           .getPointer()};
3048   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3049                                           std::end(CommonArgs));
3050   if (isOpenMPTaskLoopDirective(Kind)) {
3051     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3052     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3053     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3054     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3055     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3056     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3057     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3058     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3059     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3060     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3061     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3062     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3063     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3064     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3065     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3066     CallArgs.push_back(LBParam);
3067     CallArgs.push_back(UBParam);
3068     CallArgs.push_back(StParam);
3069     CallArgs.push_back(LIParam);
3070     CallArgs.push_back(RParam);
3071   }
3072   CallArgs.push_back(SharedsParam);
3073 
3074   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3075                                                   CallArgs);
3076   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3077                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3078   CGF.FinishFunction();
3079   return TaskEntry;
3080 }
3081 
3082 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3083                                             SourceLocation Loc,
3084                                             QualType KmpInt32Ty,
3085                                             QualType KmpTaskTWithPrivatesPtrQTy,
3086                                             QualType KmpTaskTWithPrivatesQTy) {
3087   ASTContext &C = CGM.getContext();
3088   FunctionArgList Args;
3089   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3090                             ImplicitParamKind::Other);
3091   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3092                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3093                                 ImplicitParamKind::Other);
3094   Args.push_back(&GtidArg);
3095   Args.push_back(&TaskTypeArg);
3096   const auto &DestructorFnInfo =
3097       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3098   llvm::FunctionType *DestructorFnTy =
3099       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3100   std::string Name =
3101       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3102   auto *DestructorFn =
3103       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3104                              Name, &CGM.getModule());
3105   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3106                                     DestructorFnInfo);
3107   DestructorFn->setDoesNotRecurse();
3108   CodeGenFunction CGF(CGM);
3109   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3110                     Args, Loc, Loc);
3111 
3112   LValue Base = CGF.EmitLoadOfPointerLValue(
3113       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3114       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3115   const auto *KmpTaskTWithPrivatesQTyRD =
3116       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3117   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3118   Base = CGF.EmitLValueForField(Base, *FI);
3119   for (const auto *Field :
3120        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3121     if (QualType::DestructionKind DtorKind =
3122             Field->getType().isDestructedType()) {
3123       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3124       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3125     }
3126   }
3127   CGF.FinishFunction();
3128   return DestructorFn;
3129 }
3130 
3131 /// Emit a privates mapping function for correct handling of private and
3132 /// firstprivate variables.
3133 /// \code
3134 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3135 /// **noalias priv1,...,  <tyn> **noalias privn) {
3136 ///   *priv1 = &.privates.priv1;
3137 ///   ...;
3138 ///   *privn = &.privates.privn;
3139 /// }
3140 /// \endcode
3141 static llvm::Value *
3142 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3143                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3144                                ArrayRef<PrivateDataTy> Privates) {
3145   ASTContext &C = CGM.getContext();
3146   FunctionArgList Args;
3147   ImplicitParamDecl TaskPrivatesArg(
3148       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3149       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3150       ImplicitParamKind::Other);
3151   Args.push_back(&TaskPrivatesArg);
3152   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3153   unsigned Counter = 1;
3154   for (const Expr *E : Data.PrivateVars) {
3155     Args.push_back(ImplicitParamDecl::Create(
3156         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3157         C.getPointerType(C.getPointerType(E->getType()))
3158             .withConst()
3159             .withRestrict(),
3160         ImplicitParamKind::Other));
3161     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3162     PrivateVarsPos[VD] = Counter;
3163     ++Counter;
3164   }
3165   for (const Expr *E : Data.FirstprivateVars) {
3166     Args.push_back(ImplicitParamDecl::Create(
3167         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3168         C.getPointerType(C.getPointerType(E->getType()))
3169             .withConst()
3170             .withRestrict(),
3171         ImplicitParamKind::Other));
3172     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3173     PrivateVarsPos[VD] = Counter;
3174     ++Counter;
3175   }
3176   for (const Expr *E : Data.LastprivateVars) {
3177     Args.push_back(ImplicitParamDecl::Create(
3178         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3179         C.getPointerType(C.getPointerType(E->getType()))
3180             .withConst()
3181             .withRestrict(),
3182         ImplicitParamKind::Other));
3183     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3184     PrivateVarsPos[VD] = Counter;
3185     ++Counter;
3186   }
3187   for (const VarDecl *VD : Data.PrivateLocals) {
3188     QualType Ty = VD->getType().getNonReferenceType();
3189     if (VD->getType()->isLValueReferenceType())
3190       Ty = C.getPointerType(Ty);
3191     if (isAllocatableDecl(VD))
3192       Ty = C.getPointerType(Ty);
3193     Args.push_back(ImplicitParamDecl::Create(
3194         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3195         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3196         ImplicitParamKind::Other));
3197     PrivateVarsPos[VD] = Counter;
3198     ++Counter;
3199   }
3200   const auto &TaskPrivatesMapFnInfo =
3201       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3202   llvm::FunctionType *TaskPrivatesMapTy =
3203       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3204   std::string Name =
3205       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3206   auto *TaskPrivatesMap = llvm::Function::Create(
3207       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3208       &CGM.getModule());
3209   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3210                                     TaskPrivatesMapFnInfo);
3211   if (CGM.getLangOpts().Optimize) {
3212     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3213     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3214     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3215   }
3216   CodeGenFunction CGF(CGM);
3217   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3218                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3219 
3220   // *privi = &.privates.privi;
3221   LValue Base = CGF.EmitLoadOfPointerLValue(
3222       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3223       TaskPrivatesArg.getType()->castAs<PointerType>());
3224   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3225   Counter = 0;
3226   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3227     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3228     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3229     LValue RefLVal =
3230         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3231     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3232         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3233     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3234     ++Counter;
3235   }
3236   CGF.FinishFunction();
3237   return TaskPrivatesMap;
3238 }
3239 
3240 /// Emit initialization for private variables in task-based directives.
3241 static void emitPrivatesInit(CodeGenFunction &CGF,
3242                              const OMPExecutableDirective &D,
3243                              Address KmpTaskSharedsPtr, LValue TDBase,
3244                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3245                              QualType SharedsTy, QualType SharedsPtrTy,
3246                              const OMPTaskDataTy &Data,
3247                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3248   ASTContext &C = CGF.getContext();
3249   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3250   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3251   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3252                                  ? OMPD_taskloop
3253                                  : OMPD_task;
3254   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3255   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3256   LValue SrcBase;
3257   bool IsTargetTask =
3258       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3259       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3260   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3261   // PointersArray, SizesArray, and MappersArray. The original variables for
3262   // these arrays are not captured and we get their addresses explicitly.
3263   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3264       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3265     SrcBase = CGF.MakeAddrLValue(
3266         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3267             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3268             CGF.ConvertTypeForMem(SharedsTy)),
3269         SharedsTy);
3270   }
3271   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3272   for (const PrivateDataTy &Pair : Privates) {
3273     // Do not initialize private locals.
3274     if (Pair.second.isLocalPrivate()) {
3275       ++FI;
3276       continue;
3277     }
3278     const VarDecl *VD = Pair.second.PrivateCopy;
3279     const Expr *Init = VD->getAnyInitializer();
3280     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3281                              !CGF.isTrivialInitializer(Init)))) {
3282       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3283       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3284         const VarDecl *OriginalVD = Pair.second.Original;
3285         // Check if the variable is the target-based BasePointersArray,
3286         // PointersArray, SizesArray, or MappersArray.
3287         LValue SharedRefLValue;
3288         QualType Type = PrivateLValue.getType();
3289         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3290         if (IsTargetTask && !SharedField) {
3291           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3292                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3293                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3294                          ->getNumParams() == 0 &&
3295                  isa<TranslationUnitDecl>(
3296                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3297                          ->getDeclContext()) &&
3298                  "Expected artificial target data variable.");
3299           SharedRefLValue =
3300               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3301         } else if (ForDup) {
3302           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3303           SharedRefLValue = CGF.MakeAddrLValue(
3304               SharedRefLValue.getAddress(CGF).withAlignment(
3305                   C.getDeclAlign(OriginalVD)),
3306               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3307               SharedRefLValue.getTBAAInfo());
3308         } else if (CGF.LambdaCaptureFields.count(
3309                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3310                    isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3311           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3312         } else {
3313           // Processing for implicitly captured variables.
3314           InlinedOpenMPRegionRAII Region(
3315               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3316               /*HasCancel=*/false, /*NoInheritance=*/true);
3317           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3318         }
3319         if (Type->isArrayType()) {
3320           // Initialize firstprivate array.
3321           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3322             // Perform simple memcpy.
3323             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3324           } else {
3325             // Initialize firstprivate array using element-by-element
3326             // initialization.
3327             CGF.EmitOMPAggregateAssign(
3328                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3329                 Type,
3330                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3331                                                   Address SrcElement) {
3332                   // Clean up any temporaries needed by the initialization.
3333                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3334                   InitScope.addPrivate(Elem, SrcElement);
3335                   (void)InitScope.Privatize();
3336                   // Emit initialization for single element.
3337                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3338                       CGF, &CapturesInfo);
3339                   CGF.EmitAnyExprToMem(Init, DestElement,
3340                                        Init->getType().getQualifiers(),
3341                                        /*IsInitializer=*/false);
3342                 });
3343           }
3344         } else {
3345           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3346           InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF));
3347           (void)InitScope.Privatize();
3348           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3349           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3350                              /*capturedByInit=*/false);
3351         }
3352       } else {
3353         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3354       }
3355     }
3356     ++FI;
3357   }
3358 }
3359 
3360 /// Check if duplication function is required for taskloops.
3361 static bool checkInitIsRequired(CodeGenFunction &CGF,
3362                                 ArrayRef<PrivateDataTy> Privates) {
3363   bool InitRequired = false;
3364   for (const PrivateDataTy &Pair : Privates) {
3365     if (Pair.second.isLocalPrivate())
3366       continue;
3367     const VarDecl *VD = Pair.second.PrivateCopy;
3368     const Expr *Init = VD->getAnyInitializer();
3369     InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3370                                     !CGF.isTrivialInitializer(Init));
3371     if (InitRequired)
3372       break;
3373   }
3374   return InitRequired;
3375 }
3376 
3377 
3378 /// Emit task_dup function (for initialization of
3379 /// private/firstprivate/lastprivate vars and last_iter flag)
3380 /// \code
3381 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3382 /// lastpriv) {
3383 /// // setup lastprivate flag
3384 ///    task_dst->last = lastpriv;
3385 /// // could be constructor calls here...
3386 /// }
3387 /// \endcode
3388 static llvm::Value *
3389 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3390                     const OMPExecutableDirective &D,
3391                     QualType KmpTaskTWithPrivatesPtrQTy,
3392                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3393                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3394                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3395                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3396   ASTContext &C = CGM.getContext();
3397   FunctionArgList Args;
3398   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3399                            KmpTaskTWithPrivatesPtrQTy,
3400                            ImplicitParamKind::Other);
3401   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3402                            KmpTaskTWithPrivatesPtrQTy,
3403                            ImplicitParamKind::Other);
3404   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3405                                 ImplicitParamKind::Other);
3406   Args.push_back(&DstArg);
3407   Args.push_back(&SrcArg);
3408   Args.push_back(&LastprivArg);
3409   const auto &TaskDupFnInfo =
3410       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3411   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3412   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3413   auto *TaskDup = llvm::Function::Create(
3414       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3415   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3416   TaskDup->setDoesNotRecurse();
3417   CodeGenFunction CGF(CGM);
3418   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3419                     Loc);
3420 
3421   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3422       CGF.GetAddrOfLocalVar(&DstArg),
3423       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3424   // task_dst->liter = lastpriv;
3425   if (WithLastIter) {
3426     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3427     LValue Base = CGF.EmitLValueForField(
3428         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3429     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3430     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3431         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3432     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3433   }
3434 
3435   // Emit initial values for private copies (if any).
3436   assert(!Privates.empty());
3437   Address KmpTaskSharedsPtr = Address::invalid();
3438   if (!Data.FirstprivateVars.empty()) {
3439     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3440         CGF.GetAddrOfLocalVar(&SrcArg),
3441         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3442     LValue Base = CGF.EmitLValueForField(
3443         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3444     KmpTaskSharedsPtr = Address(
3445         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3446                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
3447                                                   KmpTaskTShareds)),
3448                              Loc),
3449         CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3450   }
3451   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3452                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3453   CGF.FinishFunction();
3454   return TaskDup;
3455 }
3456 
3457 /// Checks if destructor function is required to be generated.
3458 /// \return true if cleanups are required, false otherwise.
3459 static bool
3460 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3461                          ArrayRef<PrivateDataTy> Privates) {
3462   for (const PrivateDataTy &P : Privates) {
3463     if (P.second.isLocalPrivate())
3464       continue;
3465     QualType Ty = P.second.Original->getType().getNonReferenceType();
3466     if (Ty.isDestructedType())
3467       return true;
3468   }
3469   return false;
3470 }
3471 
3472 namespace {
3473 /// Loop generator for OpenMP iterator expression.
3474 class OMPIteratorGeneratorScope final
3475     : public CodeGenFunction::OMPPrivateScope {
3476   CodeGenFunction &CGF;
3477   const OMPIteratorExpr *E = nullptr;
3478   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3479   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3480   OMPIteratorGeneratorScope() = delete;
3481   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3482 
3483 public:
3484   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3485       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3486     if (!E)
3487       return;
3488     SmallVector<llvm::Value *, 4> Uppers;
3489     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3490       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3491       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3492       addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3493       const OMPIteratorHelperData &HelperData = E->getHelper(I);
3494       addPrivate(
3495           HelperData.CounterVD,
3496           CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3497     }
3498     Privatize();
3499 
3500     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3501       const OMPIteratorHelperData &HelperData = E->getHelper(I);
3502       LValue CLVal =
3503           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3504                              HelperData.CounterVD->getType());
3505       // Counter = 0;
3506       CGF.EmitStoreOfScalar(
3507           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
3508           CLVal);
3509       CodeGenFunction::JumpDest &ContDest =
3510           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3511       CodeGenFunction::JumpDest &ExitDest =
3512           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3513       // N = <number-of_iterations>;
3514       llvm::Value *N = Uppers[I];
3515       // cont:
3516       // if (Counter < N) goto body; else goto exit;
3517       CGF.EmitBlock(ContDest.getBlock());
3518       auto *CVal =
3519           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3520       llvm::Value *Cmp =
3521           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3522               ? CGF.Builder.CreateICmpSLT(CVal, N)
3523               : CGF.Builder.CreateICmpULT(CVal, N);
3524       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3525       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3526       // body:
3527       CGF.EmitBlock(BodyBB);
3528       // Iteri = Begini + Counter * Stepi;
3529       CGF.EmitIgnoredExpr(HelperData.Update);
3530     }
3531   }
3532   ~OMPIteratorGeneratorScope() {
3533     if (!E)
3534       return;
3535     for (unsigned I = E->numOfIterators(); I > 0; --I) {
3536       // Counter = Counter + 1;
3537       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3538       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3539       // goto cont;
3540       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3541       // exit:
3542       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3543     }
3544   }
3545 };
3546 } // namespace
3547 
3548 static std::pair<llvm::Value *, llvm::Value *>
3549 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
3550   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3551   llvm::Value *Addr;
3552   if (OASE) {
3553     const Expr *Base = OASE->getBase();
3554     Addr = CGF.EmitScalarExpr(Base);
3555   } else {
3556     Addr = CGF.EmitLValue(E).getPointer(CGF);
3557   }
3558   llvm::Value *SizeVal;
3559   QualType Ty = E->getType();
3560   if (OASE) {
3561     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3562     for (const Expr *SE : OASE->getDimensions()) {
3563       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3564       Sz = CGF.EmitScalarConversion(
3565           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3566       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3567     }
3568   } else if (const auto *ASE =
3569                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
3570     LValue UpAddrLVal =
3571         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
3572     Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
3573     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3574         UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
3575     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3576     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3577     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3578   } else {
3579     SizeVal = CGF.getTypeSize(Ty);
3580   }
3581   return std::make_pair(Addr, SizeVal);
3582 }
3583 
3584 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3585 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3586   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3587   if (KmpTaskAffinityInfoTy.isNull()) {
3588     RecordDecl *KmpAffinityInfoRD =
3589         C.buildImplicitRecord("kmp_task_affinity_info_t");
3590     KmpAffinityInfoRD->startDefinition();
3591     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3592     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3593     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3594     KmpAffinityInfoRD->completeDefinition();
3595     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
3596   }
3597 }
3598 
3599 CGOpenMPRuntime::TaskResultTy
3600 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3601                               const OMPExecutableDirective &D,
3602                               llvm::Function *TaskFunction, QualType SharedsTy,
3603                               Address Shareds, const OMPTaskDataTy &Data) {
3604   ASTContext &C = CGM.getContext();
3605   llvm::SmallVector<PrivateDataTy, 4> Privates;
3606   // Aggregate privates and sort them by the alignment.
3607   const auto *I = Data.PrivateCopies.begin();
3608   for (const Expr *E : Data.PrivateVars) {
3609     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3610     Privates.emplace_back(
3611         C.getDeclAlign(VD),
3612         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3613                          /*PrivateElemInit=*/nullptr));
3614     ++I;
3615   }
3616   I = Data.FirstprivateCopies.begin();
3617   const auto *IElemInitRef = Data.FirstprivateInits.begin();
3618   for (const Expr *E : Data.FirstprivateVars) {
3619     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3620     Privates.emplace_back(
3621         C.getDeclAlign(VD),
3622         PrivateHelpersTy(
3623             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3624             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3625     ++I;
3626     ++IElemInitRef;
3627   }
3628   I = Data.LastprivateCopies.begin();
3629   for (const Expr *E : Data.LastprivateVars) {
3630     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3631     Privates.emplace_back(
3632         C.getDeclAlign(VD),
3633         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3634                          /*PrivateElemInit=*/nullptr));
3635     ++I;
3636   }
3637   for (const VarDecl *VD : Data.PrivateLocals) {
3638     if (isAllocatableDecl(VD))
3639       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3640     else
3641       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3642   }
3643   llvm::stable_sort(Privates,
3644                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
3645                       return L.first > R.first;
3646                     });
3647   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3648   // Build type kmp_routine_entry_t (if not built yet).
3649   emitKmpRoutineEntryT(KmpInt32Ty);
3650   // Build type kmp_task_t (if not built yet).
3651   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3652     if (SavedKmpTaskloopTQTy.isNull()) {
3653       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3654           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3655     }
3656     KmpTaskTQTy = SavedKmpTaskloopTQTy;
3657   } else {
3658     assert((D.getDirectiveKind() == OMPD_task ||
3659             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3660             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3661            "Expected taskloop, task or target directive");
3662     if (SavedKmpTaskTQTy.isNull()) {
3663       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3664           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3665     }
3666     KmpTaskTQTy = SavedKmpTaskTQTy;
3667   }
3668   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3669   // Build particular struct kmp_task_t for the given task.
3670   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3671       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3672   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3673   QualType KmpTaskTWithPrivatesPtrQTy =
3674       C.getPointerType(KmpTaskTWithPrivatesQTy);
3675   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
3676   llvm::Type *KmpTaskTWithPrivatesPtrTy =
3677       KmpTaskTWithPrivatesTy->getPointerTo();
3678   llvm::Value *KmpTaskTWithPrivatesTySize =
3679       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3680   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3681 
3682   // Emit initial values for private copies (if any).
3683   llvm::Value *TaskPrivatesMap = nullptr;
3684   llvm::Type *TaskPrivatesMapTy =
3685       std::next(TaskFunction->arg_begin(), 3)->getType();
3686   if (!Privates.empty()) {
3687     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3688     TaskPrivatesMap =
3689         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3690     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3691         TaskPrivatesMap, TaskPrivatesMapTy);
3692   } else {
3693     TaskPrivatesMap = llvm::ConstantPointerNull::get(
3694         cast<llvm::PointerType>(TaskPrivatesMapTy));
3695   }
3696   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3697   // kmp_task_t *tt);
3698   llvm::Function *TaskEntry = emitProxyTaskFunction(
3699       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3700       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3701       TaskPrivatesMap);
3702 
3703   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3704   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3705   // kmp_routine_entry_t *task_entry);
3706   // Task flags. Format is taken from
3707   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3708   // description of kmp_tasking_flags struct.
3709   enum {
3710     TiedFlag = 0x1,
3711     FinalFlag = 0x2,
3712     DestructorsFlag = 0x8,
3713     PriorityFlag = 0x20,
3714     DetachableFlag = 0x40,
3715   };
3716   unsigned Flags = Data.Tied ? TiedFlag : 0;
3717   bool NeedsCleanup = false;
3718   if (!Privates.empty()) {
3719     NeedsCleanup =
3720         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3721     if (NeedsCleanup)
3722       Flags = Flags | DestructorsFlag;
3723   }
3724   if (Data.Priority.getInt())
3725     Flags = Flags | PriorityFlag;
3726   if (D.hasClausesOfKind<OMPDetachClause>())
3727     Flags = Flags | DetachableFlag;
3728   llvm::Value *TaskFlags =
3729       Data.Final.getPointer()
3730           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3731                                      CGF.Builder.getInt32(FinalFlag),
3732                                      CGF.Builder.getInt32(/*C=*/0))
3733           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3734   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3735   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3736   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
3737       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3738       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3739           TaskEntry, KmpRoutineEntryPtrTy)};
3740   llvm::Value *NewTask;
3741   if (D.hasClausesOfKind<OMPNowaitClause>()) {
3742     // Check if we have any device clause associated with the directive.
3743     const Expr *Device = nullptr;
3744     if (auto *C = D.getSingleClause<OMPDeviceClause>())
3745       Device = C->getDevice();
3746     // Emit device ID if any otherwise use default value.
3747     llvm::Value *DeviceID;
3748     if (Device)
3749       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3750                                            CGF.Int64Ty, /*isSigned=*/true);
3751     else
3752       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3753     AllocArgs.push_back(DeviceID);
3754     NewTask = CGF.EmitRuntimeCall(
3755         OMPBuilder.getOrCreateRuntimeFunction(
3756             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3757         AllocArgs);
3758   } else {
3759     NewTask =
3760         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3761                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3762                             AllocArgs);
3763   }
3764   // Emit detach clause initialization.
3765   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3766   // task_descriptor);
3767   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3768     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3769     LValue EvtLVal = CGF.EmitLValue(Evt);
3770 
3771     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3772     // int gtid, kmp_task_t *task);
3773     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3774     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3775     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3776     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3777         OMPBuilder.getOrCreateRuntimeFunction(
3778             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3779         {Loc, Tid, NewTask});
3780     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3781                                       Evt->getExprLoc());
3782     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3783   }
3784   // Process affinity clauses.
3785   if (D.hasClausesOfKind<OMPAffinityClause>()) {
3786     // Process list of affinity data.
3787     ASTContext &C = CGM.getContext();
3788     Address AffinitiesArray = Address::invalid();
3789     // Calculate number of elements to form the array of affinity data.
3790     llvm::Value *NumOfElements = nullptr;
3791     unsigned NumAffinities = 0;
3792     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3793       if (const Expr *Modifier = C->getModifier()) {
3794         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3795         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3796           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3797           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3798           NumOfElements =
3799               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3800         }
3801       } else {
3802         NumAffinities += C->varlist_size();
3803       }
3804     }
3805     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
3806     // Fields ids in kmp_task_affinity_info record.
3807     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3808 
3809     QualType KmpTaskAffinityInfoArrayTy;
3810     if (NumOfElements) {
3811       NumOfElements = CGF.Builder.CreateNUWAdd(
3812           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3813       auto *OVE = new (C) OpaqueValueExpr(
3814           Loc,
3815           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3816           VK_PRValue);
3817       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3818                                                     RValue::get(NumOfElements));
3819       KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3820           KmpTaskAffinityInfoTy, OVE, ArraySizeModifier::Normal,
3821           /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
3822       // Properly emit variable-sized array.
3823       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3824                                            ImplicitParamKind::Other);
3825       CGF.EmitVarDecl(*PD);
3826       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3827       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3828                                                 /*isSigned=*/false);
3829     } else {
3830       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3831           KmpTaskAffinityInfoTy,
3832           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3833           ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3834       AffinitiesArray =
3835           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
3836       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
3837       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
3838                                              /*isSigned=*/false);
3839     }
3840 
3841     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3842     // Fill array by elements without iterators.
3843     unsigned Pos = 0;
3844     bool HasIterator = false;
3845     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3846       if (C->getModifier()) {
3847         HasIterator = true;
3848         continue;
3849       }
3850       for (const Expr *E : C->varlists()) {
3851         llvm::Value *Addr;
3852         llvm::Value *Size;
3853         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3854         LValue Base =
3855             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
3856                                KmpTaskAffinityInfoTy);
3857         // affs[i].base_addr = &<Affinities[i].second>;
3858         LValue BaseAddrLVal = CGF.EmitLValueForField(
3859             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3860         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3861                               BaseAddrLVal);
3862         // affs[i].len = sizeof(<Affinities[i].second>);
3863         LValue LenLVal = CGF.EmitLValueForField(
3864             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3865         CGF.EmitStoreOfScalar(Size, LenLVal);
3866         ++Pos;
3867       }
3868     }
3869     LValue PosLVal;
3870     if (HasIterator) {
3871       PosLVal = CGF.MakeAddrLValue(
3872           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
3873           C.getSizeType());
3874       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
3875     }
3876     // Process elements with iterators.
3877     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3878       const Expr *Modifier = C->getModifier();
3879       if (!Modifier)
3880         continue;
3881       OMPIteratorGeneratorScope IteratorScope(
3882           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
3883       for (const Expr *E : C->varlists()) {
3884         llvm::Value *Addr;
3885         llvm::Value *Size;
3886         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3887         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
3888         LValue Base = CGF.MakeAddrLValue(
3889             CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
3890         // affs[i].base_addr = &<Affinities[i].second>;
3891         LValue BaseAddrLVal = CGF.EmitLValueForField(
3892             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3893         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3894                               BaseAddrLVal);
3895         // affs[i].len = sizeof(<Affinities[i].second>);
3896         LValue LenLVal = CGF.EmitLValueForField(
3897             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3898         CGF.EmitStoreOfScalar(Size, LenLVal);
3899         Idx = CGF.Builder.CreateNUWAdd(
3900             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
3901         CGF.EmitStoreOfScalar(Idx, PosLVal);
3902       }
3903     }
3904     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3905     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3906     // naffins, kmp_task_affinity_info_t *affin_list);
3907     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3908     llvm::Value *GTid = getThreadID(CGF, Loc);
3909     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3910         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
3911     // FIXME: Emit the function and ignore its result for now unless the
3912     // runtime function is properly implemented.
3913     (void)CGF.EmitRuntimeCall(
3914         OMPBuilder.getOrCreateRuntimeFunction(
3915             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
3916         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3917   }
3918   llvm::Value *NewTaskNewTaskTTy =
3919       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3920           NewTask, KmpTaskTWithPrivatesPtrTy);
3921   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
3922                                                KmpTaskTWithPrivatesQTy);
3923   LValue TDBase =
3924       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3925   // Fill the data in the resulting kmp_task_t record.
3926   // Copy shareds if there are any.
3927   Address KmpTaskSharedsPtr = Address::invalid();
3928   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3929     KmpTaskSharedsPtr = Address(
3930         CGF.EmitLoadOfScalar(
3931             CGF.EmitLValueForField(
3932                 TDBase,
3933                 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
3934             Loc),
3935         CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3936     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
3937     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
3938     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
3939   }
3940   // Emit initial values for private copies (if any).
3941   TaskResultTy Result;
3942   if (!Privates.empty()) {
3943     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3944                      SharedsTy, SharedsPtrTy, Data, Privates,
3945                      /*ForDup=*/false);
3946     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3947         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3948       Result.TaskDupFn = emitTaskDupFunction(
3949           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3950           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3951           /*WithLastIter=*/!Data.LastprivateVars.empty());
3952     }
3953   }
3954   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3955   enum { Priority = 0, Destructors = 1 };
3956   // Provide pointer to function with destructors for privates.
3957   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3958   const RecordDecl *KmpCmplrdataUD =
3959       (*FI)->getType()->getAsUnionType()->getDecl();
3960   if (NeedsCleanup) {
3961     llvm::Value *DestructorFn = emitDestructorsFunction(
3962         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3963         KmpTaskTWithPrivatesQTy);
3964     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
3965     LValue DestructorsLV = CGF.EmitLValueForField(
3966         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
3967     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3968                               DestructorFn, KmpRoutineEntryPtrTy),
3969                           DestructorsLV);
3970   }
3971   // Set priority.
3972   if (Data.Priority.getInt()) {
3973     LValue Data2LV = CGF.EmitLValueForField(
3974         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
3975     LValue PriorityLV = CGF.EmitLValueForField(
3976         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
3977     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
3978   }
3979   Result.NewTask = NewTask;
3980   Result.TaskEntry = TaskEntry;
3981   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
3982   Result.TDBase = TDBase;
3983   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
3984   return Result;
3985 }
3986 
3987 /// Translates internal dependency kind into the runtime kind.
3988 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
3989   RTLDependenceKindTy DepKind;
3990   switch (K) {
3991   case OMPC_DEPEND_in:
3992     DepKind = RTLDependenceKindTy::DepIn;
3993     break;
3994   // Out and InOut dependencies must use the same code.
3995   case OMPC_DEPEND_out:
3996   case OMPC_DEPEND_inout:
3997     DepKind = RTLDependenceKindTy::DepInOut;
3998     break;
3999   case OMPC_DEPEND_mutexinoutset:
4000     DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4001     break;
4002   case OMPC_DEPEND_inoutset:
4003     DepKind = RTLDependenceKindTy::DepInOutSet;
4004     break;
4005   case OMPC_DEPEND_outallmemory:
4006     DepKind = RTLDependenceKindTy::DepOmpAllMem;
4007     break;
4008   case OMPC_DEPEND_source:
4009   case OMPC_DEPEND_sink:
4010   case OMPC_DEPEND_depobj:
4011   case OMPC_DEPEND_inoutallmemory:
4012   case OMPC_DEPEND_unknown:
4013     llvm_unreachable("Unknown task dependence type");
4014   }
4015   return DepKind;
4016 }
4017 
4018 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4019 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4020                            QualType &FlagsTy) {
4021   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4022   if (KmpDependInfoTy.isNull()) {
4023     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4024     KmpDependInfoRD->startDefinition();
4025     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4026     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4027     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4028     KmpDependInfoRD->completeDefinition();
4029     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4030   }
4031 }
4032 
4033 std::pair<llvm::Value *, LValue>
4034 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4035                                    SourceLocation Loc) {
4036   ASTContext &C = CGM.getContext();
4037   QualType FlagsTy;
4038   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4039   RecordDecl *KmpDependInfoRD =
4040       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4041   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4042   LValue Base = CGF.EmitLoadOfPointerLValue(
4043       DepobjLVal.getAddress(CGF).withElementType(
4044           CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4045       KmpDependInfoPtrTy->castAs<PointerType>());
4046   Address DepObjAddr = CGF.Builder.CreateGEP(
4047       Base.getAddress(CGF),
4048       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4049   LValue NumDepsBase = CGF.MakeAddrLValue(
4050       DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4051   // NumDeps = deps[i].base_addr;
4052   LValue BaseAddrLVal = CGF.EmitLValueForField(
4053       NumDepsBase,
4054       *std::next(KmpDependInfoRD->field_begin(),
4055                  static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4056   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4057   return std::make_pair(NumDeps, Base);
4058 }
4059 
4060 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4061                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4062                            const OMPTaskDataTy::DependData &Data,
4063                            Address DependenciesArray) {
4064   CodeGenModule &CGM = CGF.CGM;
4065   ASTContext &C = CGM.getContext();
4066   QualType FlagsTy;
4067   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4068   RecordDecl *KmpDependInfoRD =
4069       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4070   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4071 
4072   OMPIteratorGeneratorScope IteratorScope(
4073       CGF, cast_or_null<OMPIteratorExpr>(
4074                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4075                                  : nullptr));
4076   for (const Expr *E : Data.DepExprs) {
4077     llvm::Value *Addr;
4078     llvm::Value *Size;
4079 
4080     // The expression will be a nullptr in the 'omp_all_memory' case.
4081     if (E) {
4082       std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4083       Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4084     } else {
4085       Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4086       Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4087     }
4088     LValue Base;
4089     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4090       Base = CGF.MakeAddrLValue(
4091           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4092     } else {
4093       assert(E && "Expected a non-null expression");
4094       LValue &PosLVal = *Pos.get<LValue *>();
4095       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4096       Base = CGF.MakeAddrLValue(
4097           CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
4098     }
4099     // deps[i].base_addr = &<Dependencies[i].second>;
4100     LValue BaseAddrLVal = CGF.EmitLValueForField(
4101         Base,
4102         *std::next(KmpDependInfoRD->field_begin(),
4103                    static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4104     CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4105     // deps[i].len = sizeof(<Dependencies[i].second>);
4106     LValue LenLVal = CGF.EmitLValueForField(
4107         Base, *std::next(KmpDependInfoRD->field_begin(),
4108                          static_cast<unsigned int>(RTLDependInfoFields::Len)));
4109     CGF.EmitStoreOfScalar(Size, LenLVal);
4110     // deps[i].flags = <Dependencies[i].first>;
4111     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4112     LValue FlagsLVal = CGF.EmitLValueForField(
4113         Base,
4114         *std::next(KmpDependInfoRD->field_begin(),
4115                    static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4116     CGF.EmitStoreOfScalar(
4117         llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4118         FlagsLVal);
4119     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4120       ++(*P);
4121     } else {
4122       LValue &PosLVal = *Pos.get<LValue *>();
4123       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4124       Idx = CGF.Builder.CreateNUWAdd(Idx,
4125                                      llvm::ConstantInt::get(Idx->getType(), 1));
4126       CGF.EmitStoreOfScalar(Idx, PosLVal);
4127     }
4128   }
4129 }
4130 
4131 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4132     CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4133     const OMPTaskDataTy::DependData &Data) {
4134   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4135          "Expected depobj dependency kind.");
4136   SmallVector<llvm::Value *, 4> Sizes;
4137   SmallVector<LValue, 4> SizeLVals;
4138   ASTContext &C = CGF.getContext();
4139   {
4140     OMPIteratorGeneratorScope IteratorScope(
4141         CGF, cast_or_null<OMPIteratorExpr>(
4142                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4143                                    : nullptr));
4144     for (const Expr *E : Data.DepExprs) {
4145       llvm::Value *NumDeps;
4146       LValue Base;
4147       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4148       std::tie(NumDeps, Base) =
4149           getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4150       LValue NumLVal = CGF.MakeAddrLValue(
4151           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4152           C.getUIntPtrType());
4153       CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4154                               NumLVal.getAddress(CGF));
4155       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4156       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4157       CGF.EmitStoreOfScalar(Add, NumLVal);
4158       SizeLVals.push_back(NumLVal);
4159     }
4160   }
4161   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4162     llvm::Value *Size =
4163         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4164     Sizes.push_back(Size);
4165   }
4166   return Sizes;
4167 }
4168 
4169 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4170                                          QualType &KmpDependInfoTy,
4171                                          LValue PosLVal,
4172                                          const OMPTaskDataTy::DependData &Data,
4173                                          Address DependenciesArray) {
4174   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4175          "Expected depobj dependency kind.");
4176   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4177   {
4178     OMPIteratorGeneratorScope IteratorScope(
4179         CGF, cast_or_null<OMPIteratorExpr>(
4180                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4181                                    : nullptr));
4182     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4183       const Expr *E = Data.DepExprs[I];
4184       llvm::Value *NumDeps;
4185       LValue Base;
4186       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4187       std::tie(NumDeps, Base) =
4188           getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4189 
4190       // memcopy dependency data.
4191       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4192           ElSize,
4193           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4194       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4195       Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
4196       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4197 
4198       // Increase pos.
4199       // pos += size;
4200       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4201       CGF.EmitStoreOfScalar(Add, PosLVal);
4202     }
4203   }
4204 }
4205 
4206 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4207     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4208     SourceLocation Loc) {
4209   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4210         return D.DepExprs.empty();
4211       }))
4212     return std::make_pair(nullptr, Address::invalid());
4213   // Process list of dependencies.
4214   ASTContext &C = CGM.getContext();
4215   Address DependenciesArray = Address::invalid();
4216   llvm::Value *NumOfElements = nullptr;
4217   unsigned NumDependencies = std::accumulate(
4218       Dependencies.begin(), Dependencies.end(), 0,
4219       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4220         return D.DepKind == OMPC_DEPEND_depobj
4221                    ? V
4222                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4223       });
4224   QualType FlagsTy;
4225   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4226   bool HasDepobjDeps = false;
4227   bool HasRegularWithIterators = false;
4228   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4229   llvm::Value *NumOfRegularWithIterators =
4230       llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4231   // Calculate number of depobj dependencies and regular deps with the
4232   // iterators.
4233   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4234     if (D.DepKind == OMPC_DEPEND_depobj) {
4235       SmallVector<llvm::Value *, 4> Sizes =
4236           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4237       for (llvm::Value *Size : Sizes) {
4238         NumOfDepobjElements =
4239             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4240       }
4241       HasDepobjDeps = true;
4242       continue;
4243     }
4244     // Include number of iterations, if any.
4245 
4246     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4247       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4248         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4249         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4250         llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4251             Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4252         NumOfRegularWithIterators =
4253             CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4254       }
4255       HasRegularWithIterators = true;
4256       continue;
4257     }
4258   }
4259 
4260   QualType KmpDependInfoArrayTy;
4261   if (HasDepobjDeps || HasRegularWithIterators) {
4262     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4263                                            /*isSigned=*/false);
4264     if (HasDepobjDeps) {
4265       NumOfElements =
4266           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4267     }
4268     if (HasRegularWithIterators) {
4269       NumOfElements =
4270           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4271     }
4272     auto *OVE = new (C) OpaqueValueExpr(
4273         Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4274         VK_PRValue);
4275     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4276                                                   RValue::get(NumOfElements));
4277     KmpDependInfoArrayTy =
4278         C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4279                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4280     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4281     // Properly emit variable-sized array.
4282     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4283                                          ImplicitParamKind::Other);
4284     CGF.EmitVarDecl(*PD);
4285     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4286     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4287                                               /*isSigned=*/false);
4288   } else {
4289     KmpDependInfoArrayTy = C.getConstantArrayType(
4290         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4291         ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4292     DependenciesArray =
4293         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4294     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4295     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4296                                            /*isSigned=*/false);
4297   }
4298   unsigned Pos = 0;
4299   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4300     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4301         Dependencies[I].IteratorExpr)
4302       continue;
4303     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4304                    DependenciesArray);
4305   }
4306   // Copy regular dependencies with iterators.
4307   LValue PosLVal = CGF.MakeAddrLValue(
4308       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4309   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4310   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4311     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4312         !Dependencies[I].IteratorExpr)
4313       continue;
4314     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4315                    DependenciesArray);
4316   }
4317   // Copy final depobj arrays without iterators.
4318   if (HasDepobjDeps) {
4319     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4320       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4321         continue;
4322       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4323                          DependenciesArray);
4324     }
4325   }
4326   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4327       DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4328   return std::make_pair(NumOfElements, DependenciesArray);
4329 }
4330 
4331 Address CGOpenMPRuntime::emitDepobjDependClause(
4332     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4333     SourceLocation Loc) {
4334   if (Dependencies.DepExprs.empty())
4335     return Address::invalid();
4336   // Process list of dependencies.
4337   ASTContext &C = CGM.getContext();
4338   Address DependenciesArray = Address::invalid();
4339   unsigned NumDependencies = Dependencies.DepExprs.size();
4340   QualType FlagsTy;
4341   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4342   RecordDecl *KmpDependInfoRD =
4343       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4344 
4345   llvm::Value *Size;
4346   // Define type kmp_depend_info[<Dependencies.size()>];
4347   // For depobj reserve one extra element to store the number of elements.
4348   // It is required to handle depobj(x) update(in) construct.
4349   // kmp_depend_info[<Dependencies.size()>] deps;
4350   llvm::Value *NumDepsVal;
4351   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4352   if (const auto *IE =
4353           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4354     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4355     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4356       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4357       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4358       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4359     }
4360     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4361                                     NumDepsVal);
4362     CharUnits SizeInBytes =
4363         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4364     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4365     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4366     NumDepsVal =
4367         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4368   } else {
4369     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4370         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4371         nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4372     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4373     Size = CGM.getSize(Sz.alignTo(Align));
4374     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4375   }
4376   // Need to allocate on the dynamic memory.
4377   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4378   // Use default allocator.
4379   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4380   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4381 
4382   llvm::Value *Addr =
4383       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4384                               CGM.getModule(), OMPRTL___kmpc_alloc),
4385                           Args, ".dep.arr.addr");
4386   llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4387   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4388       Addr, KmpDependInfoLlvmTy->getPointerTo());
4389   DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4390   // Write number of elements in the first element of array for depobj.
4391   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4392   // deps[i].base_addr = NumDependencies;
4393   LValue BaseAddrLVal = CGF.EmitLValueForField(
4394       Base,
4395       *std::next(KmpDependInfoRD->field_begin(),
4396                  static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4397   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4398   llvm::PointerUnion<unsigned *, LValue *> Pos;
4399   unsigned Idx = 1;
4400   LValue PosLVal;
4401   if (Dependencies.IteratorExpr) {
4402     PosLVal = CGF.MakeAddrLValue(
4403         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4404         C.getSizeType());
4405     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4406                           /*IsInit=*/true);
4407     Pos = &PosLVal;
4408   } else {
4409     Pos = &Idx;
4410   }
4411   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4412   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4413       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4414       CGF.Int8Ty);
4415   return DependenciesArray;
4416 }
4417 
4418 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4419                                         SourceLocation Loc) {
4420   ASTContext &C = CGM.getContext();
4421   QualType FlagsTy;
4422   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4423   LValue Base = CGF.EmitLoadOfPointerLValue(
4424       DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>());
4425   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4426   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4427       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4428       CGF.ConvertTypeForMem(KmpDependInfoTy));
4429   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4430       Addr.getElementType(), Addr.getPointer(),
4431       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4432   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4433                                                                CGF.VoidPtrTy);
4434   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4435   // Use default allocator.
4436   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4437   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4438 
4439   // _kmpc_free(gtid, addr, nullptr);
4440   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4441                                 CGM.getModule(), OMPRTL___kmpc_free),
4442                             Args);
4443 }
4444 
4445 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4446                                        OpenMPDependClauseKind NewDepKind,
4447                                        SourceLocation Loc) {
4448   ASTContext &C = CGM.getContext();
4449   QualType FlagsTy;
4450   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4451   RecordDecl *KmpDependInfoRD =
4452       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4453   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4454   llvm::Value *NumDeps;
4455   LValue Base;
4456   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4457 
4458   Address Begin = Base.getAddress(CGF);
4459   // Cast from pointer to array type to pointer to single element.
4460   llvm::Value *End = CGF.Builder.CreateGEP(
4461       Begin.getElementType(), Begin.getPointer(), NumDeps);
4462   // The basic structure here is a while-do loop.
4463   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4464   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4465   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4466   CGF.EmitBlock(BodyBB);
4467   llvm::PHINode *ElementPHI =
4468       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4469   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
4470   Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4471   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4472                             Base.getTBAAInfo());
4473   // deps[i].flags = NewDepKind;
4474   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4475   LValue FlagsLVal = CGF.EmitLValueForField(
4476       Base, *std::next(KmpDependInfoRD->field_begin(),
4477                        static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4478   CGF.EmitStoreOfScalar(
4479       llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4480       FlagsLVal);
4481 
4482   // Shift the address forward by one element.
4483   Address ElementNext =
4484       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
4485   ElementPHI->addIncoming(ElementNext.getPointer(),
4486                           CGF.Builder.GetInsertBlock());
4487   llvm::Value *IsEmpty =
4488       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
4489   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4490   // Done.
4491   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4492 }
4493 
4494 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4495                                    const OMPExecutableDirective &D,
4496                                    llvm::Function *TaskFunction,
4497                                    QualType SharedsTy, Address Shareds,
4498                                    const Expr *IfCond,
4499                                    const OMPTaskDataTy &Data) {
4500   if (!CGF.HaveInsertPoint())
4501     return;
4502 
4503   TaskResultTy Result =
4504       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4505   llvm::Value *NewTask = Result.NewTask;
4506   llvm::Function *TaskEntry = Result.TaskEntry;
4507   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4508   LValue TDBase = Result.TDBase;
4509   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4510   // Process list of dependences.
4511   Address DependenciesArray = Address::invalid();
4512   llvm::Value *NumOfElements;
4513   std::tie(NumOfElements, DependenciesArray) =
4514       emitDependClause(CGF, Data.Dependences, Loc);
4515 
4516   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4517   // libcall.
4518   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4519   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4520   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4521   // list is not empty
4522   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4523   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4524   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4525   llvm::Value *DepTaskArgs[7];
4526   if (!Data.Dependences.empty()) {
4527     DepTaskArgs[0] = UpLoc;
4528     DepTaskArgs[1] = ThreadID;
4529     DepTaskArgs[2] = NewTask;
4530     DepTaskArgs[3] = NumOfElements;
4531     DepTaskArgs[4] = DependenciesArray.getPointer();
4532     DepTaskArgs[5] = CGF.Builder.getInt32(0);
4533     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4534   }
4535   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4536                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4537     if (!Data.Tied) {
4538       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4539       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4540       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4541     }
4542     if (!Data.Dependences.empty()) {
4543       CGF.EmitRuntimeCall(
4544           OMPBuilder.getOrCreateRuntimeFunction(
4545               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4546           DepTaskArgs);
4547     } else {
4548       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4549                               CGM.getModule(), OMPRTL___kmpc_omp_task),
4550                           TaskArgs);
4551     }
4552     // Check if parent region is untied and build return for untied task;
4553     if (auto *Region =
4554             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4555       Region->emitUntiedSwitch(CGF);
4556   };
4557 
4558   llvm::Value *DepWaitTaskArgs[7];
4559   if (!Data.Dependences.empty()) {
4560     DepWaitTaskArgs[0] = UpLoc;
4561     DepWaitTaskArgs[1] = ThreadID;
4562     DepWaitTaskArgs[2] = NumOfElements;
4563     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
4564     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4565     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4566     DepWaitTaskArgs[6] =
4567         llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4568   }
4569   auto &M = CGM.getModule();
4570   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4571                         TaskEntry, &Data, &DepWaitTaskArgs,
4572                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4573     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4574     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4575     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4576     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4577     // is specified.
4578     if (!Data.Dependences.empty())
4579       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4580                               M, OMPRTL___kmpc_omp_taskwait_deps_51),
4581                           DepWaitTaskArgs);
4582     // Call proxy_task_entry(gtid, new_task);
4583     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4584                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4585       Action.Enter(CGF);
4586       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4587       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4588                                                           OutlinedFnArgs);
4589     };
4590 
4591     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4592     // kmp_task_t *new_task);
4593     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4594     // kmp_task_t *new_task);
4595     RegionCodeGenTy RCG(CodeGen);
4596     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4597                               M, OMPRTL___kmpc_omp_task_begin_if0),
4598                           TaskArgs,
4599                           OMPBuilder.getOrCreateRuntimeFunction(
4600                               M, OMPRTL___kmpc_omp_task_complete_if0),
4601                           TaskArgs);
4602     RCG.setAction(Action);
4603     RCG(CGF);
4604   };
4605 
4606   if (IfCond) {
4607     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4608   } else {
4609     RegionCodeGenTy ThenRCG(ThenCodeGen);
4610     ThenRCG(CGF);
4611   }
4612 }
4613 
4614 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4615                                        const OMPLoopDirective &D,
4616                                        llvm::Function *TaskFunction,
4617                                        QualType SharedsTy, Address Shareds,
4618                                        const Expr *IfCond,
4619                                        const OMPTaskDataTy &Data) {
4620   if (!CGF.HaveInsertPoint())
4621     return;
4622   TaskResultTy Result =
4623       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4624   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4625   // libcall.
4626   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4627   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4628   // sched, kmp_uint64 grainsize, void *task_dup);
4629   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4630   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4631   llvm::Value *IfVal;
4632   if (IfCond) {
4633     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4634                                       /*isSigned=*/true);
4635   } else {
4636     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4637   }
4638 
4639   LValue LBLVal = CGF.EmitLValueForField(
4640       Result.TDBase,
4641       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4642   const auto *LBVar =
4643       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4644   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
4645                        LBLVal.getQuals(),
4646                        /*IsInitializer=*/true);
4647   LValue UBLVal = CGF.EmitLValueForField(
4648       Result.TDBase,
4649       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4650   const auto *UBVar =
4651       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4652   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
4653                        UBLVal.getQuals(),
4654                        /*IsInitializer=*/true);
4655   LValue StLVal = CGF.EmitLValueForField(
4656       Result.TDBase,
4657       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4658   const auto *StVar =
4659       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4660   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
4661                        StLVal.getQuals(),
4662                        /*IsInitializer=*/true);
4663   // Store reductions address.
4664   LValue RedLVal = CGF.EmitLValueForField(
4665       Result.TDBase,
4666       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4667   if (Data.Reductions) {
4668     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4669   } else {
4670     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
4671                                CGF.getContext().VoidPtrTy);
4672   }
4673   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4674   llvm::Value *TaskArgs[] = {
4675       UpLoc,
4676       ThreadID,
4677       Result.NewTask,
4678       IfVal,
4679       LBLVal.getPointer(CGF),
4680       UBLVal.getPointer(CGF),
4681       CGF.EmitLoadOfScalar(StLVal, Loc),
4682       llvm::ConstantInt::getSigned(
4683           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4684       llvm::ConstantInt::getSigned(
4685           CGF.IntTy, Data.Schedule.getPointer()
4686                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
4687                          : NoSchedule),
4688       Data.Schedule.getPointer()
4689           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4690                                       /*isSigned=*/false)
4691           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
4692       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4693                              Result.TaskDupFn, CGF.VoidPtrTy)
4694                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
4695   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4696                           CGM.getModule(), OMPRTL___kmpc_taskloop),
4697                       TaskArgs);
4698 }
4699 
4700 /// Emit reduction operation for each element of array (required for
4701 /// array sections) LHS op = RHS.
4702 /// \param Type Type of array.
4703 /// \param LHSVar Variable on the left side of the reduction operation
4704 /// (references element of array in original variable).
4705 /// \param RHSVar Variable on the right side of the reduction operation
4706 /// (references element of array in original variable).
4707 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
4708 /// RHSVar.
4709 static void EmitOMPAggregateReduction(
4710     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4711     const VarDecl *RHSVar,
4712     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4713                                   const Expr *, const Expr *)> &RedOpGen,
4714     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4715     const Expr *UpExpr = nullptr) {
4716   // Perform element-by-element initialization.
4717   QualType ElementTy;
4718   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4719   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4720 
4721   // Drill down to the base element type on both arrays.
4722   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4723   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4724 
4725   llvm::Value *RHSBegin = RHSAddr.getPointer();
4726   llvm::Value *LHSBegin = LHSAddr.getPointer();
4727   // Cast from pointer to array type to pointer to single element.
4728   llvm::Value *LHSEnd =
4729       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4730   // The basic structure here is a while-do loop.
4731   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4732   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4733   llvm::Value *IsEmpty =
4734       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4735   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4736 
4737   // Enter the loop body, making that address the current address.
4738   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4739   CGF.EmitBlock(BodyBB);
4740 
4741   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4742 
4743   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4744       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4745   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4746   Address RHSElementCurrent(
4747       RHSElementPHI, RHSAddr.getElementType(),
4748       RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4749 
4750   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4751       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4752   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4753   Address LHSElementCurrent(
4754       LHSElementPHI, LHSAddr.getElementType(),
4755       LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4756 
4757   // Emit copy.
4758   CodeGenFunction::OMPPrivateScope Scope(CGF);
4759   Scope.addPrivate(LHSVar, LHSElementCurrent);
4760   Scope.addPrivate(RHSVar, RHSElementCurrent);
4761   Scope.Privatize();
4762   RedOpGen(CGF, XExpr, EExpr, UpExpr);
4763   Scope.ForceCleanup();
4764 
4765   // Shift the address forward by one element.
4766   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4767       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4768       "omp.arraycpy.dest.element");
4769   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4770       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4771       "omp.arraycpy.src.element");
4772   // Check whether we've reached the end.
4773   llvm::Value *Done =
4774       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4775   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4776   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4777   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4778 
4779   // Done.
4780   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4781 }
4782 
4783 /// Emit reduction combiner. If the combiner is a simple expression emit it as
4784 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4785 /// UDR combiner function.
4786 static void emitReductionCombiner(CodeGenFunction &CGF,
4787                                   const Expr *ReductionOp) {
4788   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4789     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4790       if (const auto *DRE =
4791               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4792         if (const auto *DRD =
4793                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4794           std::pair<llvm::Function *, llvm::Function *> Reduction =
4795               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
4796           RValue Func = RValue::get(Reduction.first);
4797           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4798           CGF.EmitIgnoredExpr(ReductionOp);
4799           return;
4800         }
4801   CGF.EmitIgnoredExpr(ReductionOp);
4802 }
4803 
4804 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
4805     StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4806     ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
4807     ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4808   ASTContext &C = CGM.getContext();
4809 
4810   // void reduction_func(void *LHSArg, void *RHSArg);
4811   FunctionArgList Args;
4812   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4813                            ImplicitParamKind::Other);
4814   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4815                            ImplicitParamKind::Other);
4816   Args.push_back(&LHSArg);
4817   Args.push_back(&RHSArg);
4818   const auto &CGFI =
4819       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4820   std::string Name = getReductionFuncName(ReducerName);
4821   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4822                                     llvm::GlobalValue::InternalLinkage, Name,
4823                                     &CGM.getModule());
4824   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
4825   Fn->setDoesNotRecurse();
4826   CodeGenFunction CGF(CGM);
4827   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
4828 
4829   // Dst = (void*[n])(LHSArg);
4830   // Src = (void*[n])(RHSArg);
4831   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4832                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4833                   ArgsElemType->getPointerTo()),
4834               ArgsElemType, CGF.getPointerAlign());
4835   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4836                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4837                   ArgsElemType->getPointerTo()),
4838               ArgsElemType, CGF.getPointerAlign());
4839 
4840   //  ...
4841   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4842   //  ...
4843   CodeGenFunction::OMPPrivateScope Scope(CGF);
4844   const auto *IPriv = Privates.begin();
4845   unsigned Idx = 0;
4846   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4847     const auto *RHSVar =
4848         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4849     Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
4850     const auto *LHSVar =
4851         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4852     Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
4853     QualType PrivTy = (*IPriv)->getType();
4854     if (PrivTy->isVariablyModifiedType()) {
4855       // Get array size and emit VLA type.
4856       ++Idx;
4857       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
4858       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4859       const VariableArrayType *VLA =
4860           CGF.getContext().getAsVariableArrayType(PrivTy);
4861       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4862       CodeGenFunction::OpaqueValueMapping OpaqueMap(
4863           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4864       CGF.EmitVariablyModifiedType(PrivTy);
4865     }
4866   }
4867   Scope.Privatize();
4868   IPriv = Privates.begin();
4869   const auto *ILHS = LHSExprs.begin();
4870   const auto *IRHS = RHSExprs.begin();
4871   for (const Expr *E : ReductionOps) {
4872     if ((*IPriv)->getType()->isArrayType()) {
4873       // Emit reduction for array section.
4874       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4875       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4876       EmitOMPAggregateReduction(
4877           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4878           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4879             emitReductionCombiner(CGF, E);
4880           });
4881     } else {
4882       // Emit reduction for array subscript or single variable.
4883       emitReductionCombiner(CGF, E);
4884     }
4885     ++IPriv;
4886     ++ILHS;
4887     ++IRHS;
4888   }
4889   Scope.ForceCleanup();
4890   CGF.FinishFunction();
4891   return Fn;
4892 }
4893 
4894 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
4895                                                   const Expr *ReductionOp,
4896                                                   const Expr *PrivateRef,
4897                                                   const DeclRefExpr *LHS,
4898                                                   const DeclRefExpr *RHS) {
4899   if (PrivateRef->getType()->isArrayType()) {
4900     // Emit reduction for array section.
4901     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4902     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4903     EmitOMPAggregateReduction(
4904         CGF, PrivateRef->getType(), LHSVar, RHSVar,
4905         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4906           emitReductionCombiner(CGF, ReductionOp);
4907         });
4908   } else {
4909     // Emit reduction for array subscript or single variable.
4910     emitReductionCombiner(CGF, ReductionOp);
4911   }
4912 }
4913 
4914 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
4915                                     ArrayRef<const Expr *> Privates,
4916                                     ArrayRef<const Expr *> LHSExprs,
4917                                     ArrayRef<const Expr *> RHSExprs,
4918                                     ArrayRef<const Expr *> ReductionOps,
4919                                     ReductionOptionsTy Options) {
4920   if (!CGF.HaveInsertPoint())
4921     return;
4922 
4923   bool WithNowait = Options.WithNowait;
4924   bool SimpleReduction = Options.SimpleReduction;
4925 
4926   // Next code should be emitted for reduction:
4927   //
4928   // static kmp_critical_name lock = { 0 };
4929   //
4930   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
4931   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
4932   //  ...
4933   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
4934   //  *(Type<n>-1*)rhs[<n>-1]);
4935   // }
4936   //
4937   // ...
4938   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
4939   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4940   // RedList, reduce_func, &<lock>)) {
4941   // case 1:
4942   //  ...
4943   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4944   //  ...
4945   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4946   // break;
4947   // case 2:
4948   //  ...
4949   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4950   //  ...
4951   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
4952   // break;
4953   // default:;
4954   // }
4955   //
4956   // if SimpleReduction is true, only the next code is generated:
4957   //  ...
4958   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4959   //  ...
4960 
4961   ASTContext &C = CGM.getContext();
4962 
4963   if (SimpleReduction) {
4964     CodeGenFunction::RunCleanupsScope Scope(CGF);
4965     const auto *IPriv = Privates.begin();
4966     const auto *ILHS = LHSExprs.begin();
4967     const auto *IRHS = RHSExprs.begin();
4968     for (const Expr *E : ReductionOps) {
4969       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
4970                                   cast<DeclRefExpr>(*IRHS));
4971       ++IPriv;
4972       ++ILHS;
4973       ++IRHS;
4974     }
4975     return;
4976   }
4977 
4978   // 1. Build a list of reduction variables.
4979   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
4980   auto Size = RHSExprs.size();
4981   for (const Expr *E : Privates) {
4982     if (E->getType()->isVariablyModifiedType())
4983       // Reserve place for array size.
4984       ++Size;
4985   }
4986   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
4987   QualType ReductionArrayTy = C.getConstantArrayType(
4988       C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
4989       /*IndexTypeQuals=*/0);
4990   Address ReductionList =
4991       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
4992   const auto *IPriv = Privates.begin();
4993   unsigned Idx = 0;
4994   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
4995     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
4996     CGF.Builder.CreateStore(
4997         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4998             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
4999         Elem);
5000     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5001       // Store array size.
5002       ++Idx;
5003       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5004       llvm::Value *Size = CGF.Builder.CreateIntCast(
5005           CGF.getVLASize(
5006                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5007               .NumElts,
5008           CGF.SizeTy, /*isSigned=*/false);
5009       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5010                               Elem);
5011     }
5012   }
5013 
5014   // 2. Emit reduce_func().
5015   llvm::Function *ReductionFn = emitReductionFunction(
5016       CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5017       Privates, LHSExprs, RHSExprs, ReductionOps);
5018 
5019   // 3. Create static kmp_critical_name lock = { 0 };
5020   std::string Name = getName({"reduction"});
5021   llvm::Value *Lock = getCriticalRegionLock(Name);
5022 
5023   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5024   // RedList, reduce_func, &<lock>);
5025   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5026   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5027   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5028   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5029       ReductionList.getPointer(), CGF.VoidPtrTy);
5030   llvm::Value *Args[] = {
5031       IdentTLoc,                             // ident_t *<loc>
5032       ThreadId,                              // i32 <gtid>
5033       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5034       ReductionArrayTySize,                  // size_type sizeof(RedList)
5035       RL,                                    // void *RedList
5036       ReductionFn, // void (*) (void *, void *) <reduce_func>
5037       Lock         // kmp_critical_name *&<lock>
5038   };
5039   llvm::Value *Res = CGF.EmitRuntimeCall(
5040       OMPBuilder.getOrCreateRuntimeFunction(
5041           CGM.getModule(),
5042           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5043       Args);
5044 
5045   // 5. Build switch(res)
5046   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5047   llvm::SwitchInst *SwInst =
5048       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5049 
5050   // 6. Build case 1:
5051   //  ...
5052   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5053   //  ...
5054   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5055   // break;
5056   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5057   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5058   CGF.EmitBlock(Case1BB);
5059 
5060   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5061   llvm::Value *EndArgs[] = {
5062       IdentTLoc, // ident_t *<loc>
5063       ThreadId,  // i32 <gtid>
5064       Lock       // kmp_critical_name *&<lock>
5065   };
5066   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5067                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5068     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5069     const auto *IPriv = Privates.begin();
5070     const auto *ILHS = LHSExprs.begin();
5071     const auto *IRHS = RHSExprs.begin();
5072     for (const Expr *E : ReductionOps) {
5073       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5074                                      cast<DeclRefExpr>(*IRHS));
5075       ++IPriv;
5076       ++ILHS;
5077       ++IRHS;
5078     }
5079   };
5080   RegionCodeGenTy RCG(CodeGen);
5081   CommonActionTy Action(
5082       nullptr, std::nullopt,
5083       OMPBuilder.getOrCreateRuntimeFunction(
5084           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5085                                       : OMPRTL___kmpc_end_reduce),
5086       EndArgs);
5087   RCG.setAction(Action);
5088   RCG(CGF);
5089 
5090   CGF.EmitBranch(DefaultBB);
5091 
5092   // 7. Build case 2:
5093   //  ...
5094   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5095   //  ...
5096   // break;
5097   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5098   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5099   CGF.EmitBlock(Case2BB);
5100 
5101   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5102                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5103     const auto *ILHS = LHSExprs.begin();
5104     const auto *IRHS = RHSExprs.begin();
5105     const auto *IPriv = Privates.begin();
5106     for (const Expr *E : ReductionOps) {
5107       const Expr *XExpr = nullptr;
5108       const Expr *EExpr = nullptr;
5109       const Expr *UpExpr = nullptr;
5110       BinaryOperatorKind BO = BO_Comma;
5111       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5112         if (BO->getOpcode() == BO_Assign) {
5113           XExpr = BO->getLHS();
5114           UpExpr = BO->getRHS();
5115         }
5116       }
5117       // Try to emit update expression as a simple atomic.
5118       const Expr *RHSExpr = UpExpr;
5119       if (RHSExpr) {
5120         // Analyze RHS part of the whole expression.
5121         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5122                 RHSExpr->IgnoreParenImpCasts())) {
5123           // If this is a conditional operator, analyze its condition for
5124           // min/max reduction operator.
5125           RHSExpr = ACO->getCond();
5126         }
5127         if (const auto *BORHS =
5128                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5129           EExpr = BORHS->getRHS();
5130           BO = BORHS->getOpcode();
5131         }
5132       }
5133       if (XExpr) {
5134         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5135         auto &&AtomicRedGen = [BO, VD,
5136                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5137                                     const Expr *EExpr, const Expr *UpExpr) {
5138           LValue X = CGF.EmitLValue(XExpr);
5139           RValue E;
5140           if (EExpr)
5141             E = CGF.EmitAnyExpr(EExpr);
5142           CGF.EmitOMPAtomicSimpleUpdateExpr(
5143               X, E, BO, /*IsXLHSInRHSPart=*/true,
5144               llvm::AtomicOrdering::Monotonic, Loc,
5145               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5146                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5147                 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5148                 CGF.emitOMPSimpleStore(
5149                     CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5150                     VD->getType().getNonReferenceType(), Loc);
5151                 PrivateScope.addPrivate(VD, LHSTemp);
5152                 (void)PrivateScope.Privatize();
5153                 return CGF.EmitAnyExpr(UpExpr);
5154               });
5155         };
5156         if ((*IPriv)->getType()->isArrayType()) {
5157           // Emit atomic reduction for array section.
5158           const auto *RHSVar =
5159               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5160           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5161                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5162         } else {
5163           // Emit atomic reduction for array subscript or single variable.
5164           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5165         }
5166       } else {
5167         // Emit as a critical region.
5168         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5169                                            const Expr *, const Expr *) {
5170           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5171           std::string Name = RT.getName({"atomic_reduction"});
5172           RT.emitCriticalRegion(
5173               CGF, Name,
5174               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5175                 Action.Enter(CGF);
5176                 emitReductionCombiner(CGF, E);
5177               },
5178               Loc);
5179         };
5180         if ((*IPriv)->getType()->isArrayType()) {
5181           const auto *LHSVar =
5182               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5183           const auto *RHSVar =
5184               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5185           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5186                                     CritRedGen);
5187         } else {
5188           CritRedGen(CGF, nullptr, nullptr, nullptr);
5189         }
5190       }
5191       ++ILHS;
5192       ++IRHS;
5193       ++IPriv;
5194     }
5195   };
5196   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5197   if (!WithNowait) {
5198     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5199     llvm::Value *EndArgs[] = {
5200         IdentTLoc, // ident_t *<loc>
5201         ThreadId,  // i32 <gtid>
5202         Lock       // kmp_critical_name *&<lock>
5203     };
5204     CommonActionTy Action(nullptr, std::nullopt,
5205                           OMPBuilder.getOrCreateRuntimeFunction(
5206                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5207                           EndArgs);
5208     AtomicRCG.setAction(Action);
5209     AtomicRCG(CGF);
5210   } else {
5211     AtomicRCG(CGF);
5212   }
5213 
5214   CGF.EmitBranch(DefaultBB);
5215   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5216 }
5217 
5218 /// Generates unique name for artificial threadprivate variables.
5219 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5220 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5221                                       const Expr *Ref) {
5222   SmallString<256> Buffer;
5223   llvm::raw_svector_ostream Out(Buffer);
5224   const clang::DeclRefExpr *DE;
5225   const VarDecl *D = ::getBaseDecl(Ref, DE);
5226   if (!D)
5227     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5228   D = D->getCanonicalDecl();
5229   std::string Name = CGM.getOpenMPRuntime().getName(
5230       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5231   Out << Prefix << Name << "_"
5232       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5233   return std::string(Out.str());
5234 }
5235 
5236 /// Emits reduction initializer function:
5237 /// \code
5238 /// void @.red_init(void* %arg, void* %orig) {
5239 /// %0 = bitcast void* %arg to <type>*
5240 /// store <type> <init>, <type>* %0
5241 /// ret void
5242 /// }
5243 /// \endcode
5244 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5245                                            SourceLocation Loc,
5246                                            ReductionCodeGen &RCG, unsigned N) {
5247   ASTContext &C = CGM.getContext();
5248   QualType VoidPtrTy = C.VoidPtrTy;
5249   VoidPtrTy.addRestrict();
5250   FunctionArgList Args;
5251   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5252                           ImplicitParamKind::Other);
5253   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5254                               ImplicitParamKind::Other);
5255   Args.emplace_back(&Param);
5256   Args.emplace_back(&ParamOrig);
5257   const auto &FnInfo =
5258       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5259   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5260   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5261   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5262                                     Name, &CGM.getModule());
5263   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5264   Fn->setDoesNotRecurse();
5265   CodeGenFunction CGF(CGM);
5266   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5267   QualType PrivateType = RCG.getPrivateType(N);
5268   Address PrivateAddr = CGF.EmitLoadOfPointer(
5269       CGF.GetAddrOfLocalVar(&Param).withElementType(
5270           CGF.ConvertTypeForMem(PrivateType)->getPointerTo()),
5271       C.getPointerType(PrivateType)->castAs<PointerType>());
5272   llvm::Value *Size = nullptr;
5273   // If the size of the reduction item is non-constant, load it from global
5274   // threadprivate variable.
5275   if (RCG.getSizes(N).second) {
5276     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5277         CGF, CGM.getContext().getSizeType(),
5278         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5279     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5280                                 CGM.getContext().getSizeType(), Loc);
5281   }
5282   RCG.emitAggregateType(CGF, N, Size);
5283   Address OrigAddr = Address::invalid();
5284   // If initializer uses initializer from declare reduction construct, emit a
5285   // pointer to the address of the original reduction item (reuired by reduction
5286   // initializer)
5287   if (RCG.usesReductionInitializer(N)) {
5288     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5289     OrigAddr = CGF.EmitLoadOfPointer(
5290         SharedAddr,
5291         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5292   }
5293   // Emit the initializer:
5294   // %0 = bitcast void* %arg to <type>*
5295   // store <type> <init>, <type>* %0
5296   RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5297                          [](CodeGenFunction &) { return false; });
5298   CGF.FinishFunction();
5299   return Fn;
5300 }
5301 
5302 /// Emits reduction combiner function:
5303 /// \code
5304 /// void @.red_comb(void* %arg0, void* %arg1) {
5305 /// %lhs = bitcast void* %arg0 to <type>*
5306 /// %rhs = bitcast void* %arg1 to <type>*
5307 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5308 /// store <type> %2, <type>* %lhs
5309 /// ret void
5310 /// }
5311 /// \endcode
5312 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5313                                            SourceLocation Loc,
5314                                            ReductionCodeGen &RCG, unsigned N,
5315                                            const Expr *ReductionOp,
5316                                            const Expr *LHS, const Expr *RHS,
5317                                            const Expr *PrivateRef) {
5318   ASTContext &C = CGM.getContext();
5319   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5320   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5321   FunctionArgList Args;
5322   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5323                                C.VoidPtrTy, ImplicitParamKind::Other);
5324   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5325                             ImplicitParamKind::Other);
5326   Args.emplace_back(&ParamInOut);
5327   Args.emplace_back(&ParamIn);
5328   const auto &FnInfo =
5329       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5330   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5331   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5332   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5333                                     Name, &CGM.getModule());
5334   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5335   Fn->setDoesNotRecurse();
5336   CodeGenFunction CGF(CGM);
5337   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5338   llvm::Value *Size = nullptr;
5339   // If the size of the reduction item is non-constant, load it from global
5340   // threadprivate variable.
5341   if (RCG.getSizes(N).second) {
5342     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5343         CGF, CGM.getContext().getSizeType(),
5344         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5345     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5346                                 CGM.getContext().getSizeType(), Loc);
5347   }
5348   RCG.emitAggregateType(CGF, N, Size);
5349   // Remap lhs and rhs variables to the addresses of the function arguments.
5350   // %lhs = bitcast void* %arg0 to <type>*
5351   // %rhs = bitcast void* %arg1 to <type>*
5352   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5353   PrivateScope.addPrivate(
5354       LHSVD,
5355       // Pull out the pointer to the variable.
5356       CGF.EmitLoadOfPointer(
5357           CGF.GetAddrOfLocalVar(&ParamInOut)
5358               .withElementType(
5359                   CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()),
5360           C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5361   PrivateScope.addPrivate(
5362       RHSVD,
5363       // Pull out the pointer to the variable.
5364       CGF.EmitLoadOfPointer(
5365           CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5366               CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()),
5367           C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5368   PrivateScope.Privatize();
5369   // Emit the combiner body:
5370   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5371   // store <type> %2, <type>* %lhs
5372   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5373       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5374       cast<DeclRefExpr>(RHS));
5375   CGF.FinishFunction();
5376   return Fn;
5377 }
5378 
5379 /// Emits reduction finalizer function:
5380 /// \code
5381 /// void @.red_fini(void* %arg) {
5382 /// %0 = bitcast void* %arg to <type>*
5383 /// <destroy>(<type>* %0)
5384 /// ret void
5385 /// }
5386 /// \endcode
5387 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5388                                            SourceLocation Loc,
5389                                            ReductionCodeGen &RCG, unsigned N) {
5390   if (!RCG.needCleanups(N))
5391     return nullptr;
5392   ASTContext &C = CGM.getContext();
5393   FunctionArgList Args;
5394   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5395                           ImplicitParamKind::Other);
5396   Args.emplace_back(&Param);
5397   const auto &FnInfo =
5398       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5399   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5400   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5401   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5402                                     Name, &CGM.getModule());
5403   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5404   Fn->setDoesNotRecurse();
5405   CodeGenFunction CGF(CGM);
5406   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5407   Address PrivateAddr = CGF.EmitLoadOfPointer(
5408       CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5409   llvm::Value *Size = nullptr;
5410   // If the size of the reduction item is non-constant, load it from global
5411   // threadprivate variable.
5412   if (RCG.getSizes(N).second) {
5413     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5414         CGF, CGM.getContext().getSizeType(),
5415         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5416     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5417                                 CGM.getContext().getSizeType(), Loc);
5418   }
5419   RCG.emitAggregateType(CGF, N, Size);
5420   // Emit the finalizer body:
5421   // <destroy>(<type>* %0)
5422   RCG.emitCleanups(CGF, N, PrivateAddr);
5423   CGF.FinishFunction(Loc);
5424   return Fn;
5425 }
5426 
5427 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5428     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5429     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5430   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5431     return nullptr;
5432 
5433   // Build typedef struct:
5434   // kmp_taskred_input {
5435   //   void *reduce_shar; // shared reduction item
5436   //   void *reduce_orig; // original reduction item used for initialization
5437   //   size_t reduce_size; // size of data item
5438   //   void *reduce_init; // data initialization routine
5439   //   void *reduce_fini; // data finalization routine
5440   //   void *reduce_comb; // data combiner routine
5441   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
5442   // } kmp_taskred_input_t;
5443   ASTContext &C = CGM.getContext();
5444   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5445   RD->startDefinition();
5446   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5447   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5448   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5449   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5450   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5451   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5452   const FieldDecl *FlagsFD = addFieldToRecordDecl(
5453       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5454   RD->completeDefinition();
5455   QualType RDType = C.getRecordType(RD);
5456   unsigned Size = Data.ReductionVars.size();
5457   llvm::APInt ArraySize(/*numBits=*/64, Size);
5458   QualType ArrayRDType =
5459       C.getConstantArrayType(RDType, ArraySize, nullptr,
5460                              ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5461   // kmp_task_red_input_t .rd_input.[Size];
5462   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5463   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5464                        Data.ReductionCopies, Data.ReductionOps);
5465   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5466     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5467     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5468                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5469     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5470         TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5471         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5472         ".rd_input.gep.");
5473     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
5474     // ElemLVal.reduce_shar = &Shareds[Cnt];
5475     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5476     RCG.emitSharedOrigLValue(CGF, Cnt);
5477     llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5478     CGF.EmitStoreOfScalar(Shared, SharedLVal);
5479     // ElemLVal.reduce_orig = &Origs[Cnt];
5480     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5481     llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5482     CGF.EmitStoreOfScalar(Orig, OrigLVal);
5483     RCG.emitAggregateType(CGF, Cnt);
5484     llvm::Value *SizeValInChars;
5485     llvm::Value *SizeVal;
5486     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5487     // We use delayed creation/initialization for VLAs and array sections. It is
5488     // required because runtime does not provide the way to pass the sizes of
5489     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5490     // threadprivate global variables are used to store these values and use
5491     // them in the functions.
5492     bool DelayedCreation = !!SizeVal;
5493     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5494                                                /*isSigned=*/false);
5495     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5496     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5497     // ElemLVal.reduce_init = init;
5498     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5499     llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5500     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5501     // ElemLVal.reduce_fini = fini;
5502     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5503     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5504     llvm::Value *FiniAddr =
5505         Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5506     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5507     // ElemLVal.reduce_comb = comb;
5508     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5509     llvm::Value *CombAddr = emitReduceCombFunction(
5510         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5511         RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5512     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5513     // ElemLVal.flags = 0;
5514     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5515     if (DelayedCreation) {
5516       CGF.EmitStoreOfScalar(
5517           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5518           FlagsLVal);
5519     } else
5520       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
5521                                  FlagsLVal.getType());
5522   }
5523   if (Data.IsReductionWithTaskMod) {
5524     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5525     // is_ws, int num, void *data);
5526     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5527     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5528                                                   CGM.IntTy, /*isSigned=*/true);
5529     llvm::Value *Args[] = {
5530         IdentTLoc, GTid,
5531         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5532                                /*isSigned=*/true),
5533         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5534         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5535             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5536     return CGF.EmitRuntimeCall(
5537         OMPBuilder.getOrCreateRuntimeFunction(
5538             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5539         Args);
5540   }
5541   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5542   llvm::Value *Args[] = {
5543       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5544                                 /*isSigned=*/true),
5545       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5546       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
5547                                                       CGM.VoidPtrTy)};
5548   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5549                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
5550                              Args);
5551 }
5552 
5553 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
5554                                             SourceLocation Loc,
5555                                             bool IsWorksharingReduction) {
5556   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5557   // is_ws, int num, void *data);
5558   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5559   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5560                                                 CGM.IntTy, /*isSigned=*/true);
5561   llvm::Value *Args[] = {IdentTLoc, GTid,
5562                          llvm::ConstantInt::get(CGM.IntTy,
5563                                                 IsWorksharingReduction ? 1 : 0,
5564                                                 /*isSigned=*/true)};
5565   (void)CGF.EmitRuntimeCall(
5566       OMPBuilder.getOrCreateRuntimeFunction(
5567           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5568       Args);
5569 }
5570 
5571 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
5572                                               SourceLocation Loc,
5573                                               ReductionCodeGen &RCG,
5574                                               unsigned N) {
5575   auto Sizes = RCG.getSizes(N);
5576   // Emit threadprivate global variable if the type is non-constant
5577   // (Sizes.second = nullptr).
5578   if (Sizes.second) {
5579     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5580                                                      /*isSigned=*/false);
5581     Address SizeAddr = getAddrOfArtificialThreadPrivate(
5582         CGF, CGM.getContext().getSizeType(),
5583         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5584     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5585   }
5586 }
5587 
5588 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
5589                                               SourceLocation Loc,
5590                                               llvm::Value *ReductionsPtr,
5591                                               LValue SharedLVal) {
5592   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5593   // *d);
5594   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5595                                                    CGM.IntTy,
5596                                                    /*isSigned=*/true),
5597                          ReductionsPtr,
5598                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5599                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
5600   return Address(
5601       CGF.EmitRuntimeCall(
5602           OMPBuilder.getOrCreateRuntimeFunction(
5603               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
5604           Args),
5605       CGF.Int8Ty, SharedLVal.getAlignment());
5606 }
5607 
5608 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
5609                                        const OMPTaskDataTy &Data) {
5610   if (!CGF.HaveInsertPoint())
5611     return;
5612 
5613   if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5614     // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5615     OMPBuilder.createTaskwait(CGF.Builder);
5616   } else {
5617     llvm::Value *ThreadID = getThreadID(CGF, Loc);
5618     llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5619     auto &M = CGM.getModule();
5620     Address DependenciesArray = Address::invalid();
5621     llvm::Value *NumOfElements;
5622     std::tie(NumOfElements, DependenciesArray) =
5623         emitDependClause(CGF, Data.Dependences, Loc);
5624     if (!Data.Dependences.empty()) {
5625       llvm::Value *DepWaitTaskArgs[7];
5626       DepWaitTaskArgs[0] = UpLoc;
5627       DepWaitTaskArgs[1] = ThreadID;
5628       DepWaitTaskArgs[2] = NumOfElements;
5629       DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5630       DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5631       DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5632       DepWaitTaskArgs[6] =
5633           llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
5634 
5635       CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5636 
5637       // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5638       // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5639       // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5640       // kmp_int32 has_no_wait); if dependence info is specified.
5641       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5642                               M, OMPRTL___kmpc_omp_taskwait_deps_51),
5643                           DepWaitTaskArgs);
5644 
5645     } else {
5646 
5647       // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5648       // global_tid);
5649       llvm::Value *Args[] = {UpLoc, ThreadID};
5650       // Ignore return result until untied tasks are supported.
5651       CGF.EmitRuntimeCall(
5652           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
5653           Args);
5654     }
5655   }
5656 
5657   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5658     Region->emitUntiedSwitch(CGF);
5659 }
5660 
5661 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
5662                                            OpenMPDirectiveKind InnerKind,
5663                                            const RegionCodeGenTy &CodeGen,
5664                                            bool HasCancel) {
5665   if (!CGF.HaveInsertPoint())
5666     return;
5667   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5668                                  InnerKind != OMPD_critical &&
5669                                      InnerKind != OMPD_master &&
5670                                      InnerKind != OMPD_masked);
5671   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5672 }
5673 
5674 namespace {
5675 enum RTCancelKind {
5676   CancelNoreq = 0,
5677   CancelParallel = 1,
5678   CancelLoop = 2,
5679   CancelSections = 3,
5680   CancelTaskgroup = 4
5681 };
5682 } // anonymous namespace
5683 
5684 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5685   RTCancelKind CancelKind = CancelNoreq;
5686   if (CancelRegion == OMPD_parallel)
5687     CancelKind = CancelParallel;
5688   else if (CancelRegion == OMPD_for)
5689     CancelKind = CancelLoop;
5690   else if (CancelRegion == OMPD_sections)
5691     CancelKind = CancelSections;
5692   else {
5693     assert(CancelRegion == OMPD_taskgroup);
5694     CancelKind = CancelTaskgroup;
5695   }
5696   return CancelKind;
5697 }
5698 
5699 void CGOpenMPRuntime::emitCancellationPointCall(
5700     CodeGenFunction &CGF, SourceLocation Loc,
5701     OpenMPDirectiveKind CancelRegion) {
5702   if (!CGF.HaveInsertPoint())
5703     return;
5704   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5705   // global_tid, kmp_int32 cncl_kind);
5706   if (auto *OMPRegionInfo =
5707           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5708     // For 'cancellation point taskgroup', the task region info may not have a
5709     // cancel. This may instead happen in another adjacent task.
5710     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5711       llvm::Value *Args[] = {
5712           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
5713           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5714       // Ignore return result until untied tasks are supported.
5715       llvm::Value *Result = CGF.EmitRuntimeCall(
5716           OMPBuilder.getOrCreateRuntimeFunction(
5717               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
5718           Args);
5719       // if (__kmpc_cancellationpoint()) {
5720       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5721       //   exit from construct;
5722       // }
5723       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5724       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5725       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5726       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5727       CGF.EmitBlock(ExitBB);
5728       if (CancelRegion == OMPD_parallel)
5729         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5730       // exit from construct;
5731       CodeGenFunction::JumpDest CancelDest =
5732           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5733       CGF.EmitBranchThroughCleanup(CancelDest);
5734       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5735     }
5736   }
5737 }
5738 
5739 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
5740                                      const Expr *IfCond,
5741                                      OpenMPDirectiveKind CancelRegion) {
5742   if (!CGF.HaveInsertPoint())
5743     return;
5744   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5745   // kmp_int32 cncl_kind);
5746   auto &M = CGM.getModule();
5747   if (auto *OMPRegionInfo =
5748           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5749     auto &&ThenGen = [this, &M, Loc, CancelRegion,
5750                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
5751       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5752       llvm::Value *Args[] = {
5753           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
5754           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5755       // Ignore return result until untied tasks are supported.
5756       llvm::Value *Result = CGF.EmitRuntimeCall(
5757           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
5758       // if (__kmpc_cancel()) {
5759       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5760       //   exit from construct;
5761       // }
5762       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5763       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5764       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5765       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5766       CGF.EmitBlock(ExitBB);
5767       if (CancelRegion == OMPD_parallel)
5768         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5769       // exit from construct;
5770       CodeGenFunction::JumpDest CancelDest =
5771           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5772       CGF.EmitBranchThroughCleanup(CancelDest);
5773       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5774     };
5775     if (IfCond) {
5776       emitIfClause(CGF, IfCond, ThenGen,
5777                    [](CodeGenFunction &, PrePostActionTy &) {});
5778     } else {
5779       RegionCodeGenTy ThenRCG(ThenGen);
5780       ThenRCG(CGF);
5781     }
5782   }
5783 }
5784 
5785 namespace {
5786 /// Cleanup action for uses_allocators support.
5787 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
5788   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
5789 
5790 public:
5791   OMPUsesAllocatorsActionTy(
5792       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
5793       : Allocators(Allocators) {}
5794   void Enter(CodeGenFunction &CGF) override {
5795     if (!CGF.HaveInsertPoint())
5796       return;
5797     for (const auto &AllocatorData : Allocators) {
5798       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
5799           CGF, AllocatorData.first, AllocatorData.second);
5800     }
5801   }
5802   void Exit(CodeGenFunction &CGF) override {
5803     if (!CGF.HaveInsertPoint())
5804       return;
5805     for (const auto &AllocatorData : Allocators) {
5806       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
5807                                                         AllocatorData.first);
5808     }
5809   }
5810 };
5811 } // namespace
5812 
5813 void CGOpenMPRuntime::emitTargetOutlinedFunction(
5814     const OMPExecutableDirective &D, StringRef ParentName,
5815     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5816     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5817   assert(!ParentName.empty() && "Invalid target entry parent name!");
5818   HasEmittedTargetRegion = true;
5819   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
5820   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
5821     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
5822       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
5823       if (!D.AllocatorTraits)
5824         continue;
5825       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
5826     }
5827   }
5828   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
5829   CodeGen.setAction(UsesAllocatorAction);
5830   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
5831                                    IsOffloadEntry, CodeGen);
5832 }
5833 
5834 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
5835                                              const Expr *Allocator,
5836                                              const Expr *AllocatorTraits) {
5837   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5838   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5839   // Use default memspace handle.
5840   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5841   llvm::Value *NumTraits = llvm::ConstantInt::get(
5842       CGF.IntTy, cast<ConstantArrayType>(
5843                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
5844                      ->getSize()
5845                      .getLimitedValue());
5846   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
5847   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5848       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
5849   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
5850                                            AllocatorTraitsLVal.getBaseInfo(),
5851                                            AllocatorTraitsLVal.getTBAAInfo());
5852   llvm::Value *Traits = Addr.getPointer();
5853 
5854   llvm::Value *AllocatorVal =
5855       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5856                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
5857                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
5858   // Store to allocator.
5859   CGF.EmitAutoVarAlloca(*cast<VarDecl>(
5860       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
5861   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
5862   AllocatorVal =
5863       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
5864                                Allocator->getType(), Allocator->getExprLoc());
5865   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
5866 }
5867 
5868 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
5869                                              const Expr *Allocator) {
5870   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5871   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5872   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
5873   llvm::Value *AllocatorVal =
5874       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
5875   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
5876                                           CGF.getContext().VoidPtrTy,
5877                                           Allocator->getExprLoc());
5878   (void)CGF.EmitRuntimeCall(
5879       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
5880                                             OMPRTL___kmpc_destroy_allocator),
5881       {ThreadId, AllocatorVal});
5882 }
5883 
5884 void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams(
5885     const OMPExecutableDirective &D, CodeGenFunction &CGF,
5886     int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal,
5887     int32_t &MaxTeamsVal) {
5888 
5889   getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal, MaxTeamsVal);
5890   getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
5891                                       /*UpperBoundOnly=*/true);
5892 
5893   for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5894     for (auto *A : C->getAttrs()) {
5895       int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
5896       int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
5897       if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
5898         CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
5899                                        &AttrMinBlocksVal, &AttrMaxBlocksVal);
5900       else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
5901         CGM.handleAMDGPUFlatWorkGroupSizeAttr(
5902             nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
5903             &AttrMaxThreadsVal);
5904       else
5905         continue;
5906 
5907       MinThreadsVal = std::max(MinThreadsVal, AttrMinThreadsVal);
5908       if (AttrMaxThreadsVal > 0)
5909         MaxThreadsVal = MaxThreadsVal > 0
5910                             ? std::min(MaxThreadsVal, AttrMaxThreadsVal)
5911                             : AttrMaxThreadsVal;
5912       MinTeamsVal = std::max(MinTeamsVal, AttrMinBlocksVal);
5913       if (AttrMaxBlocksVal > 0)
5914         MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
5915                                       : AttrMaxBlocksVal;
5916     }
5917   }
5918 }
5919 
5920 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
5921     const OMPExecutableDirective &D, StringRef ParentName,
5922     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5923     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5924 
5925   llvm::TargetRegionEntryInfo EntryInfo =
5926       getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName);
5927 
5928   CodeGenFunction CGF(CGM, true);
5929   llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
5930       [&CGF, &D, &CodeGen](StringRef EntryFnName) {
5931         const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
5932 
5933         CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
5934         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
5935         return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
5936       };
5937 
5938   OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction,
5939                                       IsOffloadEntry, OutlinedFn, OutlinedFnID);
5940 
5941   if (!OutlinedFn)
5942     return;
5943 
5944   CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
5945 
5946   for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5947     for (auto *A : C->getAttrs()) {
5948       if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
5949         CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
5950     }
5951   }
5952 }
5953 
5954 /// Checks if the expression is constant or does not have non-trivial function
5955 /// calls.
5956 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
5957   // We can skip constant expressions.
5958   // We can skip expressions with trivial calls or simple expressions.
5959   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
5960           !E->hasNonTrivialCall(Ctx)) &&
5961          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
5962 }
5963 
5964 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
5965                                                     const Stmt *Body) {
5966   const Stmt *Child = Body->IgnoreContainers();
5967   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
5968     Child = nullptr;
5969     for (const Stmt *S : C->body()) {
5970       if (const auto *E = dyn_cast<Expr>(S)) {
5971         if (isTrivial(Ctx, E))
5972           continue;
5973       }
5974       // Some of the statements can be ignored.
5975       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
5976           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
5977         continue;
5978       // Analyze declarations.
5979       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
5980         if (llvm::all_of(DS->decls(), [](const Decl *D) {
5981               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
5982                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
5983                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
5984                   isa<UsingDirectiveDecl>(D) ||
5985                   isa<OMPDeclareReductionDecl>(D) ||
5986                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
5987                 return true;
5988               const auto *VD = dyn_cast<VarDecl>(D);
5989               if (!VD)
5990                 return false;
5991               return VD->hasGlobalStorage() || !VD->isUsed();
5992             }))
5993           continue;
5994       }
5995       // Found multiple children - cannot get the one child only.
5996       if (Child)
5997         return nullptr;
5998       Child = S;
5999     }
6000     if (Child)
6001       Child = Child->IgnoreContainers();
6002   }
6003   return Child;
6004 }
6005 
6006 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6007     CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6008     int32_t &MaxTeamsVal) {
6009 
6010   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6011   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6012          "Expected target-based executable directive.");
6013   switch (DirectiveKind) {
6014   case OMPD_target: {
6015     const auto *CS = D.getInnermostCapturedStmt();
6016     const auto *Body =
6017         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6018     const Stmt *ChildStmt =
6019         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6020     if (const auto *NestedDir =
6021             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6022       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6023         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6024           const Expr *NumTeams =
6025               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6026           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6027             if (auto Constant =
6028                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6029               MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6030           return NumTeams;
6031         }
6032         MinTeamsVal = MaxTeamsVal = 0;
6033         return nullptr;
6034       }
6035       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6036           isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6037         MinTeamsVal = MaxTeamsVal = 1;
6038         return nullptr;
6039       }
6040       MinTeamsVal = MaxTeamsVal = 1;
6041       return nullptr;
6042     }
6043     // A value of -1 is used to check if we need to emit no teams region
6044     MinTeamsVal = MaxTeamsVal = -1;
6045     return nullptr;
6046   }
6047   case OMPD_target_teams_loop:
6048   case OMPD_target_teams:
6049   case OMPD_target_teams_distribute:
6050   case OMPD_target_teams_distribute_simd:
6051   case OMPD_target_teams_distribute_parallel_for:
6052   case OMPD_target_teams_distribute_parallel_for_simd: {
6053     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6054       const Expr *NumTeams =
6055           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6056       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6057         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6058           MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6059       return NumTeams;
6060     }
6061     MinTeamsVal = MaxTeamsVal = 0;
6062     return nullptr;
6063   }
6064   case OMPD_target_parallel:
6065   case OMPD_target_parallel_for:
6066   case OMPD_target_parallel_for_simd:
6067   case OMPD_target_parallel_loop:
6068   case OMPD_target_simd:
6069     MinTeamsVal = MaxTeamsVal = 1;
6070     return nullptr;
6071   case OMPD_parallel:
6072   case OMPD_for:
6073   case OMPD_parallel_for:
6074   case OMPD_parallel_loop:
6075   case OMPD_parallel_master:
6076   case OMPD_parallel_sections:
6077   case OMPD_for_simd:
6078   case OMPD_parallel_for_simd:
6079   case OMPD_cancel:
6080   case OMPD_cancellation_point:
6081   case OMPD_ordered:
6082   case OMPD_threadprivate:
6083   case OMPD_allocate:
6084   case OMPD_task:
6085   case OMPD_simd:
6086   case OMPD_tile:
6087   case OMPD_unroll:
6088   case OMPD_sections:
6089   case OMPD_section:
6090   case OMPD_single:
6091   case OMPD_master:
6092   case OMPD_critical:
6093   case OMPD_taskyield:
6094   case OMPD_barrier:
6095   case OMPD_taskwait:
6096   case OMPD_taskgroup:
6097   case OMPD_atomic:
6098   case OMPD_flush:
6099   case OMPD_depobj:
6100   case OMPD_scan:
6101   case OMPD_teams:
6102   case OMPD_target_data:
6103   case OMPD_target_exit_data:
6104   case OMPD_target_enter_data:
6105   case OMPD_distribute:
6106   case OMPD_distribute_simd:
6107   case OMPD_distribute_parallel_for:
6108   case OMPD_distribute_parallel_for_simd:
6109   case OMPD_teams_distribute:
6110   case OMPD_teams_distribute_simd:
6111   case OMPD_teams_distribute_parallel_for:
6112   case OMPD_teams_distribute_parallel_for_simd:
6113   case OMPD_target_update:
6114   case OMPD_declare_simd:
6115   case OMPD_declare_variant:
6116   case OMPD_begin_declare_variant:
6117   case OMPD_end_declare_variant:
6118   case OMPD_declare_target:
6119   case OMPD_end_declare_target:
6120   case OMPD_declare_reduction:
6121   case OMPD_declare_mapper:
6122   case OMPD_taskloop:
6123   case OMPD_taskloop_simd:
6124   case OMPD_master_taskloop:
6125   case OMPD_master_taskloop_simd:
6126   case OMPD_parallel_master_taskloop:
6127   case OMPD_parallel_master_taskloop_simd:
6128   case OMPD_requires:
6129   case OMPD_metadirective:
6130   case OMPD_unknown:
6131     break;
6132   default:
6133     break;
6134   }
6135   llvm_unreachable("Unexpected directive kind.");
6136 }
6137 
6138 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6139     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6140   assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6141          "Clauses associated with the teams directive expected to be emitted "
6142          "only for the host!");
6143   CGBuilderTy &Bld = CGF.Builder;
6144   int32_t MinNT = -1, MaxNT = -1;
6145   const Expr *NumTeams =
6146       getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
6147   if (NumTeams != nullptr) {
6148     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6149 
6150     switch (DirectiveKind) {
6151     case OMPD_target: {
6152       const auto *CS = D.getInnermostCapturedStmt();
6153       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6154       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6155       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6156                                                   /*IgnoreResultAssign*/ true);
6157       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6158                              /*isSigned=*/true);
6159     }
6160     case OMPD_target_teams:
6161     case OMPD_target_teams_distribute:
6162     case OMPD_target_teams_distribute_simd:
6163     case OMPD_target_teams_distribute_parallel_for:
6164     case OMPD_target_teams_distribute_parallel_for_simd: {
6165       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6166       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6167                                                   /*IgnoreResultAssign*/ true);
6168       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6169                              /*isSigned=*/true);
6170     }
6171     default:
6172       break;
6173     }
6174   }
6175 
6176   assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6177   return llvm::ConstantInt::get(CGF.Int32Ty, MinNT);
6178 }
6179 
6180 /// Check for a num threads constant value (stored in \p DefaultVal), or
6181 /// expression (stored in \p E). If the value is conditional (via an if-clause),
6182 /// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6183 /// nullptr, no expression evaluation is perfomed.
6184 static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6185                           const Expr **E, int32_t &UpperBound,
6186                           bool UpperBoundOnly, llvm::Value **CondVal) {
6187   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6188       CGF.getContext(), CS->getCapturedStmt());
6189   const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6190   if (!Dir)
6191     return;
6192 
6193   if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6194     // Handle if clause. If if clause present, the number of threads is
6195     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6196     if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6197       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6198       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6199       const OMPIfClause *IfClause = nullptr;
6200       for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6201         if (C->getNameModifier() == OMPD_unknown ||
6202             C->getNameModifier() == OMPD_parallel) {
6203           IfClause = C;
6204           break;
6205         }
6206       }
6207       if (IfClause) {
6208         const Expr *CondExpr = IfClause->getCondition();
6209         bool Result;
6210         if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6211           if (!Result) {
6212             UpperBound = 1;
6213             return;
6214           }
6215         } else {
6216           CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange());
6217           if (const auto *PreInit =
6218                   cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6219             for (const auto *I : PreInit->decls()) {
6220               if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6221                 CGF.EmitVarDecl(cast<VarDecl>(*I));
6222               } else {
6223                 CodeGenFunction::AutoVarEmission Emission =
6224                     CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6225                 CGF.EmitAutoVarCleanups(Emission);
6226               }
6227             }
6228             *CondVal = CGF.EvaluateExprAsBool(CondExpr);
6229           }
6230         }
6231       }
6232     }
6233     // Check the value of num_threads clause iff if clause was not specified
6234     // or is not evaluated to false.
6235     if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6236       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6237       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6238       const auto *NumThreadsClause =
6239           Dir->getSingleClause<OMPNumThreadsClause>();
6240       const Expr *NTExpr = NumThreadsClause->getNumThreads();
6241       if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
6242         if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
6243           UpperBound =
6244               UpperBound
6245                   ? Constant->getZExtValue()
6246                   : std::min(UpperBound,
6247                              static_cast<int32_t>(Constant->getZExtValue()));
6248       // If we haven't found a upper bound, remember we saw a thread limiting
6249       // clause.
6250       if (UpperBound == -1)
6251         UpperBound = 0;
6252       if (!E)
6253         return;
6254       CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6255       if (const auto *PreInit =
6256               cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6257         for (const auto *I : PreInit->decls()) {
6258           if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6259             CGF.EmitVarDecl(cast<VarDecl>(*I));
6260           } else {
6261             CodeGenFunction::AutoVarEmission Emission =
6262                 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6263             CGF.EmitAutoVarCleanups(Emission);
6264           }
6265         }
6266       }
6267       *E = NTExpr;
6268     }
6269     return;
6270   }
6271   if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6272     UpperBound = 1;
6273 }
6274 
6275 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6276     CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6277     bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6278   assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6279          "Clauses associated with the teams directive expected to be emitted "
6280          "only for the host!");
6281   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6282   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6283          "Expected target-based executable directive.");
6284 
6285   const Expr *NT = nullptr;
6286   const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6287 
6288   auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6289     if (E->isIntegerConstantExpr(CGF.getContext())) {
6290       if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
6291         UpperBound = UpperBound ? Constant->getZExtValue()
6292                                 : std::min(UpperBound,
6293                                            int32_t(Constant->getZExtValue()));
6294     }
6295     // If we haven't found a upper bound, remember we saw a thread limiting
6296     // clause.
6297     if (UpperBound == -1)
6298       UpperBound = 0;
6299     if (EPtr)
6300       *EPtr = E;
6301   };
6302 
6303   auto ReturnSequential = [&]() {
6304     UpperBound = 1;
6305     return NT;
6306   };
6307 
6308   switch (DirectiveKind) {
6309   case OMPD_target: {
6310     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6311     getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6312     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6313         CGF.getContext(), CS->getCapturedStmt());
6314     // TODO: The standard is not clear how to resolve two thread limit clauses,
6315     //       let's pick the teams one if it's present, otherwise the target one.
6316     const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6317     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6318       if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6319         ThreadLimitClause = TLC;
6320         if (ThreadLimitExpr) {
6321           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6322           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6323           CodeGenFunction::LexicalScope Scope(
6324               CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6325           if (const auto *PreInit =
6326                   cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6327             for (const auto *I : PreInit->decls()) {
6328               if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6329                 CGF.EmitVarDecl(cast<VarDecl>(*I));
6330               } else {
6331                 CodeGenFunction::AutoVarEmission Emission =
6332                     CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6333                 CGF.EmitAutoVarCleanups(Emission);
6334               }
6335             }
6336           }
6337         }
6338       }
6339     }
6340     if (ThreadLimitClause)
6341       CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6342     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6343       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6344           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6345         CS = Dir->getInnermostCapturedStmt();
6346         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6347             CGF.getContext(), CS->getCapturedStmt());
6348         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6349       }
6350       if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6351         CS = Dir->getInnermostCapturedStmt();
6352         getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6353       } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6354         return ReturnSequential();
6355     }
6356     return NT;
6357   }
6358   case OMPD_target_teams: {
6359     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6360       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6361       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6362       CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6363     }
6364     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6365     getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6366     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6367         CGF.getContext(), CS->getCapturedStmt());
6368     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6369       if (Dir->getDirectiveKind() == OMPD_distribute) {
6370         CS = Dir->getInnermostCapturedStmt();
6371         getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6372       }
6373     }
6374     return NT;
6375   }
6376   case OMPD_target_teams_distribute:
6377     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6378       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6379       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6380       CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6381     }
6382     getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
6383                   UpperBoundOnly, CondVal);
6384     return NT;
6385   case OMPD_target_teams_loop:
6386   case OMPD_target_parallel_loop:
6387   case OMPD_target_parallel:
6388   case OMPD_target_parallel_for:
6389   case OMPD_target_parallel_for_simd:
6390   case OMPD_target_teams_distribute_parallel_for:
6391   case OMPD_target_teams_distribute_parallel_for_simd: {
6392     if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6393       const OMPIfClause *IfClause = nullptr;
6394       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6395         if (C->getNameModifier() == OMPD_unknown ||
6396             C->getNameModifier() == OMPD_parallel) {
6397           IfClause = C;
6398           break;
6399         }
6400       }
6401       if (IfClause) {
6402         const Expr *Cond = IfClause->getCondition();
6403         bool Result;
6404         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6405           if (!Result)
6406             return ReturnSequential();
6407         } else {
6408           CodeGenFunction::RunCleanupsScope Scope(CGF);
6409           *CondVal = CGF.EvaluateExprAsBool(Cond);
6410         }
6411       }
6412     }
6413     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6414       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6415       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6416       CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6417     }
6418     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6419       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6420       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6421       CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6422       return NumThreadsClause->getNumThreads();
6423     }
6424     return NT;
6425   }
6426   case OMPD_target_teams_distribute_simd:
6427   case OMPD_target_simd:
6428     return ReturnSequential();
6429   default:
6430     break;
6431   }
6432   llvm_unreachable("Unsupported directive kind.");
6433 }
6434 
6435 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6436     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6437   llvm::Value *NumThreadsVal = nullptr;
6438   llvm::Value *CondVal = nullptr;
6439   llvm::Value *ThreadLimitVal = nullptr;
6440   const Expr *ThreadLimitExpr = nullptr;
6441   int32_t UpperBound = -1;
6442 
6443   const Expr *NT = getNumThreadsExprForTargetDirective(
6444       CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
6445       &ThreadLimitExpr);
6446 
6447   // Thread limit expressions are used below, emit them.
6448   if (ThreadLimitExpr) {
6449     ThreadLimitVal =
6450         CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6451     ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
6452                                                /*isSigned=*/false);
6453   }
6454 
6455   // Generate the num teams expression.
6456   if (UpperBound == 1) {
6457     NumThreadsVal = CGF.Builder.getInt32(UpperBound);
6458   } else if (NT) {
6459     NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
6460     NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
6461                                               /*isSigned=*/false);
6462   } else if (ThreadLimitVal) {
6463     // If we do not have a num threads value but a thread limit, replace the
6464     // former with the latter. We know handled the thread limit expression.
6465     NumThreadsVal = ThreadLimitVal;
6466     ThreadLimitVal = nullptr;
6467   } else {
6468     // Default to "0" which means runtime choice.
6469     assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6470     NumThreadsVal = CGF.Builder.getInt32(0);
6471   }
6472 
6473   // Handle if clause. If if clause present, the number of threads is
6474   // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6475   if (CondVal) {
6476     CodeGenFunction::RunCleanupsScope Scope(CGF);
6477     NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
6478                                              CGF.Builder.getInt32(1));
6479   }
6480 
6481   // If the thread limit and num teams expression were present, take the
6482   // minimum.
6483   if (ThreadLimitVal) {
6484     NumThreadsVal = CGF.Builder.CreateSelect(
6485         CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
6486         ThreadLimitVal, NumThreadsVal);
6487   }
6488 
6489   return NumThreadsVal;
6490 }
6491 
6492 namespace {
6493 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6494 
6495 // Utility to handle information from clauses associated with a given
6496 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6497 // It provides a convenient interface to obtain the information and generate
6498 // code for that information.
6499 class MappableExprsHandler {
6500 public:
6501   /// Get the offset of the OMP_MAP_MEMBER_OF field.
6502   static unsigned getFlagMemberOffset() {
6503     unsigned Offset = 0;
6504     for (uint64_t Remain =
6505              static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
6506                  OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
6507          !(Remain & 1); Remain = Remain >> 1)
6508       Offset++;
6509     return Offset;
6510   }
6511 
6512   /// Class that holds debugging information for a data mapping to be passed to
6513   /// the runtime library.
6514   class MappingExprInfo {
6515     /// The variable declaration used for the data mapping.
6516     const ValueDecl *MapDecl = nullptr;
6517     /// The original expression used in the map clause, or null if there is
6518     /// none.
6519     const Expr *MapExpr = nullptr;
6520 
6521   public:
6522     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
6523         : MapDecl(MapDecl), MapExpr(MapExpr) {}
6524 
6525     const ValueDecl *getMapDecl() const { return MapDecl; }
6526     const Expr *getMapExpr() const { return MapExpr; }
6527   };
6528 
6529   using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
6530   using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6531   using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6532   using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
6533   using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
6534   using MapNonContiguousArrayTy =
6535       llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
6536   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
6537   using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
6538 
6539   /// This structure contains combined information generated for mappable
6540   /// clauses, including base pointers, pointers, sizes, map types, user-defined
6541   /// mappers, and non-contiguous information.
6542   struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
6543     MapExprsArrayTy Exprs;
6544     MapValueDeclsArrayTy Mappers;
6545     MapValueDeclsArrayTy DevicePtrDecls;
6546 
6547     /// Append arrays in \a CurInfo.
6548     void append(MapCombinedInfoTy &CurInfo) {
6549       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
6550       DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
6551                             CurInfo.DevicePtrDecls.end());
6552       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
6553       llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
6554     }
6555   };
6556 
6557   /// Map between a struct and the its lowest & highest elements which have been
6558   /// mapped.
6559   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6560   ///                    HE(FieldIndex, Pointer)}
6561   struct StructRangeInfoTy {
6562     MapCombinedInfoTy PreliminaryMapData;
6563     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6564         0, Address::invalid()};
6565     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6566         0, Address::invalid()};
6567     Address Base = Address::invalid();
6568     Address LB = Address::invalid();
6569     bool IsArraySection = false;
6570     bool HasCompleteRecord = false;
6571   };
6572 
6573 private:
6574   /// Kind that defines how a device pointer has to be returned.
6575   struct MapInfo {
6576     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
6577     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
6578     ArrayRef<OpenMPMapModifierKind> MapModifiers;
6579     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
6580     bool ReturnDevicePointer = false;
6581     bool IsImplicit = false;
6582     const ValueDecl *Mapper = nullptr;
6583     const Expr *VarRef = nullptr;
6584     bool ForDeviceAddr = false;
6585 
6586     MapInfo() = default;
6587     MapInfo(
6588         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6589         OpenMPMapClauseKind MapType,
6590         ArrayRef<OpenMPMapModifierKind> MapModifiers,
6591         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6592         bool ReturnDevicePointer, bool IsImplicit,
6593         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
6594         bool ForDeviceAddr = false)
6595         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
6596           MotionModifiers(MotionModifiers),
6597           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
6598           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
6599   };
6600 
6601   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6602   /// member and there is no map information about it, then emission of that
6603   /// entry is deferred until the whole struct has been processed.
6604   struct DeferredDevicePtrEntryTy {
6605     const Expr *IE = nullptr;
6606     const ValueDecl *VD = nullptr;
6607     bool ForDeviceAddr = false;
6608 
6609     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
6610                              bool ForDeviceAddr)
6611         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
6612   };
6613 
6614   /// The target directive from where the mappable clauses were extracted. It
6615   /// is either a executable directive or a user-defined mapper directive.
6616   llvm::PointerUnion<const OMPExecutableDirective *,
6617                      const OMPDeclareMapperDecl *>
6618       CurDir;
6619 
6620   /// Function the directive is being generated for.
6621   CodeGenFunction &CGF;
6622 
6623   /// Set of all first private variables in the current directive.
6624   /// bool data is set to true if the variable is implicitly marked as
6625   /// firstprivate, false otherwise.
6626   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
6627 
6628   /// Map between device pointer declarations and their expression components.
6629   /// The key value for declarations in 'this' is null.
6630   llvm::DenseMap<
6631       const ValueDecl *,
6632       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6633       DevPointersMap;
6634 
6635   /// Map between device addr declarations and their expression components.
6636   /// The key value for declarations in 'this' is null.
6637   llvm::DenseMap<
6638       const ValueDecl *,
6639       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6640       HasDevAddrsMap;
6641 
6642   /// Map between lambda declarations and their map type.
6643   llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
6644 
6645   llvm::Value *getExprTypeSize(const Expr *E) const {
6646     QualType ExprTy = E->getType().getCanonicalType();
6647 
6648     // Calculate the size for array shaping expression.
6649     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
6650       llvm::Value *Size =
6651           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
6652       for (const Expr *SE : OAE->getDimensions()) {
6653         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
6654         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
6655                                       CGF.getContext().getSizeType(),
6656                                       SE->getExprLoc());
6657         Size = CGF.Builder.CreateNUWMul(Size, Sz);
6658       }
6659       return Size;
6660     }
6661 
6662     // Reference types are ignored for mapping purposes.
6663     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6664       ExprTy = RefTy->getPointeeType().getCanonicalType();
6665 
6666     // Given that an array section is considered a built-in type, we need to
6667     // do the calculation based on the length of the section instead of relying
6668     // on CGF.getTypeSize(E->getType()).
6669     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
6670       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
6671                             OAE->getBase()->IgnoreParenImpCasts())
6672                             .getCanonicalType();
6673 
6674       // If there is no length associated with the expression and lower bound is
6675       // not specified too, that means we are using the whole length of the
6676       // base.
6677       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6678           !OAE->getLowerBound())
6679         return CGF.getTypeSize(BaseTy);
6680 
6681       llvm::Value *ElemSize;
6682       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6683         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
6684       } else {
6685         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
6686         assert(ATy && "Expecting array type if not a pointer type.");
6687         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
6688       }
6689 
6690       // If we don't have a length at this point, that is because we have an
6691       // array section with a single element.
6692       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
6693         return ElemSize;
6694 
6695       if (const Expr *LenExpr = OAE->getLength()) {
6696         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
6697         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
6698                                              CGF.getContext().getSizeType(),
6699                                              LenExpr->getExprLoc());
6700         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
6701       }
6702       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6703              OAE->getLowerBound() && "expected array_section[lb:].");
6704       // Size = sizetype - lb * elemtype;
6705       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
6706       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
6707       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
6708                                        CGF.getContext().getSizeType(),
6709                                        OAE->getLowerBound()->getExprLoc());
6710       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
6711       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
6712       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
6713       LengthVal = CGF.Builder.CreateSelect(
6714           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
6715       return LengthVal;
6716     }
6717     return CGF.getTypeSize(ExprTy);
6718   }
6719 
6720   /// Return the corresponding bits for a given map clause modifier. Add
6721   /// a flag marking the map as a pointer if requested. Add a flag marking the
6722   /// map as the first one of a series of maps that relate to the same map
6723   /// expression.
6724   OpenMPOffloadMappingFlags getMapTypeBits(
6725       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6726       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
6727       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
6728     OpenMPOffloadMappingFlags Bits =
6729         IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
6730                    : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
6731     switch (MapType) {
6732     case OMPC_MAP_alloc:
6733     case OMPC_MAP_release:
6734       // alloc and release is the default behavior in the runtime library,  i.e.
6735       // if we don't pass any bits alloc/release that is what the runtime is
6736       // going to do. Therefore, we don't need to signal anything for these two
6737       // type modifiers.
6738       break;
6739     case OMPC_MAP_to:
6740       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
6741       break;
6742     case OMPC_MAP_from:
6743       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6744       break;
6745     case OMPC_MAP_tofrom:
6746       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
6747               OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6748       break;
6749     case OMPC_MAP_delete:
6750       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
6751       break;
6752     case OMPC_MAP_unknown:
6753       llvm_unreachable("Unexpected map type!");
6754     }
6755     if (AddPtrFlag)
6756       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
6757     if (AddIsTargetParamFlag)
6758       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
6759     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
6760       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
6761     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
6762       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
6763     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
6764         llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
6765       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
6766     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
6767       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
6768     if (IsNonContiguous)
6769       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
6770     return Bits;
6771   }
6772 
6773   /// Return true if the provided expression is a final array section. A
6774   /// final array section, is one whose length can't be proved to be one.
6775   bool isFinalArraySectionExpression(const Expr *E) const {
6776     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
6777 
6778     // It is not an array section and therefore not a unity-size one.
6779     if (!OASE)
6780       return false;
6781 
6782     // An array section with no colon always refer to a single element.
6783     if (OASE->getColonLocFirst().isInvalid())
6784       return false;
6785 
6786     const Expr *Length = OASE->getLength();
6787 
6788     // If we don't have a length we have to check if the array has size 1
6789     // for this dimension. Also, we should always expect a length if the
6790     // base type is pointer.
6791     if (!Length) {
6792       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
6793                              OASE->getBase()->IgnoreParenImpCasts())
6794                              .getCanonicalType();
6795       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
6796         return ATy->getSize().getSExtValue() != 1;
6797       // If we don't have a constant dimension length, we have to consider
6798       // the current section as having any size, so it is not necessarily
6799       // unitary. If it happen to be unity size, that's user fault.
6800       return true;
6801     }
6802 
6803     // Check if the length evaluates to 1.
6804     Expr::EvalResult Result;
6805     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
6806       return true; // Can have more that size 1.
6807 
6808     llvm::APSInt ConstLength = Result.Val.getInt();
6809     return ConstLength.getSExtValue() != 1;
6810   }
6811 
6812   /// Generate the base pointers, section pointers, sizes, map type bits, and
6813   /// user-defined mappers (all included in \a CombinedInfo) for the provided
6814   /// map type, map or motion modifiers, and expression components.
6815   /// \a IsFirstComponent should be set to true if the provided set of
6816   /// components is the first associated with a capture.
6817   void generateInfoForComponentList(
6818       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6819       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6820       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6821       MapCombinedInfoTy &CombinedInfo,
6822       MapCombinedInfoTy &StructBaseCombinedInfo,
6823       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
6824       bool IsImplicit, bool GenerateAllInfoForClauses,
6825       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
6826       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
6827       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
6828           OverlappedElements = std::nullopt) const {
6829     // The following summarizes what has to be generated for each map and the
6830     // types below. The generated information is expressed in this order:
6831     // base pointer, section pointer, size, flags
6832     // (to add to the ones that come from the map type and modifier).
6833     //
6834     // double d;
6835     // int i[100];
6836     // float *p;
6837     // int **a = &i;
6838     //
6839     // struct S1 {
6840     //   int i;
6841     //   float f[50];
6842     // }
6843     // struct S2 {
6844     //   int i;
6845     //   float f[50];
6846     //   S1 s;
6847     //   double *p;
6848     //   struct S2 *ps;
6849     //   int &ref;
6850     // }
6851     // S2 s;
6852     // S2 *ps;
6853     //
6854     // map(d)
6855     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
6856     //
6857     // map(i)
6858     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
6859     //
6860     // map(i[1:23])
6861     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
6862     //
6863     // map(p)
6864     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
6865     //
6866     // map(p[1:24])
6867     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
6868     // in unified shared memory mode or for local pointers
6869     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
6870     //
6871     // map((*a)[0:3])
6872     // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6873     // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
6874     //
6875     // map(**a)
6876     // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6877     // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
6878     //
6879     // map(s)
6880     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
6881     //
6882     // map(s.i)
6883     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
6884     //
6885     // map(s.s.f)
6886     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6887     //
6888     // map(s.p)
6889     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
6890     //
6891     // map(to: s.p[:22])
6892     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
6893     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
6894     // &(s.p), &(s.p[0]), 22*sizeof(double),
6895     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6896     // (*) alloc space for struct members, only this is a target parameter
6897     // (**) map the pointer (nothing to be mapped in this example) (the compiler
6898     //      optimizes this entry out, same in the examples below)
6899     // (***) map the pointee (map: to)
6900     //
6901     // map(to: s.ref)
6902     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
6903     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6904     // (*) alloc space for struct members, only this is a target parameter
6905     // (**) map the pointer (nothing to be mapped in this example) (the compiler
6906     //      optimizes this entry out, same in the examples below)
6907     // (***) map the pointee (map: to)
6908     //
6909     // map(s.ps)
6910     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6911     //
6912     // map(from: s.ps->s.i)
6913     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6914     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6915     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
6916     //
6917     // map(to: s.ps->ps)
6918     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6919     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6920     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
6921     //
6922     // map(s.ps->ps->ps)
6923     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6924     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6925     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6926     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6927     //
6928     // map(to: s.ps->ps->s.f[:22])
6929     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6930     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6931     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6932     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6933     //
6934     // map(ps)
6935     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
6936     //
6937     // map(ps->i)
6938     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
6939     //
6940     // map(ps->s.f)
6941     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6942     //
6943     // map(from: ps->p)
6944     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
6945     //
6946     // map(to: ps->p[:22])
6947     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
6948     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
6949     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
6950     //
6951     // map(ps->ps)
6952     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6953     //
6954     // map(from: ps->ps->s.i)
6955     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6956     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6957     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6958     //
6959     // map(from: ps->ps->ps)
6960     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6961     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6962     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6963     //
6964     // map(ps->ps->ps->ps)
6965     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6966     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6967     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6968     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6969     //
6970     // map(to: ps->ps->ps->s.f[:22])
6971     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6972     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6973     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6974     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6975     //
6976     // map(to: s.f[:22]) map(from: s.p[:33])
6977     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
6978     //     sizeof(double*) (**), TARGET_PARAM
6979     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
6980     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
6981     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6982     // (*) allocate contiguous space needed to fit all mapped members even if
6983     //     we allocate space for members not mapped (in this example,
6984     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
6985     //     them as well because they fall between &s.f[0] and &s.p)
6986     //
6987     // map(from: s.f[:22]) map(to: ps->p[:33])
6988     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
6989     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
6990     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
6991     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
6992     // (*) the struct this entry pertains to is the 2nd element in the list of
6993     //     arguments, hence MEMBER_OF(2)
6994     //
6995     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
6996     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
6997     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
6998     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
6999     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7000     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7001     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7002     // (*) the struct this entry pertains to is the 4th element in the list
7003     //     of arguments, hence MEMBER_OF(4)
7004 
7005     // Track if the map information being generated is the first for a capture.
7006     bool IsCaptureFirstInfo = IsFirstComponentList;
7007     // When the variable is on a declare target link or in a to clause with
7008     // unified memory, a reference is needed to hold the host/device address
7009     // of the variable.
7010     bool RequiresReference = false;
7011 
7012     // Scan the components from the base to the complete expression.
7013     auto CI = Components.rbegin();
7014     auto CE = Components.rend();
7015     auto I = CI;
7016 
7017     // Track if the map information being generated is the first for a list of
7018     // components.
7019     bool IsExpressionFirstInfo = true;
7020     bool FirstPointerInComplexData = false;
7021     Address BP = Address::invalid();
7022     const Expr *AssocExpr = I->getAssociatedExpression();
7023     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7024     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7025     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7026 
7027     if (isa<MemberExpr>(AssocExpr)) {
7028       // The base is the 'this' pointer. The content of the pointer is going
7029       // to be the base of the field being mapped.
7030       BP = CGF.LoadCXXThisAddress();
7031     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7032                (OASE &&
7033                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7034       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7035     } else if (OAShE &&
7036                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7037       BP = Address(
7038           CGF.EmitScalarExpr(OAShE->getBase()),
7039           CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7040           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7041     } else {
7042       // The base is the reference to the variable.
7043       // BP = &Var.
7044       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7045       if (const auto *VD =
7046               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7047         if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7048                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7049           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7050               ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7051                 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7052                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7053             RequiresReference = true;
7054             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7055           }
7056         }
7057       }
7058 
7059       // If the variable is a pointer and is being dereferenced (i.e. is not
7060       // the last component), the base has to be the pointer itself, not its
7061       // reference. References are ignored for mapping purposes.
7062       QualType Ty =
7063           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7064       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7065         // No need to generate individual map information for the pointer, it
7066         // can be associated with the combined storage if shared memory mode is
7067         // active or the base declaration is not global variable.
7068         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7069         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7070             !VD || VD->hasLocalStorage())
7071           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7072         else
7073           FirstPointerInComplexData = true;
7074         ++I;
7075       }
7076     }
7077 
7078     // Track whether a component of the list should be marked as MEMBER_OF some
7079     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7080     // in a component list should be marked as MEMBER_OF, all subsequent entries
7081     // do not belong to the base struct. E.g.
7082     // struct S2 s;
7083     // s.ps->ps->ps->f[:]
7084     //   (1) (2) (3) (4)
7085     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7086     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7087     // is the pointee of ps(2) which is not member of struct s, so it should not
7088     // be marked as such (it is still PTR_AND_OBJ).
7089     // The variable is initialized to false so that PTR_AND_OBJ entries which
7090     // are not struct members are not considered (e.g. array of pointers to
7091     // data).
7092     bool ShouldBeMemberOf = false;
7093 
7094     // Variable keeping track of whether or not we have encountered a component
7095     // in the component list which is a member expression. Useful when we have a
7096     // pointer or a final array section, in which case it is the previous
7097     // component in the list which tells us whether we have a member expression.
7098     // E.g. X.f[:]
7099     // While processing the final array section "[:]" it is "f" which tells us
7100     // whether we are dealing with a member of a declared struct.
7101     const MemberExpr *EncounteredME = nullptr;
7102 
7103     // Track for the total number of dimension. Start from one for the dummy
7104     // dimension.
7105     uint64_t DimSize = 1;
7106 
7107     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7108     bool IsPrevMemberReference = false;
7109 
7110     // We need to check if we will be encountering any MEs. If we do not
7111     // encounter any ME expression it means we will be mapping the whole struct.
7112     // In that case we need to skip adding an entry for the struct to the
7113     // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7114     // list only when generating all info for clauses.
7115     bool IsMappingWholeStruct = true;
7116     if (!GenerateAllInfoForClauses) {
7117       IsMappingWholeStruct = false;
7118     } else {
7119       for (auto TempI = I; TempI != CE; ++TempI) {
7120         const MemberExpr *PossibleME =
7121             dyn_cast<MemberExpr>(TempI->getAssociatedExpression());
7122         if (PossibleME) {
7123           IsMappingWholeStruct = false;
7124           break;
7125         }
7126       }
7127     }
7128 
7129     for (; I != CE; ++I) {
7130       // If the current component is member of a struct (parent struct) mark it.
7131       if (!EncounteredME) {
7132         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7133         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7134         // as MEMBER_OF the parent struct.
7135         if (EncounteredME) {
7136           ShouldBeMemberOf = true;
7137           // Do not emit as complex pointer if this is actually not array-like
7138           // expression.
7139           if (FirstPointerInComplexData) {
7140             QualType Ty = std::prev(I)
7141                               ->getAssociatedDeclaration()
7142                               ->getType()
7143                               .getNonReferenceType();
7144             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7145             FirstPointerInComplexData = false;
7146           }
7147         }
7148       }
7149 
7150       auto Next = std::next(I);
7151 
7152       // We need to generate the addresses and sizes if this is the last
7153       // component, if the component is a pointer or if it is an array section
7154       // whose length can't be proved to be one. If this is a pointer, it
7155       // becomes the base address for the following components.
7156 
7157       // A final array section, is one whose length can't be proved to be one.
7158       // If the map item is non-contiguous then we don't treat any array section
7159       // as final array section.
7160       bool IsFinalArraySection =
7161           !IsNonContiguous &&
7162           isFinalArraySectionExpression(I->getAssociatedExpression());
7163 
7164       // If we have a declaration for the mapping use that, otherwise use
7165       // the base declaration of the map clause.
7166       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7167                                      ? I->getAssociatedDeclaration()
7168                                      : BaseDecl;
7169       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7170                                                : MapExpr;
7171 
7172       // Get information on whether the element is a pointer. Have to do a
7173       // special treatment for array sections given that they are built-in
7174       // types.
7175       const auto *OASE =
7176           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7177       const auto *OAShE =
7178           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7179       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7180       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7181       bool IsPointer =
7182           OAShE ||
7183           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7184                        .getCanonicalType()
7185                        ->isAnyPointerType()) ||
7186           I->getAssociatedExpression()->getType()->isAnyPointerType();
7187       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7188                                MapDecl &&
7189                                MapDecl->getType()->isLValueReferenceType();
7190       bool IsNonDerefPointer = IsPointer &&
7191                                !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7192                                !IsNonContiguous;
7193 
7194       if (OASE)
7195         ++DimSize;
7196 
7197       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7198           IsFinalArraySection) {
7199         // If this is not the last component, we expect the pointer to be
7200         // associated with an array expression or member expression.
7201         assert((Next == CE ||
7202                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7203                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7204                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7205                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7206                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7207                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7208                "Unexpected expression");
7209 
7210         Address LB = Address::invalid();
7211         Address LowestElem = Address::invalid();
7212         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7213                                        const MemberExpr *E) {
7214           const Expr *BaseExpr = E->getBase();
7215           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
7216           // scalar.
7217           LValue BaseLV;
7218           if (E->isArrow()) {
7219             LValueBaseInfo BaseInfo;
7220             TBAAAccessInfo TBAAInfo;
7221             Address Addr =
7222                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7223             QualType PtrTy = BaseExpr->getType()->getPointeeType();
7224             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7225           } else {
7226             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7227           }
7228           return BaseLV;
7229         };
7230         if (OAShE) {
7231           LowestElem = LB =
7232               Address(CGF.EmitScalarExpr(OAShE->getBase()),
7233                       CGF.ConvertTypeForMem(
7234                           OAShE->getBase()->getType()->getPointeeType()),
7235                       CGF.getContext().getTypeAlignInChars(
7236                           OAShE->getBase()->getType()));
7237         } else if (IsMemberReference) {
7238           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7239           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7240           LowestElem = CGF.EmitLValueForFieldInitialization(
7241                               BaseLVal, cast<FieldDecl>(MapDecl))
7242                            .getAddress(CGF);
7243           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7244                    .getAddress(CGF);
7245         } else {
7246           LowestElem = LB =
7247               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7248                   .getAddress(CGF);
7249         }
7250 
7251         // If this component is a pointer inside the base struct then we don't
7252         // need to create any entry for it - it will be combined with the object
7253         // it is pointing to into a single PTR_AND_OBJ entry.
7254         bool IsMemberPointerOrAddr =
7255             EncounteredME &&
7256             (((IsPointer || ForDeviceAddr) &&
7257               I->getAssociatedExpression() == EncounteredME) ||
7258              (IsPrevMemberReference && !IsPointer) ||
7259              (IsMemberReference && Next != CE &&
7260               !Next->getAssociatedExpression()->getType()->isPointerType()));
7261         if (!OverlappedElements.empty() && Next == CE) {
7262           // Handle base element with the info for overlapped elements.
7263           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7264           assert(!IsPointer &&
7265                  "Unexpected base element with the pointer type.");
7266           // Mark the whole struct as the struct that requires allocation on the
7267           // device.
7268           PartialStruct.LowestElem = {0, LowestElem};
7269           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7270               I->getAssociatedExpression()->getType());
7271           Address HB = CGF.Builder.CreateConstGEP(
7272               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7273                   LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
7274               TypeSize.getQuantity() - 1);
7275           PartialStruct.HighestElem = {
7276               std::numeric_limits<decltype(
7277                   PartialStruct.HighestElem.first)>::max(),
7278               HB};
7279           PartialStruct.Base = BP;
7280           PartialStruct.LB = LB;
7281           assert(
7282               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7283               "Overlapped elements must be used only once for the variable.");
7284           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7285           // Emit data for non-overlapped data.
7286           OpenMPOffloadMappingFlags Flags =
7287               OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
7288               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7289                              /*AddPtrFlag=*/false,
7290                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7291           llvm::Value *Size = nullptr;
7292           // Do bitcopy of all non-overlapped structure elements.
7293           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7294                    Component : OverlappedElements) {
7295             Address ComponentLB = Address::invalid();
7296             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7297                  Component) {
7298               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7299                 const auto *FD = dyn_cast<FieldDecl>(VD);
7300                 if (FD && FD->getType()->isLValueReferenceType()) {
7301                   const auto *ME =
7302                       cast<MemberExpr>(MC.getAssociatedExpression());
7303                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7304                   ComponentLB =
7305                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7306                           .getAddress(CGF);
7307                 } else {
7308                   ComponentLB =
7309                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7310                           .getAddress(CGF);
7311                 }
7312                 Size = CGF.Builder.CreatePtrDiff(
7313                     CGF.Int8Ty, ComponentLB.getPointer(), LB.getPointer());
7314                 break;
7315               }
7316             }
7317             assert(Size && "Failed to determine structure size");
7318             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7319             CombinedInfo.BasePointers.push_back(BP.getPointer());
7320             CombinedInfo.DevicePtrDecls.push_back(nullptr);
7321             CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7322             CombinedInfo.Pointers.push_back(LB.getPointer());
7323             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7324                 Size, CGF.Int64Ty, /*isSigned=*/true));
7325             CombinedInfo.Types.push_back(Flags);
7326             CombinedInfo.Mappers.push_back(nullptr);
7327             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7328                                                                       : 1);
7329             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7330           }
7331           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7332           CombinedInfo.BasePointers.push_back(BP.getPointer());
7333           CombinedInfo.DevicePtrDecls.push_back(nullptr);
7334           CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7335           CombinedInfo.Pointers.push_back(LB.getPointer());
7336           Size = CGF.Builder.CreatePtrDiff(
7337               CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
7338               LB.getPointer());
7339           CombinedInfo.Sizes.push_back(
7340               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7341           CombinedInfo.Types.push_back(Flags);
7342           CombinedInfo.Mappers.push_back(nullptr);
7343           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7344                                                                     : 1);
7345           break;
7346         }
7347         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7348         // Skip adding an entry in the CurInfo of this combined entry if the
7349         // whole struct is currently being mapped. The struct needs to be added
7350         // in the first position before any data internal to the struct is being
7351         // mapped.
7352         if (!IsMemberPointerOrAddr ||
7353             (Next == CE && MapType != OMPC_MAP_unknown)) {
7354           if (!IsMappingWholeStruct) {
7355             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7356             CombinedInfo.BasePointers.push_back(BP.getPointer());
7357             CombinedInfo.DevicePtrDecls.push_back(nullptr);
7358             CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7359             CombinedInfo.Pointers.push_back(LB.getPointer());
7360             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7361                 Size, CGF.Int64Ty, /*isSigned=*/true));
7362             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7363                                                                       : 1);
7364           } else {
7365             StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7366             StructBaseCombinedInfo.BasePointers.push_back(BP.getPointer());
7367             StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr);
7368             StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7369             StructBaseCombinedInfo.Pointers.push_back(LB.getPointer());
7370             StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7371                 Size, CGF.Int64Ty, /*isSigned=*/true));
7372             StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
7373                 IsNonContiguous ? DimSize : 1);
7374           }
7375 
7376           // If Mapper is valid, the last component inherits the mapper.
7377           bool HasMapper = Mapper && Next == CE;
7378           if (!IsMappingWholeStruct)
7379             CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7380           else
7381             StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper
7382                                                                : nullptr);
7383 
7384           // We need to add a pointer flag for each map that comes from the
7385           // same expression except for the first one. We also need to signal
7386           // this map is the first one that relates with the current capture
7387           // (there is a set of entries for each capture).
7388           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7389               MapType, MapModifiers, MotionModifiers, IsImplicit,
7390               !IsExpressionFirstInfo || RequiresReference ||
7391                   FirstPointerInComplexData || IsMemberReference,
7392               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
7393 
7394           if (!IsExpressionFirstInfo || IsMemberReference) {
7395             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7396             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7397             if (IsPointer || (IsMemberReference && Next != CE))
7398               Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
7399                          OpenMPOffloadMappingFlags::OMP_MAP_FROM |
7400                          OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
7401                          OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
7402                          OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
7403 
7404             if (ShouldBeMemberOf) {
7405               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7406               // should be later updated with the correct value of MEMBER_OF.
7407               Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7408               // From now on, all subsequent PTR_AND_OBJ entries should not be
7409               // marked as MEMBER_OF.
7410               ShouldBeMemberOf = false;
7411             }
7412           }
7413 
7414           if (!IsMappingWholeStruct)
7415             CombinedInfo.Types.push_back(Flags);
7416           else
7417             StructBaseCombinedInfo.Types.push_back(Flags);
7418         }
7419 
7420         // If we have encountered a member expression so far, keep track of the
7421         // mapped member. If the parent is "*this", then the value declaration
7422         // is nullptr.
7423         if (EncounteredME) {
7424           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7425           unsigned FieldIndex = FD->getFieldIndex();
7426 
7427           // Update info about the lowest and highest elements for this struct
7428           if (!PartialStruct.Base.isValid()) {
7429             PartialStruct.LowestElem = {FieldIndex, LowestElem};
7430             if (IsFinalArraySection) {
7431               Address HB =
7432                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7433                       .getAddress(CGF);
7434               PartialStruct.HighestElem = {FieldIndex, HB};
7435             } else {
7436               PartialStruct.HighestElem = {FieldIndex, LowestElem};
7437             }
7438             PartialStruct.Base = BP;
7439             PartialStruct.LB = BP;
7440           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7441             PartialStruct.LowestElem = {FieldIndex, LowestElem};
7442           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7443             if (IsFinalArraySection) {
7444               Address HB =
7445                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7446                       .getAddress(CGF);
7447               PartialStruct.HighestElem = {FieldIndex, HB};
7448             } else {
7449               PartialStruct.HighestElem = {FieldIndex, LowestElem};
7450             }
7451           }
7452         }
7453 
7454         // Need to emit combined struct for array sections.
7455         if (IsFinalArraySection || IsNonContiguous)
7456           PartialStruct.IsArraySection = true;
7457 
7458         // If we have a final array section, we are done with this expression.
7459         if (IsFinalArraySection)
7460           break;
7461 
7462         // The pointer becomes the base for the next element.
7463         if (Next != CE)
7464           BP = IsMemberReference ? LowestElem : LB;
7465 
7466         IsExpressionFirstInfo = false;
7467         IsCaptureFirstInfo = false;
7468         FirstPointerInComplexData = false;
7469         IsPrevMemberReference = IsMemberReference;
7470       } else if (FirstPointerInComplexData) {
7471         QualType Ty = Components.rbegin()
7472                           ->getAssociatedDeclaration()
7473                           ->getType()
7474                           .getNonReferenceType();
7475         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7476         FirstPointerInComplexData = false;
7477       }
7478     }
7479     // If ran into the whole component - allocate the space for the whole
7480     // record.
7481     if (!EncounteredME)
7482       PartialStruct.HasCompleteRecord = true;
7483 
7484     if (!IsNonContiguous)
7485       return;
7486 
7487     const ASTContext &Context = CGF.getContext();
7488 
7489     // For supporting stride in array section, we need to initialize the first
7490     // dimension size as 1, first offset as 0, and first count as 1
7491     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7492     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7493     MapValuesArrayTy CurStrides;
7494     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7495     uint64_t ElementTypeSize;
7496 
7497     // Collect Size information for each dimension and get the element size as
7498     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7499     // should be [10, 10] and the first stride is 4 btyes.
7500     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7501          Components) {
7502       const Expr *AssocExpr = Component.getAssociatedExpression();
7503       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7504 
7505       if (!OASE)
7506         continue;
7507 
7508       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
7509       auto *CAT = Context.getAsConstantArrayType(Ty);
7510       auto *VAT = Context.getAsVariableArrayType(Ty);
7511 
7512       // We need all the dimension size except for the last dimension.
7513       assert((VAT || CAT || &Component == &*Components.begin()) &&
7514              "Should be either ConstantArray or VariableArray if not the "
7515              "first Component");
7516 
7517       // Get element size if CurStrides is empty.
7518       if (CurStrides.empty()) {
7519         const Type *ElementType = nullptr;
7520         if (CAT)
7521           ElementType = CAT->getElementType().getTypePtr();
7522         else if (VAT)
7523           ElementType = VAT->getElementType().getTypePtr();
7524         else
7525           assert(&Component == &*Components.begin() &&
7526                  "Only expect pointer (non CAT or VAT) when this is the "
7527                  "first Component");
7528         // If ElementType is null, then it means the base is a pointer
7529         // (neither CAT nor VAT) and we'll attempt to get ElementType again
7530         // for next iteration.
7531         if (ElementType) {
7532           // For the case that having pointer as base, we need to remove one
7533           // level of indirection.
7534           if (&Component != &*Components.begin())
7535             ElementType = ElementType->getPointeeOrArrayElementType();
7536           ElementTypeSize =
7537               Context.getTypeSizeInChars(ElementType).getQuantity();
7538           CurStrides.push_back(
7539               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7540         }
7541       }
7542       // Get dimension value except for the last dimension since we don't need
7543       // it.
7544       if (DimSizes.size() < Components.size() - 1) {
7545         if (CAT)
7546           DimSizes.push_back(llvm::ConstantInt::get(
7547               CGF.Int64Ty, CAT->getSize().getZExtValue()));
7548         else if (VAT)
7549           DimSizes.push_back(CGF.Builder.CreateIntCast(
7550               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
7551               /*IsSigned=*/false));
7552       }
7553     }
7554 
7555     // Skip the dummy dimension since we have already have its information.
7556     auto *DI = DimSizes.begin() + 1;
7557     // Product of dimension.
7558     llvm::Value *DimProd =
7559         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
7560 
7561     // Collect info for non-contiguous. Notice that offset, count, and stride
7562     // are only meaningful for array-section, so we insert a null for anything
7563     // other than array-section.
7564     // Also, the size of offset, count, and stride are not the same as
7565     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7566     // count, and stride are the same as the number of non-contiguous
7567     // declaration in target update to/from clause.
7568     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7569          Components) {
7570       const Expr *AssocExpr = Component.getAssociatedExpression();
7571 
7572       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
7573         llvm::Value *Offset = CGF.Builder.CreateIntCast(
7574             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
7575             /*isSigned=*/false);
7576         CurOffsets.push_back(Offset);
7577         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
7578         CurStrides.push_back(CurStrides.back());
7579         continue;
7580       }
7581 
7582       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7583 
7584       if (!OASE)
7585         continue;
7586 
7587       // Offset
7588       const Expr *OffsetExpr = OASE->getLowerBound();
7589       llvm::Value *Offset = nullptr;
7590       if (!OffsetExpr) {
7591         // If offset is absent, then we just set it to zero.
7592         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
7593       } else {
7594         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
7595                                            CGF.Int64Ty,
7596                                            /*isSigned=*/false);
7597       }
7598       CurOffsets.push_back(Offset);
7599 
7600       // Count
7601       const Expr *CountExpr = OASE->getLength();
7602       llvm::Value *Count = nullptr;
7603       if (!CountExpr) {
7604         // In Clang, once a high dimension is an array section, we construct all
7605         // the lower dimension as array section, however, for case like
7606         // arr[0:2][2], Clang construct the inner dimension as an array section
7607         // but it actually is not in an array section form according to spec.
7608         if (!OASE->getColonLocFirst().isValid() &&
7609             !OASE->getColonLocSecond().isValid()) {
7610           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
7611         } else {
7612           // OpenMP 5.0, 2.1.5 Array Sections, Description.
7613           // When the length is absent it defaults to ⌈(size −
7614           // lower-bound)/stride⌉, where size is the size of the array
7615           // dimension.
7616           const Expr *StrideExpr = OASE->getStride();
7617           llvm::Value *Stride =
7618               StrideExpr
7619                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7620                                               CGF.Int64Ty, /*isSigned=*/false)
7621                   : nullptr;
7622           if (Stride)
7623             Count = CGF.Builder.CreateUDiv(
7624                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
7625           else
7626             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
7627         }
7628       } else {
7629         Count = CGF.EmitScalarExpr(CountExpr);
7630       }
7631       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
7632       CurCounts.push_back(Count);
7633 
7634       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7635       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7636       //              Offset      Count     Stride
7637       //    D0          0           1         4    (int)    <- dummy dimension
7638       //    D1          0           2         8    (2 * (1) * 4)
7639       //    D2          1           2         20   (1 * (1 * 5) * 4)
7640       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
7641       const Expr *StrideExpr = OASE->getStride();
7642       llvm::Value *Stride =
7643           StrideExpr
7644               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7645                                           CGF.Int64Ty, /*isSigned=*/false)
7646               : nullptr;
7647       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
7648       if (Stride)
7649         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
7650       else
7651         CurStrides.push_back(DimProd);
7652       if (DI != DimSizes.end())
7653         ++DI;
7654     }
7655 
7656     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
7657     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
7658     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
7659   }
7660 
7661   /// Return the adjusted map modifiers if the declaration a capture refers to
7662   /// appears in a first-private clause. This is expected to be used only with
7663   /// directives that start with 'target'.
7664   OpenMPOffloadMappingFlags
7665   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7666     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7667 
7668     // A first private variable captured by reference will use only the
7669     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7670     // declaration is known as first-private in this handler.
7671     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7672       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7673         return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7674                OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7675       return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
7676              OpenMPOffloadMappingFlags::OMP_MAP_TO;
7677     }
7678     auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
7679     if (I != LambdasMap.end())
7680       // for map(to: lambda): using user specified map type.
7681       return getMapTypeBits(
7682           I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
7683           /*MotionModifiers=*/std::nullopt, I->getSecond()->isImplicit(),
7684           /*AddPtrFlag=*/false,
7685           /*AddIsTargetParamFlag=*/false,
7686           /*isNonContiguous=*/false);
7687     return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7688            OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7689   }
7690 
7691   void getPlainLayout(const CXXRecordDecl *RD,
7692                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7693                       bool AsBase) const {
7694     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7695 
7696     llvm::StructType *St =
7697         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7698 
7699     unsigned NumElements = St->getNumElements();
7700     llvm::SmallVector<
7701         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7702         RecordLayout(NumElements);
7703 
7704     // Fill bases.
7705     for (const auto &I : RD->bases()) {
7706       if (I.isVirtual())
7707         continue;
7708       const auto *Base = I.getType()->getAsCXXRecordDecl();
7709       // Ignore empty bases.
7710       if (Base->isEmpty() || CGF.getContext()
7711                                  .getASTRecordLayout(Base)
7712                                  .getNonVirtualSize()
7713                                  .isZero())
7714         continue;
7715 
7716       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7717       RecordLayout[FieldIndex] = Base;
7718     }
7719     // Fill in virtual bases.
7720     for (const auto &I : RD->vbases()) {
7721       const auto *Base = I.getType()->getAsCXXRecordDecl();
7722       // Ignore empty bases.
7723       if (Base->isEmpty())
7724         continue;
7725       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7726       if (RecordLayout[FieldIndex])
7727         continue;
7728       RecordLayout[FieldIndex] = Base;
7729     }
7730     // Fill in all the fields.
7731     assert(!RD->isUnion() && "Unexpected union.");
7732     for (const auto *Field : RD->fields()) {
7733       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7734       // will fill in later.)
7735       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7736         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7737         RecordLayout[FieldIndex] = Field;
7738       }
7739     }
7740     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7741              &Data : RecordLayout) {
7742       if (Data.isNull())
7743         continue;
7744       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7745         getPlainLayout(Base, Layout, /*AsBase=*/true);
7746       else
7747         Layout.push_back(Data.get<const FieldDecl *>());
7748     }
7749   }
7750 
7751   /// Generate all the base pointers, section pointers, sizes, map types, and
7752   /// mappers for the extracted mappable expressions (all included in \a
7753   /// CombinedInfo). Also, for each item that relates with a device pointer, a
7754   /// pair of the relevant declaration and index where it occurs is appended to
7755   /// the device pointers info array.
7756   void generateAllInfoForClauses(
7757       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
7758       llvm::OpenMPIRBuilder &OMPBuilder,
7759       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
7760           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
7761     // We have to process the component lists that relate with the same
7762     // declaration in a single chunk so that we can generate the map flags
7763     // correctly. Therefore, we organize all lists in a map.
7764     enum MapKind { Present, Allocs, Other, Total };
7765     llvm::MapVector<CanonicalDeclPtr<const Decl>,
7766                     SmallVector<SmallVector<MapInfo, 8>, 4>>
7767         Info;
7768 
7769     // Helper function to fill the information map for the different supported
7770     // clauses.
7771     auto &&InfoGen =
7772         [&Info, &SkipVarSet](
7773             const ValueDecl *D, MapKind Kind,
7774             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7775             OpenMPMapClauseKind MapType,
7776             ArrayRef<OpenMPMapModifierKind> MapModifiers,
7777             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7778             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
7779             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
7780           if (SkipVarSet.contains(D))
7781             return;
7782           auto It = Info.find(D);
7783           if (It == Info.end())
7784             It = Info
7785                      .insert(std::make_pair(
7786                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
7787                      .first;
7788           It->second[Kind].emplace_back(
7789               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
7790               IsImplicit, Mapper, VarRef, ForDeviceAddr);
7791         };
7792 
7793     for (const auto *Cl : Clauses) {
7794       const auto *C = dyn_cast<OMPMapClause>(Cl);
7795       if (!C)
7796         continue;
7797       MapKind Kind = Other;
7798       if (llvm::is_contained(C->getMapTypeModifiers(),
7799                              OMPC_MAP_MODIFIER_present))
7800         Kind = Present;
7801       else if (C->getMapType() == OMPC_MAP_alloc)
7802         Kind = Allocs;
7803       const auto *EI = C->getVarRefs().begin();
7804       for (const auto L : C->component_lists()) {
7805         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
7806         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
7807                 C->getMapTypeModifiers(), std::nullopt,
7808                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7809                 E);
7810         ++EI;
7811       }
7812     }
7813     for (const auto *Cl : Clauses) {
7814       const auto *C = dyn_cast<OMPToClause>(Cl);
7815       if (!C)
7816         continue;
7817       MapKind Kind = Other;
7818       if (llvm::is_contained(C->getMotionModifiers(),
7819                              OMPC_MOTION_MODIFIER_present))
7820         Kind = Present;
7821       const auto *EI = C->getVarRefs().begin();
7822       for (const auto L : C->component_lists()) {
7823         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, std::nullopt,
7824                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
7825                 C->isImplicit(), std::get<2>(L), *EI);
7826         ++EI;
7827       }
7828     }
7829     for (const auto *Cl : Clauses) {
7830       const auto *C = dyn_cast<OMPFromClause>(Cl);
7831       if (!C)
7832         continue;
7833       MapKind Kind = Other;
7834       if (llvm::is_contained(C->getMotionModifiers(),
7835                              OMPC_MOTION_MODIFIER_present))
7836         Kind = Present;
7837       const auto *EI = C->getVarRefs().begin();
7838       for (const auto L : C->component_lists()) {
7839         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from,
7840                 std::nullopt, C->getMotionModifiers(),
7841                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7842                 *EI);
7843         ++EI;
7844       }
7845     }
7846 
7847     // Look at the use_device_ptr and use_device_addr clauses information and
7848     // mark the existing map entries as such. If there is no map information for
7849     // an entry in the use_device_ptr and use_device_addr list, we create one
7850     // with map type 'alloc' and zero size section. It is the user fault if that
7851     // was not mapped before. If there is no map information and the pointer is
7852     // a struct member, then we defer the emission of that entry until the whole
7853     // struct has been processed.
7854     llvm::MapVector<CanonicalDeclPtr<const Decl>,
7855                     SmallVector<DeferredDevicePtrEntryTy, 4>>
7856         DeferredInfo;
7857     MapCombinedInfoTy UseDeviceDataCombinedInfo;
7858 
7859     auto &&UseDeviceDataCombinedInfoGen =
7860         [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
7861                                      CodeGenFunction &CGF, bool IsDevAddr) {
7862           UseDeviceDataCombinedInfo.Exprs.push_back(VD);
7863           UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
7864           UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
7865           UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
7866               IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
7867           UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
7868           UseDeviceDataCombinedInfo.Sizes.push_back(
7869               llvm::Constant::getNullValue(CGF.Int64Ty));
7870           UseDeviceDataCombinedInfo.Types.push_back(
7871               OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
7872           UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
7873         };
7874 
7875     auto &&MapInfoGen =
7876         [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
7877          &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
7878                    OMPClauseMappableExprCommon::MappableExprComponentListRef
7879                        Components,
7880                    bool IsImplicit, bool IsDevAddr) {
7881           // We didn't find any match in our map information - generate a zero
7882           // size array section - if the pointer is a struct member we defer
7883           // this action until the whole struct has been processed.
7884           if (isa<MemberExpr>(IE)) {
7885             // Insert the pointer into Info to be processed by
7886             // generateInfoForComponentList. Because it is a member pointer
7887             // without a pointee, no entry will be generated for it, therefore
7888             // we need to generate one after the whole struct has been
7889             // processed. Nonetheless, generateInfoForComponentList must be
7890             // called to take the pointer into account for the calculation of
7891             // the range of the partial struct.
7892             InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt,
7893                     std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit,
7894                     nullptr, nullptr, IsDevAddr);
7895             DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
7896           } else {
7897             llvm::Value *Ptr;
7898             if (IsDevAddr) {
7899               if (IE->isGLValue())
7900                 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
7901               else
7902                 Ptr = CGF.EmitScalarExpr(IE);
7903             } else {
7904               Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
7905             }
7906             UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);
7907           }
7908         };
7909 
7910     auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
7911                                     const Expr *IE, bool IsDevAddr) -> bool {
7912       // We potentially have map information for this declaration already.
7913       // Look for the first set of components that refer to it. If found,
7914       // return true.
7915       // If the first component is a member expression, we have to look into
7916       // 'this', which maps to null in the map of map information. Otherwise
7917       // look directly for the information.
7918       auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7919       if (It != Info.end()) {
7920         bool Found = false;
7921         for (auto &Data : It->second) {
7922           auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
7923             return MI.Components.back().getAssociatedDeclaration() == VD;
7924           });
7925           // If we found a map entry, signal that the pointer has to be
7926           // returned and move on to the next declaration. Exclude cases where
7927           // the base pointer is mapped as array subscript, array section or
7928           // array shaping. The base address is passed as a pointer to base in
7929           // this case and cannot be used as a base for use_device_ptr list
7930           // item.
7931           if (CI != Data.end()) {
7932             if (IsDevAddr) {
7933               CI->ForDeviceAddr = IsDevAddr;
7934               CI->ReturnDevicePointer = true;
7935               Found = true;
7936               break;
7937             } else {
7938               auto PrevCI = std::next(CI->Components.rbegin());
7939               const auto *VarD = dyn_cast<VarDecl>(VD);
7940               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7941                   isa<MemberExpr>(IE) ||
7942                   !VD->getType().getNonReferenceType()->isPointerType() ||
7943                   PrevCI == CI->Components.rend() ||
7944                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
7945                   VarD->hasLocalStorage()) {
7946                 CI->ForDeviceAddr = IsDevAddr;
7947                 CI->ReturnDevicePointer = true;
7948                 Found = true;
7949                 break;
7950               }
7951             }
7952           }
7953         }
7954         return Found;
7955       }
7956       return false;
7957     };
7958 
7959     // Look at the use_device_ptr clause information and mark the existing map
7960     // entries as such. If there is no map information for an entry in the
7961     // use_device_ptr list, we create one with map type 'alloc' and zero size
7962     // section. It is the user fault if that was not mapped before. If there is
7963     // no map information and the pointer is a struct member, then we defer the
7964     // emission of that entry until the whole struct has been processed.
7965     for (const auto *Cl : Clauses) {
7966       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
7967       if (!C)
7968         continue;
7969       for (const auto L : C->component_lists()) {
7970         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
7971             std::get<1>(L);
7972         assert(!Components.empty() &&
7973                "Not expecting empty list of components!");
7974         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
7975         VD = cast<ValueDecl>(VD->getCanonicalDecl());
7976         const Expr *IE = Components.back().getAssociatedExpression();
7977         if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
7978           continue;
7979         MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
7980                    /*IsDevAddr=*/false);
7981       }
7982     }
7983 
7984     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7985     for (const auto *Cl : Clauses) {
7986       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
7987       if (!C)
7988         continue;
7989       for (const auto L : C->component_lists()) {
7990         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
7991             std::get<1>(L);
7992         assert(!std::get<1>(L).empty() &&
7993                "Not expecting empty list of components!");
7994         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
7995         if (!Processed.insert(VD).second)
7996           continue;
7997         VD = cast<ValueDecl>(VD->getCanonicalDecl());
7998         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
7999         if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8000           continue;
8001         MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8002                    /*IsDevAddr=*/true);
8003       }
8004     }
8005 
8006     for (const auto &Data : Info) {
8007       StructRangeInfoTy PartialStruct;
8008       // Current struct information:
8009       MapCombinedInfoTy CurInfo;
8010       // Current struct base information:
8011       MapCombinedInfoTy StructBaseCurInfo;
8012       const Decl *D = Data.first;
8013       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8014       for (const auto &M : Data.second) {
8015         for (const MapInfo &L : M) {
8016           assert(!L.Components.empty() &&
8017                  "Not expecting declaration with no component lists.");
8018 
8019           // Remember the current base pointer index.
8020           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8021           unsigned StructBasePointersIdx =
8022               StructBaseCurInfo.BasePointers.size();
8023           CurInfo.NonContigInfo.IsNonContiguous =
8024               L.Components.back().isNonContiguous();
8025           generateInfoForComponentList(
8026               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8027               CurInfo, StructBaseCurInfo, PartialStruct,
8028               /*IsFirstComponentList=*/false, L.IsImplicit,
8029               /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD,
8030               L.VarRef);
8031 
8032           // If this entry relates to a device pointer, set the relevant
8033           // declaration and add the 'return pointer' flag.
8034           if (L.ReturnDevicePointer) {
8035             // Check whether a value was added to either CurInfo or
8036             // StructBaseCurInfo and error if no value was added to either of
8037             // them:
8038             assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() ||
8039                     StructBasePointersIdx <
8040                         StructBaseCurInfo.BasePointers.size()) &&
8041                    "Unexpected number of mapped base pointers.");
8042 
8043             // Choose a base pointer index which is always valid:
8044             const ValueDecl *RelevantVD =
8045                 L.Components.back().getAssociatedDeclaration();
8046             assert(RelevantVD &&
8047                    "No relevant declaration related with device pointer??");
8048 
8049             // If StructBaseCurInfo has been updated this iteration then work on
8050             // the first new entry added to it i.e. make sure that when multiple
8051             // values are added to any of the lists, the first value added is
8052             // being modified by the assignments below (not the last value
8053             // added).
8054             if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) {
8055               StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
8056                   RelevantVD;
8057               StructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
8058                   L.ForDeviceAddr ? DeviceInfoTy::Address
8059                                   : DeviceInfoTy::Pointer;
8060               StructBaseCurInfo.Types[StructBasePointersIdx] |=
8061                   OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8062             } else {
8063               CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8064               CurInfo.DevicePointers[CurrentBasePointersIdx] =
8065                   L.ForDeviceAddr ? DeviceInfoTy::Address
8066                                   : DeviceInfoTy::Pointer;
8067               CurInfo.Types[CurrentBasePointersIdx] |=
8068                   OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8069             }
8070           }
8071         }
8072       }
8073 
8074       // Append any pending zero-length pointers which are struct members and
8075       // used with use_device_ptr or use_device_addr.
8076       auto CI = DeferredInfo.find(Data.first);
8077       if (CI != DeferredInfo.end()) {
8078         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8079           llvm::Value *BasePtr;
8080           llvm::Value *Ptr;
8081           if (L.ForDeviceAddr) {
8082             if (L.IE->isGLValue())
8083               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8084             else
8085               Ptr = this->CGF.EmitScalarExpr(L.IE);
8086             BasePtr = Ptr;
8087             // Entry is RETURN_PARAM. Also, set the placeholder value
8088             // MEMBER_OF=FFFF so that the entry is later updated with the
8089             // correct value of MEMBER_OF.
8090             CurInfo.Types.push_back(
8091                 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8092                 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8093           } else {
8094             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8095             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8096                                              L.IE->getExprLoc());
8097             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8098             // placeholder value MEMBER_OF=FFFF so that the entry is later
8099             // updated with the correct value of MEMBER_OF.
8100             CurInfo.Types.push_back(
8101                 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8102                 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8103                 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8104           }
8105           CurInfo.Exprs.push_back(L.VD);
8106           CurInfo.BasePointers.emplace_back(BasePtr);
8107           CurInfo.DevicePtrDecls.emplace_back(L.VD);
8108           CurInfo.DevicePointers.emplace_back(
8109               L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8110           CurInfo.Pointers.push_back(Ptr);
8111           CurInfo.Sizes.push_back(
8112               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8113           CurInfo.Mappers.push_back(nullptr);
8114         }
8115       }
8116 
8117       // Unify entries in one list making sure the struct mapping precedes the
8118       // individual fields:
8119       MapCombinedInfoTy UnionCurInfo;
8120       UnionCurInfo.append(StructBaseCurInfo);
8121       UnionCurInfo.append(CurInfo);
8122 
8123       // If there is an entry in PartialStruct it means we have a struct with
8124       // individual members mapped. Emit an extra combined entry.
8125       if (PartialStruct.Base.isValid()) {
8126         UnionCurInfo.NonContigInfo.Dims.push_back(0);
8127         // Emit a combined entry:
8128         emitCombinedEntry(CombinedInfo, UnionCurInfo.Types, PartialStruct,
8129                           /*IsMapThis*/ !VD, OMPBuilder, VD);
8130       }
8131 
8132       // We need to append the results of this capture to what we already have.
8133       CombinedInfo.append(UnionCurInfo);
8134     }
8135     // Append data for use_device_ptr clauses.
8136     CombinedInfo.append(UseDeviceDataCombinedInfo);
8137   }
8138 
8139 public:
8140   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8141       : CurDir(&Dir), CGF(CGF) {
8142     // Extract firstprivate clause information.
8143     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8144       for (const auto *D : C->varlists())
8145         FirstPrivateDecls.try_emplace(
8146             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8147     // Extract implicit firstprivates from uses_allocators clauses.
8148     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8149       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8150         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8151         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8152           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8153                                         /*Implicit=*/true);
8154         else if (const auto *VD = dyn_cast<VarDecl>(
8155                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8156                          ->getDecl()))
8157           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8158       }
8159     }
8160     // Extract device pointer clause information.
8161     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8162       for (auto L : C->component_lists())
8163         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8164     // Extract device addr clause information.
8165     for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8166       for (auto L : C->component_lists())
8167         HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8168     // Extract map information.
8169     for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8170       if (C->getMapType() != OMPC_MAP_to)
8171         continue;
8172       for (auto L : C->component_lists()) {
8173         const ValueDecl *VD = std::get<0>(L);
8174         const auto *RD = VD ? VD->getType()
8175                                   .getCanonicalType()
8176                                   .getNonReferenceType()
8177                                   ->getAsCXXRecordDecl()
8178                             : nullptr;
8179         if (RD && RD->isLambda())
8180           LambdasMap.try_emplace(std::get<0>(L), C);
8181       }
8182     }
8183   }
8184 
8185   /// Constructor for the declare mapper directive.
8186   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8187       : CurDir(&Dir), CGF(CGF) {}
8188 
8189   /// Generate code for the combined entry if we have a partially mapped struct
8190   /// and take care of the mapping flags of the arguments corresponding to
8191   /// individual struct members.
8192   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8193                          MapFlagsArrayTy &CurTypes,
8194                          const StructRangeInfoTy &PartialStruct, bool IsMapThis,
8195                          llvm::OpenMPIRBuilder &OMPBuilder,
8196                          const ValueDecl *VD = nullptr,
8197                          bool NotTargetParams = true) const {
8198     if (CurTypes.size() == 1 &&
8199         ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
8200          OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
8201         !PartialStruct.IsArraySection)
8202       return;
8203     Address LBAddr = PartialStruct.LowestElem.second;
8204     Address HBAddr = PartialStruct.HighestElem.second;
8205     if (PartialStruct.HasCompleteRecord) {
8206       LBAddr = PartialStruct.LB;
8207       HBAddr = PartialStruct.LB;
8208     }
8209     CombinedInfo.Exprs.push_back(VD);
8210     // Base is the base of the struct
8211     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8212     CombinedInfo.DevicePtrDecls.push_back(nullptr);
8213     CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8214     // Pointer is the address of the lowest element
8215     llvm::Value *LB = LBAddr.getPointer();
8216     const CXXMethodDecl *MD =
8217         CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
8218     const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
8219     bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
8220     // There should not be a mapper for a combined entry.
8221     if (HasBaseClass) {
8222       // OpenMP 5.2 148:21:
8223       // If the target construct is within a class non-static member function,
8224       // and a variable is an accessible data member of the object for which the
8225       // non-static data member function is invoked, the variable is treated as
8226       // if the this[:1] expression had appeared in a map clause with a map-type
8227       // of tofrom.
8228       // Emit this[:1]
8229       CombinedInfo.Pointers.push_back(PartialStruct.Base.getPointer());
8230       QualType Ty = MD->getFunctionObjectParameterType();
8231       llvm::Value *Size =
8232           CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
8233                                     /*isSigned=*/true);
8234       CombinedInfo.Sizes.push_back(Size);
8235     } else {
8236       CombinedInfo.Pointers.push_back(LB);
8237       // Size is (addr of {highest+1} element) - (addr of lowest element)
8238       llvm::Value *HB = HBAddr.getPointer();
8239       llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
8240           HBAddr.getElementType(), HB, /*Idx0=*/1);
8241       llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8242       llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8243       llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8244       llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8245                                                     /*isSigned=*/false);
8246       CombinedInfo.Sizes.push_back(Size);
8247     }
8248     CombinedInfo.Mappers.push_back(nullptr);
8249     // Map type is always TARGET_PARAM, if generate info for captures.
8250     CombinedInfo.Types.push_back(
8251         NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8252                         : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8253     // If any element has the present modifier, then make sure the runtime
8254     // doesn't attempt to allocate the struct.
8255     if (CurTypes.end() !=
8256         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8257           return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8258               Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8259         }))
8260       CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
8261     // Remove TARGET_PARAM flag from the first element
8262     (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8263     // If any element has the ompx_hold modifier, then make sure the runtime
8264     // uses the hold reference count for the struct as a whole so that it won't
8265     // be unmapped by an extra dynamic reference count decrement.  Add it to all
8266     // elements as well so the runtime knows which reference count to check
8267     // when determining whether it's time for device-to-host transfers of
8268     // individual elements.
8269     if (CurTypes.end() !=
8270         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8271           return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8272               Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
8273         })) {
8274       CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8275       for (auto &M : CurTypes)
8276         M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8277     }
8278 
8279     // All other current entries will be MEMBER_OF the combined entry
8280     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8281     // 0xFFFF in the MEMBER_OF field).
8282     OpenMPOffloadMappingFlags MemberOfFlag =
8283         OMPBuilder.getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8284     for (auto &M : CurTypes)
8285       OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);
8286   }
8287 
8288   /// Generate all the base pointers, section pointers, sizes, map types, and
8289   /// mappers for the extracted mappable expressions (all included in \a
8290   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8291   /// pair of the relevant declaration and index where it occurs is appended to
8292   /// the device pointers info array.
8293   void generateAllInfo(
8294       MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
8295       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8296           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8297     assert(CurDir.is<const OMPExecutableDirective *>() &&
8298            "Expect a executable directive");
8299     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8300     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,
8301                               SkipVarSet);
8302   }
8303 
8304   /// Generate all the base pointers, section pointers, sizes, map types, and
8305   /// mappers for the extracted map clauses of user-defined mapper (all included
8306   /// in \a CombinedInfo).
8307   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
8308                                 llvm::OpenMPIRBuilder &OMPBuilder) const {
8309     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8310            "Expect a declare mapper directive");
8311     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8312     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,
8313                               OMPBuilder);
8314   }
8315 
8316   /// Emit capture info for lambdas for variables captured by reference.
8317   void generateInfoForLambdaCaptures(
8318       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8319       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8320     QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
8321     const auto *RD = VDType->getAsCXXRecordDecl();
8322     if (!RD || !RD->isLambda())
8323       return;
8324     Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
8325                    CGF.getContext().getDeclAlign(VD));
8326     LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
8327     llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8328     FieldDecl *ThisCapture = nullptr;
8329     RD->getCaptureFields(Captures, ThisCapture);
8330     if (ThisCapture) {
8331       LValue ThisLVal =
8332           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8333       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8334       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8335                                  VDLVal.getPointer(CGF));
8336       CombinedInfo.Exprs.push_back(VD);
8337       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8338       CombinedInfo.DevicePtrDecls.push_back(nullptr);
8339       CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8340       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8341       CombinedInfo.Sizes.push_back(
8342           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8343                                     CGF.Int64Ty, /*isSigned=*/true));
8344       CombinedInfo.Types.push_back(
8345           OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8346           OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8347           OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8348           OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8349       CombinedInfo.Mappers.push_back(nullptr);
8350     }
8351     for (const LambdaCapture &LC : RD->captures()) {
8352       if (!LC.capturesVariable())
8353         continue;
8354       const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
8355       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8356         continue;
8357       auto It = Captures.find(VD);
8358       assert(It != Captures.end() && "Found lambda capture without field.");
8359       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8360       if (LC.getCaptureKind() == LCK_ByRef) {
8361         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8362         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8363                                    VDLVal.getPointer(CGF));
8364         CombinedInfo.Exprs.push_back(VD);
8365         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8366         CombinedInfo.DevicePtrDecls.push_back(nullptr);
8367         CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8368         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8369         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8370             CGF.getTypeSize(
8371                 VD->getType().getCanonicalType().getNonReferenceType()),
8372             CGF.Int64Ty, /*isSigned=*/true));
8373       } else {
8374         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8375         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8376                                    VDLVal.getPointer(CGF));
8377         CombinedInfo.Exprs.push_back(VD);
8378         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8379         CombinedInfo.DevicePtrDecls.push_back(nullptr);
8380         CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8381         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8382         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8383       }
8384       CombinedInfo.Types.push_back(
8385           OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8386           OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8387           OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8388           OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8389       CombinedInfo.Mappers.push_back(nullptr);
8390     }
8391   }
8392 
8393   /// Set correct indices for lambdas captures.
8394   void adjustMemberOfForLambdaCaptures(
8395       llvm::OpenMPIRBuilder &OMPBuilder,
8396       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8397       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8398       MapFlagsArrayTy &Types) const {
8399     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8400       // Set correct member_of idx for all implicit lambda captures.
8401       if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8402                        OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8403                        OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8404                        OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
8405         continue;
8406       llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
8407       assert(BasePtr && "Unable to find base lambda address.");
8408       int TgtIdx = -1;
8409       for (unsigned J = I; J > 0; --J) {
8410         unsigned Idx = J - 1;
8411         if (Pointers[Idx] != BasePtr)
8412           continue;
8413         TgtIdx = Idx;
8414         break;
8415       }
8416       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8417       // All other current entries will be MEMBER_OF the combined entry
8418       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8419       // 0xFFFF in the MEMBER_OF field).
8420       OpenMPOffloadMappingFlags MemberOfFlag =
8421           OMPBuilder.getMemberOfFlag(TgtIdx);
8422       OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8423     }
8424   }
8425 
8426   /// Generate the base pointers, section pointers, sizes, map types, and
8427   /// mappers associated to a given capture (all included in \a CombinedInfo).
8428   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8429                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8430                               StructRangeInfoTy &PartialStruct) const {
8431     assert(!Cap->capturesVariableArrayType() &&
8432            "Not expecting to generate map info for a variable array type!");
8433 
8434     // We need to know when we generating information for the first component
8435     const ValueDecl *VD = Cap->capturesThis()
8436                               ? nullptr
8437                               : Cap->getCapturedVar()->getCanonicalDecl();
8438 
8439     // for map(to: lambda): skip here, processing it in
8440     // generateDefaultMapInfo
8441     if (LambdasMap.count(VD))
8442       return;
8443 
8444     // If this declaration appears in a is_device_ptr clause we just have to
8445     // pass the pointer by value. If it is a reference to a declaration, we just
8446     // pass its value.
8447     if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
8448       CombinedInfo.Exprs.push_back(VD);
8449       CombinedInfo.BasePointers.emplace_back(Arg);
8450       CombinedInfo.DevicePtrDecls.emplace_back(VD);
8451       CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
8452       CombinedInfo.Pointers.push_back(Arg);
8453       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8454           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8455           /*isSigned=*/true));
8456       CombinedInfo.Types.push_back(
8457           OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8458           OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8459       CombinedInfo.Mappers.push_back(nullptr);
8460       return;
8461     }
8462 
8463     using MapData =
8464         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8465                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8466                    const ValueDecl *, const Expr *>;
8467     SmallVector<MapData, 4> DeclComponentLists;
8468     // For member fields list in is_device_ptr, store it in
8469     // DeclComponentLists for generating components info.
8470     static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
8471     auto It = DevPointersMap.find(VD);
8472     if (It != DevPointersMap.end())
8473       for (const auto &MCL : It->second)
8474         DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
8475                                         /*IsImpicit = */ true, nullptr,
8476                                         nullptr);
8477     auto I = HasDevAddrsMap.find(VD);
8478     if (I != HasDevAddrsMap.end())
8479       for (const auto &MCL : I->second)
8480         DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
8481                                         /*IsImpicit = */ true, nullptr,
8482                                         nullptr);
8483     assert(CurDir.is<const OMPExecutableDirective *>() &&
8484            "Expect a executable directive");
8485     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8486     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8487       const auto *EI = C->getVarRefs().begin();
8488       for (const auto L : C->decl_component_lists(VD)) {
8489         const ValueDecl *VDecl, *Mapper;
8490         // The Expression is not correct if the mapping is implicit
8491         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8492         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8493         std::tie(VDecl, Components, Mapper) = L;
8494         assert(VDecl == VD && "We got information for the wrong declaration??");
8495         assert(!Components.empty() &&
8496                "Not expecting declaration with no component lists.");
8497         DeclComponentLists.emplace_back(Components, C->getMapType(),
8498                                         C->getMapTypeModifiers(),
8499                                         C->isImplicit(), Mapper, E);
8500         ++EI;
8501       }
8502     }
8503     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8504                                              const MapData &RHS) {
8505       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8506       OpenMPMapClauseKind MapType = std::get<1>(RHS);
8507       bool HasPresent =
8508           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8509       bool HasAllocs = MapType == OMPC_MAP_alloc;
8510       MapModifiers = std::get<2>(RHS);
8511       MapType = std::get<1>(LHS);
8512       bool HasPresentR =
8513           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8514       bool HasAllocsR = MapType == OMPC_MAP_alloc;
8515       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8516     });
8517 
8518     // Find overlapping elements (including the offset from the base element).
8519     llvm::SmallDenseMap<
8520         const MapData *,
8521         llvm::SmallVector<
8522             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8523         4>
8524         OverlappedData;
8525     size_t Count = 0;
8526     for (const MapData &L : DeclComponentLists) {
8527       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8528       OpenMPMapClauseKind MapType;
8529       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8530       bool IsImplicit;
8531       const ValueDecl *Mapper;
8532       const Expr *VarRef;
8533       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8534           L;
8535       ++Count;
8536       for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
8537         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8538         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8539                  VarRef) = L1;
8540         auto CI = Components.rbegin();
8541         auto CE = Components.rend();
8542         auto SI = Components1.rbegin();
8543         auto SE = Components1.rend();
8544         for (; CI != CE && SI != SE; ++CI, ++SI) {
8545           if (CI->getAssociatedExpression()->getStmtClass() !=
8546               SI->getAssociatedExpression()->getStmtClass())
8547             break;
8548           // Are we dealing with different variables/fields?
8549           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8550             break;
8551         }
8552         // Found overlapping if, at least for one component, reached the head
8553         // of the components list.
8554         if (CI == CE || SI == SE) {
8555           // Ignore it if it is the same component.
8556           if (CI == CE && SI == SE)
8557             continue;
8558           const auto It = (SI == SE) ? CI : SI;
8559           // If one component is a pointer and another one is a kind of
8560           // dereference of this pointer (array subscript, section, dereference,
8561           // etc.), it is not an overlapping.
8562           // Same, if one component is a base and another component is a
8563           // dereferenced pointer memberexpr with the same base.
8564           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
8565               (std::prev(It)->getAssociatedDeclaration() &&
8566                std::prev(It)
8567                    ->getAssociatedDeclaration()
8568                    ->getType()
8569                    ->isPointerType()) ||
8570               (It->getAssociatedDeclaration() &&
8571                It->getAssociatedDeclaration()->getType()->isPointerType() &&
8572                std::next(It) != CE && std::next(It) != SE))
8573             continue;
8574           const MapData &BaseData = CI == CE ? L : L1;
8575           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8576               SI == SE ? Components : Components1;
8577           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8578           OverlappedElements.getSecond().push_back(SubData);
8579         }
8580       }
8581     }
8582     // Sort the overlapped elements for each item.
8583     llvm::SmallVector<const FieldDecl *, 4> Layout;
8584     if (!OverlappedData.empty()) {
8585       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8586       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8587       while (BaseType != OrigType) {
8588         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8589         OrigType = BaseType->getPointeeOrArrayElementType();
8590       }
8591 
8592       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8593         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8594       else {
8595         const auto *RD = BaseType->getAsRecordDecl();
8596         Layout.append(RD->field_begin(), RD->field_end());
8597       }
8598     }
8599     for (auto &Pair : OverlappedData) {
8600       llvm::stable_sort(
8601           Pair.getSecond(),
8602           [&Layout](
8603               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8604               OMPClauseMappableExprCommon::MappableExprComponentListRef
8605                   Second) {
8606             auto CI = First.rbegin();
8607             auto CE = First.rend();
8608             auto SI = Second.rbegin();
8609             auto SE = Second.rend();
8610             for (; CI != CE && SI != SE; ++CI, ++SI) {
8611               if (CI->getAssociatedExpression()->getStmtClass() !=
8612                   SI->getAssociatedExpression()->getStmtClass())
8613                 break;
8614               // Are we dealing with different variables/fields?
8615               if (CI->getAssociatedDeclaration() !=
8616                   SI->getAssociatedDeclaration())
8617                 break;
8618             }
8619 
8620             // Lists contain the same elements.
8621             if (CI == CE && SI == SE)
8622               return false;
8623 
8624             // List with less elements is less than list with more elements.
8625             if (CI == CE || SI == SE)
8626               return CI == CE;
8627 
8628             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8629             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8630             if (FD1->getParent() == FD2->getParent())
8631               return FD1->getFieldIndex() < FD2->getFieldIndex();
8632             const auto *It =
8633                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8634                   return FD == FD1 || FD == FD2;
8635                 });
8636             return *It == FD1;
8637           });
8638     }
8639 
8640     // Associated with a capture, because the mapping flags depend on it.
8641     // Go through all of the elements with the overlapped elements.
8642     bool IsFirstComponentList = true;
8643     MapCombinedInfoTy StructBaseCombinedInfo;
8644     for (const auto &Pair : OverlappedData) {
8645       const MapData &L = *Pair.getFirst();
8646       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8647       OpenMPMapClauseKind MapType;
8648       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8649       bool IsImplicit;
8650       const ValueDecl *Mapper;
8651       const Expr *VarRef;
8652       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8653           L;
8654       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8655           OverlappedComponents = Pair.getSecond();
8656       generateInfoForComponentList(
8657           MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
8658           StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8659           IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8660           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
8661       IsFirstComponentList = false;
8662     }
8663     // Go through other elements without overlapped elements.
8664     for (const MapData &L : DeclComponentLists) {
8665       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8666       OpenMPMapClauseKind MapType;
8667       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8668       bool IsImplicit;
8669       const ValueDecl *Mapper;
8670       const Expr *VarRef;
8671       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8672           L;
8673       auto It = OverlappedData.find(&L);
8674       if (It == OverlappedData.end())
8675         generateInfoForComponentList(
8676             MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
8677             StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8678             IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8679             /*ForDeviceAddr=*/false, VD, VarRef);
8680       IsFirstComponentList = false;
8681     }
8682   }
8683 
8684   /// Generate the default map information for a given capture \a CI,
8685   /// record field declaration \a RI and captured value \a CV.
8686   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8687                               const FieldDecl &RI, llvm::Value *CV,
8688                               MapCombinedInfoTy &CombinedInfo) const {
8689     bool IsImplicit = true;
8690     // Do the default mapping.
8691     if (CI.capturesThis()) {
8692       CombinedInfo.Exprs.push_back(nullptr);
8693       CombinedInfo.BasePointers.push_back(CV);
8694       CombinedInfo.DevicePtrDecls.push_back(nullptr);
8695       CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8696       CombinedInfo.Pointers.push_back(CV);
8697       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8698       CombinedInfo.Sizes.push_back(
8699           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8700                                     CGF.Int64Ty, /*isSigned=*/true));
8701       // Default map type.
8702       CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8703                                    OpenMPOffloadMappingFlags::OMP_MAP_FROM);
8704     } else if (CI.capturesVariableByCopy()) {
8705       const VarDecl *VD = CI.getCapturedVar();
8706       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8707       CombinedInfo.BasePointers.push_back(CV);
8708       CombinedInfo.DevicePtrDecls.push_back(nullptr);
8709       CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8710       CombinedInfo.Pointers.push_back(CV);
8711       if (!RI.getType()->isAnyPointerType()) {
8712         // We have to signal to the runtime captures passed by value that are
8713         // not pointers.
8714         CombinedInfo.Types.push_back(
8715             OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
8716         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8717             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8718       } else {
8719         // Pointers are implicitly mapped with a zero size and no flags
8720         // (other than first map that is added for all implicit maps).
8721         CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
8722         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8723       }
8724       auto I = FirstPrivateDecls.find(VD);
8725       if (I != FirstPrivateDecls.end())
8726         IsImplicit = I->getSecond();
8727     } else {
8728       assert(CI.capturesVariable() && "Expected captured reference.");
8729       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8730       QualType ElementType = PtrTy->getPointeeType();
8731       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8732           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8733       // The default map type for a scalar/complex type is 'to' because by
8734       // default the value doesn't have to be retrieved. For an aggregate
8735       // type, the default is 'tofrom'.
8736       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
8737       const VarDecl *VD = CI.getCapturedVar();
8738       auto I = FirstPrivateDecls.find(VD);
8739       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8740       CombinedInfo.BasePointers.push_back(CV);
8741       CombinedInfo.DevicePtrDecls.push_back(nullptr);
8742       CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8743       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8744         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8745             CV, ElementType, CGF.getContext().getDeclAlign(VD),
8746             AlignmentSource::Decl));
8747         CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
8748       } else {
8749         CombinedInfo.Pointers.push_back(CV);
8750       }
8751       if (I != FirstPrivateDecls.end())
8752         IsImplicit = I->getSecond();
8753     }
8754     // Every default map produces a single argument which is a target parameter.
8755     CombinedInfo.Types.back() |=
8756         OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8757 
8758     // Add flag stating this is an implicit map.
8759     if (IsImplicit)
8760       CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
8761 
8762     // No user-defined mapper for default mapping.
8763     CombinedInfo.Mappers.push_back(nullptr);
8764   }
8765 };
8766 } // anonymous namespace
8767 
8768 // Try to extract the base declaration from a `this->x` expression if possible.
8769 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
8770   if (!E)
8771     return nullptr;
8772 
8773   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
8774     if (const MemberExpr *ME =
8775             dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
8776       return ME->getMemberDecl();
8777   return nullptr;
8778 }
8779 
8780 /// Emit a string constant containing the names of the values mapped to the
8781 /// offloading runtime library.
8782 llvm::Constant *
8783 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
8784                        MappableExprsHandler::MappingExprInfo &MapExprs) {
8785 
8786   uint32_t SrcLocStrSize;
8787   if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
8788     return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
8789 
8790   SourceLocation Loc;
8791   if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
8792     if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
8793       Loc = VD->getLocation();
8794     else
8795       Loc = MapExprs.getMapExpr()->getExprLoc();
8796   } else {
8797     Loc = MapExprs.getMapDecl()->getLocation();
8798   }
8799 
8800   std::string ExprName;
8801   if (MapExprs.getMapExpr()) {
8802     PrintingPolicy P(CGF.getContext().getLangOpts());
8803     llvm::raw_string_ostream OS(ExprName);
8804     MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
8805     OS.flush();
8806   } else {
8807     ExprName = MapExprs.getMapDecl()->getNameAsString();
8808   }
8809 
8810   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
8811   return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
8812                                          PLoc.getLine(), PLoc.getColumn(),
8813                                          SrcLocStrSize);
8814 }
8815 
8816 /// Emit the arrays used to pass the captures and map information to the
8817 /// offloading runtime library. If there is no map or capture information,
8818 /// return nullptr by reference.
8819 static void emitOffloadingArrays(
8820     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
8821     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
8822     bool IsNonContiguous = false) {
8823   CodeGenModule &CGM = CGF.CGM;
8824 
8825   // Reset the array information.
8826   Info.clearArrayInfo();
8827   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
8828 
8829   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
8830   InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
8831                          CGF.AllocaInsertPt->getIterator());
8832   InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
8833                           CGF.Builder.GetInsertPoint());
8834 
8835   auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
8836     return emitMappingInformation(CGF, OMPBuilder, MapExpr);
8837   };
8838   if (CGM.getCodeGenOpts().getDebugInfo() !=
8839       llvm::codegenoptions::NoDebugInfo) {
8840     CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
8841     llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
8842                     FillInfoMap);
8843   }
8844 
8845   auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
8846     if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
8847       Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
8848     }
8849   };
8850 
8851   auto CustomMapperCB = [&](unsigned int I) {
8852     llvm::Value *MFunc = nullptr;
8853     if (CombinedInfo.Mappers[I]) {
8854       Info.HasMapper = true;
8855       MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
8856           cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
8857     }
8858     return MFunc;
8859   };
8860   OMPBuilder.emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info,
8861                                   /*IsNonContiguous=*/true, DeviceAddrCB,
8862                                   CustomMapperCB);
8863 }
8864 
8865 /// Check for inner distribute directive.
8866 static const OMPExecutableDirective *
8867 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8868   const auto *CS = D.getInnermostCapturedStmt();
8869   const auto *Body =
8870       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8871   const Stmt *ChildStmt =
8872       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8873 
8874   if (const auto *NestedDir =
8875           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8876     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8877     switch (D.getDirectiveKind()) {
8878     case OMPD_target:
8879       // For now, just treat 'target teams loop' as if it's distributed.
8880       if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
8881         return NestedDir;
8882       if (DKind == OMPD_teams) {
8883         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8884             /*IgnoreCaptured=*/true);
8885         if (!Body)
8886           return nullptr;
8887         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8888         if (const auto *NND =
8889                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8890           DKind = NND->getDirectiveKind();
8891           if (isOpenMPDistributeDirective(DKind))
8892             return NND;
8893         }
8894       }
8895       return nullptr;
8896     case OMPD_target_teams:
8897       if (isOpenMPDistributeDirective(DKind))
8898         return NestedDir;
8899       return nullptr;
8900     case OMPD_target_parallel:
8901     case OMPD_target_simd:
8902     case OMPD_target_parallel_for:
8903     case OMPD_target_parallel_for_simd:
8904       return nullptr;
8905     case OMPD_target_teams_distribute:
8906     case OMPD_target_teams_distribute_simd:
8907     case OMPD_target_teams_distribute_parallel_for:
8908     case OMPD_target_teams_distribute_parallel_for_simd:
8909     case OMPD_parallel:
8910     case OMPD_for:
8911     case OMPD_parallel_for:
8912     case OMPD_parallel_master:
8913     case OMPD_parallel_sections:
8914     case OMPD_for_simd:
8915     case OMPD_parallel_for_simd:
8916     case OMPD_cancel:
8917     case OMPD_cancellation_point:
8918     case OMPD_ordered:
8919     case OMPD_threadprivate:
8920     case OMPD_allocate:
8921     case OMPD_task:
8922     case OMPD_simd:
8923     case OMPD_tile:
8924     case OMPD_unroll:
8925     case OMPD_sections:
8926     case OMPD_section:
8927     case OMPD_single:
8928     case OMPD_master:
8929     case OMPD_critical:
8930     case OMPD_taskyield:
8931     case OMPD_barrier:
8932     case OMPD_taskwait:
8933     case OMPD_taskgroup:
8934     case OMPD_atomic:
8935     case OMPD_flush:
8936     case OMPD_depobj:
8937     case OMPD_scan:
8938     case OMPD_teams:
8939     case OMPD_target_data:
8940     case OMPD_target_exit_data:
8941     case OMPD_target_enter_data:
8942     case OMPD_distribute:
8943     case OMPD_distribute_simd:
8944     case OMPD_distribute_parallel_for:
8945     case OMPD_distribute_parallel_for_simd:
8946     case OMPD_teams_distribute:
8947     case OMPD_teams_distribute_simd:
8948     case OMPD_teams_distribute_parallel_for:
8949     case OMPD_teams_distribute_parallel_for_simd:
8950     case OMPD_target_update:
8951     case OMPD_declare_simd:
8952     case OMPD_declare_variant:
8953     case OMPD_begin_declare_variant:
8954     case OMPD_end_declare_variant:
8955     case OMPD_declare_target:
8956     case OMPD_end_declare_target:
8957     case OMPD_declare_reduction:
8958     case OMPD_declare_mapper:
8959     case OMPD_taskloop:
8960     case OMPD_taskloop_simd:
8961     case OMPD_master_taskloop:
8962     case OMPD_master_taskloop_simd:
8963     case OMPD_parallel_master_taskloop:
8964     case OMPD_parallel_master_taskloop_simd:
8965     case OMPD_requires:
8966     case OMPD_metadirective:
8967     case OMPD_unknown:
8968     default:
8969       llvm_unreachable("Unexpected directive.");
8970     }
8971   }
8972 
8973   return nullptr;
8974 }
8975 
8976 /// Emit the user-defined mapper function. The code generation follows the
8977 /// pattern in the example below.
8978 /// \code
8979 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
8980 ///                                           void *base, void *begin,
8981 ///                                           int64_t size, int64_t type,
8982 ///                                           void *name = nullptr) {
8983 ///   // Allocate space for an array section first or add a base/begin for
8984 ///   // pointer dereference.
8985 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
8986 ///       !maptype.IsDelete)
8987 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8988 ///                                 size*sizeof(Ty), clearToFromMember(type));
8989 ///   // Map members.
8990 ///   for (unsigned i = 0; i < size; i++) {
8991 ///     // For each component specified by this mapper:
8992 ///     for (auto c : begin[i]->all_components) {
8993 ///       if (c.hasMapper())
8994 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
8995 ///                       c.arg_type, c.arg_name);
8996 ///       else
8997 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
8998 ///                                     c.arg_begin, c.arg_size, c.arg_type,
8999 ///                                     c.arg_name);
9000 ///     }
9001 ///   }
9002 ///   // Delete the array section.
9003 ///   if (size > 1 && maptype.IsDelete)
9004 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9005 ///                                 size*sizeof(Ty), clearToFromMember(type));
9006 /// }
9007 /// \endcode
9008 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9009                                             CodeGenFunction *CGF) {
9010   if (UDMMap.count(D) > 0)
9011     return;
9012   ASTContext &C = CGM.getContext();
9013   QualType Ty = D->getType();
9014   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9015   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9016   auto *MapperVarDecl =
9017       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9018   SourceLocation Loc = D->getLocation();
9019   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9020   llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9021 
9022   // Prepare mapper function arguments and attributes.
9023   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9024                               C.VoidPtrTy, ImplicitParamKind::Other);
9025   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9026                             ImplicitParamKind::Other);
9027   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9028                              C.VoidPtrTy, ImplicitParamKind::Other);
9029   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9030                             ImplicitParamKind::Other);
9031   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9032                             ImplicitParamKind::Other);
9033   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9034                             ImplicitParamKind::Other);
9035   FunctionArgList Args;
9036   Args.push_back(&HandleArg);
9037   Args.push_back(&BaseArg);
9038   Args.push_back(&BeginArg);
9039   Args.push_back(&SizeArg);
9040   Args.push_back(&TypeArg);
9041   Args.push_back(&NameArg);
9042   const CGFunctionInfo &FnInfo =
9043       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9044   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9045   SmallString<64> TyStr;
9046   llvm::raw_svector_ostream Out(TyStr);
9047   CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out);
9048   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9049   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9050                                     Name, &CGM.getModule());
9051   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9052   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9053   // Start the mapper function code generation.
9054   CodeGenFunction MapperCGF(CGM);
9055   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9056   // Compute the starting and end addresses of array elements.
9057   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9058       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9059       C.getPointerType(Int64Ty), Loc);
9060   // Prepare common arguments for array initiation and deletion.
9061   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9062       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9063       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9064   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9065       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9066       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9067   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9068       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9069       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9070   // Convert the size in bytes into the number of array elements.
9071   Size = MapperCGF.Builder.CreateExactUDiv(
9072       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9073   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9074       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9075   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size);
9076   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9077       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9078       C.getPointerType(Int64Ty), Loc);
9079   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9080       MapperCGF.GetAddrOfLocalVar(&NameArg),
9081       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9082 
9083   // Emit array initiation if this is an array section and \p MapType indicates
9084   // that memory allocation is required.
9085   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9086   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9087                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
9088 
9089   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9090 
9091   // Emit the loop header block.
9092   MapperCGF.EmitBlock(HeadBB);
9093   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9094   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9095   // Evaluate whether the initial condition is satisfied.
9096   llvm::Value *IsEmpty =
9097       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9098   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9099   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9100 
9101   // Emit the loop body block.
9102   MapperCGF.EmitBlock(BodyBB);
9103   llvm::BasicBlock *LastBB = BodyBB;
9104   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9105       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9106   PtrPHI->addIncoming(PtrBegin, EntryBB);
9107   Address PtrCurrent(PtrPHI, ElemTy,
9108                      MapperCGF.GetAddrOfLocalVar(&BeginArg)
9109                          .getAlignment()
9110                          .alignmentOfArrayElement(ElementSize));
9111   // Privatize the declared variable of mapper to be the current array element.
9112   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9113   Scope.addPrivate(MapperVarDecl, PtrCurrent);
9114   (void)Scope.Privatize();
9115 
9116   // Get map clause information. Fill up the arrays with all mapped variables.
9117   MappableExprsHandler::MapCombinedInfoTy Info;
9118   MappableExprsHandler MEHandler(*D, MapperCGF);
9119   MEHandler.generateAllInfoForMapper(Info, OMPBuilder);
9120 
9121   // Call the runtime API __tgt_mapper_num_components to get the number of
9122   // pre-existing components.
9123   llvm::Value *OffloadingArgs[] = {Handle};
9124   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9125       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9126                                             OMPRTL___tgt_mapper_num_components),
9127       OffloadingArgs);
9128   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9129       PreviousSize,
9130       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9131 
9132   // Fill up the runtime mapper handle for all components.
9133   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9134     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9135         Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9136     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9137         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9138     llvm::Value *CurSizeArg = Info.Sizes[I];
9139     llvm::Value *CurNameArg =
9140         (CGM.getCodeGenOpts().getDebugInfo() ==
9141          llvm::codegenoptions::NoDebugInfo)
9142             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9143             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9144 
9145     // Extract the MEMBER_OF field from the map type.
9146     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(
9147         static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9148             Info.Types[I]));
9149     llvm::Value *MemberMapType =
9150         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9151 
9152     // Combine the map type inherited from user-defined mapper with that
9153     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9154     // bits of the \a MapType, which is the input argument of the mapper
9155     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9156     // bits of MemberMapType.
9157     // [OpenMP 5.0], 1.2.6. map-type decay.
9158     //        | alloc |  to   | from  | tofrom | release | delete
9159     // ----------------------------------------------------------
9160     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9161     // to     | alloc |  to   | alloc |   to   | release | delete
9162     // from   | alloc | alloc | from  |  from  | release | delete
9163     // tofrom | alloc |  to   | from  | tofrom | release | delete
9164     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9165         MapType,
9166         MapperCGF.Builder.getInt64(
9167             static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9168                 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9169                 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9170     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9171     llvm::BasicBlock *AllocElseBB =
9172         MapperCGF.createBasicBlock("omp.type.alloc.else");
9173     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9174     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9175     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9176     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9177     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9178     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9179     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9180     MapperCGF.EmitBlock(AllocBB);
9181     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9182         MemberMapType,
9183         MapperCGF.Builder.getInt64(
9184             ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9185                 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9186                 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9187     MapperCGF.Builder.CreateBr(EndBB);
9188     MapperCGF.EmitBlock(AllocElseBB);
9189     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9190         LeftToFrom,
9191         MapperCGF.Builder.getInt64(
9192             static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9193                 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9194     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9195     // In case of to, clear OMP_MAP_FROM.
9196     MapperCGF.EmitBlock(ToBB);
9197     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9198         MemberMapType,
9199         MapperCGF.Builder.getInt64(
9200             ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9201                 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9202     MapperCGF.Builder.CreateBr(EndBB);
9203     MapperCGF.EmitBlock(ToElseBB);
9204     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9205         LeftToFrom,
9206         MapperCGF.Builder.getInt64(
9207             static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9208                 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9209     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9210     // In case of from, clear OMP_MAP_TO.
9211     MapperCGF.EmitBlock(FromBB);
9212     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9213         MemberMapType,
9214         MapperCGF.Builder.getInt64(
9215             ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9216                 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9217     // In case of tofrom, do nothing.
9218     MapperCGF.EmitBlock(EndBB);
9219     LastBB = EndBB;
9220     llvm::PHINode *CurMapType =
9221         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9222     CurMapType->addIncoming(AllocMapType, AllocBB);
9223     CurMapType->addIncoming(ToMapType, ToBB);
9224     CurMapType->addIncoming(FromMapType, FromBB);
9225     CurMapType->addIncoming(MemberMapType, ToElseBB);
9226 
9227     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
9228                                      CurSizeArg, CurMapType, CurNameArg};
9229     if (Info.Mappers[I]) {
9230       // Call the corresponding mapper function.
9231       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9232           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9233       assert(MapperFunc && "Expect a valid mapper function is available.");
9234       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9235     } else {
9236       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9237       // data structure.
9238       MapperCGF.EmitRuntimeCall(
9239           OMPBuilder.getOrCreateRuntimeFunction(
9240               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9241           OffloadingArgs);
9242     }
9243   }
9244 
9245   // Update the pointer to point to the next element that needs to be mapped,
9246   // and check whether we have mapped all elements.
9247   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9248       ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9249   PtrPHI->addIncoming(PtrNext, LastBB);
9250   llvm::Value *IsDone =
9251       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9252   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9253   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9254 
9255   MapperCGF.EmitBlock(ExitBB);
9256   // Emit array deletion if this is an array section and \p MapType indicates
9257   // that deletion is required.
9258   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9259                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
9260 
9261   // Emit the function exit block.
9262   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9263   MapperCGF.FinishFunction();
9264   UDMMap.try_emplace(D, Fn);
9265   if (CGF) {
9266     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9267     Decls.second.push_back(D);
9268   }
9269 }
9270 
9271 /// Emit the array initialization or deletion portion for user-defined mapper
9272 /// code generation. First, it evaluates whether an array section is mapped and
9273 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9274 /// true, and \a MapType indicates to not delete this array, array
9275 /// initialization code is generated. If \a IsInit is false, and \a MapType
9276 /// indicates to not this array, array deletion code is generated.
9277 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9278     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9279     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9280     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
9281     bool IsInit) {
9282   StringRef Prefix = IsInit ? ".init" : ".del";
9283 
9284   // Evaluate if this is an array section.
9285   llvm::BasicBlock *BodyBB =
9286       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9287   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9288       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9289   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9290       MapType,
9291       MapperCGF.Builder.getInt64(
9292           static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9293               OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
9294   llvm::Value *DeleteCond;
9295   llvm::Value *Cond;
9296   if (IsInit) {
9297     // base != begin?
9298     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
9299     // IsPtrAndObj?
9300     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
9301         MapType,
9302         MapperCGF.Builder.getInt64(
9303             static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9304                 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
9305     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
9306     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
9307     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
9308     DeleteCond = MapperCGF.Builder.CreateIsNull(
9309         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9310   } else {
9311     Cond = IsArray;
9312     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9313         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9314   }
9315   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
9316   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
9317 
9318   MapperCGF.EmitBlock(BodyBB);
9319   // Get the array size by multiplying element size and element number (i.e., \p
9320   // Size).
9321   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9322       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9323   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9324   // memory allocation/deletion purpose only.
9325   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9326       MapType,
9327       MapperCGF.Builder.getInt64(
9328           ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9329               OpenMPOffloadMappingFlags::OMP_MAP_TO |
9330               OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9331   MapTypeArg = MapperCGF.Builder.CreateOr(
9332       MapTypeArg,
9333       MapperCGF.Builder.getInt64(
9334           static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9335               OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
9336 
9337   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9338   // data structure.
9339   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
9340                                    ArraySize, MapTypeArg, MapName};
9341   MapperCGF.EmitRuntimeCall(
9342       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9343                                             OMPRTL___tgt_push_mapper_component),
9344       OffloadingArgs);
9345 }
9346 
9347 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9348     const OMPDeclareMapperDecl *D) {
9349   auto I = UDMMap.find(D);
9350   if (I != UDMMap.end())
9351     return I->second;
9352   emitUserDefinedMapper(D);
9353   return UDMMap.lookup(D);
9354 }
9355 
9356 llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
9357     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9358     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9359                                      const OMPLoopDirective &D)>
9360         SizeEmitter) {
9361   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9362   const OMPExecutableDirective *TD = &D;
9363   // Get nested teams distribute kind directive, if any.
9364   if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
9365       Kind != OMPD_target_teams_loop)
9366     TD = getNestedDistributeDirective(CGM.getContext(), D);
9367   if (!TD)
9368     return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9369 
9370   const auto *LD = cast<OMPLoopDirective>(TD);
9371   if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9372     return NumIterations;
9373   return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9374 }
9375 
9376 static void
9377 emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9378                        const OMPExecutableDirective &D,
9379                        llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9380                        bool RequiresOuterTask, const CapturedStmt &CS,
9381                        bool OffloadingMandatory, CodeGenFunction &CGF) {
9382   if (OffloadingMandatory) {
9383     CGF.Builder.CreateUnreachable();
9384   } else {
9385     if (RequiresOuterTask) {
9386       CapturedVars.clear();
9387       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9388     }
9389     OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
9390                                          CapturedVars);
9391   }
9392 }
9393 
9394 static llvm::Value *emitDeviceID(
9395     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9396     CodeGenFunction &CGF) {
9397   // Emit device ID if any.
9398   llvm::Value *DeviceID;
9399   if (Device.getPointer()) {
9400     assert((Device.getInt() == OMPC_DEVICE_unknown ||
9401             Device.getInt() == OMPC_DEVICE_device_num) &&
9402            "Expected device_num modifier.");
9403     llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9404     DeviceID =
9405         CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9406   } else {
9407     DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9408   }
9409   return DeviceID;
9410 }
9411 
9412 llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
9413                                CodeGenFunction &CGF) {
9414   llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0);
9415 
9416   if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
9417     CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
9418     llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
9419         DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
9420     DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty,
9421                                              /*isSigned=*/false);
9422   }
9423   return DynCGroupMem;
9424 }
9425 
9426 static void emitTargetCallKernelLaunch(
9427     CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9428     const OMPExecutableDirective &D,
9429     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
9430     const CapturedStmt &CS, bool OffloadingMandatory,
9431     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9432     llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
9433     llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
9434     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9435                                      const OMPLoopDirective &D)>
9436         SizeEmitter,
9437     CodeGenFunction &CGF, CodeGenModule &CGM) {
9438   llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
9439 
9440   // Fill up the arrays with all the captured variables.
9441   MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9442 
9443   // Get mappable expression information.
9444   MappableExprsHandler MEHandler(D, CGF);
9445   llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9446   llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
9447 
9448   auto RI = CS.getCapturedRecordDecl()->field_begin();
9449   auto *CV = CapturedVars.begin();
9450   for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9451                                             CE = CS.capture_end();
9452        CI != CE; ++CI, ++RI, ++CV) {
9453     MappableExprsHandler::MapCombinedInfoTy CurInfo;
9454     MappableExprsHandler::StructRangeInfoTy PartialStruct;
9455 
9456     // VLA sizes are passed to the outlined region by copy and do not have map
9457     // information associated.
9458     if (CI->capturesVariableArrayType()) {
9459       CurInfo.Exprs.push_back(nullptr);
9460       CurInfo.BasePointers.push_back(*CV);
9461       CurInfo.DevicePtrDecls.push_back(nullptr);
9462       CurInfo.DevicePointers.push_back(
9463           MappableExprsHandler::DeviceInfoTy::None);
9464       CurInfo.Pointers.push_back(*CV);
9465       CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9466           CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9467       // Copy to the device as an argument. No need to retrieve it.
9468       CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9469                               OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
9470                               OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9471       CurInfo.Mappers.push_back(nullptr);
9472     } else {
9473       // If we have any information in the map clause, we use it, otherwise we
9474       // just do a default mapping.
9475       MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
9476       if (!CI->capturesThis())
9477         MappedVarSet.insert(CI->getCapturedVar());
9478       else
9479         MappedVarSet.insert(nullptr);
9480       if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
9481         MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
9482       // Generate correct mapping for variables captured by reference in
9483       // lambdas.
9484       if (CI->capturesVariable())
9485         MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
9486                                                 CurInfo, LambdaPointers);
9487     }
9488     // We expect to have at least an element of information for this capture.
9489     assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
9490            "Non-existing map pointer for capture!");
9491     assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9492            CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9493            CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9494            CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9495            "Inconsistent map information sizes!");
9496 
9497     // If there is an entry in PartialStruct it means we have a struct with
9498     // individual members mapped. Emit an extra combined entry.
9499     if (PartialStruct.Base.isValid()) {
9500       CombinedInfo.append(PartialStruct.PreliminaryMapData);
9501       MEHandler.emitCombinedEntry(
9502           CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(),
9503           OMPBuilder, nullptr,
9504           !PartialStruct.PreliminaryMapData.BasePointers.empty());
9505     }
9506 
9507     // We need to append the results of this capture to what we already have.
9508     CombinedInfo.append(CurInfo);
9509   }
9510   // Adjust MEMBER_OF flags for the lambdas captures.
9511   MEHandler.adjustMemberOfForLambdaCaptures(
9512       OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
9513       CombinedInfo.Pointers, CombinedInfo.Types);
9514   // Map any list items in a map clause that were not captures because they
9515   // weren't referenced within the construct.
9516   MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet);
9517 
9518   CGOpenMPRuntime::TargetDataInfo Info;
9519   // Fill up the arrays and create the arguments.
9520   emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
9521   bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
9522                    llvm::codegenoptions::NoDebugInfo;
9523   OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
9524                                           EmitDebug,
9525                                           /*ForEndCall=*/false);
9526 
9527   InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9528   InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
9529                                         CGF.VoidPtrTy, CGM.getPointerAlign());
9530   InputInfo.PointersArray =
9531       Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9532   InputInfo.SizesArray =
9533       Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
9534   InputInfo.MappersArray =
9535       Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9536   MapTypesArray = Info.RTArgs.MapTypesArray;
9537   MapNamesArray = Info.RTArgs.MapNamesArray;
9538 
9539   auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
9540                     RequiresOuterTask, &CS, OffloadingMandatory, Device,
9541                     OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
9542                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9543     bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
9544 
9545     if (IsReverseOffloading) {
9546       // Reverse offloading is not supported, so just execute on the host.
9547       // FIXME: This fallback solution is incorrect since it ignores the
9548       // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
9549       // assert here and ensure SEMA emits an error.
9550       emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9551                              RequiresOuterTask, CS, OffloadingMandatory, CGF);
9552       return;
9553     }
9554 
9555     bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
9556     unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
9557 
9558     llvm::Value *BasePointersArray = InputInfo.BasePointersArray.getPointer();
9559     llvm::Value *PointersArray = InputInfo.PointersArray.getPointer();
9560     llvm::Value *SizesArray = InputInfo.SizesArray.getPointer();
9561     llvm::Value *MappersArray = InputInfo.MappersArray.getPointer();
9562 
9563     auto &&EmitTargetCallFallbackCB =
9564         [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9565          OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
9566         -> llvm::OpenMPIRBuilder::InsertPointTy {
9567       CGF.Builder.restoreIP(IP);
9568       emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9569                              RequiresOuterTask, CS, OffloadingMandatory, CGF);
9570       return CGF.Builder.saveIP();
9571     };
9572 
9573     llvm::Value *DeviceID = emitDeviceID(Device, CGF);
9574     llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D);
9575     llvm::Value *NumThreads =
9576         OMPRuntime->emitNumThreadsForTargetDirective(CGF, D);
9577     llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
9578     llvm::Value *NumIterations =
9579         OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
9580     llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
9581     llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
9582         CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
9583 
9584     llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
9585         BasePointersArray, PointersArray, SizesArray, MapTypesArray,
9586         nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
9587 
9588     llvm::OpenMPIRBuilder::TargetKernelArgs Args(
9589         NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
9590         DynCGGroupMem, HasNoWait);
9591 
9592     CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch(
9593         CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args,
9594         DeviceID, RTLoc, AllocaIP));
9595   };
9596 
9597   if (RequiresOuterTask)
9598     CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9599   else
9600     OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9601 }
9602 
9603 static void
9604 emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9605                    const OMPExecutableDirective &D,
9606                    llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9607                    bool RequiresOuterTask, const CapturedStmt &CS,
9608                    bool OffloadingMandatory, CodeGenFunction &CGF) {
9609 
9610   // Notify that the host version must be executed.
9611   auto &&ElseGen =
9612       [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9613        OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9614         emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9615                                RequiresOuterTask, CS, OffloadingMandatory, CGF);
9616       };
9617 
9618   if (RequiresOuterTask) {
9619     CodeGenFunction::OMPTargetDataInfo InputInfo;
9620     CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9621   } else {
9622     OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9623   }
9624 }
9625 
9626 void CGOpenMPRuntime::emitTargetCall(
9627     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9628     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9629     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9630     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9631                                      const OMPLoopDirective &D)>
9632         SizeEmitter) {
9633   if (!CGF.HaveInsertPoint())
9634     return;
9635 
9636   const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
9637                                    CGM.getLangOpts().OpenMPOffloadMandatory;
9638 
9639   assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
9640 
9641   const bool RequiresOuterTask =
9642       D.hasClausesOfKind<OMPDependClause>() ||
9643       D.hasClausesOfKind<OMPNowaitClause>() ||
9644       D.hasClausesOfKind<OMPInReductionClause>() ||
9645       (CGM.getLangOpts().OpenMP >= 51 &&
9646        needsTaskBasedThreadLimit(D.getDirectiveKind()) &&
9647        D.hasClausesOfKind<OMPThreadLimitClause>());
9648   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9649   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9650   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9651                                             PrePostActionTy &) {
9652     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9653   };
9654   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9655 
9656   CodeGenFunction::OMPTargetDataInfo InputInfo;
9657   llvm::Value *MapTypesArray = nullptr;
9658   llvm::Value *MapNamesArray = nullptr;
9659 
9660   auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
9661                           RequiresOuterTask, &CS, OffloadingMandatory, Device,
9662                           OutlinedFnID, &InputInfo, &MapTypesArray,
9663                           &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
9664                                                        PrePostActionTy &) {
9665     emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
9666                                RequiresOuterTask, CS, OffloadingMandatory,
9667                                Device, OutlinedFnID, InputInfo, MapTypesArray,
9668                                MapNamesArray, SizeEmitter, CGF, CGM);
9669   };
9670 
9671   auto &&TargetElseGen =
9672       [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9673        OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9674         emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
9675                            CS, OffloadingMandatory, CGF);
9676       };
9677 
9678   // If we have a target function ID it means that we need to support
9679   // offloading, otherwise, just execute on the host. We need to execute on host
9680   // regardless of the conditional in the if clause if, e.g., the user do not
9681   // specify target triples.
9682   if (OutlinedFnID) {
9683     if (IfCond) {
9684       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9685     } else {
9686       RegionCodeGenTy ThenRCG(TargetThenGen);
9687       ThenRCG(CGF);
9688     }
9689   } else {
9690     RegionCodeGenTy ElseRCG(TargetElseGen);
9691     ElseRCG(CGF);
9692   }
9693 }
9694 
9695 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9696                                                     StringRef ParentName) {
9697   if (!S)
9698     return;
9699 
9700   // Codegen OMP target directives that offload compute to the device.
9701   bool RequiresDeviceCodegen =
9702       isa<OMPExecutableDirective>(S) &&
9703       isOpenMPTargetExecutionDirective(
9704           cast<OMPExecutableDirective>(S)->getDirectiveKind());
9705 
9706   if (RequiresDeviceCodegen) {
9707     const auto &E = *cast<OMPExecutableDirective>(S);
9708 
9709     llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
9710         CGM, OMPBuilder, E.getBeginLoc(), ParentName);
9711 
9712     // Is this a target region that should not be emitted as an entry point? If
9713     // so just signal we are done with this target region.
9714     if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
9715       return;
9716 
9717     switch (E.getDirectiveKind()) {
9718     case OMPD_target:
9719       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9720                                                    cast<OMPTargetDirective>(E));
9721       break;
9722     case OMPD_target_parallel:
9723       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9724           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9725       break;
9726     case OMPD_target_teams:
9727       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9728           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9729       break;
9730     case OMPD_target_teams_distribute:
9731       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9732           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9733       break;
9734     case OMPD_target_teams_distribute_simd:
9735       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9736           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9737       break;
9738     case OMPD_target_parallel_for:
9739       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9740           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9741       break;
9742     case OMPD_target_parallel_for_simd:
9743       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9744           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9745       break;
9746     case OMPD_target_simd:
9747       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9748           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9749       break;
9750     case OMPD_target_teams_distribute_parallel_for:
9751       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9752           CGM, ParentName,
9753           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9754       break;
9755     case OMPD_target_teams_distribute_parallel_for_simd:
9756       CodeGenFunction::
9757           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9758               CGM, ParentName,
9759               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9760       break;
9761     case OMPD_target_teams_loop:
9762       CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
9763           CGM, ParentName, cast<OMPTargetTeamsGenericLoopDirective>(E));
9764       break;
9765     case OMPD_target_parallel_loop:
9766       CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
9767           CGM, ParentName, cast<OMPTargetParallelGenericLoopDirective>(E));
9768       break;
9769     case OMPD_parallel:
9770     case OMPD_for:
9771     case OMPD_parallel_for:
9772     case OMPD_parallel_master:
9773     case OMPD_parallel_sections:
9774     case OMPD_for_simd:
9775     case OMPD_parallel_for_simd:
9776     case OMPD_cancel:
9777     case OMPD_cancellation_point:
9778     case OMPD_ordered:
9779     case OMPD_threadprivate:
9780     case OMPD_allocate:
9781     case OMPD_task:
9782     case OMPD_simd:
9783     case OMPD_tile:
9784     case OMPD_unroll:
9785     case OMPD_sections:
9786     case OMPD_section:
9787     case OMPD_single:
9788     case OMPD_master:
9789     case OMPD_critical:
9790     case OMPD_taskyield:
9791     case OMPD_barrier:
9792     case OMPD_taskwait:
9793     case OMPD_taskgroup:
9794     case OMPD_atomic:
9795     case OMPD_flush:
9796     case OMPD_depobj:
9797     case OMPD_scan:
9798     case OMPD_teams:
9799     case OMPD_target_data:
9800     case OMPD_target_exit_data:
9801     case OMPD_target_enter_data:
9802     case OMPD_distribute:
9803     case OMPD_distribute_simd:
9804     case OMPD_distribute_parallel_for:
9805     case OMPD_distribute_parallel_for_simd:
9806     case OMPD_teams_distribute:
9807     case OMPD_teams_distribute_simd:
9808     case OMPD_teams_distribute_parallel_for:
9809     case OMPD_teams_distribute_parallel_for_simd:
9810     case OMPD_target_update:
9811     case OMPD_declare_simd:
9812     case OMPD_declare_variant:
9813     case OMPD_begin_declare_variant:
9814     case OMPD_end_declare_variant:
9815     case OMPD_declare_target:
9816     case OMPD_end_declare_target:
9817     case OMPD_declare_reduction:
9818     case OMPD_declare_mapper:
9819     case OMPD_taskloop:
9820     case OMPD_taskloop_simd:
9821     case OMPD_master_taskloop:
9822     case OMPD_master_taskloop_simd:
9823     case OMPD_parallel_master_taskloop:
9824     case OMPD_parallel_master_taskloop_simd:
9825     case OMPD_requires:
9826     case OMPD_metadirective:
9827     case OMPD_unknown:
9828     default:
9829       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9830     }
9831     return;
9832   }
9833 
9834   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9835     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9836       return;
9837 
9838     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
9839     return;
9840   }
9841 
9842   // If this is a lambda function, look into its body.
9843   if (const auto *L = dyn_cast<LambdaExpr>(S))
9844     S = L->getBody();
9845 
9846   // Keep looking for target regions recursively.
9847   for (const Stmt *II : S->children())
9848     scanForTargetRegionsFunctions(II, ParentName);
9849 }
9850 
9851 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
9852   std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9853       OMPDeclareTargetDeclAttr::getDeviceType(VD);
9854   if (!DevTy)
9855     return false;
9856   // Do not emit device_type(nohost) functions for the host.
9857   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9858     return true;
9859   // Do not emit device_type(host) functions for the device.
9860   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9861     return true;
9862   return false;
9863 }
9864 
9865 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9866   // If emitting code for the host, we do not process FD here. Instead we do
9867   // the normal code generation.
9868   if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
9869     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
9870       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9871                                   CGM.getLangOpts().OpenMPIsTargetDevice))
9872         return true;
9873     return false;
9874   }
9875 
9876   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9877   // Try to detect target regions in the function.
9878   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9879     StringRef Name = CGM.getMangledName(GD);
9880     scanForTargetRegionsFunctions(FD->getBody(), Name);
9881     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9882                                 CGM.getLangOpts().OpenMPIsTargetDevice))
9883       return true;
9884   }
9885 
9886   // Do not to emit function if it is not marked as declare target.
9887   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9888          AlreadyEmittedTargetDecls.count(VD) == 0;
9889 }
9890 
9891 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9892   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
9893                               CGM.getLangOpts().OpenMPIsTargetDevice))
9894     return true;
9895 
9896   if (!CGM.getLangOpts().OpenMPIsTargetDevice)
9897     return false;
9898 
9899   // Check if there are Ctors/Dtors in this declaration and look for target
9900   // regions in it. We use the complete variant to produce the kernel name
9901   // mangling.
9902   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9903   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9904     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9905       StringRef ParentName =
9906           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9907       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9908     }
9909     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9910       StringRef ParentName =
9911           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9912       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9913     }
9914   }
9915 
9916   // Do not to emit variable if it is not marked as declare target.
9917   std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9918       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9919           cast<VarDecl>(GD.getDecl()));
9920   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9921       ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9922         *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9923        HasRequiresUnifiedSharedMemory)) {
9924     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9925     return true;
9926   }
9927   return false;
9928 }
9929 
9930 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9931                                                    llvm::Constant *Addr) {
9932   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9933       !CGM.getLangOpts().OpenMPIsTargetDevice)
9934     return;
9935 
9936   std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9937       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9938 
9939   // If this is an 'extern' declaration we defer to the canonical definition and
9940   // do not emit an offloading entry.
9941   if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
9942       VD->hasExternalStorage())
9943     return;
9944 
9945   if (!Res) {
9946     if (CGM.getLangOpts().OpenMPIsTargetDevice) {
9947       // Register non-target variables being emitted in device code (debug info
9948       // may cause this).
9949       StringRef VarName = CGM.getMangledName(VD);
9950       EmittedNonTargetVariables.try_emplace(VarName, Addr);
9951     }
9952     return;
9953   }
9954 
9955   auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
9956   auto LinkageForVariable = [&VD, this]() {
9957     return CGM.getLLVMLinkageVarDefinition(VD);
9958   };
9959 
9960   std::vector<llvm::GlobalVariable *> GeneratedRefs;
9961   OMPBuilder.registerTargetGlobalVariable(
9962       convertCaptureClause(VD), convertDeviceClause(VD),
9963       VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
9964       VD->isExternallyVisible(),
9965       getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
9966                                   VD->getCanonicalDecl()->getBeginLoc()),
9967       CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
9968       CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
9969       CGM.getTypes().ConvertTypeForMem(
9970           CGM.getContext().getPointerType(VD->getType())),
9971       Addr);
9972 
9973   for (auto *ref : GeneratedRefs)
9974     CGM.addCompilerUsedGlobal(ref);
9975 }
9976 
9977 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9978   if (isa<FunctionDecl>(GD.getDecl()) ||
9979       isa<OMPDeclareReductionDecl>(GD.getDecl()))
9980     return emitTargetFunctions(GD);
9981 
9982   return emitTargetGlobalVariable(GD);
9983 }
9984 
9985 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9986   for (const VarDecl *VD : DeferredGlobalVariables) {
9987     std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9988         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9989     if (!Res)
9990       continue;
9991     if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9992          *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9993         !HasRequiresUnifiedSharedMemory) {
9994       CGM.EmitGlobal(VD);
9995     } else {
9996       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9997               ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9998                 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9999                HasRequiresUnifiedSharedMemory)) &&
10000              "Expected link clause or to clause with unified memory.");
10001       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10002     }
10003   }
10004 }
10005 
10006 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10007     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10008   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10009          " Expected target-based directive.");
10010 }
10011 
10012 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10013   for (const OMPClause *Clause : D->clauselists()) {
10014     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10015       HasRequiresUnifiedSharedMemory = true;
10016       OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
10017     } else if (const auto *AC =
10018                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10019       switch (AC->getAtomicDefaultMemOrderKind()) {
10020       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10021         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10022         break;
10023       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10024         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10025         break;
10026       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10027         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10028         break;
10029       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10030         break;
10031       }
10032     }
10033   }
10034 }
10035 
10036 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10037   return RequiresAtomicOrdering;
10038 }
10039 
10040 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10041                                                        LangAS &AS) {
10042   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10043     return false;
10044   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10045   switch(A->getAllocatorType()) {
10046   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10047   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10048   // Not supported, fallback to the default mem space.
10049   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10050   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10051   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10052   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10053   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10054   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10055   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10056     AS = LangAS::Default;
10057     return true;
10058   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10059     llvm_unreachable("Expected predefined allocator for the variables with the "
10060                      "static storage.");
10061   }
10062   return false;
10063 }
10064 
10065 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10066   return HasRequiresUnifiedSharedMemory;
10067 }
10068 
10069 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10070     CodeGenModule &CGM)
10071     : CGM(CGM) {
10072   if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10073     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10074     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10075   }
10076 }
10077 
10078 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10079   if (CGM.getLangOpts().OpenMPIsTargetDevice)
10080     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10081 }
10082 
10083 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10084   if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
10085     return true;
10086 
10087   const auto *D = cast<FunctionDecl>(GD.getDecl());
10088   // Do not to emit function if it is marked as declare target as it was already
10089   // emitted.
10090   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10091     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10092       if (auto *F = dyn_cast_or_null<llvm::Function>(
10093               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10094         return !F->isDeclaration();
10095       return false;
10096     }
10097     return true;
10098   }
10099 
10100   return !AlreadyEmittedTargetDecls.insert(D).second;
10101 }
10102 
10103 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10104   // If we don't have entries or if we are emitting code for the device, we
10105   // don't need to do anything.
10106   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10107       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsTargetDevice ||
10108       (OMPBuilder.OffloadInfoManager.empty() &&
10109        !HasEmittedDeclareTargetRegion && !HasEmittedTargetRegion))
10110     return nullptr;
10111 
10112   // Create and register the function that handles the requires directives.
10113   ASTContext &C = CGM.getContext();
10114 
10115   llvm::Function *RequiresRegFn;
10116   {
10117     CodeGenFunction CGF(CGM);
10118     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10119     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10120     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10121     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10122     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10123     // TODO: check for other requires clauses.
10124     // The requires directive takes effect only when a target region is
10125     // present in the compilation unit. Otherwise it is ignored and not
10126     // passed to the runtime. This avoids the runtime from throwing an error
10127     // for mismatching requires clauses across compilation units that don't
10128     // contain at least 1 target region.
10129     assert((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion ||
10130             !OMPBuilder.OffloadInfoManager.empty()) &&
10131            "Target or declare target region expected.");
10132     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10133                             CGM.getModule(), OMPRTL___tgt_register_requires),
10134                         llvm::ConstantInt::get(
10135                             CGM.Int64Ty, OMPBuilder.Config.getRequiresFlags()));
10136     CGF.FinishFunction();
10137   }
10138   return RequiresRegFn;
10139 }
10140 
10141 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10142                                     const OMPExecutableDirective &D,
10143                                     SourceLocation Loc,
10144                                     llvm::Function *OutlinedFn,
10145                                     ArrayRef<llvm::Value *> CapturedVars) {
10146   if (!CGF.HaveInsertPoint())
10147     return;
10148 
10149   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10150   CodeGenFunction::RunCleanupsScope Scope(CGF);
10151 
10152   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10153   llvm::Value *Args[] = {
10154       RTLoc,
10155       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10156       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10157   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10158   RealArgs.append(std::begin(Args), std::end(Args));
10159   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10160 
10161   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10162       CGM.getModule(), OMPRTL___kmpc_fork_teams);
10163   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10164 }
10165 
10166 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10167                                          const Expr *NumTeams,
10168                                          const Expr *ThreadLimit,
10169                                          SourceLocation Loc) {
10170   if (!CGF.HaveInsertPoint())
10171     return;
10172 
10173   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10174 
10175   llvm::Value *NumTeamsVal =
10176       NumTeams
10177           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10178                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10179           : CGF.Builder.getInt32(0);
10180 
10181   llvm::Value *ThreadLimitVal =
10182       ThreadLimit
10183           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10184                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10185           : CGF.Builder.getInt32(0);
10186 
10187   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10188   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10189                                      ThreadLimitVal};
10190   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10191                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10192                       PushNumTeamsArgs);
10193 }
10194 
10195 void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction &CGF,
10196                                             const Expr *ThreadLimit,
10197                                             SourceLocation Loc) {
10198   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10199   llvm::Value *ThreadLimitVal =
10200       ThreadLimit
10201           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10202                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10203           : CGF.Builder.getInt32(0);
10204 
10205   // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
10206   llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
10207                                     ThreadLimitVal};
10208   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10209                           CGM.getModule(), OMPRTL___kmpc_set_thread_limit),
10210                       ThreadLimitArgs);
10211 }
10212 
10213 void CGOpenMPRuntime::emitTargetDataCalls(
10214     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10215     const Expr *Device, const RegionCodeGenTy &CodeGen,
10216     CGOpenMPRuntime::TargetDataInfo &Info) {
10217   if (!CGF.HaveInsertPoint())
10218     return;
10219 
10220   // Action used to replace the default codegen action and turn privatization
10221   // off.
10222   PrePostActionTy NoPrivAction;
10223 
10224   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10225 
10226   llvm::Value *IfCondVal = nullptr;
10227   if (IfCond)
10228     IfCondVal = CGF.EvaluateExprAsBool(IfCond);
10229 
10230   // Emit device ID if any.
10231   llvm::Value *DeviceID = nullptr;
10232   if (Device) {
10233     DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10234                                          CGF.Int64Ty, /*isSigned=*/true);
10235   } else {
10236     DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10237   }
10238 
10239   // Fill up the arrays with all the mapped variables.
10240   MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10241   auto GenMapInfoCB =
10242       [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10243     CGF.Builder.restoreIP(CodeGenIP);
10244     // Get map clause information.
10245     MappableExprsHandler MEHandler(D, CGF);
10246     MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10247 
10248     auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10249       return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10250     };
10251     if (CGM.getCodeGenOpts().getDebugInfo() !=
10252         llvm::codegenoptions::NoDebugInfo) {
10253       CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10254       llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10255                       FillInfoMap);
10256     }
10257 
10258     return CombinedInfo;
10259   };
10260   using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
10261   auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
10262     CGF.Builder.restoreIP(CodeGenIP);
10263     switch (BodyGenType) {
10264     case BodyGenTy::Priv:
10265       if (!Info.CaptureDeviceAddrMap.empty())
10266         CodeGen(CGF);
10267       break;
10268     case BodyGenTy::DupNoPriv:
10269       if (!Info.CaptureDeviceAddrMap.empty()) {
10270         CodeGen.setAction(NoPrivAction);
10271         CodeGen(CGF);
10272       }
10273       break;
10274     case BodyGenTy::NoPriv:
10275       if (Info.CaptureDeviceAddrMap.empty()) {
10276         CodeGen.setAction(NoPrivAction);
10277         CodeGen(CGF);
10278       }
10279       break;
10280     }
10281     return InsertPointTy(CGF.Builder.GetInsertBlock(),
10282                          CGF.Builder.GetInsertPoint());
10283   };
10284 
10285   auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10286     if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10287       Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
10288     }
10289   };
10290 
10291   auto CustomMapperCB = [&](unsigned int I) {
10292     llvm::Value *MFunc = nullptr;
10293     if (CombinedInfo.Mappers[I]) {
10294       Info.HasMapper = true;
10295       MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
10296           cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10297     }
10298     return MFunc;
10299   };
10300 
10301   // Source location for the ident struct
10302   llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10303 
10304   InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10305                          CGF.AllocaInsertPt->getIterator());
10306   InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10307                           CGF.Builder.GetInsertPoint());
10308   llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
10309   CGF.Builder.restoreIP(OMPBuilder.createTargetData(
10310       OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
10311       /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc));
10312 }
10313 
10314 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10315     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10316     const Expr *Device) {
10317   if (!CGF.HaveInsertPoint())
10318     return;
10319 
10320   assert((isa<OMPTargetEnterDataDirective>(D) ||
10321           isa<OMPTargetExitDataDirective>(D) ||
10322           isa<OMPTargetUpdateDirective>(D)) &&
10323          "Expecting either target enter, exit data, or update directives.");
10324 
10325   CodeGenFunction::OMPTargetDataInfo InputInfo;
10326   llvm::Value *MapTypesArray = nullptr;
10327   llvm::Value *MapNamesArray = nullptr;
10328   // Generate the code for the opening of the data environment.
10329   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10330                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10331     // Emit device ID if any.
10332     llvm::Value *DeviceID = nullptr;
10333     if (Device) {
10334       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10335                                            CGF.Int64Ty, /*isSigned=*/true);
10336     } else {
10337       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10338     }
10339 
10340     // Emit the number of elements in the offloading arrays.
10341     llvm::Constant *PointerNum =
10342         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10343 
10344     // Source location for the ident struct
10345     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10346 
10347     llvm::Value *OffloadingArgs[] = {RTLoc,
10348                                      DeviceID,
10349                                      PointerNum,
10350                                      InputInfo.BasePointersArray.getPointer(),
10351                                      InputInfo.PointersArray.getPointer(),
10352                                      InputInfo.SizesArray.getPointer(),
10353                                      MapTypesArray,
10354                                      MapNamesArray,
10355                                      InputInfo.MappersArray.getPointer()};
10356 
10357     // Select the right runtime function call for each standalone
10358     // directive.
10359     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10360     RuntimeFunction RTLFn;
10361     switch (D.getDirectiveKind()) {
10362     case OMPD_target_enter_data:
10363       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10364                         : OMPRTL___tgt_target_data_begin_mapper;
10365       break;
10366     case OMPD_target_exit_data:
10367       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10368                         : OMPRTL___tgt_target_data_end_mapper;
10369       break;
10370     case OMPD_target_update:
10371       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10372                         : OMPRTL___tgt_target_data_update_mapper;
10373       break;
10374     case OMPD_parallel:
10375     case OMPD_for:
10376     case OMPD_parallel_for:
10377     case OMPD_parallel_master:
10378     case OMPD_parallel_sections:
10379     case OMPD_for_simd:
10380     case OMPD_parallel_for_simd:
10381     case OMPD_cancel:
10382     case OMPD_cancellation_point:
10383     case OMPD_ordered:
10384     case OMPD_threadprivate:
10385     case OMPD_allocate:
10386     case OMPD_task:
10387     case OMPD_simd:
10388     case OMPD_tile:
10389     case OMPD_unroll:
10390     case OMPD_sections:
10391     case OMPD_section:
10392     case OMPD_single:
10393     case OMPD_master:
10394     case OMPD_critical:
10395     case OMPD_taskyield:
10396     case OMPD_barrier:
10397     case OMPD_taskwait:
10398     case OMPD_taskgroup:
10399     case OMPD_atomic:
10400     case OMPD_flush:
10401     case OMPD_depobj:
10402     case OMPD_scan:
10403     case OMPD_teams:
10404     case OMPD_target_data:
10405     case OMPD_distribute:
10406     case OMPD_distribute_simd:
10407     case OMPD_distribute_parallel_for:
10408     case OMPD_distribute_parallel_for_simd:
10409     case OMPD_teams_distribute:
10410     case OMPD_teams_distribute_simd:
10411     case OMPD_teams_distribute_parallel_for:
10412     case OMPD_teams_distribute_parallel_for_simd:
10413     case OMPD_declare_simd:
10414     case OMPD_declare_variant:
10415     case OMPD_begin_declare_variant:
10416     case OMPD_end_declare_variant:
10417     case OMPD_declare_target:
10418     case OMPD_end_declare_target:
10419     case OMPD_declare_reduction:
10420     case OMPD_declare_mapper:
10421     case OMPD_taskloop:
10422     case OMPD_taskloop_simd:
10423     case OMPD_master_taskloop:
10424     case OMPD_master_taskloop_simd:
10425     case OMPD_parallel_master_taskloop:
10426     case OMPD_parallel_master_taskloop_simd:
10427     case OMPD_target:
10428     case OMPD_target_simd:
10429     case OMPD_target_teams_distribute:
10430     case OMPD_target_teams_distribute_simd:
10431     case OMPD_target_teams_distribute_parallel_for:
10432     case OMPD_target_teams_distribute_parallel_for_simd:
10433     case OMPD_target_teams:
10434     case OMPD_target_parallel:
10435     case OMPD_target_parallel_for:
10436     case OMPD_target_parallel_for_simd:
10437     case OMPD_requires:
10438     case OMPD_metadirective:
10439     case OMPD_unknown:
10440     default:
10441       llvm_unreachable("Unexpected standalone target data directive.");
10442       break;
10443     }
10444     CGF.EmitRuntimeCall(
10445         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10446         OffloadingArgs);
10447   };
10448 
10449   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10450                           &MapNamesArray](CodeGenFunction &CGF,
10451                                           PrePostActionTy &) {
10452     // Fill up the arrays with all the mapped variables.
10453     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10454 
10455     // Get map clause information.
10456     MappableExprsHandler MEHandler(D, CGF);
10457     MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10458 
10459     CGOpenMPRuntime::TargetDataInfo Info;
10460     // Fill up the arrays and create the arguments.
10461     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10462                          /*IsNonContiguous=*/true);
10463     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10464                              D.hasClausesOfKind<OMPNowaitClause>();
10465     bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
10466                      llvm::codegenoptions::NoDebugInfo;
10467     OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
10468                                             EmitDebug,
10469                                             /*ForEndCall=*/false);
10470     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10471     InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10472                                           CGF.VoidPtrTy, CGM.getPointerAlign());
10473     InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10474                                       CGM.getPointerAlign());
10475     InputInfo.SizesArray =
10476         Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10477     InputInfo.MappersArray =
10478         Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10479     MapTypesArray = Info.RTArgs.MapTypesArray;
10480     MapNamesArray = Info.RTArgs.MapNamesArray;
10481     if (RequiresOuterTask)
10482       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10483     else
10484       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10485   };
10486 
10487   if (IfCond) {
10488     emitIfClause(CGF, IfCond, TargetThenGen,
10489                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
10490   } else {
10491     RegionCodeGenTy ThenRCG(TargetThenGen);
10492     ThenRCG(CGF);
10493   }
10494 }
10495 
10496 namespace {
10497   /// Kind of parameter in a function with 'declare simd' directive.
10498 enum ParamKindTy {
10499   Linear,
10500   LinearRef,
10501   LinearUVal,
10502   LinearVal,
10503   Uniform,
10504   Vector,
10505 };
10506 /// Attribute set of the parameter.
10507 struct ParamAttrTy {
10508   ParamKindTy Kind = Vector;
10509   llvm::APSInt StrideOrArg;
10510   llvm::APSInt Alignment;
10511   bool HasVarStride = false;
10512 };
10513 } // namespace
10514 
10515 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10516                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10517   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10518   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10519   // of that clause. The VLEN value must be power of 2.
10520   // In other case the notion of the function`s "characteristic data type" (CDT)
10521   // is used to compute the vector length.
10522   // CDT is defined in the following order:
10523   //   a) For non-void function, the CDT is the return type.
10524   //   b) If the function has any non-uniform, non-linear parameters, then the
10525   //   CDT is the type of the first such parameter.
10526   //   c) If the CDT determined by a) or b) above is struct, union, or class
10527   //   type which is pass-by-value (except for the type that maps to the
10528   //   built-in complex data type), the characteristic data type is int.
10529   //   d) If none of the above three cases is applicable, the CDT is int.
10530   // The VLEN is then determined based on the CDT and the size of vector
10531   // register of that ISA for which current vector version is generated. The
10532   // VLEN is computed using the formula below:
10533   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10534   // where vector register size specified in section 3.2.1 Registers and the
10535   // Stack Frame of original AMD64 ABI document.
10536   QualType RetType = FD->getReturnType();
10537   if (RetType.isNull())
10538     return 0;
10539   ASTContext &C = FD->getASTContext();
10540   QualType CDT;
10541   if (!RetType.isNull() && !RetType->isVoidType()) {
10542     CDT = RetType;
10543   } else {
10544     unsigned Offset = 0;
10545     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10546       if (ParamAttrs[Offset].Kind == Vector)
10547         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10548       ++Offset;
10549     }
10550     if (CDT.isNull()) {
10551       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10552         if (ParamAttrs[I + Offset].Kind == Vector) {
10553           CDT = FD->getParamDecl(I)->getType();
10554           break;
10555         }
10556       }
10557     }
10558   }
10559   if (CDT.isNull())
10560     CDT = C.IntTy;
10561   CDT = CDT->getCanonicalTypeUnqualified();
10562   if (CDT->isRecordType() || CDT->isUnionType())
10563     CDT = C.IntTy;
10564   return C.getTypeSize(CDT);
10565 }
10566 
10567 /// Mangle the parameter part of the vector function name according to
10568 /// their OpenMP classification. The mangling function is defined in
10569 /// section 4.5 of the AAVFABI(2021Q1).
10570 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10571   SmallString<256> Buffer;
10572   llvm::raw_svector_ostream Out(Buffer);
10573   for (const auto &ParamAttr : ParamAttrs) {
10574     switch (ParamAttr.Kind) {
10575     case Linear:
10576       Out << 'l';
10577       break;
10578     case LinearRef:
10579       Out << 'R';
10580       break;
10581     case LinearUVal:
10582       Out << 'U';
10583       break;
10584     case LinearVal:
10585       Out << 'L';
10586       break;
10587     case Uniform:
10588       Out << 'u';
10589       break;
10590     case Vector:
10591       Out << 'v';
10592       break;
10593     }
10594     if (ParamAttr.HasVarStride)
10595       Out << "s" << ParamAttr.StrideOrArg;
10596     else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
10597              ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
10598       // Don't print the step value if it is not present or if it is
10599       // equal to 1.
10600       if (ParamAttr.StrideOrArg < 0)
10601         Out << 'n' << -ParamAttr.StrideOrArg;
10602       else if (ParamAttr.StrideOrArg != 1)
10603         Out << ParamAttr.StrideOrArg;
10604     }
10605 
10606     if (!!ParamAttr.Alignment)
10607       Out << 'a' << ParamAttr.Alignment;
10608   }
10609 
10610   return std::string(Out.str());
10611 }
10612 
10613 static void
10614 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10615                            const llvm::APSInt &VLENVal,
10616                            ArrayRef<ParamAttrTy> ParamAttrs,
10617                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
10618   struct ISADataTy {
10619     char ISA;
10620     unsigned VecRegSize;
10621   };
10622   ISADataTy ISAData[] = {
10623       {
10624           'b', 128
10625       }, // SSE
10626       {
10627           'c', 256
10628       }, // AVX
10629       {
10630           'd', 256
10631       }, // AVX2
10632       {
10633           'e', 512
10634       }, // AVX512
10635   };
10636   llvm::SmallVector<char, 2> Masked;
10637   switch (State) {
10638   case OMPDeclareSimdDeclAttr::BS_Undefined:
10639     Masked.push_back('N');
10640     Masked.push_back('M');
10641     break;
10642   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10643     Masked.push_back('N');
10644     break;
10645   case OMPDeclareSimdDeclAttr::BS_Inbranch:
10646     Masked.push_back('M');
10647     break;
10648   }
10649   for (char Mask : Masked) {
10650     for (const ISADataTy &Data : ISAData) {
10651       SmallString<256> Buffer;
10652       llvm::raw_svector_ostream Out(Buffer);
10653       Out << "_ZGV" << Data.ISA << Mask;
10654       if (!VLENVal) {
10655         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10656         assert(NumElts && "Non-zero simdlen/cdtsize expected");
10657         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10658       } else {
10659         Out << VLENVal;
10660       }
10661       Out << mangleVectorParameters(ParamAttrs);
10662       Out << '_' << Fn->getName();
10663       Fn->addFnAttr(Out.str());
10664     }
10665   }
10666 }
10667 
10668 // This are the Functions that are needed to mangle the name of the
10669 // vector functions generated by the compiler, according to the rules
10670 // defined in the "Vector Function ABI specifications for AArch64",
10671 // available at
10672 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10673 
10674 /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
10675 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10676   QT = QT.getCanonicalType();
10677 
10678   if (QT->isVoidType())
10679     return false;
10680 
10681   if (Kind == ParamKindTy::Uniform)
10682     return false;
10683 
10684   if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
10685     return false;
10686 
10687   if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
10688       !QT->isReferenceType())
10689     return false;
10690 
10691   return true;
10692 }
10693 
10694 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10695 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10696   QT = QT.getCanonicalType();
10697   unsigned Size = C.getTypeSize(QT);
10698 
10699   // Only scalars and complex within 16 bytes wide set PVB to true.
10700   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10701     return false;
10702 
10703   if (QT->isFloatingType())
10704     return true;
10705 
10706   if (QT->isIntegerType())
10707     return true;
10708 
10709   if (QT->isPointerType())
10710     return true;
10711 
10712   // TODO: Add support for complex types (section 3.1.2, item 2).
10713 
10714   return false;
10715 }
10716 
10717 /// Computes the lane size (LS) of a return type or of an input parameter,
10718 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10719 /// TODO: Add support for references, section 3.2.1, item 1.
10720 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10721   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10722     QualType PTy = QT.getCanonicalType()->getPointeeType();
10723     if (getAArch64PBV(PTy, C))
10724       return C.getTypeSize(PTy);
10725   }
10726   if (getAArch64PBV(QT, C))
10727     return C.getTypeSize(QT);
10728 
10729   return C.getTypeSize(C.getUIntPtrType());
10730 }
10731 
10732 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10733 // signature of the scalar function, as defined in 3.2.2 of the
10734 // AAVFABI.
10735 static std::tuple<unsigned, unsigned, bool>
10736 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10737   QualType RetType = FD->getReturnType().getCanonicalType();
10738 
10739   ASTContext &C = FD->getASTContext();
10740 
10741   bool OutputBecomesInput = false;
10742 
10743   llvm::SmallVector<unsigned, 8> Sizes;
10744   if (!RetType->isVoidType()) {
10745     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10746     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10747       OutputBecomesInput = true;
10748   }
10749   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10750     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10751     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10752   }
10753 
10754   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10755   // The LS of a function parameter / return value can only be a power
10756   // of 2, starting from 8 bits, up to 128.
10757   assert(llvm::all_of(Sizes,
10758                       [](unsigned Size) {
10759                         return Size == 8 || Size == 16 || Size == 32 ||
10760                                Size == 64 || Size == 128;
10761                       }) &&
10762          "Invalid size");
10763 
10764   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10765                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
10766                          OutputBecomesInput);
10767 }
10768 
10769 // Function used to add the attribute. The parameter `VLEN` is
10770 // templated to allow the use of "x" when targeting scalable functions
10771 // for SVE.
10772 template <typename T>
10773 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10774                                  char ISA, StringRef ParSeq,
10775                                  StringRef MangledName, bool OutputBecomesInput,
10776                                  llvm::Function *Fn) {
10777   SmallString<256> Buffer;
10778   llvm::raw_svector_ostream Out(Buffer);
10779   Out << Prefix << ISA << LMask << VLEN;
10780   if (OutputBecomesInput)
10781     Out << "v";
10782   Out << ParSeq << "_" << MangledName;
10783   Fn->addFnAttr(Out.str());
10784 }
10785 
10786 // Helper function to generate the Advanced SIMD names depending on
10787 // the value of the NDS when simdlen is not present.
10788 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10789                                       StringRef Prefix, char ISA,
10790                                       StringRef ParSeq, StringRef MangledName,
10791                                       bool OutputBecomesInput,
10792                                       llvm::Function *Fn) {
10793   switch (NDS) {
10794   case 8:
10795     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10796                          OutputBecomesInput, Fn);
10797     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10798                          OutputBecomesInput, Fn);
10799     break;
10800   case 16:
10801     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10802                          OutputBecomesInput, Fn);
10803     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10804                          OutputBecomesInput, Fn);
10805     break;
10806   case 32:
10807     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10808                          OutputBecomesInput, Fn);
10809     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10810                          OutputBecomesInput, Fn);
10811     break;
10812   case 64:
10813   case 128:
10814     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10815                          OutputBecomesInput, Fn);
10816     break;
10817   default:
10818     llvm_unreachable("Scalar type is too wide.");
10819   }
10820 }
10821 
10822 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10823 static void emitAArch64DeclareSimdFunction(
10824     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10825     ArrayRef<ParamAttrTy> ParamAttrs,
10826     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10827     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10828 
10829   // Get basic data for building the vector signature.
10830   const auto Data = getNDSWDS(FD, ParamAttrs);
10831   const unsigned NDS = std::get<0>(Data);
10832   const unsigned WDS = std::get<1>(Data);
10833   const bool OutputBecomesInput = std::get<2>(Data);
10834 
10835   // Check the values provided via `simdlen` by the user.
10836   // 1. A `simdlen(1)` doesn't produce vector signatures,
10837   if (UserVLEN == 1) {
10838     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10839         DiagnosticsEngine::Warning,
10840         "The clause simdlen(1) has no effect when targeting aarch64.");
10841     CGM.getDiags().Report(SLoc, DiagID);
10842     return;
10843   }
10844 
10845   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10846   // Advanced SIMD output.
10847   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10848     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10849         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10850                                     "power of 2 when targeting Advanced SIMD.");
10851     CGM.getDiags().Report(SLoc, DiagID);
10852     return;
10853   }
10854 
10855   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10856   // limits.
10857   if (ISA == 's' && UserVLEN != 0) {
10858     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10859       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10860           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10861                                       "lanes in the architectural constraints "
10862                                       "for SVE (min is 128-bit, max is "
10863                                       "2048-bit, by steps of 128-bit)");
10864       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10865       return;
10866     }
10867   }
10868 
10869   // Sort out parameter sequence.
10870   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10871   StringRef Prefix = "_ZGV";
10872   // Generate simdlen from user input (if any).
10873   if (UserVLEN) {
10874     if (ISA == 's') {
10875       // SVE generates only a masked function.
10876       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10877                            OutputBecomesInput, Fn);
10878     } else {
10879       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10880       // Advanced SIMD generates one or two functions, depending on
10881       // the `[not]inbranch` clause.
10882       switch (State) {
10883       case OMPDeclareSimdDeclAttr::BS_Undefined:
10884         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10885                              OutputBecomesInput, Fn);
10886         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10887                              OutputBecomesInput, Fn);
10888         break;
10889       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10890         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10891                              OutputBecomesInput, Fn);
10892         break;
10893       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10894         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10895                              OutputBecomesInput, Fn);
10896         break;
10897       }
10898     }
10899   } else {
10900     // If no user simdlen is provided, follow the AAVFABI rules for
10901     // generating the vector length.
10902     if (ISA == 's') {
10903       // SVE, section 3.4.1, item 1.
10904       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10905                            OutputBecomesInput, Fn);
10906     } else {
10907       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10908       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10909       // two vector names depending on the use of the clause
10910       // `[not]inbranch`.
10911       switch (State) {
10912       case OMPDeclareSimdDeclAttr::BS_Undefined:
10913         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10914                                   OutputBecomesInput, Fn);
10915         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10916                                   OutputBecomesInput, Fn);
10917         break;
10918       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10919         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10920                                   OutputBecomesInput, Fn);
10921         break;
10922       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10923         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10924                                   OutputBecomesInput, Fn);
10925         break;
10926       }
10927     }
10928   }
10929 }
10930 
10931 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10932                                               llvm::Function *Fn) {
10933   ASTContext &C = CGM.getContext();
10934   FD = FD->getMostRecentDecl();
10935   while (FD) {
10936     // Map params to their positions in function decl.
10937     llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10938     if (isa<CXXMethodDecl>(FD))
10939       ParamPositions.try_emplace(FD, 0);
10940     unsigned ParamPos = ParamPositions.size();
10941     for (const ParmVarDecl *P : FD->parameters()) {
10942       ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10943       ++ParamPos;
10944     }
10945     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10946       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10947       // Mark uniform parameters.
10948       for (const Expr *E : Attr->uniforms()) {
10949         E = E->IgnoreParenImpCasts();
10950         unsigned Pos;
10951         if (isa<CXXThisExpr>(E)) {
10952           Pos = ParamPositions[FD];
10953         } else {
10954           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10955                                 ->getCanonicalDecl();
10956           auto It = ParamPositions.find(PVD);
10957           assert(It != ParamPositions.end() && "Function parameter not found");
10958           Pos = It->second;
10959         }
10960         ParamAttrs[Pos].Kind = Uniform;
10961       }
10962       // Get alignment info.
10963       auto *NI = Attr->alignments_begin();
10964       for (const Expr *E : Attr->aligneds()) {
10965         E = E->IgnoreParenImpCasts();
10966         unsigned Pos;
10967         QualType ParmTy;
10968         if (isa<CXXThisExpr>(E)) {
10969           Pos = ParamPositions[FD];
10970           ParmTy = E->getType();
10971         } else {
10972           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10973                                 ->getCanonicalDecl();
10974           auto It = ParamPositions.find(PVD);
10975           assert(It != ParamPositions.end() && "Function parameter not found");
10976           Pos = It->second;
10977           ParmTy = PVD->getType();
10978         }
10979         ParamAttrs[Pos].Alignment =
10980             (*NI)
10981                 ? (*NI)->EvaluateKnownConstInt(C)
10982                 : llvm::APSInt::getUnsigned(
10983                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10984                           .getQuantity());
10985         ++NI;
10986       }
10987       // Mark linear parameters.
10988       auto *SI = Attr->steps_begin();
10989       auto *MI = Attr->modifiers_begin();
10990       for (const Expr *E : Attr->linears()) {
10991         E = E->IgnoreParenImpCasts();
10992         unsigned Pos;
10993         bool IsReferenceType = false;
10994         // Rescaling factor needed to compute the linear parameter
10995         // value in the mangled name.
10996         unsigned PtrRescalingFactor = 1;
10997         if (isa<CXXThisExpr>(E)) {
10998           Pos = ParamPositions[FD];
10999           auto *P = cast<PointerType>(E->getType());
11000           PtrRescalingFactor = CGM.getContext()
11001                                    .getTypeSizeInChars(P->getPointeeType())
11002                                    .getQuantity();
11003         } else {
11004           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11005                                 ->getCanonicalDecl();
11006           auto It = ParamPositions.find(PVD);
11007           assert(It != ParamPositions.end() && "Function parameter not found");
11008           Pos = It->second;
11009           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11010             PtrRescalingFactor = CGM.getContext()
11011                                      .getTypeSizeInChars(P->getPointeeType())
11012                                      .getQuantity();
11013           else if (PVD->getType()->isReferenceType()) {
11014             IsReferenceType = true;
11015             PtrRescalingFactor =
11016                 CGM.getContext()
11017                     .getTypeSizeInChars(PVD->getType().getNonReferenceType())
11018                     .getQuantity();
11019           }
11020         }
11021         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11022         if (*MI == OMPC_LINEAR_ref)
11023           ParamAttr.Kind = LinearRef;
11024         else if (*MI == OMPC_LINEAR_uval)
11025           ParamAttr.Kind = LinearUVal;
11026         else if (IsReferenceType)
11027           ParamAttr.Kind = LinearVal;
11028         else
11029           ParamAttr.Kind = Linear;
11030         // Assuming a stride of 1, for `linear` without modifiers.
11031         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11032         if (*SI) {
11033           Expr::EvalResult Result;
11034           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11035             if (const auto *DRE =
11036                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11037               if (const auto *StridePVD =
11038                       dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11039                 ParamAttr.HasVarStride = true;
11040                 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11041                 assert(It != ParamPositions.end() &&
11042                        "Function parameter not found");
11043                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11044               }
11045             }
11046           } else {
11047             ParamAttr.StrideOrArg = Result.Val.getInt();
11048           }
11049         }
11050         // If we are using a linear clause on a pointer, we need to
11051         // rescale the value of linear_step with the byte size of the
11052         // pointee type.
11053         if (!ParamAttr.HasVarStride &&
11054             (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11055           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11056         ++SI;
11057         ++MI;
11058       }
11059       llvm::APSInt VLENVal;
11060       SourceLocation ExprLoc;
11061       const Expr *VLENExpr = Attr->getSimdlen();
11062       if (VLENExpr) {
11063         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11064         ExprLoc = VLENExpr->getExprLoc();
11065       }
11066       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11067       if (CGM.getTriple().isX86()) {
11068         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11069       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11070         unsigned VLEN = VLENVal.getExtValue();
11071         StringRef MangledName = Fn->getName();
11072         if (CGM.getTarget().hasFeature("sve"))
11073           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11074                                          MangledName, 's', 128, Fn, ExprLoc);
11075         else if (CGM.getTarget().hasFeature("neon"))
11076           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11077                                          MangledName, 'n', 128, Fn, ExprLoc);
11078       }
11079     }
11080     FD = FD->getPreviousDecl();
11081   }
11082 }
11083 
11084 namespace {
11085 /// Cleanup action for doacross support.
11086 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11087 public:
11088   static const int DoacrossFinArgs = 2;
11089 
11090 private:
11091   llvm::FunctionCallee RTLFn;
11092   llvm::Value *Args[DoacrossFinArgs];
11093 
11094 public:
11095   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11096                     ArrayRef<llvm::Value *> CallArgs)
11097       : RTLFn(RTLFn) {
11098     assert(CallArgs.size() == DoacrossFinArgs);
11099     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11100   }
11101   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11102     if (!CGF.HaveInsertPoint())
11103       return;
11104     CGF.EmitRuntimeCall(RTLFn, Args);
11105   }
11106 };
11107 } // namespace
11108 
11109 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11110                                        const OMPLoopDirective &D,
11111                                        ArrayRef<Expr *> NumIterations) {
11112   if (!CGF.HaveInsertPoint())
11113     return;
11114 
11115   ASTContext &C = CGM.getContext();
11116   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11117   RecordDecl *RD;
11118   if (KmpDimTy.isNull()) {
11119     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11120     //  kmp_int64 lo; // lower
11121     //  kmp_int64 up; // upper
11122     //  kmp_int64 st; // stride
11123     // };
11124     RD = C.buildImplicitRecord("kmp_dim");
11125     RD->startDefinition();
11126     addFieldToRecordDecl(C, RD, Int64Ty);
11127     addFieldToRecordDecl(C, RD, Int64Ty);
11128     addFieldToRecordDecl(C, RD, Int64Ty);
11129     RD->completeDefinition();
11130     KmpDimTy = C.getRecordType(RD);
11131   } else {
11132     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11133   }
11134   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11135   QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
11136                                             ArraySizeModifier::Normal, 0);
11137 
11138   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11139   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11140   enum { LowerFD = 0, UpperFD, StrideFD };
11141   // Fill dims with data.
11142   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11143     LValue DimsLVal = CGF.MakeAddrLValue(
11144         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11145     // dims.upper = num_iterations;
11146     LValue UpperLVal = CGF.EmitLValueForField(
11147         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11148     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11149         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11150         Int64Ty, NumIterations[I]->getExprLoc());
11151     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11152     // dims.stride = 1;
11153     LValue StrideLVal = CGF.EmitLValueForField(
11154         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11155     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11156                           StrideLVal);
11157   }
11158 
11159   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11160   // kmp_int32 num_dims, struct kmp_dim * dims);
11161   llvm::Value *Args[] = {
11162       emitUpdateLocation(CGF, D.getBeginLoc()),
11163       getThreadID(CGF, D.getBeginLoc()),
11164       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11165       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11166           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11167           CGM.VoidPtrTy)};
11168 
11169   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11170       CGM.getModule(), OMPRTL___kmpc_doacross_init);
11171   CGF.EmitRuntimeCall(RTLFn, Args);
11172   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11173       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11174   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11175       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11176   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11177                                              llvm::ArrayRef(FiniArgs));
11178 }
11179 
11180 template <typename T>
11181 static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM,
11182                                 const T *C, llvm::Value *ULoc,
11183                                 llvm::Value *ThreadID) {
11184   QualType Int64Ty =
11185       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11186   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11187   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11188       Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);
11189   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11190   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11191     const Expr *CounterVal = C->getLoopData(I);
11192     assert(CounterVal);
11193     llvm::Value *CntVal = CGF.EmitScalarConversion(
11194         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11195         CounterVal->getExprLoc());
11196     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11197                           /*Volatile=*/false, Int64Ty);
11198   }
11199   llvm::Value *Args[] = {
11200       ULoc, ThreadID, CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11201   llvm::FunctionCallee RTLFn;
11202   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
11203   OMPDoacrossKind<T> ODK;
11204   if (ODK.isSource(C)) {
11205     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11206                                                   OMPRTL___kmpc_doacross_post);
11207   } else {
11208     assert(ODK.isSink(C) && "Expect sink modifier.");
11209     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11210                                                   OMPRTL___kmpc_doacross_wait);
11211   }
11212   CGF.EmitRuntimeCall(RTLFn, Args);
11213 }
11214 
11215 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11216                                           const OMPDependClause *C) {
11217   return EmitDoacrossOrdered<OMPDependClause>(
11218       CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11219       getThreadID(CGF, C->getBeginLoc()));
11220 }
11221 
11222 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11223                                           const OMPDoacrossClause *C) {
11224   return EmitDoacrossOrdered<OMPDoacrossClause>(
11225       CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11226       getThreadID(CGF, C->getBeginLoc()));
11227 }
11228 
11229 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11230                                llvm::FunctionCallee Callee,
11231                                ArrayRef<llvm::Value *> Args) const {
11232   assert(Loc.isValid() && "Outlined function call location must be valid.");
11233   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11234 
11235   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11236     if (Fn->doesNotThrow()) {
11237       CGF.EmitNounwindRuntimeCall(Fn, Args);
11238       return;
11239     }
11240   }
11241   CGF.EmitRuntimeCall(Callee, Args);
11242 }
11243 
11244 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11245     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11246     ArrayRef<llvm::Value *> Args) const {
11247   emitCall(CGF, Loc, OutlinedFn, Args);
11248 }
11249 
11250 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11251   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11252     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11253       HasEmittedDeclareTargetRegion = true;
11254 }
11255 
11256 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11257                                              const VarDecl *NativeParam,
11258                                              const VarDecl *TargetParam) const {
11259   return CGF.GetAddrOfLocalVar(NativeParam);
11260 }
11261 
11262 /// Return allocator value from expression, or return a null allocator (default
11263 /// when no allocator specified).
11264 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11265                                     const Expr *Allocator) {
11266   llvm::Value *AllocVal;
11267   if (Allocator) {
11268     AllocVal = CGF.EmitScalarExpr(Allocator);
11269     // According to the standard, the original allocator type is a enum
11270     // (integer). Convert to pointer type, if required.
11271     AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11272                                         CGF.getContext().VoidPtrTy,
11273                                         Allocator->getExprLoc());
11274   } else {
11275     // If no allocator specified, it defaults to the null allocator.
11276     AllocVal = llvm::Constant::getNullValue(
11277         CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
11278   }
11279   return AllocVal;
11280 }
11281 
11282 /// Return the alignment from an allocate directive if present.
11283 static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11284   std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11285 
11286   if (!AllocateAlignment)
11287     return nullptr;
11288 
11289   return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
11290 }
11291 
11292 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11293                                                    const VarDecl *VD) {
11294   if (!VD)
11295     return Address::invalid();
11296   Address UntiedAddr = Address::invalid();
11297   Address UntiedRealAddr = Address::invalid();
11298   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11299   if (It != FunctionToUntiedTaskStackMap.end()) {
11300     const UntiedLocalVarsAddressesMap &UntiedData =
11301         UntiedLocalVarsStack[It->second];
11302     auto I = UntiedData.find(VD);
11303     if (I != UntiedData.end()) {
11304       UntiedAddr = I->second.first;
11305       UntiedRealAddr = I->second.second;
11306     }
11307   }
11308   const VarDecl *CVD = VD->getCanonicalDecl();
11309   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11310     // Use the default allocation.
11311     if (!isAllocatableDecl(VD))
11312       return UntiedAddr;
11313     llvm::Value *Size;
11314     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11315     if (CVD->getType()->isVariablyModifiedType()) {
11316       Size = CGF.getTypeSize(CVD->getType());
11317       // Align the size: ((size + align - 1) / align) * align
11318       Size = CGF.Builder.CreateNUWAdd(
11319           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11320       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11321       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11322     } else {
11323       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11324       Size = CGM.getSize(Sz.alignTo(Align));
11325     }
11326     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11327     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11328     const Expr *Allocator = AA->getAllocator();
11329     llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11330     llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
11331     SmallVector<llvm::Value *, 4> Args;
11332     Args.push_back(ThreadID);
11333     if (Alignment)
11334       Args.push_back(Alignment);
11335     Args.push_back(Size);
11336     Args.push_back(AllocVal);
11337     llvm::omp::RuntimeFunction FnID =
11338         Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11339     llvm::Value *Addr = CGF.EmitRuntimeCall(
11340         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
11341         getName({CVD->getName(), ".void.addr"}));
11342     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11343         CGM.getModule(), OMPRTL___kmpc_free);
11344     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11345     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11346         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11347     if (UntiedAddr.isValid())
11348       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11349 
11350     // Cleanup action for allocate support.
11351     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11352       llvm::FunctionCallee RTLFn;
11353       SourceLocation::UIntTy LocEncoding;
11354       Address Addr;
11355       const Expr *AllocExpr;
11356 
11357     public:
11358       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11359                            SourceLocation::UIntTy LocEncoding, Address Addr,
11360                            const Expr *AllocExpr)
11361           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11362             AllocExpr(AllocExpr) {}
11363       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11364         if (!CGF.HaveInsertPoint())
11365           return;
11366         llvm::Value *Args[3];
11367         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11368             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11369         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11370             Addr.getPointer(), CGF.VoidPtrTy);
11371         llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
11372         Args[2] = AllocVal;
11373         CGF.EmitRuntimeCall(RTLFn, Args);
11374       }
11375     };
11376     Address VDAddr =
11377         UntiedRealAddr.isValid()
11378             ? UntiedRealAddr
11379             : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
11380     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11381         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11382         VDAddr, Allocator);
11383     if (UntiedRealAddr.isValid())
11384       if (auto *Region =
11385               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11386         Region->emitUntiedSwitch(CGF);
11387     return VDAddr;
11388   }
11389   return UntiedAddr;
11390 }
11391 
11392 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11393                                              const VarDecl *VD) const {
11394   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11395   if (It == FunctionToUntiedTaskStackMap.end())
11396     return false;
11397   return UntiedLocalVarsStack[It->second].count(VD) > 0;
11398 }
11399 
11400 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11401     CodeGenModule &CGM, const OMPLoopDirective &S)
11402     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11403   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11404   if (!NeedToPush)
11405     return;
11406   NontemporalDeclsSet &DS =
11407       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11408   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11409     for (const Stmt *Ref : C->private_refs()) {
11410       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11411       const ValueDecl *VD;
11412       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11413         VD = DRE->getDecl();
11414       } else {
11415         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11416         assert((ME->isImplicitCXXThis() ||
11417                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11418                "Expected member of current class.");
11419         VD = ME->getMemberDecl();
11420       }
11421       DS.insert(VD);
11422     }
11423   }
11424 }
11425 
11426 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11427   if (!NeedToPush)
11428     return;
11429   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11430 }
11431 
11432 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11433     CodeGenFunction &CGF,
11434     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
11435                           std::pair<Address, Address>> &LocalVars)
11436     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11437   if (!NeedToPush)
11438     return;
11439   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
11440       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11441   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11442 }
11443 
11444 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11445   if (!NeedToPush)
11446     return;
11447   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
11448 }
11449 
11450 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11451   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11452 
11453   return llvm::any_of(
11454       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11455       [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
11456 }
11457 
11458 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11459     const OMPExecutableDirective &S,
11460     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11461     const {
11462   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11463   // Vars in target/task regions must be excluded completely.
11464   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11465       isOpenMPTaskingDirective(S.getDirectiveKind())) {
11466     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11467     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11468     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11469     for (const CapturedStmt::Capture &Cap : CS->captures()) {
11470       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11471         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11472     }
11473   }
11474   // Exclude vars in private clauses.
11475   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11476     for (const Expr *Ref : C->varlists()) {
11477       if (!Ref->getType()->isScalarType())
11478         continue;
11479       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11480       if (!DRE)
11481         continue;
11482       NeedToCheckForLPCs.insert(DRE->getDecl());
11483     }
11484   }
11485   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11486     for (const Expr *Ref : C->varlists()) {
11487       if (!Ref->getType()->isScalarType())
11488         continue;
11489       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11490       if (!DRE)
11491         continue;
11492       NeedToCheckForLPCs.insert(DRE->getDecl());
11493     }
11494   }
11495   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11496     for (const Expr *Ref : C->varlists()) {
11497       if (!Ref->getType()->isScalarType())
11498         continue;
11499       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11500       if (!DRE)
11501         continue;
11502       NeedToCheckForLPCs.insert(DRE->getDecl());
11503     }
11504   }
11505   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11506     for (const Expr *Ref : C->varlists()) {
11507       if (!Ref->getType()->isScalarType())
11508         continue;
11509       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11510       if (!DRE)
11511         continue;
11512       NeedToCheckForLPCs.insert(DRE->getDecl());
11513     }
11514   }
11515   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11516     for (const Expr *Ref : C->varlists()) {
11517       if (!Ref->getType()->isScalarType())
11518         continue;
11519       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11520       if (!DRE)
11521         continue;
11522       NeedToCheckForLPCs.insert(DRE->getDecl());
11523     }
11524   }
11525   for (const Decl *VD : NeedToCheckForLPCs) {
11526     for (const LastprivateConditionalData &Data :
11527          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11528       if (Data.DeclToUniqueName.count(VD) > 0) {
11529         if (!Data.Disabled)
11530           NeedToAddForLPCsAsDisabled.insert(VD);
11531         break;
11532       }
11533     }
11534   }
11535 }
11536 
11537 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11538     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11539     : CGM(CGF.CGM),
11540       Action((CGM.getLangOpts().OpenMP >= 50 &&
11541               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11542                            [](const OMPLastprivateClause *C) {
11543                              return C->getKind() ==
11544                                     OMPC_LASTPRIVATE_conditional;
11545                            }))
11546                  ? ActionToDo::PushAsLastprivateConditional
11547                  : ActionToDo::DoNotPush) {
11548   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11549   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11550     return;
11551   assert(Action == ActionToDo::PushAsLastprivateConditional &&
11552          "Expected a push action.");
11553   LastprivateConditionalData &Data =
11554       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11555   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11556     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11557       continue;
11558 
11559     for (const Expr *Ref : C->varlists()) {
11560       Data.DeclToUniqueName.insert(std::make_pair(
11561           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11562           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11563     }
11564   }
11565   Data.IVLVal = IVLVal;
11566   Data.Fn = CGF.CurFn;
11567 }
11568 
11569 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11570     CodeGenFunction &CGF, const OMPExecutableDirective &S)
11571     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11572   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11573   if (CGM.getLangOpts().OpenMP < 50)
11574     return;
11575   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11576   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11577   if (!NeedToAddForLPCsAsDisabled.empty()) {
11578     Action = ActionToDo::DisableLastprivateConditional;
11579     LastprivateConditionalData &Data =
11580         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11581     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11582       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11583     Data.Fn = CGF.CurFn;
11584     Data.Disabled = true;
11585   }
11586 }
11587 
11588 CGOpenMPRuntime::LastprivateConditionalRAII
11589 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11590     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11591   return LastprivateConditionalRAII(CGF, S);
11592 }
11593 
11594 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11595   if (CGM.getLangOpts().OpenMP < 50)
11596     return;
11597   if (Action == ActionToDo::DisableLastprivateConditional) {
11598     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11599            "Expected list of disabled private vars.");
11600     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11601   }
11602   if (Action == ActionToDo::PushAsLastprivateConditional) {
11603     assert(
11604         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11605         "Expected list of lastprivate conditional vars.");
11606     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11607   }
11608 }
11609 
11610 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11611                                                         const VarDecl *VD) {
11612   ASTContext &C = CGM.getContext();
11613   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11614   if (I == LastprivateConditionalToTypes.end())
11615     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11616   QualType NewType;
11617   const FieldDecl *VDField;
11618   const FieldDecl *FiredField;
11619   LValue BaseLVal;
11620   auto VI = I->getSecond().find(VD);
11621   if (VI == I->getSecond().end()) {
11622     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11623     RD->startDefinition();
11624     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11625     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11626     RD->completeDefinition();
11627     NewType = C.getRecordType(RD);
11628     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11629     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11630     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11631   } else {
11632     NewType = std::get<0>(VI->getSecond());
11633     VDField = std::get<1>(VI->getSecond());
11634     FiredField = std::get<2>(VI->getSecond());
11635     BaseLVal = std::get<3>(VI->getSecond());
11636   }
11637   LValue FiredLVal =
11638       CGF.EmitLValueForField(BaseLVal, FiredField);
11639   CGF.EmitStoreOfScalar(
11640       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11641       FiredLVal);
11642   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
11643 }
11644 
11645 namespace {
11646 /// Checks if the lastprivate conditional variable is referenced in LHS.
11647 class LastprivateConditionalRefChecker final
11648     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11649   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11650   const Expr *FoundE = nullptr;
11651   const Decl *FoundD = nullptr;
11652   StringRef UniqueDeclName;
11653   LValue IVLVal;
11654   llvm::Function *FoundFn = nullptr;
11655   SourceLocation Loc;
11656 
11657 public:
11658   bool VisitDeclRefExpr(const DeclRefExpr *E) {
11659     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11660          llvm::reverse(LPM)) {
11661       auto It = D.DeclToUniqueName.find(E->getDecl());
11662       if (It == D.DeclToUniqueName.end())
11663         continue;
11664       if (D.Disabled)
11665         return false;
11666       FoundE = E;
11667       FoundD = E->getDecl()->getCanonicalDecl();
11668       UniqueDeclName = It->second;
11669       IVLVal = D.IVLVal;
11670       FoundFn = D.Fn;
11671       break;
11672     }
11673     return FoundE == E;
11674   }
11675   bool VisitMemberExpr(const MemberExpr *E) {
11676     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11677       return false;
11678     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11679          llvm::reverse(LPM)) {
11680       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11681       if (It == D.DeclToUniqueName.end())
11682         continue;
11683       if (D.Disabled)
11684         return false;
11685       FoundE = E;
11686       FoundD = E->getMemberDecl()->getCanonicalDecl();
11687       UniqueDeclName = It->second;
11688       IVLVal = D.IVLVal;
11689       FoundFn = D.Fn;
11690       break;
11691     }
11692     return FoundE == E;
11693   }
11694   bool VisitStmt(const Stmt *S) {
11695     for (const Stmt *Child : S->children()) {
11696       if (!Child)
11697         continue;
11698       if (const auto *E = dyn_cast<Expr>(Child))
11699         if (!E->isGLValue())
11700           continue;
11701       if (Visit(Child))
11702         return true;
11703     }
11704     return false;
11705   }
11706   explicit LastprivateConditionalRefChecker(
11707       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11708       : LPM(LPM) {}
11709   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11710   getFoundData() const {
11711     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11712   }
11713 };
11714 } // namespace
11715 
11716 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11717                                                        LValue IVLVal,
11718                                                        StringRef UniqueDeclName,
11719                                                        LValue LVal,
11720                                                        SourceLocation Loc) {
11721   // Last updated loop counter for the lastprivate conditional var.
11722   // int<xx> last_iv = 0;
11723   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11724   llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
11725       LLIVTy, getName({UniqueDeclName, "iv"}));
11726   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11727       IVLVal.getAlignment().getAsAlign());
11728   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11729 
11730   // Last value of the lastprivate conditional.
11731   // decltype(priv_a) last_a;
11732   llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
11733       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11734   Last->setAlignment(LVal.getAlignment().getAsAlign());
11735   LValue LastLVal = CGF.MakeAddrLValue(
11736       Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType());
11737 
11738   // Global loop counter. Required to handle inner parallel-for regions.
11739   // iv
11740   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11741 
11742   // #pragma omp critical(a)
11743   // if (last_iv <= iv) {
11744   //   last_iv = iv;
11745   //   last_a = priv_a;
11746   // }
11747   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11748                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11749     Action.Enter(CGF);
11750     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11751     // (last_iv <= iv) ? Check if the variable is updated and store new
11752     // value in global var.
11753     llvm::Value *CmpRes;
11754     if (IVLVal.getType()->isSignedIntegerType()) {
11755       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11756     } else {
11757       assert(IVLVal.getType()->isUnsignedIntegerType() &&
11758              "Loop iteration variable must be integer.");
11759       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11760     }
11761     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11762     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11763     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11764     // {
11765     CGF.EmitBlock(ThenBB);
11766 
11767     //   last_iv = iv;
11768     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11769 
11770     //   last_a = priv_a;
11771     switch (CGF.getEvaluationKind(LVal.getType())) {
11772     case TEK_Scalar: {
11773       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11774       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11775       break;
11776     }
11777     case TEK_Complex: {
11778       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11779       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11780       break;
11781     }
11782     case TEK_Aggregate:
11783       llvm_unreachable(
11784           "Aggregates are not supported in lastprivate conditional.");
11785     }
11786     // }
11787     CGF.EmitBranch(ExitBB);
11788     // There is no need to emit line number for unconditional branch.
11789     (void)ApplyDebugLocation::CreateEmpty(CGF);
11790     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11791   };
11792 
11793   if (CGM.getLangOpts().OpenMPSimd) {
11794     // Do not emit as a critical region as no parallel region could be emitted.
11795     RegionCodeGenTy ThenRCG(CodeGen);
11796     ThenRCG(CGF);
11797   } else {
11798     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11799   }
11800 }
11801 
11802 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11803                                                          const Expr *LHS) {
11804   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11805     return;
11806   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11807   if (!Checker.Visit(LHS))
11808     return;
11809   const Expr *FoundE;
11810   const Decl *FoundD;
11811   StringRef UniqueDeclName;
11812   LValue IVLVal;
11813   llvm::Function *FoundFn;
11814   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11815       Checker.getFoundData();
11816   if (FoundFn != CGF.CurFn) {
11817     // Special codegen for inner parallel regions.
11818     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11819     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11820     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11821            "Lastprivate conditional is not found in outer region.");
11822     QualType StructTy = std::get<0>(It->getSecond());
11823     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11824     LValue PrivLVal = CGF.EmitLValue(FoundE);
11825     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11826         PrivLVal.getAddress(CGF),
11827         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
11828         CGF.ConvertTypeForMem(StructTy));
11829     LValue BaseLVal =
11830         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11831     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11832     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11833                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11834                         FiredLVal, llvm::AtomicOrdering::Unordered,
11835                         /*IsVolatile=*/true, /*isInit=*/false);
11836     return;
11837   }
11838 
11839   // Private address of the lastprivate conditional in the current context.
11840   // priv_a
11841   LValue LVal = CGF.EmitLValue(FoundE);
11842   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11843                                    FoundE->getExprLoc());
11844 }
11845 
11846 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11847     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11848     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11849   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11850     return;
11851   auto Range = llvm::reverse(LastprivateConditionalStack);
11852   auto It = llvm::find_if(
11853       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11854   if (It == Range.end() || It->Fn != CGF.CurFn)
11855     return;
11856   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11857   assert(LPCI != LastprivateConditionalToTypes.end() &&
11858          "Lastprivates must be registered already.");
11859   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11860   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11861   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11862   for (const auto &Pair : It->DeclToUniqueName) {
11863     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11864     if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
11865       continue;
11866     auto I = LPCI->getSecond().find(Pair.first);
11867     assert(I != LPCI->getSecond().end() &&
11868            "Lastprivate must be rehistered already.");
11869     // bool Cmp = priv_a.Fired != 0;
11870     LValue BaseLVal = std::get<3>(I->getSecond());
11871     LValue FiredLVal =
11872         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11873     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11874     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11875     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11876     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11877     // if (Cmp) {
11878     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11879     CGF.EmitBlock(ThenBB);
11880     Address Addr = CGF.GetAddrOfLocalVar(VD);
11881     LValue LVal;
11882     if (VD->getType()->isReferenceType())
11883       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11884                                            AlignmentSource::Decl);
11885     else
11886       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11887                                 AlignmentSource::Decl);
11888     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11889                                      D.getBeginLoc());
11890     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11891     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11892     // }
11893   }
11894 }
11895 
11896 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11897     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11898     SourceLocation Loc) {
11899   if (CGF.getLangOpts().OpenMP < 50)
11900     return;
11901   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11902   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11903          "Unknown lastprivate conditional variable.");
11904   StringRef UniqueName = It->second;
11905   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11906   // The variable was not updated in the region - exit.
11907   if (!GV)
11908     return;
11909   LValue LPLVal = CGF.MakeAddrLValue(
11910       Address(GV, GV->getValueType(), PrivLVal.getAlignment()),
11911       PrivLVal.getType().getNonReferenceType());
11912   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11913   CGF.EmitStoreOfScalar(Res, PrivLVal);
11914 }
11915 
11916 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11917     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11918     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11919     const RegionCodeGenTy &CodeGen) {
11920   llvm_unreachable("Not supported in SIMD-only mode");
11921 }
11922 
11923 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11924     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11925     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11926     const RegionCodeGenTy &CodeGen) {
11927   llvm_unreachable("Not supported in SIMD-only mode");
11928 }
11929 
11930 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11931     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11932     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11933     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11934     bool Tied, unsigned &NumberOfParts) {
11935   llvm_unreachable("Not supported in SIMD-only mode");
11936 }
11937 
11938 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11939                                            SourceLocation Loc,
11940                                            llvm::Function *OutlinedFn,
11941                                            ArrayRef<llvm::Value *> CapturedVars,
11942                                            const Expr *IfCond,
11943                                            llvm::Value *NumThreads) {
11944   llvm_unreachable("Not supported in SIMD-only mode");
11945 }
11946 
11947 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11948     CodeGenFunction &CGF, StringRef CriticalName,
11949     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11950     const Expr *Hint) {
11951   llvm_unreachable("Not supported in SIMD-only mode");
11952 }
11953 
11954 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11955                                            const RegionCodeGenTy &MasterOpGen,
11956                                            SourceLocation Loc) {
11957   llvm_unreachable("Not supported in SIMD-only mode");
11958 }
11959 
11960 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
11961                                            const RegionCodeGenTy &MasterOpGen,
11962                                            SourceLocation Loc,
11963                                            const Expr *Filter) {
11964   llvm_unreachable("Not supported in SIMD-only mode");
11965 }
11966 
11967 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11968                                             SourceLocation Loc) {
11969   llvm_unreachable("Not supported in SIMD-only mode");
11970 }
11971 
11972 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11973     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11974     SourceLocation Loc) {
11975   llvm_unreachable("Not supported in SIMD-only mode");
11976 }
11977 
11978 void CGOpenMPSIMDRuntime::emitSingleRegion(
11979     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11980     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11981     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11982     ArrayRef<const Expr *> AssignmentOps) {
11983   llvm_unreachable("Not supported in SIMD-only mode");
11984 }
11985 
11986 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11987                                             const RegionCodeGenTy &OrderedOpGen,
11988                                             SourceLocation Loc,
11989                                             bool IsThreads) {
11990   llvm_unreachable("Not supported in SIMD-only mode");
11991 }
11992 
11993 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11994                                           SourceLocation Loc,
11995                                           OpenMPDirectiveKind Kind,
11996                                           bool EmitChecks,
11997                                           bool ForceSimpleCall) {
11998   llvm_unreachable("Not supported in SIMD-only mode");
11999 }
12000 
12001 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12002     CodeGenFunction &CGF, SourceLocation Loc,
12003     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12004     bool Ordered, const DispatchRTInput &DispatchValues) {
12005   llvm_unreachable("Not supported in SIMD-only mode");
12006 }
12007 
12008 void CGOpenMPSIMDRuntime::emitForStaticInit(
12009     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12010     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12011   llvm_unreachable("Not supported in SIMD-only mode");
12012 }
12013 
12014 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12015     CodeGenFunction &CGF, SourceLocation Loc,
12016     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12017   llvm_unreachable("Not supported in SIMD-only mode");
12018 }
12019 
12020 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12021                                                      SourceLocation Loc,
12022                                                      unsigned IVSize,
12023                                                      bool IVSigned) {
12024   llvm_unreachable("Not supported in SIMD-only mode");
12025 }
12026 
12027 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12028                                               SourceLocation Loc,
12029                                               OpenMPDirectiveKind DKind) {
12030   llvm_unreachable("Not supported in SIMD-only mode");
12031 }
12032 
12033 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12034                                               SourceLocation Loc,
12035                                               unsigned IVSize, bool IVSigned,
12036                                               Address IL, Address LB,
12037                                               Address UB, Address ST) {
12038   llvm_unreachable("Not supported in SIMD-only mode");
12039 }
12040 
12041 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12042                                                llvm::Value *NumThreads,
12043                                                SourceLocation Loc) {
12044   llvm_unreachable("Not supported in SIMD-only mode");
12045 }
12046 
12047 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12048                                              ProcBindKind ProcBind,
12049                                              SourceLocation Loc) {
12050   llvm_unreachable("Not supported in SIMD-only mode");
12051 }
12052 
12053 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12054                                                     const VarDecl *VD,
12055                                                     Address VDAddr,
12056                                                     SourceLocation Loc) {
12057   llvm_unreachable("Not supported in SIMD-only mode");
12058 }
12059 
12060 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12061     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12062     CodeGenFunction *CGF) {
12063   llvm_unreachable("Not supported in SIMD-only mode");
12064 }
12065 
12066 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12067     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12068   llvm_unreachable("Not supported in SIMD-only mode");
12069 }
12070 
12071 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12072                                     ArrayRef<const Expr *> Vars,
12073                                     SourceLocation Loc,
12074                                     llvm::AtomicOrdering AO) {
12075   llvm_unreachable("Not supported in SIMD-only mode");
12076 }
12077 
12078 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12079                                        const OMPExecutableDirective &D,
12080                                        llvm::Function *TaskFunction,
12081                                        QualType SharedsTy, Address Shareds,
12082                                        const Expr *IfCond,
12083                                        const OMPTaskDataTy &Data) {
12084   llvm_unreachable("Not supported in SIMD-only mode");
12085 }
12086 
12087 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12088     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12089     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12090     const Expr *IfCond, const OMPTaskDataTy &Data) {
12091   llvm_unreachable("Not supported in SIMD-only mode");
12092 }
12093 
12094 void CGOpenMPSIMDRuntime::emitReduction(
12095     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12096     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12097     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12098   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12099   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12100                                  ReductionOps, Options);
12101 }
12102 
12103 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12104     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12105     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12106   llvm_unreachable("Not supported in SIMD-only mode");
12107 }
12108 
12109 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12110                                                 SourceLocation Loc,
12111                                                 bool IsWorksharingReduction) {
12112   llvm_unreachable("Not supported in SIMD-only mode");
12113 }
12114 
12115 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12116                                                   SourceLocation Loc,
12117                                                   ReductionCodeGen &RCG,
12118                                                   unsigned N) {
12119   llvm_unreachable("Not supported in SIMD-only mode");
12120 }
12121 
12122 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12123                                                   SourceLocation Loc,
12124                                                   llvm::Value *ReductionsPtr,
12125                                                   LValue SharedLVal) {
12126   llvm_unreachable("Not supported in SIMD-only mode");
12127 }
12128 
12129 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12130                                            SourceLocation Loc,
12131                                            const OMPTaskDataTy &Data) {
12132   llvm_unreachable("Not supported in SIMD-only mode");
12133 }
12134 
12135 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12136     CodeGenFunction &CGF, SourceLocation Loc,
12137     OpenMPDirectiveKind CancelRegion) {
12138   llvm_unreachable("Not supported in SIMD-only mode");
12139 }
12140 
12141 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12142                                          SourceLocation Loc, const Expr *IfCond,
12143                                          OpenMPDirectiveKind CancelRegion) {
12144   llvm_unreachable("Not supported in SIMD-only mode");
12145 }
12146 
12147 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12148     const OMPExecutableDirective &D, StringRef ParentName,
12149     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12150     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12151   llvm_unreachable("Not supported in SIMD-only mode");
12152 }
12153 
12154 void CGOpenMPSIMDRuntime::emitTargetCall(
12155     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12156     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12157     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12158     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12159                                      const OMPLoopDirective &D)>
12160         SizeEmitter) {
12161   llvm_unreachable("Not supported in SIMD-only mode");
12162 }
12163 
12164 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12165   llvm_unreachable("Not supported in SIMD-only mode");
12166 }
12167 
12168 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12169   llvm_unreachable("Not supported in SIMD-only mode");
12170 }
12171 
12172 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12173   return false;
12174 }
12175 
12176 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12177                                         const OMPExecutableDirective &D,
12178                                         SourceLocation Loc,
12179                                         llvm::Function *OutlinedFn,
12180                                         ArrayRef<llvm::Value *> CapturedVars) {
12181   llvm_unreachable("Not supported in SIMD-only mode");
12182 }
12183 
12184 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12185                                              const Expr *NumTeams,
12186                                              const Expr *ThreadLimit,
12187                                              SourceLocation Loc) {
12188   llvm_unreachable("Not supported in SIMD-only mode");
12189 }
12190 
12191 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12192     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12193     const Expr *Device, const RegionCodeGenTy &CodeGen,
12194     CGOpenMPRuntime::TargetDataInfo &Info) {
12195   llvm_unreachable("Not supported in SIMD-only mode");
12196 }
12197 
12198 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12199     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12200     const Expr *Device) {
12201   llvm_unreachable("Not supported in SIMD-only mode");
12202 }
12203 
12204 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12205                                            const OMPLoopDirective &D,
12206                                            ArrayRef<Expr *> NumIterations) {
12207   llvm_unreachable("Not supported in SIMD-only mode");
12208 }
12209 
12210 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12211                                               const OMPDependClause *C) {
12212   llvm_unreachable("Not supported in SIMD-only mode");
12213 }
12214 
12215 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12216                                               const OMPDoacrossClause *C) {
12217   llvm_unreachable("Not supported in SIMD-only mode");
12218 }
12219 
12220 const VarDecl *
12221 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12222                                         const VarDecl *NativeParam) const {
12223   llvm_unreachable("Not supported in SIMD-only mode");
12224 }
12225 
12226 Address
12227 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12228                                          const VarDecl *NativeParam,
12229                                          const VarDecl *TargetParam) const {
12230   llvm_unreachable("Not supported in SIMD-only mode");
12231 }
12232