xref: /freebsd/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp (revision e64bea71c21eb42e97aa615188ba91f6cce0d36d)
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "ABIInfoImpl.h"
15 #include "CGCXXABI.h"
16 #include "CGCleanup.h"
17 #include "CGDebugInfo.h"
18 #include "CGRecordLayout.h"
19 #include "CodeGenFunction.h"
20 #include "TargetInfo.h"
21 #include "clang/AST/APValue.h"
22 #include "clang/AST/Attr.h"
23 #include "clang/AST/Decl.h"
24 #include "clang/AST/OpenMPClause.h"
25 #include "clang/AST/StmtOpenMP.h"
26 #include "clang/AST/StmtVisitor.h"
27 #include "clang/Basic/OpenMPKinds.h"
28 #include "clang/Basic/SourceManager.h"
29 #include "clang/CodeGen/ConstantInitBuilder.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/ADT/StringExtras.h"
33 #include "llvm/Bitcode/BitcodeReader.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/DerivedTypes.h"
36 #include "llvm/IR/GlobalValue.h"
37 #include "llvm/IR/InstrTypes.h"
38 #include "llvm/IR/Value.h"
39 #include "llvm/Support/AtomicOrdering.h"
40 #include "llvm/Support/raw_ostream.h"
41 #include <cassert>
42 #include <cstdint>
43 #include <numeric>
44 #include <optional>
45 
46 using namespace clang;
47 using namespace CodeGen;
48 using namespace llvm::omp;
49 
50 namespace {
51 /// Base class for handling code generation inside OpenMP regions.
52 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
53 public:
54   /// Kinds of OpenMP regions used in codegen.
55   enum CGOpenMPRegionKind {
56     /// Region with outlined function for standalone 'parallel'
57     /// directive.
58     ParallelOutlinedRegion,
59     /// Region with outlined function for standalone 'task' directive.
60     TaskOutlinedRegion,
61     /// Region for constructs that do not require function outlining,
62     /// like 'for', 'sections', 'atomic' etc. directives.
63     InlinedRegion,
64     /// Region with outlined function for standalone 'target' directive.
65     TargetRegion,
66   };
67 
68   CGOpenMPRegionInfo(const CapturedStmt &CS,
69                      const CGOpenMPRegionKind RegionKind,
70                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
71                      bool HasCancel)
72       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
73         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
74 
75   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
76                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
77                      bool HasCancel)
78       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
79         Kind(Kind), HasCancel(HasCancel) {}
80 
81   /// Get a variable or parameter for storing global thread id
82   /// inside OpenMP construct.
83   virtual const VarDecl *getThreadIDVariable() const = 0;
84 
85   /// Emit the captured statement body.
86   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
87 
88   /// Get an LValue for the current ThreadID variable.
89   /// \return LValue for thread id variable. This LValue always has type int32*.
90   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
91 
92   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
93 
94   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
95 
96   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
97 
98   bool hasCancel() const { return HasCancel; }
99 
100   static bool classof(const CGCapturedStmtInfo *Info) {
101     return Info->getKind() == CR_OpenMP;
102   }
103 
104   ~CGOpenMPRegionInfo() override = default;
105 
106 protected:
107   CGOpenMPRegionKind RegionKind;
108   RegionCodeGenTy CodeGen;
109   OpenMPDirectiveKind Kind;
110   bool HasCancel;
111 };
112 
113 /// API for captured statement code generation in OpenMP constructs.
114 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
115 public:
116   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
117                              const RegionCodeGenTy &CodeGen,
118                              OpenMPDirectiveKind Kind, bool HasCancel,
119                              StringRef HelperName)
120       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
121                            HasCancel),
122         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
123     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
124   }
125 
126   /// Get a variable or parameter for storing global thread id
127   /// inside OpenMP construct.
128   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
129 
130   /// Get the name of the capture helper.
131   StringRef getHelperName() const override { return HelperName; }
132 
133   static bool classof(const CGCapturedStmtInfo *Info) {
134     return CGOpenMPRegionInfo::classof(Info) &&
135            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
136                ParallelOutlinedRegion;
137   }
138 
139 private:
140   /// A variable or parameter storing global thread id for OpenMP
141   /// constructs.
142   const VarDecl *ThreadIDVar;
143   StringRef HelperName;
144 };
145 
146 /// API for captured statement code generation in OpenMP constructs.
147 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
148 public:
149   class UntiedTaskActionTy final : public PrePostActionTy {
150     bool Untied;
151     const VarDecl *PartIDVar;
152     const RegionCodeGenTy UntiedCodeGen;
153     llvm::SwitchInst *UntiedSwitch = nullptr;
154 
155   public:
156     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
157                        const RegionCodeGenTy &UntiedCodeGen)
158         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
159     void Enter(CodeGenFunction &CGF) override {
160       if (Untied) {
161         // Emit task switching point.
162         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
163             CGF.GetAddrOfLocalVar(PartIDVar),
164             PartIDVar->getType()->castAs<PointerType>());
165         llvm::Value *Res =
166             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
167         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
168         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
169         CGF.EmitBlock(DoneBB);
170         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
171         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
172         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
173                               CGF.Builder.GetInsertBlock());
174         emitUntiedSwitch(CGF);
175       }
176     }
177     void emitUntiedSwitch(CodeGenFunction &CGF) const {
178       if (Untied) {
179         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
180             CGF.GetAddrOfLocalVar(PartIDVar),
181             PartIDVar->getType()->castAs<PointerType>());
182         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
183                               PartIdLVal);
184         UntiedCodeGen(CGF);
185         CodeGenFunction::JumpDest CurPoint =
186             CGF.getJumpDestInCurrentScope(".untied.next.");
187         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
188         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
189         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
190                               CGF.Builder.GetInsertBlock());
191         CGF.EmitBranchThroughCleanup(CurPoint);
192         CGF.EmitBlock(CurPoint.getBlock());
193       }
194     }
195     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
196   };
197   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
198                                  const VarDecl *ThreadIDVar,
199                                  const RegionCodeGenTy &CodeGen,
200                                  OpenMPDirectiveKind Kind, bool HasCancel,
201                                  const UntiedTaskActionTy &Action)
202       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
203         ThreadIDVar(ThreadIDVar), Action(Action) {
204     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
205   }
206 
207   /// Get a variable or parameter for storing global thread id
208   /// inside OpenMP construct.
209   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
210 
211   /// Get an LValue for the current ThreadID variable.
212   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
213 
214   /// Get the name of the capture helper.
215   StringRef getHelperName() const override { return ".omp_outlined."; }
216 
217   void emitUntiedSwitch(CodeGenFunction &CGF) override {
218     Action.emitUntiedSwitch(CGF);
219   }
220 
221   static bool classof(const CGCapturedStmtInfo *Info) {
222     return CGOpenMPRegionInfo::classof(Info) &&
223            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
224                TaskOutlinedRegion;
225   }
226 
227 private:
228   /// A variable or parameter storing global thread id for OpenMP
229   /// constructs.
230   const VarDecl *ThreadIDVar;
231   /// Action for emitting code for untied tasks.
232   const UntiedTaskActionTy &Action;
233 };
234 
235 /// API for inlined captured statement code generation in OpenMP
236 /// constructs.
237 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
238 public:
239   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
240                             const RegionCodeGenTy &CodeGen,
241                             OpenMPDirectiveKind Kind, bool HasCancel)
242       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
243         OldCSI(OldCSI),
244         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
245 
246   // Retrieve the value of the context parameter.
247   llvm::Value *getContextValue() const override {
248     if (OuterRegionInfo)
249       return OuterRegionInfo->getContextValue();
250     llvm_unreachable("No context value for inlined OpenMP region");
251   }
252 
253   void setContextValue(llvm::Value *V) override {
254     if (OuterRegionInfo) {
255       OuterRegionInfo->setContextValue(V);
256       return;
257     }
258     llvm_unreachable("No context value for inlined OpenMP region");
259   }
260 
261   /// Lookup the captured field decl for a variable.
262   const FieldDecl *lookup(const VarDecl *VD) const override {
263     if (OuterRegionInfo)
264       return OuterRegionInfo->lookup(VD);
265     // If there is no outer outlined region,no need to lookup in a list of
266     // captured variables, we can use the original one.
267     return nullptr;
268   }
269 
270   FieldDecl *getThisFieldDecl() const override {
271     if (OuterRegionInfo)
272       return OuterRegionInfo->getThisFieldDecl();
273     return nullptr;
274   }
275 
276   /// Get a variable or parameter for storing global thread id
277   /// inside OpenMP construct.
278   const VarDecl *getThreadIDVariable() const override {
279     if (OuterRegionInfo)
280       return OuterRegionInfo->getThreadIDVariable();
281     return nullptr;
282   }
283 
284   /// Get an LValue for the current ThreadID variable.
285   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
286     if (OuterRegionInfo)
287       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
288     llvm_unreachable("No LValue for inlined OpenMP construct");
289   }
290 
291   /// Get the name of the capture helper.
292   StringRef getHelperName() const override {
293     if (auto *OuterRegionInfo = getOldCSI())
294       return OuterRegionInfo->getHelperName();
295     llvm_unreachable("No helper name for inlined OpenMP construct");
296   }
297 
298   void emitUntiedSwitch(CodeGenFunction &CGF) override {
299     if (OuterRegionInfo)
300       OuterRegionInfo->emitUntiedSwitch(CGF);
301   }
302 
303   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
304 
305   static bool classof(const CGCapturedStmtInfo *Info) {
306     return CGOpenMPRegionInfo::classof(Info) &&
307            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
308   }
309 
310   ~CGOpenMPInlinedRegionInfo() override = default;
311 
312 private:
313   /// CodeGen info about outer OpenMP region.
314   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
315   CGOpenMPRegionInfo *OuterRegionInfo;
316 };
317 
318 /// API for captured statement code generation in OpenMP target
319 /// constructs. For this captures, implicit parameters are used instead of the
320 /// captured fields. The name of the target region has to be unique in a given
321 /// application so it is provided by the client, because only the client has
322 /// the information to generate that.
323 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
324 public:
325   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
326                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
327       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
328                            /*HasCancel=*/false),
329         HelperName(HelperName) {}
330 
331   /// This is unused for target regions because each starts executing
332   /// with a single thread.
333   const VarDecl *getThreadIDVariable() const override { return nullptr; }
334 
335   /// Get the name of the capture helper.
336   StringRef getHelperName() const override { return HelperName; }
337 
338   static bool classof(const CGCapturedStmtInfo *Info) {
339     return CGOpenMPRegionInfo::classof(Info) &&
340            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
341   }
342 
343 private:
344   StringRef HelperName;
345 };
346 
347 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
348   llvm_unreachable("No codegen for expressions");
349 }
350 /// API for generation of expressions captured in a innermost OpenMP
351 /// region.
352 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
353 public:
354   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
355       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
356                                   OMPD_unknown,
357                                   /*HasCancel=*/false),
358         PrivScope(CGF) {
359     // Make sure the globals captured in the provided statement are local by
360     // using the privatization logic. We assume the same variable is not
361     // captured more than once.
362     for (const auto &C : CS.captures()) {
363       if (!C.capturesVariable() && !C.capturesVariableByCopy())
364         continue;
365 
366       const VarDecl *VD = C.getCapturedVar();
367       if (VD->isLocalVarDeclOrParm())
368         continue;
369 
370       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
371                       /*RefersToEnclosingVariableOrCapture=*/false,
372                       VD->getType().getNonReferenceType(), VK_LValue,
373                       C.getLocation());
374       PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress());
375     }
376     (void)PrivScope.Privatize();
377   }
378 
379   /// Lookup the captured field decl for a variable.
380   const FieldDecl *lookup(const VarDecl *VD) const override {
381     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
382       return FD;
383     return nullptr;
384   }
385 
386   /// Emit the captured statement body.
387   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
388     llvm_unreachable("No body for expressions");
389   }
390 
391   /// Get a variable or parameter for storing global thread id
392   /// inside OpenMP construct.
393   const VarDecl *getThreadIDVariable() const override {
394     llvm_unreachable("No thread id for expressions");
395   }
396 
397   /// Get the name of the capture helper.
398   StringRef getHelperName() const override {
399     llvm_unreachable("No helper name for expressions");
400   }
401 
402   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
403 
404 private:
405   /// Private scope to capture global variables.
406   CodeGenFunction::OMPPrivateScope PrivScope;
407 };
408 
409 /// RAII for emitting code of OpenMP constructs.
410 class InlinedOpenMPRegionRAII {
411   CodeGenFunction &CGF;
412   llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
413   FieldDecl *LambdaThisCaptureField = nullptr;
414   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
415   bool NoInheritance = false;
416 
417 public:
418   /// Constructs region for combined constructs.
419   /// \param CodeGen Code generation sequence for combined directives. Includes
420   /// a list of functions used for code generation of implicitly inlined
421   /// regions.
422   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
423                           OpenMPDirectiveKind Kind, bool HasCancel,
424                           bool NoInheritance = true)
425       : CGF(CGF), NoInheritance(NoInheritance) {
426     // Start emission for the construct.
427     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
428         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
429     if (NoInheritance) {
430       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
431       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
432       CGF.LambdaThisCaptureField = nullptr;
433       BlockInfo = CGF.BlockInfo;
434       CGF.BlockInfo = nullptr;
435     }
436   }
437 
438   ~InlinedOpenMPRegionRAII() {
439     // Restore original CapturedStmtInfo only if we're done with code emission.
440     auto *OldCSI =
441         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
442     delete CGF.CapturedStmtInfo;
443     CGF.CapturedStmtInfo = OldCSI;
444     if (NoInheritance) {
445       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
446       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
447       CGF.BlockInfo = BlockInfo;
448     }
449   }
450 };
451 
452 /// Values for bit flags used in the ident_t to describe the fields.
453 /// All enumeric elements are named and described in accordance with the code
454 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
455 enum OpenMPLocationFlags : unsigned {
456   /// Use trampoline for internal microtask.
457   OMP_IDENT_IMD = 0x01,
458   /// Use c-style ident structure.
459   OMP_IDENT_KMPC = 0x02,
460   /// Atomic reduction option for kmpc_reduce.
461   OMP_ATOMIC_REDUCE = 0x10,
462   /// Explicit 'barrier' directive.
463   OMP_IDENT_BARRIER_EXPL = 0x20,
464   /// Implicit barrier in code.
465   OMP_IDENT_BARRIER_IMPL = 0x40,
466   /// Implicit barrier in 'for' directive.
467   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
468   /// Implicit barrier in 'sections' directive.
469   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
470   /// Implicit barrier in 'single' directive.
471   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
472   /// Call of __kmp_for_static_init for static loop.
473   OMP_IDENT_WORK_LOOP = 0x200,
474   /// Call of __kmp_for_static_init for sections.
475   OMP_IDENT_WORK_SECTIONS = 0x400,
476   /// Call of __kmp_for_static_init for distribute.
477   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
478   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
479 };
480 
481 /// Describes ident structure that describes a source location.
482 /// All descriptions are taken from
483 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
484 /// Original structure:
485 /// typedef struct ident {
486 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
487 ///                                  see above  */
488 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
489 ///                                  KMP_IDENT_KMPC identifies this union
490 ///                                  member  */
491 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
492 ///                                  see above */
493 ///#if USE_ITT_BUILD
494 ///                            /*  but currently used for storing
495 ///                                region-specific ITT */
496 ///                            /*  contextual information. */
497 ///#endif /* USE_ITT_BUILD */
498 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
499 ///                                 C++  */
500 ///    char const *psource;    /**< String describing the source location.
501 ///                            The string is composed of semi-colon separated
502 //                             fields which describe the source file,
503 ///                            the function and a pair of line numbers that
504 ///                            delimit the construct.
505 ///                             */
506 /// } ident_t;
507 enum IdentFieldIndex {
508   /// might be used in Fortran
509   IdentField_Reserved_1,
510   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
511   IdentField_Flags,
512   /// Not really used in Fortran any more
513   IdentField_Reserved_2,
514   /// Source[4] in Fortran, do not use for C++
515   IdentField_Reserved_3,
516   /// String describing the source location. The string is composed of
517   /// semi-colon separated fields which describe the source file, the function
518   /// and a pair of line numbers that delimit the construct.
519   IdentField_PSource
520 };
521 
522 /// Schedule types for 'omp for' loops (these enumerators are taken from
523 /// the enum sched_type in kmp.h).
524 enum OpenMPSchedType {
525   /// Lower bound for default (unordered) versions.
526   OMP_sch_lower = 32,
527   OMP_sch_static_chunked = 33,
528   OMP_sch_static = 34,
529   OMP_sch_dynamic_chunked = 35,
530   OMP_sch_guided_chunked = 36,
531   OMP_sch_runtime = 37,
532   OMP_sch_auto = 38,
533   /// static with chunk adjustment (e.g., simd)
534   OMP_sch_static_balanced_chunked = 45,
535   /// Lower bound for 'ordered' versions.
536   OMP_ord_lower = 64,
537   OMP_ord_static_chunked = 65,
538   OMP_ord_static = 66,
539   OMP_ord_dynamic_chunked = 67,
540   OMP_ord_guided_chunked = 68,
541   OMP_ord_runtime = 69,
542   OMP_ord_auto = 70,
543   OMP_sch_default = OMP_sch_static,
544   /// dist_schedule types
545   OMP_dist_sch_static_chunked = 91,
546   OMP_dist_sch_static = 92,
547   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
548   /// Set if the monotonic schedule modifier was present.
549   OMP_sch_modifier_monotonic = (1 << 29),
550   /// Set if the nonmonotonic schedule modifier was present.
551   OMP_sch_modifier_nonmonotonic = (1 << 30),
552 };
553 
554 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
555 /// region.
556 class CleanupTy final : public EHScopeStack::Cleanup {
557   PrePostActionTy *Action;
558 
559 public:
560   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
561   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
562     if (!CGF.HaveInsertPoint())
563       return;
564     Action->Exit(CGF);
565   }
566 };
567 
568 } // anonymous namespace
569 
570 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
571   CodeGenFunction::RunCleanupsScope Scope(CGF);
572   if (PrePostAction) {
573     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
574     Callback(CodeGen, CGF, *PrePostAction);
575   } else {
576     PrePostActionTy Action;
577     Callback(CodeGen, CGF, Action);
578   }
579 }
580 
581 /// Check if the combiner is a call to UDR combiner and if it is so return the
582 /// UDR decl used for reduction.
583 static const OMPDeclareReductionDecl *
584 getReductionInit(const Expr *ReductionOp) {
585   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
586     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
587       if (const auto *DRE =
588               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
589         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
590           return DRD;
591   return nullptr;
592 }
593 
594 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
595                                              const OMPDeclareReductionDecl *DRD,
596                                              const Expr *InitOp,
597                                              Address Private, Address Original,
598                                              QualType Ty) {
599   if (DRD->getInitializer()) {
600     std::pair<llvm::Function *, llvm::Function *> Reduction =
601         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
602     const auto *CE = cast<CallExpr>(InitOp);
603     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
604     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
605     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
606     const auto *LHSDRE =
607         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
608     const auto *RHSDRE =
609         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
610     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
611     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
612     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
613     (void)PrivateScope.Privatize();
614     RValue Func = RValue::get(Reduction.second);
615     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
616     CGF.EmitIgnoredExpr(InitOp);
617   } else {
618     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
619     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
620     auto *GV = new llvm::GlobalVariable(
621         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
622         llvm::GlobalValue::PrivateLinkage, Init, Name);
623     LValue LV = CGF.MakeNaturalAlignRawAddrLValue(GV, Ty);
624     RValue InitRVal;
625     switch (CGF.getEvaluationKind(Ty)) {
626     case TEK_Scalar:
627       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
628       break;
629     case TEK_Complex:
630       InitRVal =
631           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
632       break;
633     case TEK_Aggregate: {
634       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
635       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
636       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
637                            /*IsInitializer=*/false);
638       return;
639     }
640     }
641     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
642     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
643     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
644                          /*IsInitializer=*/false);
645   }
646 }
647 
648 /// Emit initialization of arrays of complex types.
649 /// \param DestAddr Address of the array.
650 /// \param Type Type of array.
651 /// \param Init Initial expression of array.
652 /// \param SrcAddr Address of the original array.
653 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
654                                  QualType Type, bool EmitDeclareReductionInit,
655                                  const Expr *Init,
656                                  const OMPDeclareReductionDecl *DRD,
657                                  Address SrcAddr = Address::invalid()) {
658   // Perform element-by-element initialization.
659   QualType ElementTy;
660 
661   // Drill down to the base element type on both arrays.
662   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
663   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
664   if (DRD)
665     SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
666 
667   llvm::Value *SrcBegin = nullptr;
668   if (DRD)
669     SrcBegin = SrcAddr.emitRawPointer(CGF);
670   llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
671   // Cast from pointer to array type to pointer to single element.
672   llvm::Value *DestEnd =
673       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
674   // The basic structure here is a while-do loop.
675   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
676   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
677   llvm::Value *IsEmpty =
678       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
679   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
680 
681   // Enter the loop body, making that address the current address.
682   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
683   CGF.EmitBlock(BodyBB);
684 
685   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
686 
687   llvm::PHINode *SrcElementPHI = nullptr;
688   Address SrcElementCurrent = Address::invalid();
689   if (DRD) {
690     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
691                                           "omp.arraycpy.srcElementPast");
692     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
693     SrcElementCurrent =
694         Address(SrcElementPHI, SrcAddr.getElementType(),
695                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
696   }
697   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
698       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
699   DestElementPHI->addIncoming(DestBegin, EntryBB);
700   Address DestElementCurrent =
701       Address(DestElementPHI, DestAddr.getElementType(),
702               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
703 
704   // Emit copy.
705   {
706     CodeGenFunction::RunCleanupsScope InitScope(CGF);
707     if (EmitDeclareReductionInit) {
708       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
709                                        SrcElementCurrent, ElementTy);
710     } else
711       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
712                            /*IsInitializer=*/false);
713   }
714 
715   if (DRD) {
716     // Shift the address forward by one element.
717     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
718         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
719         "omp.arraycpy.dest.element");
720     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
721   }
722 
723   // Shift the address forward by one element.
724   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
725       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
726       "omp.arraycpy.dest.element");
727   // Check whether we've reached the end.
728   llvm::Value *Done =
729       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
730   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
731   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
732 
733   // Done.
734   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
735 }
736 
737 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
738   return CGF.EmitOMPSharedLValue(E);
739 }
740 
741 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
742                                             const Expr *E) {
743   if (const auto *OASE = dyn_cast<ArraySectionExpr>(E))
744     return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false);
745   return LValue();
746 }
747 
748 void ReductionCodeGen::emitAggregateInitialization(
749     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
750     const OMPDeclareReductionDecl *DRD) {
751   // Emit VarDecl with copy init for arrays.
752   // Get the address of the original variable captured in current
753   // captured region.
754   const auto *PrivateVD =
755       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
756   bool EmitDeclareReductionInit =
757       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
758   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
759                        EmitDeclareReductionInit,
760                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
761                                                 : PrivateVD->getInit(),
762                        DRD, SharedAddr);
763 }
764 
765 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
766                                    ArrayRef<const Expr *> Origs,
767                                    ArrayRef<const Expr *> Privates,
768                                    ArrayRef<const Expr *> ReductionOps) {
769   ClausesData.reserve(Shareds.size());
770   SharedAddresses.reserve(Shareds.size());
771   Sizes.reserve(Shareds.size());
772   BaseDecls.reserve(Shareds.size());
773   const auto *IOrig = Origs.begin();
774   const auto *IPriv = Privates.begin();
775   const auto *IRed = ReductionOps.begin();
776   for (const Expr *Ref : Shareds) {
777     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
778     std::advance(IOrig, 1);
779     std::advance(IPriv, 1);
780     std::advance(IRed, 1);
781   }
782 }
783 
784 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
785   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
786          "Number of generated lvalues must be exactly N.");
787   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
788   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
789   SharedAddresses.emplace_back(First, Second);
790   if (ClausesData[N].Shared == ClausesData[N].Ref) {
791     OrigAddresses.emplace_back(First, Second);
792   } else {
793     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
794     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
795     OrigAddresses.emplace_back(First, Second);
796   }
797 }
798 
799 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
800   QualType PrivateType = getPrivateType(N);
801   bool AsArraySection = isa<ArraySectionExpr>(ClausesData[N].Ref);
802   if (!PrivateType->isVariablyModifiedType()) {
803     Sizes.emplace_back(
804         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
805         nullptr);
806     return;
807   }
808   llvm::Value *Size;
809   llvm::Value *SizeInChars;
810   auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
811   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
812   if (AsArraySection) {
813     Size = CGF.Builder.CreatePtrDiff(ElemType,
814                                      OrigAddresses[N].second.getPointer(CGF),
815                                      OrigAddresses[N].first.getPointer(CGF));
816     Size = CGF.Builder.CreateNUWAdd(
817         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
818     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
819   } else {
820     SizeInChars =
821         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
822     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
823   }
824   Sizes.emplace_back(SizeInChars, Size);
825   CodeGenFunction::OpaqueValueMapping OpaqueMap(
826       CGF,
827       cast<OpaqueValueExpr>(
828           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
829       RValue::get(Size));
830   CGF.EmitVariablyModifiedType(PrivateType);
831 }
832 
833 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
834                                          llvm::Value *Size) {
835   QualType PrivateType = getPrivateType(N);
836   if (!PrivateType->isVariablyModifiedType()) {
837     assert(!Size && !Sizes[N].second &&
838            "Size should be nullptr for non-variably modified reduction "
839            "items.");
840     return;
841   }
842   CodeGenFunction::OpaqueValueMapping OpaqueMap(
843       CGF,
844       cast<OpaqueValueExpr>(
845           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
846       RValue::get(Size));
847   CGF.EmitVariablyModifiedType(PrivateType);
848 }
849 
850 void ReductionCodeGen::emitInitialization(
851     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
852     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
853   assert(SharedAddresses.size() > N && "No variable was generated");
854   const auto *PrivateVD =
855       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
856   const OMPDeclareReductionDecl *DRD =
857       getReductionInit(ClausesData[N].ReductionOp);
858   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
859     if (DRD && DRD->getInitializer())
860       (void)DefaultInit(CGF);
861     emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
862   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
863     (void)DefaultInit(CGF);
864     QualType SharedType = SharedAddresses[N].first.getType();
865     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
866                                      PrivateAddr, SharedAddr, SharedType);
867   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
868              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
869     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
870                          PrivateVD->getType().getQualifiers(),
871                          /*IsInitializer=*/false);
872   }
873 }
874 
875 bool ReductionCodeGen::needCleanups(unsigned N) {
876   QualType PrivateType = getPrivateType(N);
877   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
878   return DTorKind != QualType::DK_none;
879 }
880 
881 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
882                                     Address PrivateAddr) {
883   QualType PrivateType = getPrivateType(N);
884   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
885   if (needCleanups(N)) {
886     PrivateAddr =
887         PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
888     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
889   }
890 }
891 
892 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
893                           LValue BaseLV) {
894   BaseTy = BaseTy.getNonReferenceType();
895   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
896          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
897     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
898       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
899     } else {
900       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
901       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
902     }
903     BaseTy = BaseTy->getPointeeType();
904   }
905   return CGF.MakeAddrLValue(
906       BaseLV.getAddress().withElementType(CGF.ConvertTypeForMem(ElTy)),
907       BaseLV.getType(), BaseLV.getBaseInfo(),
908       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
909 }
910 
911 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
912                           Address OriginalBaseAddress, llvm::Value *Addr) {
913   RawAddress Tmp = RawAddress::invalid();
914   Address TopTmp = Address::invalid();
915   Address MostTopTmp = Address::invalid();
916   BaseTy = BaseTy.getNonReferenceType();
917   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
918          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
919     Tmp = CGF.CreateMemTemp(BaseTy);
920     if (TopTmp.isValid())
921       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
922     else
923       MostTopTmp = Tmp;
924     TopTmp = Tmp;
925     BaseTy = BaseTy->getPointeeType();
926   }
927 
928   if (Tmp.isValid()) {
929     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
930         Addr, Tmp.getElementType());
931     CGF.Builder.CreateStore(Addr, Tmp);
932     return MostTopTmp;
933   }
934 
935   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
936       Addr, OriginalBaseAddress.getType());
937   return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
938 }
939 
940 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
941   const VarDecl *OrigVD = nullptr;
942   if (const auto *OASE = dyn_cast<ArraySectionExpr>(Ref)) {
943     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
944     while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base))
945       Base = TempOASE->getBase()->IgnoreParenImpCasts();
946     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
947       Base = TempASE->getBase()->IgnoreParenImpCasts();
948     DE = cast<DeclRefExpr>(Base);
949     OrigVD = cast<VarDecl>(DE->getDecl());
950   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
951     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
952     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
953       Base = TempASE->getBase()->IgnoreParenImpCasts();
954     DE = cast<DeclRefExpr>(Base);
955     OrigVD = cast<VarDecl>(DE->getDecl());
956   }
957   return OrigVD;
958 }
959 
960 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
961                                                Address PrivateAddr) {
962   const DeclRefExpr *DE;
963   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
964     BaseDecls.emplace_back(OrigVD);
965     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
966     LValue BaseLValue =
967         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
968                     OriginalBaseLValue);
969     Address SharedAddr = SharedAddresses[N].first.getAddress();
970     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
971         SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
972         SharedAddr.emitRawPointer(CGF));
973     llvm::Value *PrivatePointer =
974         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
975             PrivateAddr.emitRawPointer(CGF), SharedAddr.getType());
976     llvm::Value *Ptr = CGF.Builder.CreateGEP(
977         SharedAddr.getElementType(), PrivatePointer, Adjustment);
978     return castToBase(CGF, OrigVD->getType(),
979                       SharedAddresses[N].first.getType(),
980                       OriginalBaseLValue.getAddress(), Ptr);
981   }
982   BaseDecls.emplace_back(
983       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
984   return PrivateAddr;
985 }
986 
987 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
988   const OMPDeclareReductionDecl *DRD =
989       getReductionInit(ClausesData[N].ReductionOp);
990   return DRD && DRD->getInitializer();
991 }
992 
993 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
994   return CGF.EmitLoadOfPointerLValue(
995       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
996       getThreadIDVariable()->getType()->castAs<PointerType>());
997 }
998 
999 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1000   if (!CGF.HaveInsertPoint())
1001     return;
1002   // 1.2.2 OpenMP Language Terminology
1003   // Structured block - An executable statement with a single entry at the
1004   // top and a single exit at the bottom.
1005   // The point of exit cannot be a branch out of the structured block.
1006   // longjmp() and throw() must not violate the entry/exit criteria.
1007   CGF.EHStack.pushTerminate();
1008   if (S)
1009     CGF.incrementProfileCounter(S);
1010   CodeGen(CGF);
1011   CGF.EHStack.popTerminate();
1012 }
1013 
1014 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1015     CodeGenFunction &CGF) {
1016   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1017                             getThreadIDVariable()->getType(),
1018                             AlignmentSource::Decl);
1019 }
1020 
1021 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1022                                        QualType FieldTy) {
1023   auto *Field = FieldDecl::Create(
1024       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1025       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1026       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1027   Field->setAccess(AS_public);
1028   DC->addDecl(Field);
1029   return Field;
1030 }
1031 
1032 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
1033     : CGM(CGM), OMPBuilder(CGM.getModule()) {
1034   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1035   llvm::OpenMPIRBuilderConfig Config(
1036       CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1037       CGM.getLangOpts().OpenMPOffloadMandatory,
1038       /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1039       hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1040   OMPBuilder.initialize();
1041   OMPBuilder.loadOffloadInfoMetadata(CGM.getLangOpts().OpenMPIsTargetDevice
1042                                          ? CGM.getLangOpts().OMPHostIRFile
1043                                          : StringRef{});
1044   OMPBuilder.setConfig(Config);
1045 
1046   // The user forces the compiler to behave as if omp requires
1047   // unified_shared_memory was given.
1048   if (CGM.getLangOpts().OpenMPForceUSM) {
1049     HasRequiresUnifiedSharedMemory = true;
1050     OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1051   }
1052 }
1053 
1054 void CGOpenMPRuntime::clear() {
1055   InternalVars.clear();
1056   // Clean non-target variable declarations possibly used only in debug info.
1057   for (const auto &Data : EmittedNonTargetVariables) {
1058     if (!Data.getValue().pointsToAliveValue())
1059       continue;
1060     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1061     if (!GV)
1062       continue;
1063     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1064       continue;
1065     GV->eraseFromParent();
1066   }
1067 }
1068 
1069 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1070   return OMPBuilder.createPlatformSpecificName(Parts);
1071 }
1072 
1073 static llvm::Function *
1074 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1075                           const Expr *CombinerInitializer, const VarDecl *In,
1076                           const VarDecl *Out, bool IsCombiner) {
1077   // void .omp_combiner.(Ty *in, Ty *out);
1078   ASTContext &C = CGM.getContext();
1079   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1080   FunctionArgList Args;
1081   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1082                                /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1083   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1084                               /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1085   Args.push_back(&OmpOutParm);
1086   Args.push_back(&OmpInParm);
1087   const CGFunctionInfo &FnInfo =
1088       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1089   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1090   std::string Name = CGM.getOpenMPRuntime().getName(
1091       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1092   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1093                                     Name, &CGM.getModule());
1094   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1095   if (CGM.getLangOpts().Optimize) {
1096     Fn->removeFnAttr(llvm::Attribute::NoInline);
1097     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1098     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1099   }
1100   CodeGenFunction CGF(CGM);
1101   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1102   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1103   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1104                     Out->getLocation());
1105   CodeGenFunction::OMPPrivateScope Scope(CGF);
1106   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1107   Scope.addPrivate(
1108       In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1109               .getAddress());
1110   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1111   Scope.addPrivate(
1112       Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1113                .getAddress());
1114   (void)Scope.Privatize();
1115   if (!IsCombiner && Out->hasInit() &&
1116       !CGF.isTrivialInitializer(Out->getInit())) {
1117     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1118                          Out->getType().getQualifiers(),
1119                          /*IsInitializer=*/true);
1120   }
1121   if (CombinerInitializer)
1122     CGF.EmitIgnoredExpr(CombinerInitializer);
1123   Scope.ForceCleanup();
1124   CGF.FinishFunction();
1125   return Fn;
1126 }
1127 
1128 void CGOpenMPRuntime::emitUserDefinedReduction(
1129     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1130   if (UDRMap.count(D) > 0)
1131     return;
1132   llvm::Function *Combiner = emitCombinerOrInitializer(
1133       CGM, D->getType(), D->getCombiner(),
1134       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1135       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1136       /*IsCombiner=*/true);
1137   llvm::Function *Initializer = nullptr;
1138   if (const Expr *Init = D->getInitializer()) {
1139     Initializer = emitCombinerOrInitializer(
1140         CGM, D->getType(),
1141         D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init
1142                                                                      : nullptr,
1143         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1144         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1145         /*IsCombiner=*/false);
1146   }
1147   UDRMap.try_emplace(D, Combiner, Initializer);
1148   if (CGF)
1149     FunctionUDRMap[CGF->CurFn].push_back(D);
1150 }
1151 
1152 std::pair<llvm::Function *, llvm::Function *>
1153 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1154   auto I = UDRMap.find(D);
1155   if (I != UDRMap.end())
1156     return I->second;
1157   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1158   return UDRMap.lookup(D);
1159 }
1160 
1161 namespace {
1162 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1163 // Builder if one is present.
1164 struct PushAndPopStackRAII {
1165   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1166                       bool HasCancel, llvm::omp::Directive Kind)
1167       : OMPBuilder(OMPBuilder) {
1168     if (!OMPBuilder)
1169       return;
1170 
1171     // The following callback is the crucial part of clangs cleanup process.
1172     //
1173     // NOTE:
1174     // Once the OpenMPIRBuilder is used to create parallel regions (and
1175     // similar), the cancellation destination (Dest below) is determined via
1176     // IP. That means if we have variables to finalize we split the block at IP,
1177     // use the new block (=BB) as destination to build a JumpDest (via
1178     // getJumpDestInCurrentScope(BB)) which then is fed to
1179     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1180     // to push & pop an FinalizationInfo object.
1181     // The FiniCB will still be needed but at the point where the
1182     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1183     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1184       assert(IP.getBlock()->end() == IP.getPoint() &&
1185              "Clang CG should cause non-terminated block!");
1186       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1187       CGF.Builder.restoreIP(IP);
1188       CodeGenFunction::JumpDest Dest =
1189           CGF.getOMPCancelDestination(OMPD_parallel);
1190       CGF.EmitBranchThroughCleanup(Dest);
1191       return llvm::Error::success();
1192     };
1193 
1194     // TODO: Remove this once we emit parallel regions through the
1195     //       OpenMPIRBuilder as it can do this setup internally.
1196     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1197     OMPBuilder->pushFinalizationCB(std::move(FI));
1198   }
1199   ~PushAndPopStackRAII() {
1200     if (OMPBuilder)
1201       OMPBuilder->popFinalizationCB();
1202   }
1203   llvm::OpenMPIRBuilder *OMPBuilder;
1204 };
1205 } // namespace
1206 
1207 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1208     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1209     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1210     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1211   assert(ThreadIDVar->getType()->isPointerType() &&
1212          "thread id variable must be of type kmp_int32 *");
1213   CodeGenFunction CGF(CGM, true);
1214   bool HasCancel = false;
1215   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1216     HasCancel = OPD->hasCancel();
1217   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1218     HasCancel = OPD->hasCancel();
1219   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1220     HasCancel = OPSD->hasCancel();
1221   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1222     HasCancel = OPFD->hasCancel();
1223   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1224     HasCancel = OPFD->hasCancel();
1225   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1226     HasCancel = OPFD->hasCancel();
1227   else if (const auto *OPFD =
1228                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1229     HasCancel = OPFD->hasCancel();
1230   else if (const auto *OPFD =
1231                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1232     HasCancel = OPFD->hasCancel();
1233 
1234   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1235   //       parallel region to make cancellation barriers work properly.
1236   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1237   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1238   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1239                                     HasCancel, OutlinedHelperName);
1240   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1241   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1242 }
1243 
1244 std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1245   std::string Suffix = getName({"omp_outlined"});
1246   return (Name + Suffix).str();
1247 }
1248 
1249 std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const {
1250   return getOutlinedHelperName(CGF.CurFn->getName());
1251 }
1252 
1253 std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1254   std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1255   return (Name + Suffix).str();
1256 }
1257 
1258 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1259     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1260     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1261     const RegionCodeGenTy &CodeGen) {
1262   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1263   return emitParallelOrTeamsOutlinedFunction(
1264       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1265       CodeGen);
1266 }
1267 
1268 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1269     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1270     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1271     const RegionCodeGenTy &CodeGen) {
1272   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1273   return emitParallelOrTeamsOutlinedFunction(
1274       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1275       CodeGen);
1276 }
1277 
1278 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1279     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1280     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1281     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1282     bool Tied, unsigned &NumberOfParts) {
1283   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1284                                               PrePostActionTy &) {
1285     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1286     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1287     llvm::Value *TaskArgs[] = {
1288         UpLoc, ThreadID,
1289         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1290                                     TaskTVar->getType()->castAs<PointerType>())
1291             .getPointer(CGF)};
1292     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1293                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1294                         TaskArgs);
1295   };
1296   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1297                                                             UntiedCodeGen);
1298   CodeGen.setAction(Action);
1299   assert(!ThreadIDVar->getType()->isPointerType() &&
1300          "thread id variable must be of type kmp_int32 for tasks");
1301   const OpenMPDirectiveKind Region =
1302       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1303                                                       : OMPD_task;
1304   const CapturedStmt *CS = D.getCapturedStmt(Region);
1305   bool HasCancel = false;
1306   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1307     HasCancel = TD->hasCancel();
1308   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1309     HasCancel = TD->hasCancel();
1310   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1311     HasCancel = TD->hasCancel();
1312   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1313     HasCancel = TD->hasCancel();
1314 
1315   CodeGenFunction CGF(CGM, true);
1316   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1317                                         InnermostKind, HasCancel, Action);
1318   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1319   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1320   if (!Tied)
1321     NumberOfParts = Action.getNumberOfParts();
1322   return Res;
1323 }
1324 
1325 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1326                                              bool AtCurrentPoint) {
1327   auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1328   assert(!Elem.ServiceInsertPt && "Insert point is set already.");
1329 
1330   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1331   if (AtCurrentPoint) {
1332     Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt",
1333                                                  CGF.Builder.GetInsertBlock());
1334   } else {
1335     Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1336     Elem.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt->getIterator());
1337   }
1338 }
1339 
1340 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1341   auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1342   if (Elem.ServiceInsertPt) {
1343     llvm::Instruction *Ptr = Elem.ServiceInsertPt;
1344     Elem.ServiceInsertPt = nullptr;
1345     Ptr->eraseFromParent();
1346   }
1347 }
1348 
1349 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1350                                                   SourceLocation Loc,
1351                                                   SmallString<128> &Buffer) {
1352   llvm::raw_svector_ostream OS(Buffer);
1353   // Build debug location
1354   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1355   OS << ";";
1356   if (auto *DbgInfo = CGF.getDebugInfo())
1357     OS << DbgInfo->remapDIPath(PLoc.getFilename());
1358   else
1359     OS << PLoc.getFilename();
1360   OS << ";";
1361   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1362     OS << FD->getQualifiedNameAsString();
1363   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1364   return OS.str();
1365 }
1366 
1367 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1368                                                  SourceLocation Loc,
1369                                                  unsigned Flags, bool EmitLoc) {
1370   uint32_t SrcLocStrSize;
1371   llvm::Constant *SrcLocStr;
1372   if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1373                        llvm::codegenoptions::NoDebugInfo) ||
1374       Loc.isInvalid()) {
1375     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1376   } else {
1377     std::string FunctionName;
1378     std::string FileName;
1379     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1380       FunctionName = FD->getQualifiedNameAsString();
1381     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1382     if (auto *DbgInfo = CGF.getDebugInfo())
1383       FileName = DbgInfo->remapDIPath(PLoc.getFilename());
1384     else
1385       FileName = PLoc.getFilename();
1386     unsigned Line = PLoc.getLine();
1387     unsigned Column = PLoc.getColumn();
1388     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1389                                                 Column, SrcLocStrSize);
1390   }
1391   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1392   return OMPBuilder.getOrCreateIdent(
1393       SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1394 }
1395 
1396 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1397                                           SourceLocation Loc) {
1398   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1399   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1400   // the clang invariants used below might be broken.
1401   if (CGM.getLangOpts().OpenMPIRBuilder) {
1402     SmallString<128> Buffer;
1403     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1404     uint32_t SrcLocStrSize;
1405     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1406         getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1407     return OMPBuilder.getOrCreateThreadID(
1408         OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1409   }
1410 
1411   llvm::Value *ThreadID = nullptr;
1412   // Check whether we've already cached a load of the thread id in this
1413   // function.
1414   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1415   if (I != OpenMPLocThreadIDMap.end()) {
1416     ThreadID = I->second.ThreadID;
1417     if (ThreadID != nullptr)
1418       return ThreadID;
1419   }
1420   // If exceptions are enabled, do not use parameter to avoid possible crash.
1421   if (auto *OMPRegionInfo =
1422           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1423     if (OMPRegionInfo->getThreadIDVariable()) {
1424       // Check if this an outlined function with thread id passed as argument.
1425       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1426       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1427       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1428           !CGF.getLangOpts().CXXExceptions ||
1429           CGF.Builder.GetInsertBlock() == TopBlock ||
1430           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1431           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1432               TopBlock ||
1433           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1434               CGF.Builder.GetInsertBlock()) {
1435         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1436         // If value loaded in entry block, cache it and use it everywhere in
1437         // function.
1438         if (CGF.Builder.GetInsertBlock() == TopBlock)
1439           OpenMPLocThreadIDMap[CGF.CurFn].ThreadID = ThreadID;
1440         return ThreadID;
1441       }
1442     }
1443   }
1444 
1445   // This is not an outlined function region - need to call __kmpc_int32
1446   // kmpc_global_thread_num(ident_t *loc).
1447   // Generate thread id value and cache this value for use across the
1448   // function.
1449   auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1450   if (!Elem.ServiceInsertPt)
1451     setLocThreadIdInsertPt(CGF);
1452   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1453   CGF.Builder.SetInsertPoint(Elem.ServiceInsertPt);
1454   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
1455   llvm::CallInst *Call = CGF.Builder.CreateCall(
1456       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1457                                             OMPRTL___kmpc_global_thread_num),
1458       emitUpdateLocation(CGF, Loc));
1459   Call->setCallingConv(CGF.getRuntimeCC());
1460   Elem.ThreadID = Call;
1461   return Call;
1462 }
1463 
1464 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1465   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1466   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1467     clearLocThreadIdInsertPt(CGF);
1468     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1469   }
1470   if (auto I = FunctionUDRMap.find(CGF.CurFn); I != FunctionUDRMap.end()) {
1471     for (const auto *D : I->second)
1472       UDRMap.erase(D);
1473     FunctionUDRMap.erase(I);
1474   }
1475   if (auto I = FunctionUDMMap.find(CGF.CurFn); I != FunctionUDMMap.end()) {
1476     for (const auto *D : I->second)
1477       UDMMap.erase(D);
1478     FunctionUDMMap.erase(I);
1479   }
1480   LastprivateConditionalToTypes.erase(CGF.CurFn);
1481   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1482 }
1483 
1484 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1485   return OMPBuilder.IdentPtr;
1486 }
1487 
1488 static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1489 convertDeviceClause(const VarDecl *VD) {
1490   std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1491       OMPDeclareTargetDeclAttr::getDeviceType(VD);
1492   if (!DevTy)
1493     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1494 
1495   switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1496   case OMPDeclareTargetDeclAttr::DT_Host:
1497     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1498     break;
1499   case OMPDeclareTargetDeclAttr::DT_NoHost:
1500     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1501     break;
1502   case OMPDeclareTargetDeclAttr::DT_Any:
1503     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1504     break;
1505   default:
1506     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1507     break;
1508   }
1509 }
1510 
1511 static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1512 convertCaptureClause(const VarDecl *VD) {
1513   std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1514       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1515   if (!MapType)
1516     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1517   switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1518   case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1519     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1520     break;
1521   case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1522     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1523     break;
1524   case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1525     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1526     break;
1527   default:
1528     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1529     break;
1530   }
1531 }
1532 
1533 static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1534     CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1535     SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1536 
1537   auto FileInfoCallBack = [&]() {
1538     SourceManager &SM = CGM.getContext().getSourceManager();
1539     PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1540 
1541     llvm::sys::fs::UniqueID ID;
1542     if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1543       PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1544     }
1545 
1546     return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1547   };
1548 
1549   return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName);
1550 }
1551 
1552 ConstantAddress CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1553   auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1554 
1555   auto LinkageForVariable = [&VD, this]() {
1556     return CGM.getLLVMLinkageVarDefinition(VD);
1557   };
1558 
1559   std::vector<llvm::GlobalVariable *> GeneratedRefs;
1560 
1561   llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1562       CGM.getContext().getPointerType(VD->getType()));
1563   llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1564       convertCaptureClause(VD), convertDeviceClause(VD),
1565       VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1566       VD->isExternallyVisible(),
1567       getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
1568                                   VD->getCanonicalDecl()->getBeginLoc()),
1569       CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1570       CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1571       LinkageForVariable);
1572 
1573   if (!addr)
1574     return ConstantAddress::invalid();
1575   return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1576 }
1577 
1578 llvm::Constant *
1579 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1580   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1581          !CGM.getContext().getTargetInfo().isTLSSupported());
1582   // Lookup the entry, lazily creating it if necessary.
1583   std::string Suffix = getName({"cache", ""});
1584   return OMPBuilder.getOrCreateInternalVariable(
1585       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1586 }
1587 
1588 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1589                                                 const VarDecl *VD,
1590                                                 Address VDAddr,
1591                                                 SourceLocation Loc) {
1592   if (CGM.getLangOpts().OpenMPUseTLS &&
1593       CGM.getContext().getTargetInfo().isTLSSupported())
1594     return VDAddr;
1595 
1596   llvm::Type *VarTy = VDAddr.getElementType();
1597   llvm::Value *Args[] = {
1598       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1599       CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy),
1600       CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1601       getOrCreateThreadPrivateCache(VD)};
1602   return Address(
1603       CGF.EmitRuntimeCall(
1604           OMPBuilder.getOrCreateRuntimeFunction(
1605               CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1606           Args),
1607       CGF.Int8Ty, VDAddr.getAlignment());
1608 }
1609 
1610 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1611     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1612     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1613   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1614   // library.
1615   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1616   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1617                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1618                       OMPLoc);
1619   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1620   // to register constructor/destructor for variable.
1621   llvm::Value *Args[] = {
1622       OMPLoc,
1623       CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.VoidPtrTy),
1624       Ctor, CopyCtor, Dtor};
1625   CGF.EmitRuntimeCall(
1626       OMPBuilder.getOrCreateRuntimeFunction(
1627           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1628       Args);
1629 }
1630 
1631 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1632     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1633     bool PerformInit, CodeGenFunction *CGF) {
1634   if (CGM.getLangOpts().OpenMPUseTLS &&
1635       CGM.getContext().getTargetInfo().isTLSSupported())
1636     return nullptr;
1637 
1638   VD = VD->getDefinition(CGM.getContext());
1639   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1640     QualType ASTTy = VD->getType();
1641 
1642     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1643     const Expr *Init = VD->getAnyInitializer();
1644     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1645       // Generate function that re-emits the declaration's initializer into the
1646       // threadprivate copy of the variable VD
1647       CodeGenFunction CtorCGF(CGM);
1648       FunctionArgList Args;
1649       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1650                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1651                             ImplicitParamKind::Other);
1652       Args.push_back(&Dst);
1653 
1654       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1655           CGM.getContext().VoidPtrTy, Args);
1656       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1657       std::string Name = getName({"__kmpc_global_ctor_", ""});
1658       llvm::Function *Fn =
1659           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1660       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1661                             Args, Loc, Loc);
1662       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1663           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1664           CGM.getContext().VoidPtrTy, Dst.getLocation());
1665       Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1666                   VDAddr.getAlignment());
1667       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1668                                /*IsInitializer=*/true);
1669       ArgVal = CtorCGF.EmitLoadOfScalar(
1670           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1671           CGM.getContext().VoidPtrTy, Dst.getLocation());
1672       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1673       CtorCGF.FinishFunction();
1674       Ctor = Fn;
1675     }
1676     if (VD->getType().isDestructedType() != QualType::DK_none) {
1677       // Generate function that emits destructor call for the threadprivate copy
1678       // of the variable VD
1679       CodeGenFunction DtorCGF(CGM);
1680       FunctionArgList Args;
1681       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1682                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1683                             ImplicitParamKind::Other);
1684       Args.push_back(&Dst);
1685 
1686       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1687           CGM.getContext().VoidTy, Args);
1688       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1689       std::string Name = getName({"__kmpc_global_dtor_", ""});
1690       llvm::Function *Fn =
1691           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1692       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1693       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1694                             Loc, Loc);
1695       // Create a scope with an artificial location for the body of this function.
1696       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1697       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1698           DtorCGF.GetAddrOfLocalVar(&Dst),
1699           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1700       DtorCGF.emitDestroy(
1701           Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1702           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1703           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1704       DtorCGF.FinishFunction();
1705       Dtor = Fn;
1706     }
1707     // Do not emit init function if it is not required.
1708     if (!Ctor && !Dtor)
1709       return nullptr;
1710 
1711     // Copying constructor for the threadprivate variable.
1712     // Must be NULL - reserved by runtime, but currently it requires that this
1713     // parameter is always NULL. Otherwise it fires assertion.
1714     CopyCtor = llvm::Constant::getNullValue(CGM.UnqualPtrTy);
1715     if (Ctor == nullptr) {
1716       Ctor = llvm::Constant::getNullValue(CGM.UnqualPtrTy);
1717     }
1718     if (Dtor == nullptr) {
1719       Dtor = llvm::Constant::getNullValue(CGM.UnqualPtrTy);
1720     }
1721     if (!CGF) {
1722       auto *InitFunctionTy =
1723           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1724       std::string Name = getName({"__omp_threadprivate_init_", ""});
1725       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1726           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1727       CodeGenFunction InitCGF(CGM);
1728       FunctionArgList ArgList;
1729       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1730                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1731                             Loc, Loc);
1732       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1733       InitCGF.FinishFunction();
1734       return InitFunction;
1735     }
1736     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1737   }
1738   return nullptr;
1739 }
1740 
1741 void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
1742                                                 llvm::GlobalValue *GV) {
1743   std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1744       OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1745 
1746   // We only need to handle active 'indirect' declare target functions.
1747   if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1748     return;
1749 
1750   // Get a mangled name to store the new device global in.
1751   llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1752       CGM, OMPBuilder, FD->getCanonicalDecl()->getBeginLoc(), FD->getName());
1753   SmallString<128> Name;
1754   OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1755 
1756   // We need to generate a new global to hold the address of the indirectly
1757   // called device function. Doing this allows us to keep the visibility and
1758   // linkage of the associated function unchanged while allowing the runtime to
1759   // access its value.
1760   llvm::GlobalValue *Addr = GV;
1761   if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1762     Addr = new llvm::GlobalVariable(
1763         CGM.getModule(), CGM.VoidPtrTy,
1764         /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1765         nullptr, llvm::GlobalValue::NotThreadLocal,
1766         CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1767     Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1768   }
1769 
1770   OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1771       Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),
1772       llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1773       llvm::GlobalValue::WeakODRLinkage);
1774 }
1775 
1776 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1777                                                           QualType VarType,
1778                                                           StringRef Name) {
1779   std::string Suffix = getName({"artificial", ""});
1780   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1781   llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1782       VarLVType, Twine(Name).concat(Suffix).str());
1783   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1784       CGM.getTarget().isTLSSupported()) {
1785     GAddr->setThreadLocal(/*Val=*/true);
1786     return Address(GAddr, GAddr->getValueType(),
1787                    CGM.getContext().getTypeAlignInChars(VarType));
1788   }
1789   std::string CacheSuffix = getName({"cache", ""});
1790   llvm::Value *Args[] = {
1791       emitUpdateLocation(CGF, SourceLocation()),
1792       getThreadID(CGF, SourceLocation()),
1793       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1794       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1795                                 /*isSigned=*/false),
1796       OMPBuilder.getOrCreateInternalVariable(
1797           CGM.VoidPtrPtrTy,
1798           Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1799   return Address(
1800       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1801           CGF.EmitRuntimeCall(
1802               OMPBuilder.getOrCreateRuntimeFunction(
1803                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1804               Args),
1805           CGF.Builder.getPtrTy(0)),
1806       VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1807 }
1808 
1809 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
1810                                    const RegionCodeGenTy &ThenGen,
1811                                    const RegionCodeGenTy &ElseGen) {
1812   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1813 
1814   // If the condition constant folds and can be elided, try to avoid emitting
1815   // the condition and the dead arm of the if/else.
1816   bool CondConstant;
1817   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1818     if (CondConstant)
1819       ThenGen(CGF);
1820     else
1821       ElseGen(CGF);
1822     return;
1823   }
1824 
1825   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
1826   // emit the conditional branch.
1827   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1828   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1829   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1830   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1831 
1832   // Emit the 'then' code.
1833   CGF.EmitBlock(ThenBlock);
1834   ThenGen(CGF);
1835   CGF.EmitBranch(ContBlock);
1836   // Emit the 'else' code if present.
1837   // There is no need to emit line number for unconditional branch.
1838   (void)ApplyDebugLocation::CreateEmpty(CGF);
1839   CGF.EmitBlock(ElseBlock);
1840   ElseGen(CGF);
1841   // There is no need to emit line number for unconditional branch.
1842   (void)ApplyDebugLocation::CreateEmpty(CGF);
1843   CGF.EmitBranch(ContBlock);
1844   // Emit the continuation block for code after the if.
1845   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1846 }
1847 
1848 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1849                                        llvm::Function *OutlinedFn,
1850                                        ArrayRef<llvm::Value *> CapturedVars,
1851                                        const Expr *IfCond,
1852                                        llvm::Value *NumThreads) {
1853   if (!CGF.HaveInsertPoint())
1854     return;
1855   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1856   auto &M = CGM.getModule();
1857   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1858                     this](CodeGenFunction &CGF, PrePostActionTy &) {
1859     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1860     llvm::Value *Args[] = {
1861         RTLoc,
1862         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1863         OutlinedFn};
1864     llvm::SmallVector<llvm::Value *, 16> RealArgs;
1865     RealArgs.append(std::begin(Args), std::end(Args));
1866     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1867 
1868     llvm::FunctionCallee RTLFn =
1869         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
1870     CGF.EmitRuntimeCall(RTLFn, RealArgs);
1871   };
1872   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1873                     this](CodeGenFunction &CGF, PrePostActionTy &) {
1874     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
1875     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1876     // Build calls:
1877     // __kmpc_serialized_parallel(&Loc, GTid);
1878     llvm::Value *Args[] = {RTLoc, ThreadID};
1879     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1880                             M, OMPRTL___kmpc_serialized_parallel),
1881                         Args);
1882 
1883     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
1884     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1885     RawAddress ZeroAddrBound =
1886         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
1887                                          /*Name=*/".bound.zero.addr");
1888     CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
1889     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1890     // ThreadId for serialized parallels is 0.
1891     OutlinedFnArgs.push_back(ThreadIDAddr.emitRawPointer(CGF));
1892     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
1893     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1894 
1895     // Ensure we do not inline the function. This is trivially true for the ones
1896     // passed to __kmpc_fork_call but the ones called in serialized regions
1897     // could be inlined. This is not a perfect but it is closer to the invariant
1898     // we want, namely, every data environment starts with a new function.
1899     // TODO: We should pass the if condition to the runtime function and do the
1900     //       handling there. Much cleaner code.
1901     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
1902     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
1903     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
1904 
1905     // __kmpc_end_serialized_parallel(&Loc, GTid);
1906     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1907     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1908                             M, OMPRTL___kmpc_end_serialized_parallel),
1909                         EndArgs);
1910   };
1911   if (IfCond) {
1912     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
1913   } else {
1914     RegionCodeGenTy ThenRCG(ThenGen);
1915     ThenRCG(CGF);
1916   }
1917 }
1918 
1919 // If we're inside an (outlined) parallel region, use the region info's
1920 // thread-ID variable (it is passed in a first argument of the outlined function
1921 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1922 // regular serial code region, get thread ID by calling kmp_int32
1923 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1924 // return the address of that temp.
1925 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1926                                              SourceLocation Loc) {
1927   if (auto *OMPRegionInfo =
1928           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1929     if (OMPRegionInfo->getThreadIDVariable())
1930       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1931 
1932   llvm::Value *ThreadID = getThreadID(CGF, Loc);
1933   QualType Int32Ty =
1934       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1935   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1936   CGF.EmitStoreOfScalar(ThreadID,
1937                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1938 
1939   return ThreadIDTemp;
1940 }
1941 
1942 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1943   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1944   std::string Name = getName({Prefix, "var"});
1945   return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
1946 }
1947 
1948 namespace {
1949 /// Common pre(post)-action for different OpenMP constructs.
1950 class CommonActionTy final : public PrePostActionTy {
1951   llvm::FunctionCallee EnterCallee;
1952   ArrayRef<llvm::Value *> EnterArgs;
1953   llvm::FunctionCallee ExitCallee;
1954   ArrayRef<llvm::Value *> ExitArgs;
1955   bool Conditional;
1956   llvm::BasicBlock *ContBlock = nullptr;
1957 
1958 public:
1959   CommonActionTy(llvm::FunctionCallee EnterCallee,
1960                  ArrayRef<llvm::Value *> EnterArgs,
1961                  llvm::FunctionCallee ExitCallee,
1962                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1963       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1964         ExitArgs(ExitArgs), Conditional(Conditional) {}
1965   void Enter(CodeGenFunction &CGF) override {
1966     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
1967     if (Conditional) {
1968       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
1969       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1970       ContBlock = CGF.createBasicBlock("omp_if.end");
1971       // Generate the branch (If-stmt)
1972       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1973       CGF.EmitBlock(ThenBlock);
1974     }
1975   }
1976   void Done(CodeGenFunction &CGF) {
1977     // Emit the rest of blocks/branches
1978     CGF.EmitBranch(ContBlock);
1979     CGF.EmitBlock(ContBlock, true);
1980   }
1981   void Exit(CodeGenFunction &CGF) override {
1982     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
1983   }
1984 };
1985 } // anonymous namespace
1986 
1987 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
1988                                          StringRef CriticalName,
1989                                          const RegionCodeGenTy &CriticalOpGen,
1990                                          SourceLocation Loc, const Expr *Hint) {
1991   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
1992   // CriticalOpGen();
1993   // __kmpc_end_critical(ident_t *, gtid, Lock);
1994   // Prepare arguments and build a call to __kmpc_critical
1995   if (!CGF.HaveInsertPoint())
1996     return;
1997   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1998                          getCriticalRegionLock(CriticalName)};
1999   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2000                                                 std::end(Args));
2001   if (Hint) {
2002     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2003         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2004   }
2005   CommonActionTy Action(
2006       OMPBuilder.getOrCreateRuntimeFunction(
2007           CGM.getModule(),
2008           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2009       EnterArgs,
2010       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2011                                             OMPRTL___kmpc_end_critical),
2012       Args);
2013   CriticalOpGen.setAction(Action);
2014   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2015 }
2016 
2017 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2018                                        const RegionCodeGenTy &MasterOpGen,
2019                                        SourceLocation Loc) {
2020   if (!CGF.HaveInsertPoint())
2021     return;
2022   // if(__kmpc_master(ident_t *, gtid)) {
2023   //   MasterOpGen();
2024   //   __kmpc_end_master(ident_t *, gtid);
2025   // }
2026   // Prepare arguments and build a call to __kmpc_master
2027   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2028   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2029                             CGM.getModule(), OMPRTL___kmpc_master),
2030                         Args,
2031                         OMPBuilder.getOrCreateRuntimeFunction(
2032                             CGM.getModule(), OMPRTL___kmpc_end_master),
2033                         Args,
2034                         /*Conditional=*/true);
2035   MasterOpGen.setAction(Action);
2036   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2037   Action.Done(CGF);
2038 }
2039 
2040 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2041                                        const RegionCodeGenTy &MaskedOpGen,
2042                                        SourceLocation Loc, const Expr *Filter) {
2043   if (!CGF.HaveInsertPoint())
2044     return;
2045   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2046   //   MaskedOpGen();
2047   //   __kmpc_end_masked(iden_t *, gtid);
2048   // }
2049   // Prepare arguments and build a call to __kmpc_masked
2050   llvm::Value *FilterVal = Filter
2051                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2052                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2053   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2054                          FilterVal};
2055   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2056                             getThreadID(CGF, Loc)};
2057   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2058                             CGM.getModule(), OMPRTL___kmpc_masked),
2059                         Args,
2060                         OMPBuilder.getOrCreateRuntimeFunction(
2061                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2062                         ArgsEnd,
2063                         /*Conditional=*/true);
2064   MaskedOpGen.setAction(Action);
2065   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2066   Action.Done(CGF);
2067 }
2068 
2069 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2070                                         SourceLocation Loc) {
2071   if (!CGF.HaveInsertPoint())
2072     return;
2073   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2074     OMPBuilder.createTaskyield(CGF.Builder);
2075   } else {
2076     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2077     llvm::Value *Args[] = {
2078         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2079         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2080     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2081                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2082                         Args);
2083   }
2084 
2085   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2086     Region->emitUntiedSwitch(CGF);
2087 }
2088 
2089 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2090                                           const RegionCodeGenTy &TaskgroupOpGen,
2091                                           SourceLocation Loc) {
2092   if (!CGF.HaveInsertPoint())
2093     return;
2094   // __kmpc_taskgroup(ident_t *, gtid);
2095   // TaskgroupOpGen();
2096   // __kmpc_end_taskgroup(ident_t *, gtid);
2097   // Prepare arguments and build a call to __kmpc_taskgroup
2098   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2099   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2100                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2101                         Args,
2102                         OMPBuilder.getOrCreateRuntimeFunction(
2103                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2104                         Args);
2105   TaskgroupOpGen.setAction(Action);
2106   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2107 }
2108 
2109 /// Given an array of pointers to variables, project the address of a
2110 /// given variable.
2111 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2112                                       unsigned Index, const VarDecl *Var) {
2113   // Pull out the pointer to the variable.
2114   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2115   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2116 
2117   llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2118   return Address(Ptr, ElemTy, CGF.getContext().getDeclAlign(Var));
2119 }
2120 
2121 static llvm::Value *emitCopyprivateCopyFunction(
2122     CodeGenModule &CGM, llvm::Type *ArgsElemType,
2123     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2124     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2125     SourceLocation Loc) {
2126   ASTContext &C = CGM.getContext();
2127   // void copy_func(void *LHSArg, void *RHSArg);
2128   FunctionArgList Args;
2129   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2130                            ImplicitParamKind::Other);
2131   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2132                            ImplicitParamKind::Other);
2133   Args.push_back(&LHSArg);
2134   Args.push_back(&RHSArg);
2135   const auto &CGFI =
2136       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2137   std::string Name =
2138       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2139   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2140                                     llvm::GlobalValue::InternalLinkage, Name,
2141                                     &CGM.getModule());
2142   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2143   Fn->setDoesNotRecurse();
2144   CodeGenFunction CGF(CGM);
2145   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2146   // Dest = (void*[n])(LHSArg);
2147   // Src = (void*[n])(RHSArg);
2148   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2149                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2150                   CGF.Builder.getPtrTy(0)),
2151               ArgsElemType, CGF.getPointerAlign());
2152   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2153                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2154                   CGF.Builder.getPtrTy(0)),
2155               ArgsElemType, CGF.getPointerAlign());
2156   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2157   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2158   // ...
2159   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2160   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2161     const auto *DestVar =
2162         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2163     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2164 
2165     const auto *SrcVar =
2166         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2167     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2168 
2169     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2170     QualType Type = VD->getType();
2171     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2172   }
2173   CGF.FinishFunction();
2174   return Fn;
2175 }
2176 
2177 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2178                                        const RegionCodeGenTy &SingleOpGen,
2179                                        SourceLocation Loc,
2180                                        ArrayRef<const Expr *> CopyprivateVars,
2181                                        ArrayRef<const Expr *> SrcExprs,
2182                                        ArrayRef<const Expr *> DstExprs,
2183                                        ArrayRef<const Expr *> AssignmentOps) {
2184   if (!CGF.HaveInsertPoint())
2185     return;
2186   assert(CopyprivateVars.size() == SrcExprs.size() &&
2187          CopyprivateVars.size() == DstExprs.size() &&
2188          CopyprivateVars.size() == AssignmentOps.size());
2189   ASTContext &C = CGM.getContext();
2190   // int32 did_it = 0;
2191   // if(__kmpc_single(ident_t *, gtid)) {
2192   //   SingleOpGen();
2193   //   __kmpc_end_single(ident_t *, gtid);
2194   //   did_it = 1;
2195   // }
2196   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2197   // <copy_func>, did_it);
2198 
2199   Address DidIt = Address::invalid();
2200   if (!CopyprivateVars.empty()) {
2201     // int32 did_it = 0;
2202     QualType KmpInt32Ty =
2203         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2204     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2205     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2206   }
2207   // Prepare arguments and build a call to __kmpc_single
2208   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2209   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2210                             CGM.getModule(), OMPRTL___kmpc_single),
2211                         Args,
2212                         OMPBuilder.getOrCreateRuntimeFunction(
2213                             CGM.getModule(), OMPRTL___kmpc_end_single),
2214                         Args,
2215                         /*Conditional=*/true);
2216   SingleOpGen.setAction(Action);
2217   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2218   if (DidIt.isValid()) {
2219     // did_it = 1;
2220     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2221   }
2222   Action.Done(CGF);
2223   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2224   // <copy_func>, did_it);
2225   if (DidIt.isValid()) {
2226     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2227     QualType CopyprivateArrayTy = C.getConstantArrayType(
2228         C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2229         /*IndexTypeQuals=*/0);
2230     // Create a list of all private variables for copyprivate.
2231     Address CopyprivateList =
2232         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2233     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2234       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2235       CGF.Builder.CreateStore(
2236           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2237               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2238               CGF.VoidPtrTy),
2239           Elem);
2240     }
2241     // Build function that copies private values from single region to all other
2242     // threads in the corresponding parallel region.
2243     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2244         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2245         SrcExprs, DstExprs, AssignmentOps, Loc);
2246     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2247     Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2248         CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2249     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2250     llvm::Value *Args[] = {
2251         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2252         getThreadID(CGF, Loc),        // i32 <gtid>
2253         BufSize,                      // size_t <buf_size>
2254         CL.emitRawPointer(CGF),       // void *<copyprivate list>
2255         CpyFn,                        // void (*) (void *, void *) <copy_func>
2256         DidItVal                      // i32 did_it
2257     };
2258     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2259                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2260                         Args);
2261   }
2262 }
2263 
2264 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2265                                         const RegionCodeGenTy &OrderedOpGen,
2266                                         SourceLocation Loc, bool IsThreads) {
2267   if (!CGF.HaveInsertPoint())
2268     return;
2269   // __kmpc_ordered(ident_t *, gtid);
2270   // OrderedOpGen();
2271   // __kmpc_end_ordered(ident_t *, gtid);
2272   // Prepare arguments and build a call to __kmpc_ordered
2273   if (IsThreads) {
2274     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2275     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2276                               CGM.getModule(), OMPRTL___kmpc_ordered),
2277                           Args,
2278                           OMPBuilder.getOrCreateRuntimeFunction(
2279                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2280                           Args);
2281     OrderedOpGen.setAction(Action);
2282     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2283     return;
2284   }
2285   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2286 }
2287 
2288 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2289   unsigned Flags;
2290   if (Kind == OMPD_for)
2291     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2292   else if (Kind == OMPD_sections)
2293     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2294   else if (Kind == OMPD_single)
2295     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2296   else if (Kind == OMPD_barrier)
2297     Flags = OMP_IDENT_BARRIER_EXPL;
2298   else
2299     Flags = OMP_IDENT_BARRIER_IMPL;
2300   return Flags;
2301 }
2302 
2303 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2304     CodeGenFunction &CGF, const OMPLoopDirective &S,
2305     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2306   // Check if the loop directive is actually a doacross loop directive. In this
2307   // case choose static, 1 schedule.
2308   if (llvm::any_of(
2309           S.getClausesOfKind<OMPOrderedClause>(),
2310           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2311     ScheduleKind = OMPC_SCHEDULE_static;
2312     // Chunk size is 1 in this case.
2313     llvm::APInt ChunkSize(32, 1);
2314     ChunkExpr = IntegerLiteral::Create(
2315         CGF.getContext(), ChunkSize,
2316         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2317         SourceLocation());
2318   }
2319 }
2320 
2321 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2322                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2323                                       bool ForceSimpleCall) {
2324   // Check if we should use the OMPBuilder
2325   auto *OMPRegionInfo =
2326       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2327   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2328     llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
2329         cantFail(OMPBuilder.createBarrier(CGF.Builder, Kind, ForceSimpleCall,
2330                                           EmitChecks));
2331     CGF.Builder.restoreIP(AfterIP);
2332     return;
2333   }
2334 
2335   if (!CGF.HaveInsertPoint())
2336     return;
2337   // Build call __kmpc_cancel_barrier(loc, thread_id);
2338   // Build call __kmpc_barrier(loc, thread_id);
2339   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2340   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2341   // thread_id);
2342   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2343                          getThreadID(CGF, Loc)};
2344   if (OMPRegionInfo) {
2345     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2346       llvm::Value *Result = CGF.EmitRuntimeCall(
2347           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2348                                                 OMPRTL___kmpc_cancel_barrier),
2349           Args);
2350       if (EmitChecks) {
2351         // if (__kmpc_cancel_barrier()) {
2352         //   exit from construct;
2353         // }
2354         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2355         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2356         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2357         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2358         CGF.EmitBlock(ExitBB);
2359         //   exit from construct;
2360         CodeGenFunction::JumpDest CancelDestination =
2361             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2362         CGF.EmitBranchThroughCleanup(CancelDestination);
2363         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2364       }
2365       return;
2366     }
2367   }
2368   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2369                           CGM.getModule(), OMPRTL___kmpc_barrier),
2370                       Args);
2371 }
2372 
2373 void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc,
2374                                     Expr *ME, bool IsFatal) {
2375   llvm::Value *MVL =
2376       ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF)
2377          : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2378   // Build call void __kmpc_error(ident_t *loc, int severity, const char
2379   // *message)
2380   llvm::Value *Args[] = {
2381       emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2382       llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2383       CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2384   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2385                           CGM.getModule(), OMPRTL___kmpc_error),
2386                       Args);
2387 }
2388 
2389 /// Map the OpenMP loop schedule to the runtime enumeration.
2390 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2391                                           bool Chunked, bool Ordered) {
2392   switch (ScheduleKind) {
2393   case OMPC_SCHEDULE_static:
2394     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2395                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2396   case OMPC_SCHEDULE_dynamic:
2397     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2398   case OMPC_SCHEDULE_guided:
2399     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2400   case OMPC_SCHEDULE_runtime:
2401     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2402   case OMPC_SCHEDULE_auto:
2403     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2404   case OMPC_SCHEDULE_unknown:
2405     assert(!Chunked && "chunk was specified but schedule kind not known");
2406     return Ordered ? OMP_ord_static : OMP_sch_static;
2407   }
2408   llvm_unreachable("Unexpected runtime schedule");
2409 }
2410 
2411 /// Map the OpenMP distribute schedule to the runtime enumeration.
2412 static OpenMPSchedType
2413 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2414   // only static is allowed for dist_schedule
2415   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2416 }
2417 
2418 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2419                                          bool Chunked) const {
2420   OpenMPSchedType Schedule =
2421       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2422   return Schedule == OMP_sch_static;
2423 }
2424 
2425 bool CGOpenMPRuntime::isStaticNonchunked(
2426     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2427   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2428   return Schedule == OMP_dist_sch_static;
2429 }
2430 
2431 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2432                                       bool Chunked) const {
2433   OpenMPSchedType Schedule =
2434       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2435   return Schedule == OMP_sch_static_chunked;
2436 }
2437 
2438 bool CGOpenMPRuntime::isStaticChunked(
2439     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2440   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2441   return Schedule == OMP_dist_sch_static_chunked;
2442 }
2443 
2444 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2445   OpenMPSchedType Schedule =
2446       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2447   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2448   return Schedule != OMP_sch_static;
2449 }
2450 
2451 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2452                                   OpenMPScheduleClauseModifier M1,
2453                                   OpenMPScheduleClauseModifier M2) {
2454   int Modifier = 0;
2455   switch (M1) {
2456   case OMPC_SCHEDULE_MODIFIER_monotonic:
2457     Modifier = OMP_sch_modifier_monotonic;
2458     break;
2459   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2460     Modifier = OMP_sch_modifier_nonmonotonic;
2461     break;
2462   case OMPC_SCHEDULE_MODIFIER_simd:
2463     if (Schedule == OMP_sch_static_chunked)
2464       Schedule = OMP_sch_static_balanced_chunked;
2465     break;
2466   case OMPC_SCHEDULE_MODIFIER_last:
2467   case OMPC_SCHEDULE_MODIFIER_unknown:
2468     break;
2469   }
2470   switch (M2) {
2471   case OMPC_SCHEDULE_MODIFIER_monotonic:
2472     Modifier = OMP_sch_modifier_monotonic;
2473     break;
2474   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2475     Modifier = OMP_sch_modifier_nonmonotonic;
2476     break;
2477   case OMPC_SCHEDULE_MODIFIER_simd:
2478     if (Schedule == OMP_sch_static_chunked)
2479       Schedule = OMP_sch_static_balanced_chunked;
2480     break;
2481   case OMPC_SCHEDULE_MODIFIER_last:
2482   case OMPC_SCHEDULE_MODIFIER_unknown:
2483     break;
2484   }
2485   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2486   // If the static schedule kind is specified or if the ordered clause is
2487   // specified, and if the nonmonotonic modifier is not specified, the effect is
2488   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2489   // modifier is specified, the effect is as if the nonmonotonic modifier is
2490   // specified.
2491   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2492     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2493           Schedule == OMP_sch_static_balanced_chunked ||
2494           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2495           Schedule == OMP_dist_sch_static_chunked ||
2496           Schedule == OMP_dist_sch_static))
2497       Modifier = OMP_sch_modifier_nonmonotonic;
2498   }
2499   return Schedule | Modifier;
2500 }
2501 
2502 void CGOpenMPRuntime::emitForDispatchInit(
2503     CodeGenFunction &CGF, SourceLocation Loc,
2504     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2505     bool Ordered, const DispatchRTInput &DispatchValues) {
2506   if (!CGF.HaveInsertPoint())
2507     return;
2508   OpenMPSchedType Schedule = getRuntimeSchedule(
2509       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2510   assert(Ordered ||
2511          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2512           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2513           Schedule != OMP_sch_static_balanced_chunked));
2514   // Call __kmpc_dispatch_init(
2515   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2516   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2517   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2518 
2519   // If the Chunk was not specified in the clause - use default value 1.
2520   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2521                                             : CGF.Builder.getIntN(IVSize, 1);
2522   llvm::Value *Args[] = {
2523       emitUpdateLocation(CGF, Loc),
2524       getThreadID(CGF, Loc),
2525       CGF.Builder.getInt32(addMonoNonMonoModifier(
2526           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2527       DispatchValues.LB,                                     // Lower
2528       DispatchValues.UB,                                     // Upper
2529       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2530       Chunk                                                  // Chunk
2531   };
2532   CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2533                       Args);
2534 }
2535 
2536 void CGOpenMPRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
2537                                             SourceLocation Loc) {
2538   if (!CGF.HaveInsertPoint())
2539     return;
2540   // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2541   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2542   CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args);
2543 }
2544 
2545 static void emitForStaticInitCall(
2546     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2547     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2548     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2549     const CGOpenMPRuntime::StaticRTInput &Values) {
2550   if (!CGF.HaveInsertPoint())
2551     return;
2552 
2553   assert(!Values.Ordered);
2554   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2555          Schedule == OMP_sch_static_balanced_chunked ||
2556          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2557          Schedule == OMP_dist_sch_static ||
2558          Schedule == OMP_dist_sch_static_chunked);
2559 
2560   // Call __kmpc_for_static_init(
2561   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2562   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2563   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2564   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2565   llvm::Value *Chunk = Values.Chunk;
2566   if (Chunk == nullptr) {
2567     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2568             Schedule == OMP_dist_sch_static) &&
2569            "expected static non-chunked schedule");
2570     // If the Chunk was not specified in the clause - use default value 1.
2571     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2572   } else {
2573     assert((Schedule == OMP_sch_static_chunked ||
2574             Schedule == OMP_sch_static_balanced_chunked ||
2575             Schedule == OMP_ord_static_chunked ||
2576             Schedule == OMP_dist_sch_static_chunked) &&
2577            "expected static chunked schedule");
2578   }
2579   llvm::Value *Args[] = {
2580       UpdateLocation,
2581       ThreadId,
2582       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2583                                                   M2)), // Schedule type
2584       Values.IL.emitRawPointer(CGF),                    // &isLastIter
2585       Values.LB.emitRawPointer(CGF),                    // &LB
2586       Values.UB.emitRawPointer(CGF),                    // &UB
2587       Values.ST.emitRawPointer(CGF),                    // &Stride
2588       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2589       Chunk                                             // Chunk
2590   };
2591   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2592 }
2593 
2594 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2595                                         SourceLocation Loc,
2596                                         OpenMPDirectiveKind DKind,
2597                                         const OpenMPScheduleTy &ScheduleKind,
2598                                         const StaticRTInput &Values) {
2599   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2600       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2601   assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2602          "Expected loop-based or sections-based directive.");
2603   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2604                                              isOpenMPLoopDirective(DKind)
2605                                                  ? OMP_IDENT_WORK_LOOP
2606                                                  : OMP_IDENT_WORK_SECTIONS);
2607   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2608   llvm::FunctionCallee StaticInitFunction =
2609       OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2610                                              false);
2611   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2612   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2613                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2614 }
2615 
2616 void CGOpenMPRuntime::emitDistributeStaticInit(
2617     CodeGenFunction &CGF, SourceLocation Loc,
2618     OpenMPDistScheduleClauseKind SchedKind,
2619     const CGOpenMPRuntime::StaticRTInput &Values) {
2620   OpenMPSchedType ScheduleNum =
2621       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2622   llvm::Value *UpdatedLocation =
2623       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2624   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2625   llvm::FunctionCallee StaticInitFunction;
2626   bool isGPUDistribute =
2627       CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU();
2628   StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2629       Values.IVSize, Values.IVSigned, isGPUDistribute);
2630 
2631   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2632                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2633                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2634 }
2635 
2636 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2637                                           SourceLocation Loc,
2638                                           OpenMPDirectiveKind DKind) {
2639   assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2640           DKind == OMPD_sections) &&
2641          "Expected distribute, for, or sections directive kind");
2642   if (!CGF.HaveInsertPoint())
2643     return;
2644   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2645   llvm::Value *Args[] = {
2646       emitUpdateLocation(CGF, Loc,
2647                          isOpenMPDistributeDirective(DKind) ||
2648                                  (DKind == OMPD_target_teams_loop)
2649                              ? OMP_IDENT_WORK_DISTRIBUTE
2650                          : isOpenMPLoopDirective(DKind)
2651                              ? OMP_IDENT_WORK_LOOP
2652                              : OMP_IDENT_WORK_SECTIONS),
2653       getThreadID(CGF, Loc)};
2654   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2655   if (isOpenMPDistributeDirective(DKind) &&
2656       CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU())
2657     CGF.EmitRuntimeCall(
2658         OMPBuilder.getOrCreateRuntimeFunction(
2659             CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2660         Args);
2661   else
2662     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2663                             CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2664                         Args);
2665 }
2666 
2667 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2668                                                  SourceLocation Loc,
2669                                                  unsigned IVSize,
2670                                                  bool IVSigned) {
2671   if (!CGF.HaveInsertPoint())
2672     return;
2673   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2674   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2675   CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2676                       Args);
2677 }
2678 
2679 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2680                                           SourceLocation Loc, unsigned IVSize,
2681                                           bool IVSigned, Address IL,
2682                                           Address LB, Address UB,
2683                                           Address ST) {
2684   // Call __kmpc_dispatch_next(
2685   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2686   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2687   //          kmp_int[32|64] *p_stride);
2688   llvm::Value *Args[] = {
2689       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2690       IL.emitRawPointer(CGF), // &isLastIter
2691       LB.emitRawPointer(CGF), // &Lower
2692       UB.emitRawPointer(CGF), // &Upper
2693       ST.emitRawPointer(CGF)  // &Stride
2694   };
2695   llvm::Value *Call = CGF.EmitRuntimeCall(
2696       OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2697   return CGF.EmitScalarConversion(
2698       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2699       CGF.getContext().BoolTy, Loc);
2700 }
2701 
2702 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2703                                            llvm::Value *NumThreads,
2704                                            SourceLocation Loc) {
2705   if (!CGF.HaveInsertPoint())
2706     return;
2707   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2708   llvm::Value *Args[] = {
2709       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2710       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2711   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2712                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2713                       Args);
2714 }
2715 
2716 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2717                                          ProcBindKind ProcBind,
2718                                          SourceLocation Loc) {
2719   if (!CGF.HaveInsertPoint())
2720     return;
2721   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2722   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2723   llvm::Value *Args[] = {
2724       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2725       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2726   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2727                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2728                       Args);
2729 }
2730 
2731 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2732                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2733   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2734     OMPBuilder.createFlush(CGF.Builder);
2735   } else {
2736     if (!CGF.HaveInsertPoint())
2737       return;
2738     // Build call void __kmpc_flush(ident_t *loc)
2739     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2740                             CGM.getModule(), OMPRTL___kmpc_flush),
2741                         emitUpdateLocation(CGF, Loc));
2742   }
2743 }
2744 
2745 namespace {
2746 /// Indexes of fields for type kmp_task_t.
2747 enum KmpTaskTFields {
2748   /// List of shared variables.
2749   KmpTaskTShareds,
2750   /// Task routine.
2751   KmpTaskTRoutine,
2752   /// Partition id for the untied tasks.
2753   KmpTaskTPartId,
2754   /// Function with call of destructors for private variables.
2755   Data1,
2756   /// Task priority.
2757   Data2,
2758   /// (Taskloops only) Lower bound.
2759   KmpTaskTLowerBound,
2760   /// (Taskloops only) Upper bound.
2761   KmpTaskTUpperBound,
2762   /// (Taskloops only) Stride.
2763   KmpTaskTStride,
2764   /// (Taskloops only) Is last iteration flag.
2765   KmpTaskTLastIter,
2766   /// (Taskloops only) Reduction data.
2767   KmpTaskTReductions,
2768 };
2769 } // anonymous namespace
2770 
2771 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2772   // If we are in simd mode or there are no entries, we don't need to do
2773   // anything.
2774   if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2775     return;
2776 
2777   llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2778       [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2779              const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2780     SourceLocation Loc;
2781     if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2782       for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2783                 E = CGM.getContext().getSourceManager().fileinfo_end();
2784            I != E; ++I) {
2785         if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2786             I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2787           Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2788               I->getFirst(), EntryInfo.Line, 1);
2789           break;
2790         }
2791       }
2792     }
2793     switch (Kind) {
2794     case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2795       unsigned DiagID = CGM.getDiags().getCustomDiagID(
2796           DiagnosticsEngine::Error, "Offloading entry for target region in "
2797                                     "%0 is incorrect: either the "
2798                                     "address or the ID is invalid.");
2799       CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2800     } break;
2801     case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2802       unsigned DiagID = CGM.getDiags().getCustomDiagID(
2803           DiagnosticsEngine::Error, "Offloading entry for declare target "
2804                                     "variable %0 is incorrect: the "
2805                                     "address is invalid.");
2806       CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2807     } break;
2808     case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2809       unsigned DiagID = CGM.getDiags().getCustomDiagID(
2810           DiagnosticsEngine::Error,
2811           "Offloading entry for declare target variable is incorrect: the "
2812           "address is invalid.");
2813       CGM.getDiags().Report(DiagID);
2814     } break;
2815     }
2816   };
2817 
2818   OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2819 }
2820 
2821 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
2822   if (!KmpRoutineEntryPtrTy) {
2823     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2824     ASTContext &C = CGM.getContext();
2825     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2826     FunctionProtoType::ExtProtoInfo EPI;
2827     KmpRoutineEntryPtrQTy = C.getPointerType(
2828         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2829     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
2830   }
2831 }
2832 
2833 namespace {
2834 struct PrivateHelpersTy {
2835   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2836                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2837       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2838         PrivateElemInit(PrivateElemInit) {}
2839   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2840   const Expr *OriginalRef = nullptr;
2841   const VarDecl *Original = nullptr;
2842   const VarDecl *PrivateCopy = nullptr;
2843   const VarDecl *PrivateElemInit = nullptr;
2844   bool isLocalPrivate() const {
2845     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2846   }
2847 };
2848 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2849 } // anonymous namespace
2850 
2851 static bool isAllocatableDecl(const VarDecl *VD) {
2852   const VarDecl *CVD = VD->getCanonicalDecl();
2853   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2854     return false;
2855   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2856   // Use the default allocation.
2857   return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2858            !AA->getAllocator());
2859 }
2860 
2861 static RecordDecl *
2862 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
2863   if (!Privates.empty()) {
2864     ASTContext &C = CGM.getContext();
2865     // Build struct .kmp_privates_t. {
2866     //         /*  private vars  */
2867     //       };
2868     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
2869     RD->startDefinition();
2870     for (const auto &Pair : Privates) {
2871       const VarDecl *VD = Pair.second.Original;
2872       QualType Type = VD->getType().getNonReferenceType();
2873       // If the private variable is a local variable with lvalue ref type,
2874       // allocate the pointer instead of the pointee type.
2875       if (Pair.second.isLocalPrivate()) {
2876         if (VD->getType()->isLValueReferenceType())
2877           Type = C.getPointerType(Type);
2878         if (isAllocatableDecl(VD))
2879           Type = C.getPointerType(Type);
2880       }
2881       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
2882       if (VD->hasAttrs()) {
2883         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2884              E(VD->getAttrs().end());
2885              I != E; ++I)
2886           FD->addAttr(*I);
2887       }
2888     }
2889     RD->completeDefinition();
2890     return RD;
2891   }
2892   return nullptr;
2893 }
2894 
2895 static RecordDecl *
2896 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
2897                          QualType KmpInt32Ty,
2898                          QualType KmpRoutineEntryPointerQTy) {
2899   ASTContext &C = CGM.getContext();
2900   // Build struct kmp_task_t {
2901   //         void *              shareds;
2902   //         kmp_routine_entry_t routine;
2903   //         kmp_int32           part_id;
2904   //         kmp_cmplrdata_t data1;
2905   //         kmp_cmplrdata_t data2;
2906   // For taskloops additional fields:
2907   //         kmp_uint64          lb;
2908   //         kmp_uint64          ub;
2909   //         kmp_int64           st;
2910   //         kmp_int32           liter;
2911   //         void *              reductions;
2912   //       };
2913   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
2914   UD->startDefinition();
2915   addFieldToRecordDecl(C, UD, KmpInt32Ty);
2916   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
2917   UD->completeDefinition();
2918   QualType KmpCmplrdataTy = C.getRecordType(UD);
2919   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
2920   RD->startDefinition();
2921   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2922   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2923   addFieldToRecordDecl(C, RD, KmpInt32Ty);
2924   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2925   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2926   if (isOpenMPTaskLoopDirective(Kind)) {
2927     QualType KmpUInt64Ty =
2928         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2929     QualType KmpInt64Ty =
2930         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2931     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2932     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2933     addFieldToRecordDecl(C, RD, KmpInt64Ty);
2934     addFieldToRecordDecl(C, RD, KmpInt32Ty);
2935     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2936   }
2937   RD->completeDefinition();
2938   return RD;
2939 }
2940 
2941 static RecordDecl *
2942 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
2943                                      ArrayRef<PrivateDataTy> Privates) {
2944   ASTContext &C = CGM.getContext();
2945   // Build struct kmp_task_t_with_privates {
2946   //         kmp_task_t task_data;
2947   //         .kmp_privates_t. privates;
2948   //       };
2949   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2950   RD->startDefinition();
2951   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2952   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2953     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
2954   RD->completeDefinition();
2955   return RD;
2956 }
2957 
2958 /// Emit a proxy function which accepts kmp_task_t as the second
2959 /// argument.
2960 /// \code
2961 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2962 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
2963 ///   For taskloops:
2964 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
2965 ///   tt->reductions, tt->shareds);
2966 ///   return 0;
2967 /// }
2968 /// \endcode
2969 static llvm::Function *
2970 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
2971                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
2972                       QualType KmpTaskTWithPrivatesPtrQTy,
2973                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2974                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
2975                       llvm::Value *TaskPrivatesMap) {
2976   ASTContext &C = CGM.getContext();
2977   FunctionArgList Args;
2978   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
2979                             ImplicitParamKind::Other);
2980   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2981                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
2982                                 ImplicitParamKind::Other);
2983   Args.push_back(&GtidArg);
2984   Args.push_back(&TaskTypeArg);
2985   const auto &TaskEntryFnInfo =
2986       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
2987   llvm::FunctionType *TaskEntryTy =
2988       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
2989   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
2990   auto *TaskEntry = llvm::Function::Create(
2991       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
2992   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
2993   TaskEntry->setDoesNotRecurse();
2994   CodeGenFunction CGF(CGM);
2995   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
2996                     Loc, Loc);
2997 
2998   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
2999   // tt,
3000   // For taskloops:
3001   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3002   // tt->task_data.shareds);
3003   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3004       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3005   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3006       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3007       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3008   const auto *KmpTaskTWithPrivatesQTyRD =
3009       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3010   LValue Base =
3011       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3012   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3013   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3014   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3015   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3016 
3017   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3018   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3019   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3020       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3021       CGF.ConvertTypeForMem(SharedsPtrTy));
3022 
3023   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3024   llvm::Value *PrivatesParam;
3025   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3026     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3027     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3028         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3029   } else {
3030     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3031   }
3032 
3033   llvm::Value *CommonArgs[] = {
3034       GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3035       CGF.Builder
3036           .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(),
3037                                                CGF.VoidPtrTy, CGF.Int8Ty)
3038           .emitRawPointer(CGF)};
3039   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3040                                           std::end(CommonArgs));
3041   if (isOpenMPTaskLoopDirective(Kind)) {
3042     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3043     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3044     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3045     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3046     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3047     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3048     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3049     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3050     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3051     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3052     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3053     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3054     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3055     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3056     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3057     CallArgs.push_back(LBParam);
3058     CallArgs.push_back(UBParam);
3059     CallArgs.push_back(StParam);
3060     CallArgs.push_back(LIParam);
3061     CallArgs.push_back(RParam);
3062   }
3063   CallArgs.push_back(SharedsParam);
3064 
3065   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3066                                                   CallArgs);
3067   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3068                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3069   CGF.FinishFunction();
3070   return TaskEntry;
3071 }
3072 
3073 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3074                                             SourceLocation Loc,
3075                                             QualType KmpInt32Ty,
3076                                             QualType KmpTaskTWithPrivatesPtrQTy,
3077                                             QualType KmpTaskTWithPrivatesQTy) {
3078   ASTContext &C = CGM.getContext();
3079   FunctionArgList Args;
3080   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3081                             ImplicitParamKind::Other);
3082   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3083                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3084                                 ImplicitParamKind::Other);
3085   Args.push_back(&GtidArg);
3086   Args.push_back(&TaskTypeArg);
3087   const auto &DestructorFnInfo =
3088       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3089   llvm::FunctionType *DestructorFnTy =
3090       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3091   std::string Name =
3092       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3093   auto *DestructorFn =
3094       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3095                              Name, &CGM.getModule());
3096   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3097                                     DestructorFnInfo);
3098   DestructorFn->setDoesNotRecurse();
3099   CodeGenFunction CGF(CGM);
3100   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3101                     Args, Loc, Loc);
3102 
3103   LValue Base = CGF.EmitLoadOfPointerLValue(
3104       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3105       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3106   const auto *KmpTaskTWithPrivatesQTyRD =
3107       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3108   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3109   Base = CGF.EmitLValueForField(Base, *FI);
3110   for (const auto *Field :
3111        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3112     if (QualType::DestructionKind DtorKind =
3113             Field->getType().isDestructedType()) {
3114       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3115       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3116     }
3117   }
3118   CGF.FinishFunction();
3119   return DestructorFn;
3120 }
3121 
3122 /// Emit a privates mapping function for correct handling of private and
3123 /// firstprivate variables.
3124 /// \code
3125 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3126 /// **noalias priv1,...,  <tyn> **noalias privn) {
3127 ///   *priv1 = &.privates.priv1;
3128 ///   ...;
3129 ///   *privn = &.privates.privn;
3130 /// }
3131 /// \endcode
3132 static llvm::Value *
3133 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3134                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3135                                ArrayRef<PrivateDataTy> Privates) {
3136   ASTContext &C = CGM.getContext();
3137   FunctionArgList Args;
3138   ImplicitParamDecl TaskPrivatesArg(
3139       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3140       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3141       ImplicitParamKind::Other);
3142   Args.push_back(&TaskPrivatesArg);
3143   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3144   unsigned Counter = 1;
3145   for (const Expr *E : Data.PrivateVars) {
3146     Args.push_back(ImplicitParamDecl::Create(
3147         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3148         C.getPointerType(C.getPointerType(E->getType()))
3149             .withConst()
3150             .withRestrict(),
3151         ImplicitParamKind::Other));
3152     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3153     PrivateVarsPos[VD] = Counter;
3154     ++Counter;
3155   }
3156   for (const Expr *E : Data.FirstprivateVars) {
3157     Args.push_back(ImplicitParamDecl::Create(
3158         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3159         C.getPointerType(C.getPointerType(E->getType()))
3160             .withConst()
3161             .withRestrict(),
3162         ImplicitParamKind::Other));
3163     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3164     PrivateVarsPos[VD] = Counter;
3165     ++Counter;
3166   }
3167   for (const Expr *E : Data.LastprivateVars) {
3168     Args.push_back(ImplicitParamDecl::Create(
3169         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3170         C.getPointerType(C.getPointerType(E->getType()))
3171             .withConst()
3172             .withRestrict(),
3173         ImplicitParamKind::Other));
3174     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3175     PrivateVarsPos[VD] = Counter;
3176     ++Counter;
3177   }
3178   for (const VarDecl *VD : Data.PrivateLocals) {
3179     QualType Ty = VD->getType().getNonReferenceType();
3180     if (VD->getType()->isLValueReferenceType())
3181       Ty = C.getPointerType(Ty);
3182     if (isAllocatableDecl(VD))
3183       Ty = C.getPointerType(Ty);
3184     Args.push_back(ImplicitParamDecl::Create(
3185         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3186         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3187         ImplicitParamKind::Other));
3188     PrivateVarsPos[VD] = Counter;
3189     ++Counter;
3190   }
3191   const auto &TaskPrivatesMapFnInfo =
3192       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3193   llvm::FunctionType *TaskPrivatesMapTy =
3194       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3195   std::string Name =
3196       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3197   auto *TaskPrivatesMap = llvm::Function::Create(
3198       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3199       &CGM.getModule());
3200   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3201                                     TaskPrivatesMapFnInfo);
3202   if (CGM.getLangOpts().Optimize) {
3203     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3204     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3205     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3206   }
3207   CodeGenFunction CGF(CGM);
3208   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3209                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3210 
3211   // *privi = &.privates.privi;
3212   LValue Base = CGF.EmitLoadOfPointerLValue(
3213       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3214       TaskPrivatesArg.getType()->castAs<PointerType>());
3215   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3216   Counter = 0;
3217   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3218     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3219     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3220     LValue RefLVal =
3221         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3222     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3223         RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3224     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3225     ++Counter;
3226   }
3227   CGF.FinishFunction();
3228   return TaskPrivatesMap;
3229 }
3230 
3231 /// Emit initialization for private variables in task-based directives.
3232 static void emitPrivatesInit(CodeGenFunction &CGF,
3233                              const OMPExecutableDirective &D,
3234                              Address KmpTaskSharedsPtr, LValue TDBase,
3235                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3236                              QualType SharedsTy, QualType SharedsPtrTy,
3237                              const OMPTaskDataTy &Data,
3238                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3239   ASTContext &C = CGF.getContext();
3240   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3241   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3242   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3243                                  ? OMPD_taskloop
3244                                  : OMPD_task;
3245   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3246   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3247   LValue SrcBase;
3248   bool IsTargetTask =
3249       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3250       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3251   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3252   // PointersArray, SizesArray, and MappersArray. The original variables for
3253   // these arrays are not captured and we get their addresses explicitly.
3254   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3255       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3256     SrcBase = CGF.MakeAddrLValue(
3257         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3258             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3259             CGF.ConvertTypeForMem(SharedsTy)),
3260         SharedsTy);
3261   }
3262   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3263   for (const PrivateDataTy &Pair : Privates) {
3264     // Do not initialize private locals.
3265     if (Pair.second.isLocalPrivate()) {
3266       ++FI;
3267       continue;
3268     }
3269     const VarDecl *VD = Pair.second.PrivateCopy;
3270     const Expr *Init = VD->getAnyInitializer();
3271     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3272                              !CGF.isTrivialInitializer(Init)))) {
3273       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3274       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3275         const VarDecl *OriginalVD = Pair.second.Original;
3276         // Check if the variable is the target-based BasePointersArray,
3277         // PointersArray, SizesArray, or MappersArray.
3278         LValue SharedRefLValue;
3279         QualType Type = PrivateLValue.getType();
3280         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3281         if (IsTargetTask && !SharedField) {
3282           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3283                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3284                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3285                          ->getNumParams() == 0 &&
3286                  isa<TranslationUnitDecl>(
3287                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3288                          ->getDeclContext()) &&
3289                  "Expected artificial target data variable.");
3290           SharedRefLValue =
3291               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3292         } else if (ForDup) {
3293           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3294           SharedRefLValue = CGF.MakeAddrLValue(
3295               SharedRefLValue.getAddress().withAlignment(
3296                   C.getDeclAlign(OriginalVD)),
3297               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3298               SharedRefLValue.getTBAAInfo());
3299         } else if (CGF.LambdaCaptureFields.count(
3300                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3301                    isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3302           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3303         } else {
3304           // Processing for implicitly captured variables.
3305           InlinedOpenMPRegionRAII Region(
3306               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3307               /*HasCancel=*/false, /*NoInheritance=*/true);
3308           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3309         }
3310         if (Type->isArrayType()) {
3311           // Initialize firstprivate array.
3312           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3313             // Perform simple memcpy.
3314             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3315           } else {
3316             // Initialize firstprivate array using element-by-element
3317             // initialization.
3318             CGF.EmitOMPAggregateAssign(
3319                 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
3320                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3321                                                   Address SrcElement) {
3322                   // Clean up any temporaries needed by the initialization.
3323                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3324                   InitScope.addPrivate(Elem, SrcElement);
3325                   (void)InitScope.Privatize();
3326                   // Emit initialization for single element.
3327                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3328                       CGF, &CapturesInfo);
3329                   CGF.EmitAnyExprToMem(Init, DestElement,
3330                                        Init->getType().getQualifiers(),
3331                                        /*IsInitializer=*/false);
3332                 });
3333           }
3334         } else {
3335           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3336           InitScope.addPrivate(Elem, SharedRefLValue.getAddress());
3337           (void)InitScope.Privatize();
3338           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3339           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3340                              /*capturedByInit=*/false);
3341         }
3342       } else {
3343         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3344       }
3345     }
3346     ++FI;
3347   }
3348 }
3349 
3350 /// Check if duplication function is required for taskloops.
3351 static bool checkInitIsRequired(CodeGenFunction &CGF,
3352                                 ArrayRef<PrivateDataTy> Privates) {
3353   bool InitRequired = false;
3354   for (const PrivateDataTy &Pair : Privates) {
3355     if (Pair.second.isLocalPrivate())
3356       continue;
3357     const VarDecl *VD = Pair.second.PrivateCopy;
3358     const Expr *Init = VD->getAnyInitializer();
3359     InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3360                                     !CGF.isTrivialInitializer(Init));
3361     if (InitRequired)
3362       break;
3363   }
3364   return InitRequired;
3365 }
3366 
3367 
3368 /// Emit task_dup function (for initialization of
3369 /// private/firstprivate/lastprivate vars and last_iter flag)
3370 /// \code
3371 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3372 /// lastpriv) {
3373 /// // setup lastprivate flag
3374 ///    task_dst->last = lastpriv;
3375 /// // could be constructor calls here...
3376 /// }
3377 /// \endcode
3378 static llvm::Value *
3379 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3380                     const OMPExecutableDirective &D,
3381                     QualType KmpTaskTWithPrivatesPtrQTy,
3382                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3383                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3384                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3385                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3386   ASTContext &C = CGM.getContext();
3387   FunctionArgList Args;
3388   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3389                            KmpTaskTWithPrivatesPtrQTy,
3390                            ImplicitParamKind::Other);
3391   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3392                            KmpTaskTWithPrivatesPtrQTy,
3393                            ImplicitParamKind::Other);
3394   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3395                                 ImplicitParamKind::Other);
3396   Args.push_back(&DstArg);
3397   Args.push_back(&SrcArg);
3398   Args.push_back(&LastprivArg);
3399   const auto &TaskDupFnInfo =
3400       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3401   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3402   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3403   auto *TaskDup = llvm::Function::Create(
3404       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3405   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3406   TaskDup->setDoesNotRecurse();
3407   CodeGenFunction CGF(CGM);
3408   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3409                     Loc);
3410 
3411   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3412       CGF.GetAddrOfLocalVar(&DstArg),
3413       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3414   // task_dst->liter = lastpriv;
3415   if (WithLastIter) {
3416     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3417     LValue Base = CGF.EmitLValueForField(
3418         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3419     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3420     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3421         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3422     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3423   }
3424 
3425   // Emit initial values for private copies (if any).
3426   assert(!Privates.empty());
3427   Address KmpTaskSharedsPtr = Address::invalid();
3428   if (!Data.FirstprivateVars.empty()) {
3429     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3430         CGF.GetAddrOfLocalVar(&SrcArg),
3431         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3432     LValue Base = CGF.EmitLValueForField(
3433         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3434     KmpTaskSharedsPtr = Address(
3435         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3436                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
3437                                                   KmpTaskTShareds)),
3438                              Loc),
3439         CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3440   }
3441   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3442                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3443   CGF.FinishFunction();
3444   return TaskDup;
3445 }
3446 
3447 /// Checks if destructor function is required to be generated.
3448 /// \return true if cleanups are required, false otherwise.
3449 static bool
3450 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3451                          ArrayRef<PrivateDataTy> Privates) {
3452   for (const PrivateDataTy &P : Privates) {
3453     if (P.second.isLocalPrivate())
3454       continue;
3455     QualType Ty = P.second.Original->getType().getNonReferenceType();
3456     if (Ty.isDestructedType())
3457       return true;
3458   }
3459   return false;
3460 }
3461 
3462 namespace {
3463 /// Loop generator for OpenMP iterator expression.
3464 class OMPIteratorGeneratorScope final
3465     : public CodeGenFunction::OMPPrivateScope {
3466   CodeGenFunction &CGF;
3467   const OMPIteratorExpr *E = nullptr;
3468   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3469   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3470   OMPIteratorGeneratorScope() = delete;
3471   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3472 
3473 public:
3474   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3475       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3476     if (!E)
3477       return;
3478     SmallVector<llvm::Value *, 4> Uppers;
3479     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3480       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3481       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3482       addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3483       const OMPIteratorHelperData &HelperData = E->getHelper(I);
3484       addPrivate(
3485           HelperData.CounterVD,
3486           CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3487     }
3488     Privatize();
3489 
3490     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3491       const OMPIteratorHelperData &HelperData = E->getHelper(I);
3492       LValue CLVal =
3493           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3494                              HelperData.CounterVD->getType());
3495       // Counter = 0;
3496       CGF.EmitStoreOfScalar(
3497           llvm::ConstantInt::get(CLVal.getAddress().getElementType(), 0),
3498           CLVal);
3499       CodeGenFunction::JumpDest &ContDest =
3500           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3501       CodeGenFunction::JumpDest &ExitDest =
3502           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3503       // N = <number-of_iterations>;
3504       llvm::Value *N = Uppers[I];
3505       // cont:
3506       // if (Counter < N) goto body; else goto exit;
3507       CGF.EmitBlock(ContDest.getBlock());
3508       auto *CVal =
3509           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3510       llvm::Value *Cmp =
3511           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3512               ? CGF.Builder.CreateICmpSLT(CVal, N)
3513               : CGF.Builder.CreateICmpULT(CVal, N);
3514       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3515       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3516       // body:
3517       CGF.EmitBlock(BodyBB);
3518       // Iteri = Begini + Counter * Stepi;
3519       CGF.EmitIgnoredExpr(HelperData.Update);
3520     }
3521   }
3522   ~OMPIteratorGeneratorScope() {
3523     if (!E)
3524       return;
3525     for (unsigned I = E->numOfIterators(); I > 0; --I) {
3526       // Counter = Counter + 1;
3527       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3528       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3529       // goto cont;
3530       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3531       // exit:
3532       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3533     }
3534   }
3535 };
3536 } // namespace
3537 
3538 static std::pair<llvm::Value *, llvm::Value *>
3539 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
3540   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3541   llvm::Value *Addr;
3542   if (OASE) {
3543     const Expr *Base = OASE->getBase();
3544     Addr = CGF.EmitScalarExpr(Base);
3545   } else {
3546     Addr = CGF.EmitLValue(E).getPointer(CGF);
3547   }
3548   llvm::Value *SizeVal;
3549   QualType Ty = E->getType();
3550   if (OASE) {
3551     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3552     for (const Expr *SE : OASE->getDimensions()) {
3553       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3554       Sz = CGF.EmitScalarConversion(
3555           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3556       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3557     }
3558   } else if (const auto *ASE =
3559                  dyn_cast<ArraySectionExpr>(E->IgnoreParenImpCasts())) {
3560     LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false);
3561     Address UpAddrAddress = UpAddrLVal.getAddress();
3562     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3563         UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF),
3564         /*Idx0=*/1);
3565     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3566     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3567     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3568   } else {
3569     SizeVal = CGF.getTypeSize(Ty);
3570   }
3571   return std::make_pair(Addr, SizeVal);
3572 }
3573 
3574 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3575 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3576   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3577   if (KmpTaskAffinityInfoTy.isNull()) {
3578     RecordDecl *KmpAffinityInfoRD =
3579         C.buildImplicitRecord("kmp_task_affinity_info_t");
3580     KmpAffinityInfoRD->startDefinition();
3581     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3582     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3583     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3584     KmpAffinityInfoRD->completeDefinition();
3585     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
3586   }
3587 }
3588 
3589 CGOpenMPRuntime::TaskResultTy
3590 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3591                               const OMPExecutableDirective &D,
3592                               llvm::Function *TaskFunction, QualType SharedsTy,
3593                               Address Shareds, const OMPTaskDataTy &Data) {
3594   ASTContext &C = CGM.getContext();
3595   llvm::SmallVector<PrivateDataTy, 4> Privates;
3596   // Aggregate privates and sort them by the alignment.
3597   const auto *I = Data.PrivateCopies.begin();
3598   for (const Expr *E : Data.PrivateVars) {
3599     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3600     Privates.emplace_back(
3601         C.getDeclAlign(VD),
3602         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3603                          /*PrivateElemInit=*/nullptr));
3604     ++I;
3605   }
3606   I = Data.FirstprivateCopies.begin();
3607   const auto *IElemInitRef = Data.FirstprivateInits.begin();
3608   for (const Expr *E : Data.FirstprivateVars) {
3609     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3610     Privates.emplace_back(
3611         C.getDeclAlign(VD),
3612         PrivateHelpersTy(
3613             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3614             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3615     ++I;
3616     ++IElemInitRef;
3617   }
3618   I = Data.LastprivateCopies.begin();
3619   for (const Expr *E : Data.LastprivateVars) {
3620     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3621     Privates.emplace_back(
3622         C.getDeclAlign(VD),
3623         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3624                          /*PrivateElemInit=*/nullptr));
3625     ++I;
3626   }
3627   for (const VarDecl *VD : Data.PrivateLocals) {
3628     if (isAllocatableDecl(VD))
3629       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3630     else
3631       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3632   }
3633   llvm::stable_sort(Privates,
3634                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
3635                       return L.first > R.first;
3636                     });
3637   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3638   // Build type kmp_routine_entry_t (if not built yet).
3639   emitKmpRoutineEntryT(KmpInt32Ty);
3640   // Build type kmp_task_t (if not built yet).
3641   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3642     if (SavedKmpTaskloopTQTy.isNull()) {
3643       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3644           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3645     }
3646     KmpTaskTQTy = SavedKmpTaskloopTQTy;
3647   } else {
3648     assert((D.getDirectiveKind() == OMPD_task ||
3649             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3650             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3651            "Expected taskloop, task or target directive");
3652     if (SavedKmpTaskTQTy.isNull()) {
3653       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3654           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3655     }
3656     KmpTaskTQTy = SavedKmpTaskTQTy;
3657   }
3658   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3659   // Build particular struct kmp_task_t for the given task.
3660   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3661       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3662   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3663   QualType KmpTaskTWithPrivatesPtrQTy =
3664       C.getPointerType(KmpTaskTWithPrivatesQTy);
3665   llvm::Type *KmpTaskTWithPrivatesPtrTy = CGF.Builder.getPtrTy(0);
3666   llvm::Value *KmpTaskTWithPrivatesTySize =
3667       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3668   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3669 
3670   // Emit initial values for private copies (if any).
3671   llvm::Value *TaskPrivatesMap = nullptr;
3672   llvm::Type *TaskPrivatesMapTy =
3673       std::next(TaskFunction->arg_begin(), 3)->getType();
3674   if (!Privates.empty()) {
3675     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3676     TaskPrivatesMap =
3677         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3678     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3679         TaskPrivatesMap, TaskPrivatesMapTy);
3680   } else {
3681     TaskPrivatesMap = llvm::ConstantPointerNull::get(
3682         cast<llvm::PointerType>(TaskPrivatesMapTy));
3683   }
3684   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3685   // kmp_task_t *tt);
3686   llvm::Function *TaskEntry = emitProxyTaskFunction(
3687       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3688       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3689       TaskPrivatesMap);
3690 
3691   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3692   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3693   // kmp_routine_entry_t *task_entry);
3694   // Task flags. Format is taken from
3695   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3696   // description of kmp_tasking_flags struct.
3697   enum {
3698     TiedFlag = 0x1,
3699     FinalFlag = 0x2,
3700     DestructorsFlag = 0x8,
3701     PriorityFlag = 0x20,
3702     DetachableFlag = 0x40,
3703   };
3704   unsigned Flags = Data.Tied ? TiedFlag : 0;
3705   bool NeedsCleanup = false;
3706   if (!Privates.empty()) {
3707     NeedsCleanup =
3708         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3709     if (NeedsCleanup)
3710       Flags = Flags | DestructorsFlag;
3711   }
3712   if (Data.Priority.getInt())
3713     Flags = Flags | PriorityFlag;
3714   if (D.hasClausesOfKind<OMPDetachClause>())
3715     Flags = Flags | DetachableFlag;
3716   llvm::Value *TaskFlags =
3717       Data.Final.getPointer()
3718           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3719                                      CGF.Builder.getInt32(FinalFlag),
3720                                      CGF.Builder.getInt32(/*C=*/0))
3721           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3722   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3723   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3724   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
3725       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3726       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3727           TaskEntry, KmpRoutineEntryPtrTy)};
3728   llvm::Value *NewTask;
3729   if (D.hasClausesOfKind<OMPNowaitClause>()) {
3730     // Check if we have any device clause associated with the directive.
3731     const Expr *Device = nullptr;
3732     if (auto *C = D.getSingleClause<OMPDeviceClause>())
3733       Device = C->getDevice();
3734     // Emit device ID if any otherwise use default value.
3735     llvm::Value *DeviceID;
3736     if (Device)
3737       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3738                                            CGF.Int64Ty, /*isSigned=*/true);
3739     else
3740       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3741     AllocArgs.push_back(DeviceID);
3742     NewTask = CGF.EmitRuntimeCall(
3743         OMPBuilder.getOrCreateRuntimeFunction(
3744             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3745         AllocArgs);
3746   } else {
3747     NewTask =
3748         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3749                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3750                             AllocArgs);
3751   }
3752   // Emit detach clause initialization.
3753   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3754   // task_descriptor);
3755   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3756     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3757     LValue EvtLVal = CGF.EmitLValue(Evt);
3758 
3759     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3760     // int gtid, kmp_task_t *task);
3761     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3762     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3763     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3764     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3765         OMPBuilder.getOrCreateRuntimeFunction(
3766             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3767         {Loc, Tid, NewTask});
3768     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3769                                       Evt->getExprLoc());
3770     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3771   }
3772   // Process affinity clauses.
3773   if (D.hasClausesOfKind<OMPAffinityClause>()) {
3774     // Process list of affinity data.
3775     ASTContext &C = CGM.getContext();
3776     Address AffinitiesArray = Address::invalid();
3777     // Calculate number of elements to form the array of affinity data.
3778     llvm::Value *NumOfElements = nullptr;
3779     unsigned NumAffinities = 0;
3780     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3781       if (const Expr *Modifier = C->getModifier()) {
3782         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3783         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3784           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3785           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3786           NumOfElements =
3787               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3788         }
3789       } else {
3790         NumAffinities += C->varlist_size();
3791       }
3792     }
3793     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
3794     // Fields ids in kmp_task_affinity_info record.
3795     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3796 
3797     QualType KmpTaskAffinityInfoArrayTy;
3798     if (NumOfElements) {
3799       NumOfElements = CGF.Builder.CreateNUWAdd(
3800           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3801       auto *OVE = new (C) OpaqueValueExpr(
3802           Loc,
3803           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3804           VK_PRValue);
3805       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3806                                                     RValue::get(NumOfElements));
3807       KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3808           KmpTaskAffinityInfoTy, OVE, ArraySizeModifier::Normal,
3809           /*IndexTypeQuals=*/0);
3810       // Properly emit variable-sized array.
3811       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3812                                            ImplicitParamKind::Other);
3813       CGF.EmitVarDecl(*PD);
3814       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3815       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3816                                                 /*isSigned=*/false);
3817     } else {
3818       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3819           KmpTaskAffinityInfoTy,
3820           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3821           ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3822       AffinitiesArray =
3823           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
3824       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
3825       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
3826                                              /*isSigned=*/false);
3827     }
3828 
3829     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3830     // Fill array by elements without iterators.
3831     unsigned Pos = 0;
3832     bool HasIterator = false;
3833     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3834       if (C->getModifier()) {
3835         HasIterator = true;
3836         continue;
3837       }
3838       for (const Expr *E : C->varlist()) {
3839         llvm::Value *Addr;
3840         llvm::Value *Size;
3841         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3842         LValue Base =
3843             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
3844                                KmpTaskAffinityInfoTy);
3845         // affs[i].base_addr = &<Affinities[i].second>;
3846         LValue BaseAddrLVal = CGF.EmitLValueForField(
3847             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3848         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3849                               BaseAddrLVal);
3850         // affs[i].len = sizeof(<Affinities[i].second>);
3851         LValue LenLVal = CGF.EmitLValueForField(
3852             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3853         CGF.EmitStoreOfScalar(Size, LenLVal);
3854         ++Pos;
3855       }
3856     }
3857     LValue PosLVal;
3858     if (HasIterator) {
3859       PosLVal = CGF.MakeAddrLValue(
3860           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
3861           C.getSizeType());
3862       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
3863     }
3864     // Process elements with iterators.
3865     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3866       const Expr *Modifier = C->getModifier();
3867       if (!Modifier)
3868         continue;
3869       OMPIteratorGeneratorScope IteratorScope(
3870           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
3871       for (const Expr *E : C->varlist()) {
3872         llvm::Value *Addr;
3873         llvm::Value *Size;
3874         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3875         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
3876         LValue Base =
3877             CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx),
3878                                KmpTaskAffinityInfoTy);
3879         // affs[i].base_addr = &<Affinities[i].second>;
3880         LValue BaseAddrLVal = CGF.EmitLValueForField(
3881             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3882         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3883                               BaseAddrLVal);
3884         // affs[i].len = sizeof(<Affinities[i].second>);
3885         LValue LenLVal = CGF.EmitLValueForField(
3886             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3887         CGF.EmitStoreOfScalar(Size, LenLVal);
3888         Idx = CGF.Builder.CreateNUWAdd(
3889             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
3890         CGF.EmitStoreOfScalar(Idx, PosLVal);
3891       }
3892     }
3893     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3894     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3895     // naffins, kmp_task_affinity_info_t *affin_list);
3896     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3897     llvm::Value *GTid = getThreadID(CGF, Loc);
3898     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3899         AffinitiesArray.emitRawPointer(CGF), CGM.VoidPtrTy);
3900     // FIXME: Emit the function and ignore its result for now unless the
3901     // runtime function is properly implemented.
3902     (void)CGF.EmitRuntimeCall(
3903         OMPBuilder.getOrCreateRuntimeFunction(
3904             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
3905         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3906   }
3907   llvm::Value *NewTaskNewTaskTTy =
3908       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3909           NewTask, KmpTaskTWithPrivatesPtrTy);
3910   LValue Base = CGF.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy,
3911                                                   KmpTaskTWithPrivatesQTy);
3912   LValue TDBase =
3913       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3914   // Fill the data in the resulting kmp_task_t record.
3915   // Copy shareds if there are any.
3916   Address KmpTaskSharedsPtr = Address::invalid();
3917   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3918     KmpTaskSharedsPtr = Address(
3919         CGF.EmitLoadOfScalar(
3920             CGF.EmitLValueForField(
3921                 TDBase,
3922                 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
3923             Loc),
3924         CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3925     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
3926     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
3927     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
3928   }
3929   // Emit initial values for private copies (if any).
3930   TaskResultTy Result;
3931   if (!Privates.empty()) {
3932     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3933                      SharedsTy, SharedsPtrTy, Data, Privates,
3934                      /*ForDup=*/false);
3935     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3936         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3937       Result.TaskDupFn = emitTaskDupFunction(
3938           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3939           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3940           /*WithLastIter=*/!Data.LastprivateVars.empty());
3941     }
3942   }
3943   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3944   enum { Priority = 0, Destructors = 1 };
3945   // Provide pointer to function with destructors for privates.
3946   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3947   const RecordDecl *KmpCmplrdataUD =
3948       (*FI)->getType()->getAsUnionType()->getDecl();
3949   if (NeedsCleanup) {
3950     llvm::Value *DestructorFn = emitDestructorsFunction(
3951         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3952         KmpTaskTWithPrivatesQTy);
3953     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
3954     LValue DestructorsLV = CGF.EmitLValueForField(
3955         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
3956     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3957                               DestructorFn, KmpRoutineEntryPtrTy),
3958                           DestructorsLV);
3959   }
3960   // Set priority.
3961   if (Data.Priority.getInt()) {
3962     LValue Data2LV = CGF.EmitLValueForField(
3963         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
3964     LValue PriorityLV = CGF.EmitLValueForField(
3965         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
3966     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
3967   }
3968   Result.NewTask = NewTask;
3969   Result.TaskEntry = TaskEntry;
3970   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
3971   Result.TDBase = TDBase;
3972   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
3973   return Result;
3974 }
3975 
3976 /// Translates internal dependency kind into the runtime kind.
3977 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
3978   RTLDependenceKindTy DepKind;
3979   switch (K) {
3980   case OMPC_DEPEND_in:
3981     DepKind = RTLDependenceKindTy::DepIn;
3982     break;
3983   // Out and InOut dependencies must use the same code.
3984   case OMPC_DEPEND_out:
3985   case OMPC_DEPEND_inout:
3986     DepKind = RTLDependenceKindTy::DepInOut;
3987     break;
3988   case OMPC_DEPEND_mutexinoutset:
3989     DepKind = RTLDependenceKindTy::DepMutexInOutSet;
3990     break;
3991   case OMPC_DEPEND_inoutset:
3992     DepKind = RTLDependenceKindTy::DepInOutSet;
3993     break;
3994   case OMPC_DEPEND_outallmemory:
3995     DepKind = RTLDependenceKindTy::DepOmpAllMem;
3996     break;
3997   case OMPC_DEPEND_source:
3998   case OMPC_DEPEND_sink:
3999   case OMPC_DEPEND_depobj:
4000   case OMPC_DEPEND_inoutallmemory:
4001   case OMPC_DEPEND_unknown:
4002     llvm_unreachable("Unknown task dependence type");
4003   }
4004   return DepKind;
4005 }
4006 
4007 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4008 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4009                            QualType &FlagsTy) {
4010   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4011   if (KmpDependInfoTy.isNull()) {
4012     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4013     KmpDependInfoRD->startDefinition();
4014     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4015     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4016     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4017     KmpDependInfoRD->completeDefinition();
4018     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4019   }
4020 }
4021 
4022 std::pair<llvm::Value *, LValue>
4023 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4024                                    SourceLocation Loc) {
4025   ASTContext &C = CGM.getContext();
4026   QualType FlagsTy;
4027   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4028   RecordDecl *KmpDependInfoRD =
4029       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4030   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4031   LValue Base = CGF.EmitLoadOfPointerLValue(
4032       DepobjLVal.getAddress().withElementType(
4033           CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4034       KmpDependInfoPtrTy->castAs<PointerType>());
4035   Address DepObjAddr = CGF.Builder.CreateGEP(
4036       CGF, Base.getAddress(),
4037       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4038   LValue NumDepsBase = CGF.MakeAddrLValue(
4039       DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4040   // NumDeps = deps[i].base_addr;
4041   LValue BaseAddrLVal = CGF.EmitLValueForField(
4042       NumDepsBase,
4043       *std::next(KmpDependInfoRD->field_begin(),
4044                  static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4045   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4046   return std::make_pair(NumDeps, Base);
4047 }
4048 
4049 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4050                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4051                            const OMPTaskDataTy::DependData &Data,
4052                            Address DependenciesArray) {
4053   CodeGenModule &CGM = CGF.CGM;
4054   ASTContext &C = CGM.getContext();
4055   QualType FlagsTy;
4056   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4057   RecordDecl *KmpDependInfoRD =
4058       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4059   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4060 
4061   OMPIteratorGeneratorScope IteratorScope(
4062       CGF, cast_or_null<OMPIteratorExpr>(
4063                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4064                                  : nullptr));
4065   for (const Expr *E : Data.DepExprs) {
4066     llvm::Value *Addr;
4067     llvm::Value *Size;
4068 
4069     // The expression will be a nullptr in the 'omp_all_memory' case.
4070     if (E) {
4071       std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4072       Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4073     } else {
4074       Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4075       Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4076     }
4077     LValue Base;
4078     if (unsigned *P = dyn_cast<unsigned *>(Pos)) {
4079       Base = CGF.MakeAddrLValue(
4080           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4081     } else {
4082       assert(E && "Expected a non-null expression");
4083       LValue &PosLVal = *cast<LValue *>(Pos);
4084       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4085       Base = CGF.MakeAddrLValue(
4086           CGF.Builder.CreateGEP(CGF, DependenciesArray, Idx), KmpDependInfoTy);
4087     }
4088     // deps[i].base_addr = &<Dependencies[i].second>;
4089     LValue BaseAddrLVal = CGF.EmitLValueForField(
4090         Base,
4091         *std::next(KmpDependInfoRD->field_begin(),
4092                    static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4093     CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4094     // deps[i].len = sizeof(<Dependencies[i].second>);
4095     LValue LenLVal = CGF.EmitLValueForField(
4096         Base, *std::next(KmpDependInfoRD->field_begin(),
4097                          static_cast<unsigned int>(RTLDependInfoFields::Len)));
4098     CGF.EmitStoreOfScalar(Size, LenLVal);
4099     // deps[i].flags = <Dependencies[i].first>;
4100     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4101     LValue FlagsLVal = CGF.EmitLValueForField(
4102         Base,
4103         *std::next(KmpDependInfoRD->field_begin(),
4104                    static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4105     CGF.EmitStoreOfScalar(
4106         llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4107         FlagsLVal);
4108     if (unsigned *P = dyn_cast<unsigned *>(Pos)) {
4109       ++(*P);
4110     } else {
4111       LValue &PosLVal = *cast<LValue *>(Pos);
4112       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4113       Idx = CGF.Builder.CreateNUWAdd(Idx,
4114                                      llvm::ConstantInt::get(Idx->getType(), 1));
4115       CGF.EmitStoreOfScalar(Idx, PosLVal);
4116     }
4117   }
4118 }
4119 
4120 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4121     CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4122     const OMPTaskDataTy::DependData &Data) {
4123   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4124          "Expected depobj dependency kind.");
4125   SmallVector<llvm::Value *, 4> Sizes;
4126   SmallVector<LValue, 4> SizeLVals;
4127   ASTContext &C = CGF.getContext();
4128   {
4129     OMPIteratorGeneratorScope IteratorScope(
4130         CGF, cast_or_null<OMPIteratorExpr>(
4131                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4132                                    : nullptr));
4133     for (const Expr *E : Data.DepExprs) {
4134       llvm::Value *NumDeps;
4135       LValue Base;
4136       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4137       std::tie(NumDeps, Base) =
4138           getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4139       LValue NumLVal = CGF.MakeAddrLValue(
4140           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4141           C.getUIntPtrType());
4142       CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4143                               NumLVal.getAddress());
4144       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4145       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4146       CGF.EmitStoreOfScalar(Add, NumLVal);
4147       SizeLVals.push_back(NumLVal);
4148     }
4149   }
4150   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4151     llvm::Value *Size =
4152         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4153     Sizes.push_back(Size);
4154   }
4155   return Sizes;
4156 }
4157 
4158 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4159                                          QualType &KmpDependInfoTy,
4160                                          LValue PosLVal,
4161                                          const OMPTaskDataTy::DependData &Data,
4162                                          Address DependenciesArray) {
4163   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4164          "Expected depobj dependency kind.");
4165   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4166   {
4167     OMPIteratorGeneratorScope IteratorScope(
4168         CGF, cast_or_null<OMPIteratorExpr>(
4169                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4170                                    : nullptr));
4171     for (const Expr *E : Data.DepExprs) {
4172       llvm::Value *NumDeps;
4173       LValue Base;
4174       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4175       std::tie(NumDeps, Base) =
4176           getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4177 
4178       // memcopy dependency data.
4179       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4180           ElSize,
4181           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4182       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4183       Address DepAddr = CGF.Builder.CreateGEP(CGF, DependenciesArray, Pos);
4184       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(), Size);
4185 
4186       // Increase pos.
4187       // pos += size;
4188       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4189       CGF.EmitStoreOfScalar(Add, PosLVal);
4190     }
4191   }
4192 }
4193 
4194 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4195     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4196     SourceLocation Loc) {
4197   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4198         return D.DepExprs.empty();
4199       }))
4200     return std::make_pair(nullptr, Address::invalid());
4201   // Process list of dependencies.
4202   ASTContext &C = CGM.getContext();
4203   Address DependenciesArray = Address::invalid();
4204   llvm::Value *NumOfElements = nullptr;
4205   unsigned NumDependencies = std::accumulate(
4206       Dependencies.begin(), Dependencies.end(), 0,
4207       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4208         return D.DepKind == OMPC_DEPEND_depobj
4209                    ? V
4210                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4211       });
4212   QualType FlagsTy;
4213   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4214   bool HasDepobjDeps = false;
4215   bool HasRegularWithIterators = false;
4216   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4217   llvm::Value *NumOfRegularWithIterators =
4218       llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4219   // Calculate number of depobj dependencies and regular deps with the
4220   // iterators.
4221   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4222     if (D.DepKind == OMPC_DEPEND_depobj) {
4223       SmallVector<llvm::Value *, 4> Sizes =
4224           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4225       for (llvm::Value *Size : Sizes) {
4226         NumOfDepobjElements =
4227             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4228       }
4229       HasDepobjDeps = true;
4230       continue;
4231     }
4232     // Include number of iterations, if any.
4233 
4234     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4235       llvm::Value *ClauseIteratorSpace =
4236           llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4237       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4238         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4239         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4240         ClauseIteratorSpace = CGF.Builder.CreateNUWMul(Sz, ClauseIteratorSpace);
4241       }
4242       llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4243           ClauseIteratorSpace,
4244           llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4245       NumOfRegularWithIterators =
4246           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4247       HasRegularWithIterators = true;
4248       continue;
4249     }
4250   }
4251 
4252   QualType KmpDependInfoArrayTy;
4253   if (HasDepobjDeps || HasRegularWithIterators) {
4254     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4255                                            /*isSigned=*/false);
4256     if (HasDepobjDeps) {
4257       NumOfElements =
4258           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4259     }
4260     if (HasRegularWithIterators) {
4261       NumOfElements =
4262           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4263     }
4264     auto *OVE = new (C) OpaqueValueExpr(
4265         Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4266         VK_PRValue);
4267     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4268                                                   RValue::get(NumOfElements));
4269     KmpDependInfoArrayTy =
4270         C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4271                                /*IndexTypeQuals=*/0);
4272     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4273     // Properly emit variable-sized array.
4274     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4275                                          ImplicitParamKind::Other);
4276     CGF.EmitVarDecl(*PD);
4277     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4278     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4279                                               /*isSigned=*/false);
4280   } else {
4281     KmpDependInfoArrayTy = C.getConstantArrayType(
4282         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4283         ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4284     DependenciesArray =
4285         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4286     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4287     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4288                                            /*isSigned=*/false);
4289   }
4290   unsigned Pos = 0;
4291   for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4292     if (Dep.DepKind == OMPC_DEPEND_depobj || Dep.IteratorExpr)
4293       continue;
4294     emitDependData(CGF, KmpDependInfoTy, &Pos, Dep, DependenciesArray);
4295   }
4296   // Copy regular dependencies with iterators.
4297   LValue PosLVal = CGF.MakeAddrLValue(
4298       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4299   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4300   for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4301     if (Dep.DepKind == OMPC_DEPEND_depobj || !Dep.IteratorExpr)
4302       continue;
4303     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dep, DependenciesArray);
4304   }
4305   // Copy final depobj arrays without iterators.
4306   if (HasDepobjDeps) {
4307     for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4308       if (Dep.DepKind != OMPC_DEPEND_depobj)
4309         continue;
4310       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dep, DependenciesArray);
4311     }
4312   }
4313   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4314       DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4315   return std::make_pair(NumOfElements, DependenciesArray);
4316 }
4317 
4318 Address CGOpenMPRuntime::emitDepobjDependClause(
4319     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4320     SourceLocation Loc) {
4321   if (Dependencies.DepExprs.empty())
4322     return Address::invalid();
4323   // Process list of dependencies.
4324   ASTContext &C = CGM.getContext();
4325   Address DependenciesArray = Address::invalid();
4326   unsigned NumDependencies = Dependencies.DepExprs.size();
4327   QualType FlagsTy;
4328   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4329   RecordDecl *KmpDependInfoRD =
4330       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4331 
4332   llvm::Value *Size;
4333   // Define type kmp_depend_info[<Dependencies.size()>];
4334   // For depobj reserve one extra element to store the number of elements.
4335   // It is required to handle depobj(x) update(in) construct.
4336   // kmp_depend_info[<Dependencies.size()>] deps;
4337   llvm::Value *NumDepsVal;
4338   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4339   if (const auto *IE =
4340           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4341     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4342     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4343       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4344       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4345       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4346     }
4347     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4348                                     NumDepsVal);
4349     CharUnits SizeInBytes =
4350         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4351     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4352     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4353     NumDepsVal =
4354         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4355   } else {
4356     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4357         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4358         nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4359     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4360     Size = CGM.getSize(Sz.alignTo(Align));
4361     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4362   }
4363   // Need to allocate on the dynamic memory.
4364   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4365   // Use default allocator.
4366   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4367   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4368 
4369   llvm::Value *Addr =
4370       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4371                               CGM.getModule(), OMPRTL___kmpc_alloc),
4372                           Args, ".dep.arr.addr");
4373   llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4374   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4375       Addr, CGF.Builder.getPtrTy(0));
4376   DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4377   // Write number of elements in the first element of array for depobj.
4378   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4379   // deps[i].base_addr = NumDependencies;
4380   LValue BaseAddrLVal = CGF.EmitLValueForField(
4381       Base,
4382       *std::next(KmpDependInfoRD->field_begin(),
4383                  static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4384   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4385   llvm::PointerUnion<unsigned *, LValue *> Pos;
4386   unsigned Idx = 1;
4387   LValue PosLVal;
4388   if (Dependencies.IteratorExpr) {
4389     PosLVal = CGF.MakeAddrLValue(
4390         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4391         C.getSizeType());
4392     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4393                           /*IsInit=*/true);
4394     Pos = &PosLVal;
4395   } else {
4396     Pos = &Idx;
4397   }
4398   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4399   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4400       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4401       CGF.Int8Ty);
4402   return DependenciesArray;
4403 }
4404 
4405 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4406                                         SourceLocation Loc) {
4407   ASTContext &C = CGM.getContext();
4408   QualType FlagsTy;
4409   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4410   LValue Base = CGF.EmitLoadOfPointerLValue(DepobjLVal.getAddress(),
4411                                             C.VoidPtrTy.castAs<PointerType>());
4412   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4413   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4414       Base.getAddress(), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4415       CGF.ConvertTypeForMem(KmpDependInfoTy));
4416   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4417       Addr.getElementType(), Addr.emitRawPointer(CGF),
4418       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4419   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4420                                                                CGF.VoidPtrTy);
4421   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4422   // Use default allocator.
4423   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4424   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4425 
4426   // _kmpc_free(gtid, addr, nullptr);
4427   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4428                                 CGM.getModule(), OMPRTL___kmpc_free),
4429                             Args);
4430 }
4431 
4432 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4433                                        OpenMPDependClauseKind NewDepKind,
4434                                        SourceLocation Loc) {
4435   ASTContext &C = CGM.getContext();
4436   QualType FlagsTy;
4437   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4438   RecordDecl *KmpDependInfoRD =
4439       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4440   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4441   llvm::Value *NumDeps;
4442   LValue Base;
4443   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4444 
4445   Address Begin = Base.getAddress();
4446   // Cast from pointer to array type to pointer to single element.
4447   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getElementType(),
4448                                            Begin.emitRawPointer(CGF), NumDeps);
4449   // The basic structure here is a while-do loop.
4450   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4451   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4452   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4453   CGF.EmitBlock(BodyBB);
4454   llvm::PHINode *ElementPHI =
4455       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4456   ElementPHI->addIncoming(Begin.emitRawPointer(CGF), EntryBB);
4457   Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4458   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4459                             Base.getTBAAInfo());
4460   // deps[i].flags = NewDepKind;
4461   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4462   LValue FlagsLVal = CGF.EmitLValueForField(
4463       Base, *std::next(KmpDependInfoRD->field_begin(),
4464                        static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4465   CGF.EmitStoreOfScalar(
4466       llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4467       FlagsLVal);
4468 
4469   // Shift the address forward by one element.
4470   llvm::Value *ElementNext =
4471       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext")
4472           .emitRawPointer(CGF);
4473   ElementPHI->addIncoming(ElementNext, CGF.Builder.GetInsertBlock());
4474   llvm::Value *IsEmpty =
4475       CGF.Builder.CreateICmpEQ(ElementNext, End, "omp.isempty");
4476   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4477   // Done.
4478   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4479 }
4480 
4481 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4482                                    const OMPExecutableDirective &D,
4483                                    llvm::Function *TaskFunction,
4484                                    QualType SharedsTy, Address Shareds,
4485                                    const Expr *IfCond,
4486                                    const OMPTaskDataTy &Data) {
4487   if (!CGF.HaveInsertPoint())
4488     return;
4489 
4490   TaskResultTy Result =
4491       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4492   llvm::Value *NewTask = Result.NewTask;
4493   llvm::Function *TaskEntry = Result.TaskEntry;
4494   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4495   LValue TDBase = Result.TDBase;
4496   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4497   // Process list of dependences.
4498   Address DependenciesArray = Address::invalid();
4499   llvm::Value *NumOfElements;
4500   std::tie(NumOfElements, DependenciesArray) =
4501       emitDependClause(CGF, Data.Dependences, Loc);
4502 
4503   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4504   // libcall.
4505   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4506   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4507   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4508   // list is not empty
4509   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4510   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4511   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4512   llvm::Value *DepTaskArgs[7];
4513   if (!Data.Dependences.empty()) {
4514     DepTaskArgs[0] = UpLoc;
4515     DepTaskArgs[1] = ThreadID;
4516     DepTaskArgs[2] = NewTask;
4517     DepTaskArgs[3] = NumOfElements;
4518     DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4519     DepTaskArgs[5] = CGF.Builder.getInt32(0);
4520     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4521   }
4522   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4523                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4524     if (!Data.Tied) {
4525       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4526       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4527       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4528     }
4529     if (!Data.Dependences.empty()) {
4530       CGF.EmitRuntimeCall(
4531           OMPBuilder.getOrCreateRuntimeFunction(
4532               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4533           DepTaskArgs);
4534     } else {
4535       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4536                               CGM.getModule(), OMPRTL___kmpc_omp_task),
4537                           TaskArgs);
4538     }
4539     // Check if parent region is untied and build return for untied task;
4540     if (auto *Region =
4541             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4542       Region->emitUntiedSwitch(CGF);
4543   };
4544 
4545   llvm::Value *DepWaitTaskArgs[7];
4546   if (!Data.Dependences.empty()) {
4547     DepWaitTaskArgs[0] = UpLoc;
4548     DepWaitTaskArgs[1] = ThreadID;
4549     DepWaitTaskArgs[2] = NumOfElements;
4550     DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4551     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4552     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4553     DepWaitTaskArgs[6] =
4554         llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4555   }
4556   auto &M = CGM.getModule();
4557   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4558                         TaskEntry, &Data, &DepWaitTaskArgs,
4559                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4560     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4561     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4562     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4563     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4564     // is specified.
4565     if (!Data.Dependences.empty())
4566       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4567                               M, OMPRTL___kmpc_omp_taskwait_deps_51),
4568                           DepWaitTaskArgs);
4569     // Call proxy_task_entry(gtid, new_task);
4570     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4571                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4572       Action.Enter(CGF);
4573       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4574       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4575                                                           OutlinedFnArgs);
4576     };
4577 
4578     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4579     // kmp_task_t *new_task);
4580     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4581     // kmp_task_t *new_task);
4582     RegionCodeGenTy RCG(CodeGen);
4583     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4584                               M, OMPRTL___kmpc_omp_task_begin_if0),
4585                           TaskArgs,
4586                           OMPBuilder.getOrCreateRuntimeFunction(
4587                               M, OMPRTL___kmpc_omp_task_complete_if0),
4588                           TaskArgs);
4589     RCG.setAction(Action);
4590     RCG(CGF);
4591   };
4592 
4593   if (IfCond) {
4594     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4595   } else {
4596     RegionCodeGenTy ThenRCG(ThenCodeGen);
4597     ThenRCG(CGF);
4598   }
4599 }
4600 
4601 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4602                                        const OMPLoopDirective &D,
4603                                        llvm::Function *TaskFunction,
4604                                        QualType SharedsTy, Address Shareds,
4605                                        const Expr *IfCond,
4606                                        const OMPTaskDataTy &Data) {
4607   if (!CGF.HaveInsertPoint())
4608     return;
4609   TaskResultTy Result =
4610       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4611   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4612   // libcall.
4613   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4614   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4615   // sched, kmp_uint64 grainsize, void *task_dup);
4616   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4617   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4618   llvm::Value *IfVal;
4619   if (IfCond) {
4620     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4621                                       /*isSigned=*/true);
4622   } else {
4623     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4624   }
4625 
4626   LValue LBLVal = CGF.EmitLValueForField(
4627       Result.TDBase,
4628       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4629   const auto *LBVar =
4630       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4631   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4632                        /*IsInitializer=*/true);
4633   LValue UBLVal = CGF.EmitLValueForField(
4634       Result.TDBase,
4635       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4636   const auto *UBVar =
4637       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4638   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4639                        /*IsInitializer=*/true);
4640   LValue StLVal = CGF.EmitLValueForField(
4641       Result.TDBase,
4642       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4643   const auto *StVar =
4644       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4645   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4646                        /*IsInitializer=*/true);
4647   // Store reductions address.
4648   LValue RedLVal = CGF.EmitLValueForField(
4649       Result.TDBase,
4650       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4651   if (Data.Reductions) {
4652     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4653   } else {
4654     CGF.EmitNullInitialization(RedLVal.getAddress(),
4655                                CGF.getContext().VoidPtrTy);
4656   }
4657   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4658   llvm::SmallVector<llvm::Value *, 12> TaskArgs{
4659       UpLoc,
4660       ThreadID,
4661       Result.NewTask,
4662       IfVal,
4663       LBLVal.getPointer(CGF),
4664       UBLVal.getPointer(CGF),
4665       CGF.EmitLoadOfScalar(StLVal, Loc),
4666       llvm::ConstantInt::getSigned(
4667           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4668       llvm::ConstantInt::getSigned(
4669           CGF.IntTy, Data.Schedule.getPointer()
4670                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
4671                          : NoSchedule),
4672       Data.Schedule.getPointer()
4673           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4674                                       /*isSigned=*/false)
4675           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0)};
4676   if (Data.HasModifier)
4677     TaskArgs.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 1));
4678 
4679   TaskArgs.push_back(Result.TaskDupFn
4680                          ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4681                                Result.TaskDupFn, CGF.VoidPtrTy)
4682                          : llvm::ConstantPointerNull::get(CGF.VoidPtrTy));
4683   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4684                           CGM.getModule(), Data.HasModifier
4685                                                ? OMPRTL___kmpc_taskloop_5
4686                                                : OMPRTL___kmpc_taskloop),
4687                       TaskArgs);
4688 }
4689 
4690 /// Emit reduction operation for each element of array (required for
4691 /// array sections) LHS op = RHS.
4692 /// \param Type Type of array.
4693 /// \param LHSVar Variable on the left side of the reduction operation
4694 /// (references element of array in original variable).
4695 /// \param RHSVar Variable on the right side of the reduction operation
4696 /// (references element of array in original variable).
4697 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
4698 /// RHSVar.
4699 static void EmitOMPAggregateReduction(
4700     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4701     const VarDecl *RHSVar,
4702     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4703                                   const Expr *, const Expr *)> &RedOpGen,
4704     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4705     const Expr *UpExpr = nullptr) {
4706   // Perform element-by-element initialization.
4707   QualType ElementTy;
4708   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4709   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4710 
4711   // Drill down to the base element type on both arrays.
4712   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4713   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4714 
4715   llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4716   llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4717   // Cast from pointer to array type to pointer to single element.
4718   llvm::Value *LHSEnd =
4719       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4720   // The basic structure here is a while-do loop.
4721   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4722   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4723   llvm::Value *IsEmpty =
4724       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4725   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4726 
4727   // Enter the loop body, making that address the current address.
4728   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4729   CGF.EmitBlock(BodyBB);
4730 
4731   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4732 
4733   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4734       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4735   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4736   Address RHSElementCurrent(
4737       RHSElementPHI, RHSAddr.getElementType(),
4738       RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4739 
4740   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4741       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4742   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4743   Address LHSElementCurrent(
4744       LHSElementPHI, LHSAddr.getElementType(),
4745       LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4746 
4747   // Emit copy.
4748   CodeGenFunction::OMPPrivateScope Scope(CGF);
4749   Scope.addPrivate(LHSVar, LHSElementCurrent);
4750   Scope.addPrivate(RHSVar, RHSElementCurrent);
4751   Scope.Privatize();
4752   RedOpGen(CGF, XExpr, EExpr, UpExpr);
4753   Scope.ForceCleanup();
4754 
4755   // Shift the address forward by one element.
4756   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4757       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4758       "omp.arraycpy.dest.element");
4759   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4760       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4761       "omp.arraycpy.src.element");
4762   // Check whether we've reached the end.
4763   llvm::Value *Done =
4764       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4765   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4766   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4767   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4768 
4769   // Done.
4770   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4771 }
4772 
4773 /// Emit reduction combiner. If the combiner is a simple expression emit it as
4774 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4775 /// UDR combiner function.
4776 static void emitReductionCombiner(CodeGenFunction &CGF,
4777                                   const Expr *ReductionOp) {
4778   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4779     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4780       if (const auto *DRE =
4781               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4782         if (const auto *DRD =
4783                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4784           std::pair<llvm::Function *, llvm::Function *> Reduction =
4785               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
4786           RValue Func = RValue::get(Reduction.first);
4787           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4788           CGF.EmitIgnoredExpr(ReductionOp);
4789           return;
4790         }
4791   CGF.EmitIgnoredExpr(ReductionOp);
4792 }
4793 
4794 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
4795     StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4796     ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
4797     ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4798   ASTContext &C = CGM.getContext();
4799 
4800   // void reduction_func(void *LHSArg, void *RHSArg);
4801   FunctionArgList Args;
4802   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4803                            ImplicitParamKind::Other);
4804   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4805                            ImplicitParamKind::Other);
4806   Args.push_back(&LHSArg);
4807   Args.push_back(&RHSArg);
4808   const auto &CGFI =
4809       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4810   std::string Name = getReductionFuncName(ReducerName);
4811   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4812                                     llvm::GlobalValue::InternalLinkage, Name,
4813                                     &CGM.getModule());
4814   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
4815   Fn->setDoesNotRecurse();
4816   CodeGenFunction CGF(CGM);
4817   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
4818 
4819   // Dst = (void*[n])(LHSArg);
4820   // Src = (void*[n])(RHSArg);
4821   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4822                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4823                   CGF.Builder.getPtrTy(0)),
4824               ArgsElemType, CGF.getPointerAlign());
4825   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4826                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4827                   CGF.Builder.getPtrTy(0)),
4828               ArgsElemType, CGF.getPointerAlign());
4829 
4830   //  ...
4831   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4832   //  ...
4833   CodeGenFunction::OMPPrivateScope Scope(CGF);
4834   const auto *IPriv = Privates.begin();
4835   unsigned Idx = 0;
4836   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4837     const auto *RHSVar =
4838         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4839     Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
4840     const auto *LHSVar =
4841         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4842     Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
4843     QualType PrivTy = (*IPriv)->getType();
4844     if (PrivTy->isVariablyModifiedType()) {
4845       // Get array size and emit VLA type.
4846       ++Idx;
4847       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
4848       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4849       const VariableArrayType *VLA =
4850           CGF.getContext().getAsVariableArrayType(PrivTy);
4851       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4852       CodeGenFunction::OpaqueValueMapping OpaqueMap(
4853           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4854       CGF.EmitVariablyModifiedType(PrivTy);
4855     }
4856   }
4857   Scope.Privatize();
4858   IPriv = Privates.begin();
4859   const auto *ILHS = LHSExprs.begin();
4860   const auto *IRHS = RHSExprs.begin();
4861   for (const Expr *E : ReductionOps) {
4862     if ((*IPriv)->getType()->isArrayType()) {
4863       // Emit reduction for array section.
4864       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4865       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4866       EmitOMPAggregateReduction(
4867           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4868           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4869             emitReductionCombiner(CGF, E);
4870           });
4871     } else {
4872       // Emit reduction for array subscript or single variable.
4873       emitReductionCombiner(CGF, E);
4874     }
4875     ++IPriv;
4876     ++ILHS;
4877     ++IRHS;
4878   }
4879   Scope.ForceCleanup();
4880   CGF.FinishFunction();
4881   return Fn;
4882 }
4883 
4884 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
4885                                                   const Expr *ReductionOp,
4886                                                   const Expr *PrivateRef,
4887                                                   const DeclRefExpr *LHS,
4888                                                   const DeclRefExpr *RHS) {
4889   if (PrivateRef->getType()->isArrayType()) {
4890     // Emit reduction for array section.
4891     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4892     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4893     EmitOMPAggregateReduction(
4894         CGF, PrivateRef->getType(), LHSVar, RHSVar,
4895         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4896           emitReductionCombiner(CGF, ReductionOp);
4897         });
4898   } else {
4899     // Emit reduction for array subscript or single variable.
4900     emitReductionCombiner(CGF, ReductionOp);
4901   }
4902 }
4903 
4904 static std::string generateUniqueName(CodeGenModule &CGM,
4905                                       llvm::StringRef Prefix, const Expr *Ref);
4906 
4907 void CGOpenMPRuntime::emitPrivateReduction(
4908     CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates,
4909     const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps) {
4910 
4911   //  Create a shared global variable (__shared_reduction_var) to accumulate the
4912   //  final result.
4913   //
4914   //  Call __kmpc_barrier to synchronize threads before initialization.
4915   //
4916   //  The master thread (thread_id == 0) initializes __shared_reduction_var
4917   //    with the identity value or initializer.
4918   //
4919   //  Call __kmpc_barrier to synchronize before combining.
4920   //  For each i:
4921   //    - Thread enters critical section.
4922   //    - Reads its private value from LHSExprs[i].
4923   //    - Updates __shared_reduction_var[i] = RedOp_i(__shared_reduction_var[i],
4924   //    Privates[i]).
4925   //    - Exits critical section.
4926   //
4927   //  Call __kmpc_barrier after combining.
4928   //
4929   //  Each thread copies __shared_reduction_var[i] back to RHSExprs[i].
4930   //
4931   //  Final __kmpc_barrier to synchronize after broadcasting
4932   QualType PrivateType = Privates->getType();
4933   llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
4934 
4935   const OMPDeclareReductionDecl *UDR = getReductionInit(ReductionOps);
4936   std::string ReductionVarNameStr;
4937   if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates->IgnoreParenCasts()))
4938     ReductionVarNameStr =
4939         generateUniqueName(CGM, DRE->getDecl()->getNameAsString(), Privates);
4940   else
4941     ReductionVarNameStr = "unnamed_priv_var";
4942 
4943   // Create an internal shared variable
4944   std::string SharedName =
4945       CGM.getOpenMPRuntime().getName({"internal_pivate_", ReductionVarNameStr});
4946   llvm::GlobalVariable *SharedVar = OMPBuilder.getOrCreateInternalVariable(
4947       LLVMType, ".omp.reduction." + SharedName);
4948 
4949   SharedVar->setAlignment(
4950       llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
4951 
4952   Address SharedResult =
4953       CGF.MakeNaturalAlignRawAddrLValue(SharedVar, PrivateType).getAddress();
4954 
4955   llvm::Value *ThreadId = getThreadID(CGF, Loc);
4956   llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
4957   llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
4958 
4959   llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
4960   llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
4961 
4962   llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
4963       ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
4964   CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
4965 
4966   CGF.EmitBlock(InitBB);
4967 
4968   auto EmitSharedInit = [&]() {
4969     if (UDR) { // Check if it's a User-Defined Reduction
4970       if (const Expr *UDRInitExpr = UDR->getInitializer()) {
4971         std::pair<llvm::Function *, llvm::Function *> FnPair =
4972             getUserDefinedReduction(UDR);
4973         llvm::Function *InitializerFn = FnPair.second;
4974         if (InitializerFn) {
4975           if (const auto *CE =
4976                   dyn_cast<CallExpr>(UDRInitExpr->IgnoreParenImpCasts())) {
4977             const auto *OutDRE = cast<DeclRefExpr>(
4978                 cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
4979                     ->getSubExpr());
4980             const VarDecl *OutVD = cast<VarDecl>(OutDRE->getDecl());
4981 
4982             CodeGenFunction::OMPPrivateScope LocalScope(CGF);
4983             LocalScope.addPrivate(OutVD, SharedResult);
4984 
4985             (void)LocalScope.Privatize();
4986             if (const auto *OVE = dyn_cast<OpaqueValueExpr>(
4987                     CE->getCallee()->IgnoreParenImpCasts())) {
4988               CodeGenFunction::OpaqueValueMapping OpaqueMap(
4989                   CGF, OVE, RValue::get(InitializerFn));
4990               CGF.EmitIgnoredExpr(CE);
4991             } else {
4992               CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
4993                                    PrivateType.getQualifiers(),
4994                                    /*IsInitializer=*/true);
4995             }
4996           } else {
4997             CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
4998                                  PrivateType.getQualifiers(),
4999                                  /*IsInitializer=*/true);
5000           }
5001         } else {
5002           CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5003                                PrivateType.getQualifiers(),
5004                                /*IsInitializer=*/true);
5005         }
5006       } else {
5007         // EmitNullInitialization handles default construction for C++ classes
5008         // and zeroing for scalars, which is a reasonable default.
5009         CGF.EmitNullInitialization(SharedResult, PrivateType);
5010       }
5011       return; // UDR initialization handled
5012     }
5013     if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates)) {
5014       if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
5015         if (const Expr *InitExpr = VD->getInit()) {
5016           CGF.EmitAnyExprToMem(InitExpr, SharedResult,
5017                                PrivateType.getQualifiers(), true);
5018           return;
5019         }
5020       }
5021     }
5022     CGF.EmitNullInitialization(SharedResult, PrivateType);
5023   };
5024   EmitSharedInit();
5025   CGF.Builder.CreateBr(InitEndBB);
5026   CGF.EmitBlock(InitEndBB);
5027 
5028   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5029                           CGM.getModule(), OMPRTL___kmpc_barrier),
5030                       BarrierArgs);
5031 
5032   const Expr *ReductionOp = ReductionOps;
5033   const OMPDeclareReductionDecl *CurrentUDR = getReductionInit(ReductionOp);
5034   LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType);
5035   LValue LHSLV = CGF.EmitLValue(Privates);
5036 
5037   auto EmitCriticalReduction = [&](auto ReductionGen) {
5038     std::string CriticalName = getName({"reduction_critical"});
5039     emitCriticalRegion(CGF, CriticalName, ReductionGen, Loc);
5040   };
5041 
5042   if (CurrentUDR) {
5043     // Handle user-defined reduction.
5044     auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5045       Action.Enter(CGF);
5046       std::pair<llvm::Function *, llvm::Function *> FnPair =
5047           getUserDefinedReduction(CurrentUDR);
5048       if (FnPair.first) {
5049         if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) {
5050           const auto *OutDRE = cast<DeclRefExpr>(
5051               cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
5052                   ->getSubExpr());
5053           const auto *InDRE = cast<DeclRefExpr>(
5054               cast<UnaryOperator>(CE->getArg(1)->IgnoreParenImpCasts())
5055                   ->getSubExpr());
5056           CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5057           LocalScope.addPrivate(cast<VarDecl>(OutDRE->getDecl()),
5058                                 SharedLV.getAddress());
5059           LocalScope.addPrivate(cast<VarDecl>(InDRE->getDecl()),
5060                                 LHSLV.getAddress());
5061           (void)LocalScope.Privatize();
5062           emitReductionCombiner(CGF, ReductionOp);
5063         }
5064       }
5065     };
5066     EmitCriticalReduction(ReductionGen);
5067   } else {
5068     // Handle built-in reduction operations.
5069 #ifndef NDEBUG
5070     const Expr *ReductionClauseExpr = ReductionOp->IgnoreParenCasts();
5071     if (const auto *Cleanup = dyn_cast<ExprWithCleanups>(ReductionClauseExpr))
5072       ReductionClauseExpr = Cleanup->getSubExpr()->IgnoreParenCasts();
5073 
5074     const Expr *AssignRHS = nullptr;
5075     if (const auto *BinOp = dyn_cast<BinaryOperator>(ReductionClauseExpr)) {
5076       if (BinOp->getOpcode() == BO_Assign)
5077         AssignRHS = BinOp->getRHS();
5078     } else if (const auto *OpCall =
5079                    dyn_cast<CXXOperatorCallExpr>(ReductionClauseExpr)) {
5080       if (OpCall->getOperator() == OO_Equal)
5081         AssignRHS = OpCall->getArg(1);
5082     }
5083 
5084     assert(AssignRHS &&
5085            "Private Variable Reduction : Invalid ReductionOp expression");
5086 #endif
5087 
5088     auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5089       Action.Enter(CGF);
5090       const auto *OmpOutDRE =
5091           dyn_cast<DeclRefExpr>(LHSExprs->IgnoreParenImpCasts());
5092       const auto *OmpInDRE =
5093           dyn_cast<DeclRefExpr>(RHSExprs->IgnoreParenImpCasts());
5094       assert(
5095           OmpOutDRE && OmpInDRE &&
5096           "Private Variable Reduction : LHSExpr/RHSExpr must be DeclRefExprs");
5097       const VarDecl *OmpOutVD = cast<VarDecl>(OmpOutDRE->getDecl());
5098       const VarDecl *OmpInVD = cast<VarDecl>(OmpInDRE->getDecl());
5099       CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5100       LocalScope.addPrivate(OmpOutVD, SharedLV.getAddress());
5101       LocalScope.addPrivate(OmpInVD, LHSLV.getAddress());
5102       (void)LocalScope.Privatize();
5103       // Emit the actual reduction operation
5104       CGF.EmitIgnoredExpr(ReductionOp);
5105     };
5106     EmitCriticalReduction(ReductionGen);
5107   }
5108 
5109   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5110                           CGM.getModule(), OMPRTL___kmpc_barrier),
5111                       BarrierArgs);
5112 
5113   // Broadcast final result
5114   bool IsAggregate = PrivateType->isAggregateType();
5115   LValue SharedLV1 = CGF.MakeAddrLValue(SharedResult, PrivateType);
5116   llvm::Value *FinalResultVal = nullptr;
5117   Address FinalResultAddr = Address::invalid();
5118 
5119   if (IsAggregate)
5120     FinalResultAddr = SharedResult;
5121   else
5122     FinalResultVal = CGF.EmitLoadOfScalar(SharedLV1, Loc);
5123 
5124   LValue TargetLHSLV = CGF.EmitLValue(RHSExprs);
5125   if (IsAggregate) {
5126     CGF.EmitAggregateCopy(TargetLHSLV,
5127                           CGF.MakeAddrLValue(FinalResultAddr, PrivateType),
5128                           PrivateType, AggValueSlot::DoesNotOverlap, false);
5129   } else {
5130     CGF.EmitStoreOfScalar(FinalResultVal, TargetLHSLV);
5131   }
5132   // Final synchronization barrier
5133   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5134                           CGM.getModule(), OMPRTL___kmpc_barrier),
5135                       BarrierArgs);
5136 
5137   // Combiner with original list item
5138   auto OriginalListCombiner = [&](CodeGenFunction &CGF,
5139                                   PrePostActionTy &Action) {
5140     Action.Enter(CGF);
5141     emitSingleReductionCombiner(CGF, ReductionOps, Privates,
5142                                 cast<DeclRefExpr>(LHSExprs),
5143                                 cast<DeclRefExpr>(RHSExprs));
5144   };
5145   EmitCriticalReduction(OriginalListCombiner);
5146 }
5147 
5148 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5149                                     ArrayRef<const Expr *> OrgPrivates,
5150                                     ArrayRef<const Expr *> OrgLHSExprs,
5151                                     ArrayRef<const Expr *> OrgRHSExprs,
5152                                     ArrayRef<const Expr *> OrgReductionOps,
5153                                     ReductionOptionsTy Options) {
5154   if (!CGF.HaveInsertPoint())
5155     return;
5156 
5157   bool WithNowait = Options.WithNowait;
5158   bool SimpleReduction = Options.SimpleReduction;
5159 
5160   // Next code should be emitted for reduction:
5161   //
5162   // static kmp_critical_name lock = { 0 };
5163   //
5164   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5165   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5166   //  ...
5167   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5168   //  *(Type<n>-1*)rhs[<n>-1]);
5169   // }
5170   //
5171   // ...
5172   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5173   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5174   // RedList, reduce_func, &<lock>)) {
5175   // case 1:
5176   //  ...
5177   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5178   //  ...
5179   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5180   // break;
5181   // case 2:
5182   //  ...
5183   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5184   //  ...
5185   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5186   // break;
5187   // default:;
5188   // }
5189   //
5190   // if SimpleReduction is true, only the next code is generated:
5191   //  ...
5192   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5193   //  ...
5194 
5195   ASTContext &C = CGM.getContext();
5196 
5197   if (SimpleReduction) {
5198     CodeGenFunction::RunCleanupsScope Scope(CGF);
5199     const auto *IPriv = OrgPrivates.begin();
5200     const auto *ILHS = OrgLHSExprs.begin();
5201     const auto *IRHS = OrgRHSExprs.begin();
5202     for (const Expr *E : OrgReductionOps) {
5203       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5204                                   cast<DeclRefExpr>(*IRHS));
5205       ++IPriv;
5206       ++ILHS;
5207       ++IRHS;
5208     }
5209     return;
5210   }
5211 
5212   // Filter out shared  reduction variables based on IsPrivateVarReduction flag.
5213   // Only keep entries where the corresponding variable is not private.
5214   SmallVector<const Expr *> FilteredPrivates, FilteredLHSExprs,
5215       FilteredRHSExprs, FilteredReductionOps;
5216   for (unsigned I : llvm::seq<unsigned>(
5217            std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5218     if (!Options.IsPrivateVarReduction[I]) {
5219       FilteredPrivates.emplace_back(OrgPrivates[I]);
5220       FilteredLHSExprs.emplace_back(OrgLHSExprs[I]);
5221       FilteredRHSExprs.emplace_back(OrgRHSExprs[I]);
5222       FilteredReductionOps.emplace_back(OrgReductionOps[I]);
5223     }
5224   }
5225   // Wrap filtered vectors in ArrayRef for downstream shared reduction
5226   // processing.
5227   ArrayRef<const Expr *> Privates = FilteredPrivates;
5228   ArrayRef<const Expr *> LHSExprs = FilteredLHSExprs;
5229   ArrayRef<const Expr *> RHSExprs = FilteredRHSExprs;
5230   ArrayRef<const Expr *> ReductionOps = FilteredReductionOps;
5231 
5232   // 1. Build a list of reduction variables.
5233   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5234   auto Size = RHSExprs.size();
5235   for (const Expr *E : Privates) {
5236     if (E->getType()->isVariablyModifiedType())
5237       // Reserve place for array size.
5238       ++Size;
5239   }
5240   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5241   QualType ReductionArrayTy = C.getConstantArrayType(
5242       C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
5243       /*IndexTypeQuals=*/0);
5244   RawAddress ReductionList =
5245       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5246   const auto *IPriv = Privates.begin();
5247   unsigned Idx = 0;
5248   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5249     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5250     CGF.Builder.CreateStore(
5251         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5252             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5253         Elem);
5254     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5255       // Store array size.
5256       ++Idx;
5257       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5258       llvm::Value *Size = CGF.Builder.CreateIntCast(
5259           CGF.getVLASize(
5260                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5261               .NumElts,
5262           CGF.SizeTy, /*isSigned=*/false);
5263       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5264                               Elem);
5265     }
5266   }
5267 
5268   // 2. Emit reduce_func().
5269   llvm::Function *ReductionFn = emitReductionFunction(
5270       CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5271       Privates, LHSExprs, RHSExprs, ReductionOps);
5272 
5273   // 3. Create static kmp_critical_name lock = { 0 };
5274   std::string Name = getName({"reduction"});
5275   llvm::Value *Lock = getCriticalRegionLock(Name);
5276 
5277   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5278   // RedList, reduce_func, &<lock>);
5279   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5280   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5281   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5282   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5283       ReductionList.getPointer(), CGF.VoidPtrTy);
5284   llvm::Value *Args[] = {
5285       IdentTLoc,                             // ident_t *<loc>
5286       ThreadId,                              // i32 <gtid>
5287       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5288       ReductionArrayTySize,                  // size_type sizeof(RedList)
5289       RL,                                    // void *RedList
5290       ReductionFn, // void (*) (void *, void *) <reduce_func>
5291       Lock         // kmp_critical_name *&<lock>
5292   };
5293   llvm::Value *Res = CGF.EmitRuntimeCall(
5294       OMPBuilder.getOrCreateRuntimeFunction(
5295           CGM.getModule(),
5296           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5297       Args);
5298 
5299   // 5. Build switch(res)
5300   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5301   llvm::SwitchInst *SwInst =
5302       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5303 
5304   // 6. Build case 1:
5305   //  ...
5306   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5307   //  ...
5308   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5309   // break;
5310   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5311   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5312   CGF.EmitBlock(Case1BB);
5313 
5314   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5315   llvm::Value *EndArgs[] = {
5316       IdentTLoc, // ident_t *<loc>
5317       ThreadId,  // i32 <gtid>
5318       Lock       // kmp_critical_name *&<lock>
5319   };
5320   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5321                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5322     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5323     const auto *IPriv = Privates.begin();
5324     const auto *ILHS = LHSExprs.begin();
5325     const auto *IRHS = RHSExprs.begin();
5326     for (const Expr *E : ReductionOps) {
5327       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5328                                      cast<DeclRefExpr>(*IRHS));
5329       ++IPriv;
5330       ++ILHS;
5331       ++IRHS;
5332     }
5333   };
5334   RegionCodeGenTy RCG(CodeGen);
5335   CommonActionTy Action(
5336       nullptr, {},
5337       OMPBuilder.getOrCreateRuntimeFunction(
5338           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5339                                       : OMPRTL___kmpc_end_reduce),
5340       EndArgs);
5341   RCG.setAction(Action);
5342   RCG(CGF);
5343 
5344   CGF.EmitBranch(DefaultBB);
5345 
5346   // 7. Build case 2:
5347   //  ...
5348   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5349   //  ...
5350   // break;
5351   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5352   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5353   CGF.EmitBlock(Case2BB);
5354 
5355   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5356                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5357     const auto *ILHS = LHSExprs.begin();
5358     const auto *IRHS = RHSExprs.begin();
5359     const auto *IPriv = Privates.begin();
5360     for (const Expr *E : ReductionOps) {
5361       const Expr *XExpr = nullptr;
5362       const Expr *EExpr = nullptr;
5363       const Expr *UpExpr = nullptr;
5364       BinaryOperatorKind BO = BO_Comma;
5365       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5366         if (BO->getOpcode() == BO_Assign) {
5367           XExpr = BO->getLHS();
5368           UpExpr = BO->getRHS();
5369         }
5370       }
5371       // Try to emit update expression as a simple atomic.
5372       const Expr *RHSExpr = UpExpr;
5373       if (RHSExpr) {
5374         // Analyze RHS part of the whole expression.
5375         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5376                 RHSExpr->IgnoreParenImpCasts())) {
5377           // If this is a conditional operator, analyze its condition for
5378           // min/max reduction operator.
5379           RHSExpr = ACO->getCond();
5380         }
5381         if (const auto *BORHS =
5382                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5383           EExpr = BORHS->getRHS();
5384           BO = BORHS->getOpcode();
5385         }
5386       }
5387       if (XExpr) {
5388         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5389         auto &&AtomicRedGen = [BO, VD,
5390                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5391                                     const Expr *EExpr, const Expr *UpExpr) {
5392           LValue X = CGF.EmitLValue(XExpr);
5393           RValue E;
5394           if (EExpr)
5395             E = CGF.EmitAnyExpr(EExpr);
5396           CGF.EmitOMPAtomicSimpleUpdateExpr(
5397               X, E, BO, /*IsXLHSInRHSPart=*/true,
5398               llvm::AtomicOrdering::Monotonic, Loc,
5399               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5400                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5401                 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5402                 CGF.emitOMPSimpleStore(
5403                     CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5404                     VD->getType().getNonReferenceType(), Loc);
5405                 PrivateScope.addPrivate(VD, LHSTemp);
5406                 (void)PrivateScope.Privatize();
5407                 return CGF.EmitAnyExpr(UpExpr);
5408               });
5409         };
5410         if ((*IPriv)->getType()->isArrayType()) {
5411           // Emit atomic reduction for array section.
5412           const auto *RHSVar =
5413               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5414           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5415                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5416         } else {
5417           // Emit atomic reduction for array subscript or single variable.
5418           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5419         }
5420       } else {
5421         // Emit as a critical region.
5422         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5423                                      const Expr *, const Expr *) {
5424           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5425           std::string Name = RT.getName({"atomic_reduction"});
5426           RT.emitCriticalRegion(
5427               CGF, Name,
5428               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5429                 Action.Enter(CGF);
5430                 emitReductionCombiner(CGF, E);
5431               },
5432               Loc);
5433         };
5434         if ((*IPriv)->getType()->isArrayType()) {
5435           const auto *LHSVar =
5436               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5437           const auto *RHSVar =
5438               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5439           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5440                                     CritRedGen);
5441         } else {
5442           CritRedGen(CGF, nullptr, nullptr, nullptr);
5443         }
5444       }
5445       ++ILHS;
5446       ++IRHS;
5447       ++IPriv;
5448     }
5449   };
5450   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5451   if (!WithNowait) {
5452     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5453     llvm::Value *EndArgs[] = {
5454         IdentTLoc, // ident_t *<loc>
5455         ThreadId,  // i32 <gtid>
5456         Lock       // kmp_critical_name *&<lock>
5457     };
5458     CommonActionTy Action(nullptr, {},
5459                           OMPBuilder.getOrCreateRuntimeFunction(
5460                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5461                           EndArgs);
5462     AtomicRCG.setAction(Action);
5463     AtomicRCG(CGF);
5464   } else {
5465     AtomicRCG(CGF);
5466   }
5467 
5468   CGF.EmitBranch(DefaultBB);
5469   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5470   assert(OrgLHSExprs.size() == OrgPrivates.size() &&
5471          "PrivateVarReduction: Privates size mismatch");
5472   assert(OrgLHSExprs.size() == OrgReductionOps.size() &&
5473          "PrivateVarReduction: ReductionOps size mismatch");
5474   for (unsigned I : llvm::seq<unsigned>(
5475            std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5476     if (Options.IsPrivateVarReduction[I])
5477       emitPrivateReduction(CGF, Loc, OrgPrivates[I], OrgLHSExprs[I],
5478                            OrgRHSExprs[I], OrgReductionOps[I]);
5479   }
5480 }
5481 
5482 /// Generates unique name for artificial threadprivate variables.
5483 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5484 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5485                                       const Expr *Ref) {
5486   SmallString<256> Buffer;
5487   llvm::raw_svector_ostream Out(Buffer);
5488   const clang::DeclRefExpr *DE;
5489   const VarDecl *D = ::getBaseDecl(Ref, DE);
5490   if (!D)
5491     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5492   D = D->getCanonicalDecl();
5493   std::string Name = CGM.getOpenMPRuntime().getName(
5494       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5495   Out << Prefix << Name << "_"
5496       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5497   return std::string(Out.str());
5498 }
5499 
5500 /// Emits reduction initializer function:
5501 /// \code
5502 /// void @.red_init(void* %arg, void* %orig) {
5503 /// %0 = bitcast void* %arg to <type>*
5504 /// store <type> <init>, <type>* %0
5505 /// ret void
5506 /// }
5507 /// \endcode
5508 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5509                                            SourceLocation Loc,
5510                                            ReductionCodeGen &RCG, unsigned N) {
5511   ASTContext &C = CGM.getContext();
5512   QualType VoidPtrTy = C.VoidPtrTy;
5513   VoidPtrTy.addRestrict();
5514   FunctionArgList Args;
5515   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5516                           ImplicitParamKind::Other);
5517   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5518                               ImplicitParamKind::Other);
5519   Args.emplace_back(&Param);
5520   Args.emplace_back(&ParamOrig);
5521   const auto &FnInfo =
5522       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5523   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5524   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5525   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5526                                     Name, &CGM.getModule());
5527   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5528   Fn->setDoesNotRecurse();
5529   CodeGenFunction CGF(CGM);
5530   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5531   QualType PrivateType = RCG.getPrivateType(N);
5532   Address PrivateAddr = CGF.EmitLoadOfPointer(
5533       CGF.GetAddrOfLocalVar(&Param).withElementType(CGF.Builder.getPtrTy(0)),
5534       C.getPointerType(PrivateType)->castAs<PointerType>());
5535   llvm::Value *Size = nullptr;
5536   // If the size of the reduction item is non-constant, load it from global
5537   // threadprivate variable.
5538   if (RCG.getSizes(N).second) {
5539     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5540         CGF, CGM.getContext().getSizeType(),
5541         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5542     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5543                                 CGM.getContext().getSizeType(), Loc);
5544   }
5545   RCG.emitAggregateType(CGF, N, Size);
5546   Address OrigAddr = Address::invalid();
5547   // If initializer uses initializer from declare reduction construct, emit a
5548   // pointer to the address of the original reduction item (reuired by reduction
5549   // initializer)
5550   if (RCG.usesReductionInitializer(N)) {
5551     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5552     OrigAddr = CGF.EmitLoadOfPointer(
5553         SharedAddr,
5554         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5555   }
5556   // Emit the initializer:
5557   // %0 = bitcast void* %arg to <type>*
5558   // store <type> <init>, <type>* %0
5559   RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5560                          [](CodeGenFunction &) { return false; });
5561   CGF.FinishFunction();
5562   return Fn;
5563 }
5564 
5565 /// Emits reduction combiner function:
5566 /// \code
5567 /// void @.red_comb(void* %arg0, void* %arg1) {
5568 /// %lhs = bitcast void* %arg0 to <type>*
5569 /// %rhs = bitcast void* %arg1 to <type>*
5570 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5571 /// store <type> %2, <type>* %lhs
5572 /// ret void
5573 /// }
5574 /// \endcode
5575 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5576                                            SourceLocation Loc,
5577                                            ReductionCodeGen &RCG, unsigned N,
5578                                            const Expr *ReductionOp,
5579                                            const Expr *LHS, const Expr *RHS,
5580                                            const Expr *PrivateRef) {
5581   ASTContext &C = CGM.getContext();
5582   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5583   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5584   FunctionArgList Args;
5585   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5586                                C.VoidPtrTy, ImplicitParamKind::Other);
5587   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5588                             ImplicitParamKind::Other);
5589   Args.emplace_back(&ParamInOut);
5590   Args.emplace_back(&ParamIn);
5591   const auto &FnInfo =
5592       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5593   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5594   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5595   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5596                                     Name, &CGM.getModule());
5597   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5598   Fn->setDoesNotRecurse();
5599   CodeGenFunction CGF(CGM);
5600   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5601   llvm::Value *Size = nullptr;
5602   // If the size of the reduction item is non-constant, load it from global
5603   // threadprivate variable.
5604   if (RCG.getSizes(N).second) {
5605     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5606         CGF, CGM.getContext().getSizeType(),
5607         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5608     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5609                                 CGM.getContext().getSizeType(), Loc);
5610   }
5611   RCG.emitAggregateType(CGF, N, Size);
5612   // Remap lhs and rhs variables to the addresses of the function arguments.
5613   // %lhs = bitcast void* %arg0 to <type>*
5614   // %rhs = bitcast void* %arg1 to <type>*
5615   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5616   PrivateScope.addPrivate(
5617       LHSVD,
5618       // Pull out the pointer to the variable.
5619       CGF.EmitLoadOfPointer(
5620           CGF.GetAddrOfLocalVar(&ParamInOut)
5621               .withElementType(CGF.Builder.getPtrTy(0)),
5622           C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5623   PrivateScope.addPrivate(
5624       RHSVD,
5625       // Pull out the pointer to the variable.
5626       CGF.EmitLoadOfPointer(
5627           CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5628               CGF.Builder.getPtrTy(0)),
5629           C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5630   PrivateScope.Privatize();
5631   // Emit the combiner body:
5632   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5633   // store <type> %2, <type>* %lhs
5634   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5635       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5636       cast<DeclRefExpr>(RHS));
5637   CGF.FinishFunction();
5638   return Fn;
5639 }
5640 
5641 /// Emits reduction finalizer function:
5642 /// \code
5643 /// void @.red_fini(void* %arg) {
5644 /// %0 = bitcast void* %arg to <type>*
5645 /// <destroy>(<type>* %0)
5646 /// ret void
5647 /// }
5648 /// \endcode
5649 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5650                                            SourceLocation Loc,
5651                                            ReductionCodeGen &RCG, unsigned N) {
5652   if (!RCG.needCleanups(N))
5653     return nullptr;
5654   ASTContext &C = CGM.getContext();
5655   FunctionArgList Args;
5656   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5657                           ImplicitParamKind::Other);
5658   Args.emplace_back(&Param);
5659   const auto &FnInfo =
5660       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5661   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5662   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5663   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5664                                     Name, &CGM.getModule());
5665   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5666   Fn->setDoesNotRecurse();
5667   CodeGenFunction CGF(CGM);
5668   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5669   Address PrivateAddr = CGF.EmitLoadOfPointer(
5670       CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5671   llvm::Value *Size = nullptr;
5672   // If the size of the reduction item is non-constant, load it from global
5673   // threadprivate variable.
5674   if (RCG.getSizes(N).second) {
5675     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5676         CGF, CGM.getContext().getSizeType(),
5677         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5678     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5679                                 CGM.getContext().getSizeType(), Loc);
5680   }
5681   RCG.emitAggregateType(CGF, N, Size);
5682   // Emit the finalizer body:
5683   // <destroy>(<type>* %0)
5684   RCG.emitCleanups(CGF, N, PrivateAddr);
5685   CGF.FinishFunction(Loc);
5686   return Fn;
5687 }
5688 
5689 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5690     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5691     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5692   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5693     return nullptr;
5694 
5695   // Build typedef struct:
5696   // kmp_taskred_input {
5697   //   void *reduce_shar; // shared reduction item
5698   //   void *reduce_orig; // original reduction item used for initialization
5699   //   size_t reduce_size; // size of data item
5700   //   void *reduce_init; // data initialization routine
5701   //   void *reduce_fini; // data finalization routine
5702   //   void *reduce_comb; // data combiner routine
5703   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
5704   // } kmp_taskred_input_t;
5705   ASTContext &C = CGM.getContext();
5706   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5707   RD->startDefinition();
5708   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5709   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5710   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5711   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5712   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5713   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5714   const FieldDecl *FlagsFD = addFieldToRecordDecl(
5715       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5716   RD->completeDefinition();
5717   QualType RDType = C.getRecordType(RD);
5718   unsigned Size = Data.ReductionVars.size();
5719   llvm::APInt ArraySize(/*numBits=*/64, Size);
5720   QualType ArrayRDType =
5721       C.getConstantArrayType(RDType, ArraySize, nullptr,
5722                              ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5723   // kmp_task_red_input_t .rd_input.[Size];
5724   RawAddress TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5725   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5726                        Data.ReductionCopies, Data.ReductionOps);
5727   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5728     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5729     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5730                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5731     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5732         TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5733         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5734         ".rd_input.gep.");
5735     LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(GEP, RDType);
5736     // ElemLVal.reduce_shar = &Shareds[Cnt];
5737     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5738     RCG.emitSharedOrigLValue(CGF, Cnt);
5739     llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5740     CGF.EmitStoreOfScalar(Shared, SharedLVal);
5741     // ElemLVal.reduce_orig = &Origs[Cnt];
5742     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5743     llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5744     CGF.EmitStoreOfScalar(Orig, OrigLVal);
5745     RCG.emitAggregateType(CGF, Cnt);
5746     llvm::Value *SizeValInChars;
5747     llvm::Value *SizeVal;
5748     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5749     // We use delayed creation/initialization for VLAs and array sections. It is
5750     // required because runtime does not provide the way to pass the sizes of
5751     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5752     // threadprivate global variables are used to store these values and use
5753     // them in the functions.
5754     bool DelayedCreation = !!SizeVal;
5755     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5756                                                /*isSigned=*/false);
5757     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5758     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5759     // ElemLVal.reduce_init = init;
5760     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5761     llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5762     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5763     // ElemLVal.reduce_fini = fini;
5764     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5765     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5766     llvm::Value *FiniAddr =
5767         Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5768     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5769     // ElemLVal.reduce_comb = comb;
5770     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5771     llvm::Value *CombAddr = emitReduceCombFunction(
5772         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5773         RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5774     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5775     // ElemLVal.flags = 0;
5776     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5777     if (DelayedCreation) {
5778       CGF.EmitStoreOfScalar(
5779           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5780           FlagsLVal);
5781     } else
5782       CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
5783   }
5784   if (Data.IsReductionWithTaskMod) {
5785     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5786     // is_ws, int num, void *data);
5787     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5788     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5789                                                   CGM.IntTy, /*isSigned=*/true);
5790     llvm::Value *Args[] = {
5791         IdentTLoc, GTid,
5792         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5793                                /*isSigned=*/true),
5794         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5795         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5796             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5797     return CGF.EmitRuntimeCall(
5798         OMPBuilder.getOrCreateRuntimeFunction(
5799             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5800         Args);
5801   }
5802   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5803   llvm::Value *Args[] = {
5804       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5805                                 /*isSigned=*/true),
5806       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5807       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
5808                                                       CGM.VoidPtrTy)};
5809   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5810                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
5811                              Args);
5812 }
5813 
5814 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
5815                                             SourceLocation Loc,
5816                                             bool IsWorksharingReduction) {
5817   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5818   // is_ws, int num, void *data);
5819   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5820   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5821                                                 CGM.IntTy, /*isSigned=*/true);
5822   llvm::Value *Args[] = {IdentTLoc, GTid,
5823                          llvm::ConstantInt::get(CGM.IntTy,
5824                                                 IsWorksharingReduction ? 1 : 0,
5825                                                 /*isSigned=*/true)};
5826   (void)CGF.EmitRuntimeCall(
5827       OMPBuilder.getOrCreateRuntimeFunction(
5828           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5829       Args);
5830 }
5831 
5832 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
5833                                               SourceLocation Loc,
5834                                               ReductionCodeGen &RCG,
5835                                               unsigned N) {
5836   auto Sizes = RCG.getSizes(N);
5837   // Emit threadprivate global variable if the type is non-constant
5838   // (Sizes.second = nullptr).
5839   if (Sizes.second) {
5840     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5841                                                      /*isSigned=*/false);
5842     Address SizeAddr = getAddrOfArtificialThreadPrivate(
5843         CGF, CGM.getContext().getSizeType(),
5844         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5845     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5846   }
5847 }
5848 
5849 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
5850                                               SourceLocation Loc,
5851                                               llvm::Value *ReductionsPtr,
5852                                               LValue SharedLVal) {
5853   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5854   // *d);
5855   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5856                                                    CGM.IntTy,
5857                                                    /*isSigned=*/true),
5858                          ReductionsPtr,
5859                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5860                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
5861   return Address(
5862       CGF.EmitRuntimeCall(
5863           OMPBuilder.getOrCreateRuntimeFunction(
5864               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
5865           Args),
5866       CGF.Int8Ty, SharedLVal.getAlignment());
5867 }
5868 
5869 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
5870                                        const OMPTaskDataTy &Data) {
5871   if (!CGF.HaveInsertPoint())
5872     return;
5873 
5874   if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5875     // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5876     OMPBuilder.createTaskwait(CGF.Builder);
5877   } else {
5878     llvm::Value *ThreadID = getThreadID(CGF, Loc);
5879     llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5880     auto &M = CGM.getModule();
5881     Address DependenciesArray = Address::invalid();
5882     llvm::Value *NumOfElements;
5883     std::tie(NumOfElements, DependenciesArray) =
5884         emitDependClause(CGF, Data.Dependences, Loc);
5885     if (!Data.Dependences.empty()) {
5886       llvm::Value *DepWaitTaskArgs[7];
5887       DepWaitTaskArgs[0] = UpLoc;
5888       DepWaitTaskArgs[1] = ThreadID;
5889       DepWaitTaskArgs[2] = NumOfElements;
5890       DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
5891       DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5892       DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5893       DepWaitTaskArgs[6] =
5894           llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
5895 
5896       CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5897 
5898       // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5899       // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5900       // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5901       // kmp_int32 has_no_wait); if dependence info is specified.
5902       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5903                               M, OMPRTL___kmpc_omp_taskwait_deps_51),
5904                           DepWaitTaskArgs);
5905 
5906     } else {
5907 
5908       // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5909       // global_tid);
5910       llvm::Value *Args[] = {UpLoc, ThreadID};
5911       // Ignore return result until untied tasks are supported.
5912       CGF.EmitRuntimeCall(
5913           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
5914           Args);
5915     }
5916   }
5917 
5918   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5919     Region->emitUntiedSwitch(CGF);
5920 }
5921 
5922 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
5923                                            OpenMPDirectiveKind InnerKind,
5924                                            const RegionCodeGenTy &CodeGen,
5925                                            bool HasCancel) {
5926   if (!CGF.HaveInsertPoint())
5927     return;
5928   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5929                                  InnerKind != OMPD_critical &&
5930                                      InnerKind != OMPD_master &&
5931                                      InnerKind != OMPD_masked);
5932   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5933 }
5934 
5935 namespace {
5936 enum RTCancelKind {
5937   CancelNoreq = 0,
5938   CancelParallel = 1,
5939   CancelLoop = 2,
5940   CancelSections = 3,
5941   CancelTaskgroup = 4
5942 };
5943 } // anonymous namespace
5944 
5945 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5946   RTCancelKind CancelKind = CancelNoreq;
5947   if (CancelRegion == OMPD_parallel)
5948     CancelKind = CancelParallel;
5949   else if (CancelRegion == OMPD_for)
5950     CancelKind = CancelLoop;
5951   else if (CancelRegion == OMPD_sections)
5952     CancelKind = CancelSections;
5953   else {
5954     assert(CancelRegion == OMPD_taskgroup);
5955     CancelKind = CancelTaskgroup;
5956   }
5957   return CancelKind;
5958 }
5959 
5960 void CGOpenMPRuntime::emitCancellationPointCall(
5961     CodeGenFunction &CGF, SourceLocation Loc,
5962     OpenMPDirectiveKind CancelRegion) {
5963   if (!CGF.HaveInsertPoint())
5964     return;
5965   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5966   // global_tid, kmp_int32 cncl_kind);
5967   if (auto *OMPRegionInfo =
5968           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5969     // For 'cancellation point taskgroup', the task region info may not have a
5970     // cancel. This may instead happen in another adjacent task.
5971     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5972       llvm::Value *Args[] = {
5973           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
5974           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5975       // Ignore return result until untied tasks are supported.
5976       llvm::Value *Result = CGF.EmitRuntimeCall(
5977           OMPBuilder.getOrCreateRuntimeFunction(
5978               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
5979           Args);
5980       // if (__kmpc_cancellationpoint()) {
5981       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5982       //   exit from construct;
5983       // }
5984       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5985       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5986       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5987       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5988       CGF.EmitBlock(ExitBB);
5989       if (CancelRegion == OMPD_parallel)
5990         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5991       // exit from construct;
5992       CodeGenFunction::JumpDest CancelDest =
5993           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5994       CGF.EmitBranchThroughCleanup(CancelDest);
5995       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5996     }
5997   }
5998 }
5999 
6000 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6001                                      const Expr *IfCond,
6002                                      OpenMPDirectiveKind CancelRegion) {
6003   if (!CGF.HaveInsertPoint())
6004     return;
6005   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6006   // kmp_int32 cncl_kind);
6007   auto &M = CGM.getModule();
6008   if (auto *OMPRegionInfo =
6009           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6010     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6011                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6012       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6013       llvm::Value *Args[] = {
6014           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6015           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6016       // Ignore return result until untied tasks are supported.
6017       llvm::Value *Result = CGF.EmitRuntimeCall(
6018           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6019       // if (__kmpc_cancel()) {
6020       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6021       //   exit from construct;
6022       // }
6023       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6024       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6025       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6026       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6027       CGF.EmitBlock(ExitBB);
6028       if (CancelRegion == OMPD_parallel)
6029         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6030       // exit from construct;
6031       CodeGenFunction::JumpDest CancelDest =
6032           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6033       CGF.EmitBranchThroughCleanup(CancelDest);
6034       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6035     };
6036     if (IfCond) {
6037       emitIfClause(CGF, IfCond, ThenGen,
6038                    [](CodeGenFunction &, PrePostActionTy &) {});
6039     } else {
6040       RegionCodeGenTy ThenRCG(ThenGen);
6041       ThenRCG(CGF);
6042     }
6043   }
6044 }
6045 
6046 namespace {
6047 /// Cleanup action for uses_allocators support.
6048 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6049   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6050 
6051 public:
6052   OMPUsesAllocatorsActionTy(
6053       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6054       : Allocators(Allocators) {}
6055   void Enter(CodeGenFunction &CGF) override {
6056     if (!CGF.HaveInsertPoint())
6057       return;
6058     for (const auto &AllocatorData : Allocators) {
6059       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6060           CGF, AllocatorData.first, AllocatorData.second);
6061     }
6062   }
6063   void Exit(CodeGenFunction &CGF) override {
6064     if (!CGF.HaveInsertPoint())
6065       return;
6066     for (const auto &AllocatorData : Allocators) {
6067       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6068                                                         AllocatorData.first);
6069     }
6070   }
6071 };
6072 } // namespace
6073 
6074 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6075     const OMPExecutableDirective &D, StringRef ParentName,
6076     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6077     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6078   assert(!ParentName.empty() && "Invalid target entry parent name!");
6079   HasEmittedTargetRegion = true;
6080   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6081   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6082     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6083       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6084       if (!D.AllocatorTraits)
6085         continue;
6086       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6087     }
6088   }
6089   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6090   CodeGen.setAction(UsesAllocatorAction);
6091   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6092                                    IsOffloadEntry, CodeGen);
6093 }
6094 
6095 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6096                                              const Expr *Allocator,
6097                                              const Expr *AllocatorTraits) {
6098   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6099   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6100   // Use default memspace handle.
6101   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6102   llvm::Value *NumTraits = llvm::ConstantInt::get(
6103       CGF.IntTy, cast<ConstantArrayType>(
6104                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6105                      ->getSize()
6106                      .getLimitedValue());
6107   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6108   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6109       AllocatorTraitsLVal.getAddress(), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
6110   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6111                                            AllocatorTraitsLVal.getBaseInfo(),
6112                                            AllocatorTraitsLVal.getTBAAInfo());
6113   llvm::Value *Traits = Addr.emitRawPointer(CGF);
6114 
6115   llvm::Value *AllocatorVal =
6116       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6117                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6118                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6119   // Store to allocator.
6120   CGF.EmitAutoVarAlloca(*cast<VarDecl>(
6121       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6122   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6123   AllocatorVal =
6124       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6125                                Allocator->getType(), Allocator->getExprLoc());
6126   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6127 }
6128 
6129 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6130                                              const Expr *Allocator) {
6131   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6132   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6133   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6134   llvm::Value *AllocatorVal =
6135       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6136   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6137                                           CGF.getContext().VoidPtrTy,
6138                                           Allocator->getExprLoc());
6139   (void)CGF.EmitRuntimeCall(
6140       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6141                                             OMPRTL___kmpc_destroy_allocator),
6142       {ThreadId, AllocatorVal});
6143 }
6144 
6145 void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams(
6146     const OMPExecutableDirective &D, CodeGenFunction &CGF,
6147     llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
6148   assert(Attrs.MaxTeams.size() == 1 && Attrs.MaxThreads.size() == 1 &&
6149          "invalid default attrs structure");
6150   int32_t &MaxTeamsVal = Attrs.MaxTeams.front();
6151   int32_t &MaxThreadsVal = Attrs.MaxThreads.front();
6152 
6153   getNumTeamsExprForTargetDirective(CGF, D, Attrs.MinTeams, MaxTeamsVal);
6154   getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
6155                                       /*UpperBoundOnly=*/true);
6156 
6157   for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6158     for (auto *A : C->getAttrs()) {
6159       int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
6160       int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
6161       if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
6162         CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
6163                                        &AttrMinBlocksVal, &AttrMaxBlocksVal);
6164       else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
6165         CGM.handleAMDGPUFlatWorkGroupSizeAttr(
6166             nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
6167             &AttrMaxThreadsVal);
6168       else
6169         continue;
6170 
6171       Attrs.MinThreads = std::max(Attrs.MinThreads, AttrMinThreadsVal);
6172       if (AttrMaxThreadsVal > 0)
6173         MaxThreadsVal = MaxThreadsVal > 0
6174                             ? std::min(MaxThreadsVal, AttrMaxThreadsVal)
6175                             : AttrMaxThreadsVal;
6176       Attrs.MinTeams = std::max(Attrs.MinTeams, AttrMinBlocksVal);
6177       if (AttrMaxBlocksVal > 0)
6178         MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
6179                                       : AttrMaxBlocksVal;
6180     }
6181   }
6182 }
6183 
6184 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6185     const OMPExecutableDirective &D, StringRef ParentName,
6186     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6187     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6188 
6189   llvm::TargetRegionEntryInfo EntryInfo =
6190       getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName);
6191 
6192   CodeGenFunction CGF(CGM, true);
6193   llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
6194       [&CGF, &D, &CodeGen](StringRef EntryFnName) {
6195         const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6196 
6197         CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6198         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6199         return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6200       };
6201 
6202   cantFail(OMPBuilder.emitTargetRegionFunction(
6203       EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
6204       OutlinedFnID));
6205 
6206   if (!OutlinedFn)
6207     return;
6208 
6209   CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6210 
6211   for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6212     for (auto *A : C->getAttrs()) {
6213       if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
6214         CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
6215     }
6216   }
6217 }
6218 
6219 /// Checks if the expression is constant or does not have non-trivial function
6220 /// calls.
6221 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6222   // We can skip constant expressions.
6223   // We can skip expressions with trivial calls or simple expressions.
6224   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6225           !E->hasNonTrivialCall(Ctx)) &&
6226          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6227 }
6228 
6229 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6230                                                     const Stmt *Body) {
6231   const Stmt *Child = Body->IgnoreContainers();
6232   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6233     Child = nullptr;
6234     for (const Stmt *S : C->body()) {
6235       if (const auto *E = dyn_cast<Expr>(S)) {
6236         if (isTrivial(Ctx, E))
6237           continue;
6238       }
6239       // Some of the statements can be ignored.
6240       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6241           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6242         continue;
6243       // Analyze declarations.
6244       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6245         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6246               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6247                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6248                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6249                   isa<UsingDirectiveDecl>(D) ||
6250                   isa<OMPDeclareReductionDecl>(D) ||
6251                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6252                 return true;
6253               const auto *VD = dyn_cast<VarDecl>(D);
6254               if (!VD)
6255                 return false;
6256               return VD->hasGlobalStorage() || !VD->isUsed();
6257             }))
6258           continue;
6259       }
6260       // Found multiple children - cannot get the one child only.
6261       if (Child)
6262         return nullptr;
6263       Child = S;
6264     }
6265     if (Child)
6266       Child = Child->IgnoreContainers();
6267   }
6268   return Child;
6269 }
6270 
6271 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6272     CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6273     int32_t &MaxTeamsVal) {
6274 
6275   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6276   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6277          "Expected target-based executable directive.");
6278   switch (DirectiveKind) {
6279   case OMPD_target: {
6280     const auto *CS = D.getInnermostCapturedStmt();
6281     const auto *Body =
6282         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6283     const Stmt *ChildStmt =
6284         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6285     if (const auto *NestedDir =
6286             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6287       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6288         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6289           const Expr *NumTeams = NestedDir->getSingleClause<OMPNumTeamsClause>()
6290                                      ->getNumTeams()
6291                                      .front();
6292           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6293             if (auto Constant =
6294                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6295               MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6296           return NumTeams;
6297         }
6298         MinTeamsVal = MaxTeamsVal = 0;
6299         return nullptr;
6300       }
6301       MinTeamsVal = MaxTeamsVal = 1;
6302       return nullptr;
6303     }
6304     // A value of -1 is used to check if we need to emit no teams region
6305     MinTeamsVal = MaxTeamsVal = -1;
6306     return nullptr;
6307   }
6308   case OMPD_target_teams_loop:
6309   case OMPD_target_teams:
6310   case OMPD_target_teams_distribute:
6311   case OMPD_target_teams_distribute_simd:
6312   case OMPD_target_teams_distribute_parallel_for:
6313   case OMPD_target_teams_distribute_parallel_for_simd: {
6314     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6315       const Expr *NumTeams =
6316           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams().front();
6317       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6318         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6319           MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6320       return NumTeams;
6321     }
6322     MinTeamsVal = MaxTeamsVal = 0;
6323     return nullptr;
6324   }
6325   case OMPD_target_parallel:
6326   case OMPD_target_parallel_for:
6327   case OMPD_target_parallel_for_simd:
6328   case OMPD_target_parallel_loop:
6329   case OMPD_target_simd:
6330     MinTeamsVal = MaxTeamsVal = 1;
6331     return nullptr;
6332   case OMPD_parallel:
6333   case OMPD_for:
6334   case OMPD_parallel_for:
6335   case OMPD_parallel_loop:
6336   case OMPD_parallel_master:
6337   case OMPD_parallel_sections:
6338   case OMPD_for_simd:
6339   case OMPD_parallel_for_simd:
6340   case OMPD_cancel:
6341   case OMPD_cancellation_point:
6342   case OMPD_ordered:
6343   case OMPD_threadprivate:
6344   case OMPD_allocate:
6345   case OMPD_task:
6346   case OMPD_simd:
6347   case OMPD_tile:
6348   case OMPD_unroll:
6349   case OMPD_sections:
6350   case OMPD_section:
6351   case OMPD_single:
6352   case OMPD_master:
6353   case OMPD_critical:
6354   case OMPD_taskyield:
6355   case OMPD_barrier:
6356   case OMPD_taskwait:
6357   case OMPD_taskgroup:
6358   case OMPD_atomic:
6359   case OMPD_flush:
6360   case OMPD_depobj:
6361   case OMPD_scan:
6362   case OMPD_teams:
6363   case OMPD_target_data:
6364   case OMPD_target_exit_data:
6365   case OMPD_target_enter_data:
6366   case OMPD_distribute:
6367   case OMPD_distribute_simd:
6368   case OMPD_distribute_parallel_for:
6369   case OMPD_distribute_parallel_for_simd:
6370   case OMPD_teams_distribute:
6371   case OMPD_teams_distribute_simd:
6372   case OMPD_teams_distribute_parallel_for:
6373   case OMPD_teams_distribute_parallel_for_simd:
6374   case OMPD_target_update:
6375   case OMPD_declare_simd:
6376   case OMPD_declare_variant:
6377   case OMPD_begin_declare_variant:
6378   case OMPD_end_declare_variant:
6379   case OMPD_declare_target:
6380   case OMPD_end_declare_target:
6381   case OMPD_declare_reduction:
6382   case OMPD_declare_mapper:
6383   case OMPD_taskloop:
6384   case OMPD_taskloop_simd:
6385   case OMPD_master_taskloop:
6386   case OMPD_master_taskloop_simd:
6387   case OMPD_parallel_master_taskloop:
6388   case OMPD_parallel_master_taskloop_simd:
6389   case OMPD_requires:
6390   case OMPD_metadirective:
6391   case OMPD_unknown:
6392     break;
6393   default:
6394     break;
6395   }
6396   llvm_unreachable("Unexpected directive kind.");
6397 }
6398 
6399 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6400     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6401   assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6402          "Clauses associated with the teams directive expected to be emitted "
6403          "only for the host!");
6404   CGBuilderTy &Bld = CGF.Builder;
6405   int32_t MinNT = -1, MaxNT = -1;
6406   const Expr *NumTeams =
6407       getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
6408   if (NumTeams != nullptr) {
6409     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6410 
6411     switch (DirectiveKind) {
6412     case OMPD_target: {
6413       const auto *CS = D.getInnermostCapturedStmt();
6414       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6415       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6416       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6417                                                   /*IgnoreResultAssign*/ true);
6418       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6419                              /*isSigned=*/true);
6420     }
6421     case OMPD_target_teams:
6422     case OMPD_target_teams_distribute:
6423     case OMPD_target_teams_distribute_simd:
6424     case OMPD_target_teams_distribute_parallel_for:
6425     case OMPD_target_teams_distribute_parallel_for_simd: {
6426       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6427       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6428                                                   /*IgnoreResultAssign*/ true);
6429       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6430                              /*isSigned=*/true);
6431     }
6432     default:
6433       break;
6434     }
6435   }
6436 
6437   assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6438   return llvm::ConstantInt::get(CGF.Int32Ty, MinNT);
6439 }
6440 
6441 /// Check for a num threads constant value (stored in \p DefaultVal), or
6442 /// expression (stored in \p E). If the value is conditional (via an if-clause),
6443 /// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6444 /// nullptr, no expression evaluation is perfomed.
6445 static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6446                           const Expr **E, int32_t &UpperBound,
6447                           bool UpperBoundOnly, llvm::Value **CondVal) {
6448   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6449       CGF.getContext(), CS->getCapturedStmt());
6450   const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6451   if (!Dir)
6452     return;
6453 
6454   if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6455     // Handle if clause. If if clause present, the number of threads is
6456     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6457     if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6458       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6459       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6460       const OMPIfClause *IfClause = nullptr;
6461       for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6462         if (C->getNameModifier() == OMPD_unknown ||
6463             C->getNameModifier() == OMPD_parallel) {
6464           IfClause = C;
6465           break;
6466         }
6467       }
6468       if (IfClause) {
6469         const Expr *CondExpr = IfClause->getCondition();
6470         bool Result;
6471         if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6472           if (!Result) {
6473             UpperBound = 1;
6474             return;
6475           }
6476         } else {
6477           CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange());
6478           if (const auto *PreInit =
6479                   cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6480             for (const auto *I : PreInit->decls()) {
6481               if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6482                 CGF.EmitVarDecl(cast<VarDecl>(*I));
6483               } else {
6484                 CodeGenFunction::AutoVarEmission Emission =
6485                     CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6486                 CGF.EmitAutoVarCleanups(Emission);
6487               }
6488             }
6489             *CondVal = CGF.EvaluateExprAsBool(CondExpr);
6490           }
6491         }
6492       }
6493     }
6494     // Check the value of num_threads clause iff if clause was not specified
6495     // or is not evaluated to false.
6496     if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6497       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6498       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6499       const auto *NumThreadsClause =
6500           Dir->getSingleClause<OMPNumThreadsClause>();
6501       const Expr *NTExpr = NumThreadsClause->getNumThreads();
6502       if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
6503         if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
6504           UpperBound =
6505               UpperBound
6506                   ? Constant->getZExtValue()
6507                   : std::min(UpperBound,
6508                              static_cast<int32_t>(Constant->getZExtValue()));
6509       // If we haven't found a upper bound, remember we saw a thread limiting
6510       // clause.
6511       if (UpperBound == -1)
6512         UpperBound = 0;
6513       if (!E)
6514         return;
6515       CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6516       if (const auto *PreInit =
6517               cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6518         for (const auto *I : PreInit->decls()) {
6519           if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6520             CGF.EmitVarDecl(cast<VarDecl>(*I));
6521           } else {
6522             CodeGenFunction::AutoVarEmission Emission =
6523                 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6524             CGF.EmitAutoVarCleanups(Emission);
6525           }
6526         }
6527       }
6528       *E = NTExpr;
6529     }
6530     return;
6531   }
6532   if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6533     UpperBound = 1;
6534 }
6535 
6536 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6537     CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6538     bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6539   assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6540          "Clauses associated with the teams directive expected to be emitted "
6541          "only for the host!");
6542   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6543   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6544          "Expected target-based executable directive.");
6545 
6546   const Expr *NT = nullptr;
6547   const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6548 
6549   auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6550     if (E->isIntegerConstantExpr(CGF.getContext())) {
6551       if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
6552         UpperBound = UpperBound ? Constant->getZExtValue()
6553                                 : std::min(UpperBound,
6554                                            int32_t(Constant->getZExtValue()));
6555     }
6556     // If we haven't found a upper bound, remember we saw a thread limiting
6557     // clause.
6558     if (UpperBound == -1)
6559       UpperBound = 0;
6560     if (EPtr)
6561       *EPtr = E;
6562   };
6563 
6564   auto ReturnSequential = [&]() {
6565     UpperBound = 1;
6566     return NT;
6567   };
6568 
6569   switch (DirectiveKind) {
6570   case OMPD_target: {
6571     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6572     getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6573     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6574         CGF.getContext(), CS->getCapturedStmt());
6575     // TODO: The standard is not clear how to resolve two thread limit clauses,
6576     //       let's pick the teams one if it's present, otherwise the target one.
6577     const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6578     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6579       if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6580         ThreadLimitClause = TLC;
6581         if (ThreadLimitExpr) {
6582           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6583           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6584           CodeGenFunction::LexicalScope Scope(
6585               CGF,
6586               ThreadLimitClause->getThreadLimit().front()->getSourceRange());
6587           if (const auto *PreInit =
6588                   cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6589             for (const auto *I : PreInit->decls()) {
6590               if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6591                 CGF.EmitVarDecl(cast<VarDecl>(*I));
6592               } else {
6593                 CodeGenFunction::AutoVarEmission Emission =
6594                     CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6595                 CGF.EmitAutoVarCleanups(Emission);
6596               }
6597             }
6598           }
6599         }
6600       }
6601     }
6602     if (ThreadLimitClause)
6603       CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6604                         ThreadLimitExpr);
6605     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6606       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6607           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6608         CS = Dir->getInnermostCapturedStmt();
6609         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6610             CGF.getContext(), CS->getCapturedStmt());
6611         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6612       }
6613       if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6614         CS = Dir->getInnermostCapturedStmt();
6615         getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6616       } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6617         return ReturnSequential();
6618     }
6619     return NT;
6620   }
6621   case OMPD_target_teams: {
6622     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6623       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6624       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6625       CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6626                         ThreadLimitExpr);
6627     }
6628     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6629     getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6630     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6631         CGF.getContext(), CS->getCapturedStmt());
6632     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6633       if (Dir->getDirectiveKind() == OMPD_distribute) {
6634         CS = Dir->getInnermostCapturedStmt();
6635         getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6636       }
6637     }
6638     return NT;
6639   }
6640   case OMPD_target_teams_distribute:
6641     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6642       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6643       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6644       CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6645                         ThreadLimitExpr);
6646     }
6647     getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
6648                   UpperBoundOnly, CondVal);
6649     return NT;
6650   case OMPD_target_teams_loop:
6651   case OMPD_target_parallel_loop:
6652   case OMPD_target_parallel:
6653   case OMPD_target_parallel_for:
6654   case OMPD_target_parallel_for_simd:
6655   case OMPD_target_teams_distribute_parallel_for:
6656   case OMPD_target_teams_distribute_parallel_for_simd: {
6657     if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6658       const OMPIfClause *IfClause = nullptr;
6659       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6660         if (C->getNameModifier() == OMPD_unknown ||
6661             C->getNameModifier() == OMPD_parallel) {
6662           IfClause = C;
6663           break;
6664         }
6665       }
6666       if (IfClause) {
6667         const Expr *Cond = IfClause->getCondition();
6668         bool Result;
6669         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6670           if (!Result)
6671             return ReturnSequential();
6672         } else {
6673           CodeGenFunction::RunCleanupsScope Scope(CGF);
6674           *CondVal = CGF.EvaluateExprAsBool(Cond);
6675         }
6676       }
6677     }
6678     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6679       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6680       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6681       CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6682                         ThreadLimitExpr);
6683     }
6684     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6685       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6686       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6687       CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6688       return NumThreadsClause->getNumThreads();
6689     }
6690     return NT;
6691   }
6692   case OMPD_target_teams_distribute_simd:
6693   case OMPD_target_simd:
6694     return ReturnSequential();
6695   default:
6696     break;
6697   }
6698   llvm_unreachable("Unsupported directive kind.");
6699 }
6700 
6701 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6702     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6703   llvm::Value *NumThreadsVal = nullptr;
6704   llvm::Value *CondVal = nullptr;
6705   llvm::Value *ThreadLimitVal = nullptr;
6706   const Expr *ThreadLimitExpr = nullptr;
6707   int32_t UpperBound = -1;
6708 
6709   const Expr *NT = getNumThreadsExprForTargetDirective(
6710       CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
6711       &ThreadLimitExpr);
6712 
6713   // Thread limit expressions are used below, emit them.
6714   if (ThreadLimitExpr) {
6715     ThreadLimitVal =
6716         CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6717     ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
6718                                                /*isSigned=*/false);
6719   }
6720 
6721   // Generate the num teams expression.
6722   if (UpperBound == 1) {
6723     NumThreadsVal = CGF.Builder.getInt32(UpperBound);
6724   } else if (NT) {
6725     NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
6726     NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
6727                                               /*isSigned=*/false);
6728   } else if (ThreadLimitVal) {
6729     // If we do not have a num threads value but a thread limit, replace the
6730     // former with the latter. We know handled the thread limit expression.
6731     NumThreadsVal = ThreadLimitVal;
6732     ThreadLimitVal = nullptr;
6733   } else {
6734     // Default to "0" which means runtime choice.
6735     assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6736     NumThreadsVal = CGF.Builder.getInt32(0);
6737   }
6738 
6739   // Handle if clause. If if clause present, the number of threads is
6740   // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6741   if (CondVal) {
6742     CodeGenFunction::RunCleanupsScope Scope(CGF);
6743     NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
6744                                              CGF.Builder.getInt32(1));
6745   }
6746 
6747   // If the thread limit and num teams expression were present, take the
6748   // minimum.
6749   if (ThreadLimitVal) {
6750     NumThreadsVal = CGF.Builder.CreateSelect(
6751         CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
6752         ThreadLimitVal, NumThreadsVal);
6753   }
6754 
6755   return NumThreadsVal;
6756 }
6757 
6758 namespace {
6759 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6760 
6761 // Utility to handle information from clauses associated with a given
6762 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6763 // It provides a convenient interface to obtain the information and generate
6764 // code for that information.
6765 class MappableExprsHandler {
6766 public:
6767   /// Get the offset of the OMP_MAP_MEMBER_OF field.
6768   static unsigned getFlagMemberOffset() {
6769     unsigned Offset = 0;
6770     for (uint64_t Remain =
6771              static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
6772                  OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
6773          !(Remain & 1); Remain = Remain >> 1)
6774       Offset++;
6775     return Offset;
6776   }
6777 
6778   /// Class that holds debugging information for a data mapping to be passed to
6779   /// the runtime library.
6780   class MappingExprInfo {
6781     /// The variable declaration used for the data mapping.
6782     const ValueDecl *MapDecl = nullptr;
6783     /// The original expression used in the map clause, or null if there is
6784     /// none.
6785     const Expr *MapExpr = nullptr;
6786 
6787   public:
6788     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
6789         : MapDecl(MapDecl), MapExpr(MapExpr) {}
6790 
6791     const ValueDecl *getMapDecl() const { return MapDecl; }
6792     const Expr *getMapExpr() const { return MapExpr; }
6793   };
6794 
6795   using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
6796   using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6797   using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6798   using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
6799   using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
6800   using MapNonContiguousArrayTy =
6801       llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
6802   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
6803   using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
6804   using MapData =
6805       std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
6806                  OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>,
6807                  bool /*IsImplicit*/, const ValueDecl *, const Expr *>;
6808   using MapDataArrayTy = SmallVector<MapData, 4>;
6809 
6810   /// This structure contains combined information generated for mappable
6811   /// clauses, including base pointers, pointers, sizes, map types, user-defined
6812   /// mappers, and non-contiguous information.
6813   struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
6814     MapExprsArrayTy Exprs;
6815     MapValueDeclsArrayTy Mappers;
6816     MapValueDeclsArrayTy DevicePtrDecls;
6817 
6818     /// Append arrays in \a CurInfo.
6819     void append(MapCombinedInfoTy &CurInfo) {
6820       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
6821       DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
6822                             CurInfo.DevicePtrDecls.end());
6823       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
6824       llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
6825     }
6826   };
6827 
6828   /// Map between a struct and the its lowest & highest elements which have been
6829   /// mapped.
6830   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6831   ///                    HE(FieldIndex, Pointer)}
6832   struct StructRangeInfoTy {
6833     MapCombinedInfoTy PreliminaryMapData;
6834     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6835         0, Address::invalid()};
6836     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6837         0, Address::invalid()};
6838     Address Base = Address::invalid();
6839     Address LB = Address::invalid();
6840     bool IsArraySection = false;
6841     bool HasCompleteRecord = false;
6842   };
6843 
6844 private:
6845   /// Kind that defines how a device pointer has to be returned.
6846   struct MapInfo {
6847     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
6848     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
6849     ArrayRef<OpenMPMapModifierKind> MapModifiers;
6850     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
6851     bool ReturnDevicePointer = false;
6852     bool IsImplicit = false;
6853     const ValueDecl *Mapper = nullptr;
6854     const Expr *VarRef = nullptr;
6855     bool ForDeviceAddr = false;
6856 
6857     MapInfo() = default;
6858     MapInfo(
6859         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6860         OpenMPMapClauseKind MapType,
6861         ArrayRef<OpenMPMapModifierKind> MapModifiers,
6862         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6863         bool ReturnDevicePointer, bool IsImplicit,
6864         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
6865         bool ForDeviceAddr = false)
6866         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
6867           MotionModifiers(MotionModifiers),
6868           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
6869           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
6870   };
6871 
6872   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6873   /// member and there is no map information about it, then emission of that
6874   /// entry is deferred until the whole struct has been processed.
6875   struct DeferredDevicePtrEntryTy {
6876     const Expr *IE = nullptr;
6877     const ValueDecl *VD = nullptr;
6878     bool ForDeviceAddr = false;
6879 
6880     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
6881                              bool ForDeviceAddr)
6882         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
6883   };
6884 
6885   /// The target directive from where the mappable clauses were extracted. It
6886   /// is either a executable directive or a user-defined mapper directive.
6887   llvm::PointerUnion<const OMPExecutableDirective *,
6888                      const OMPDeclareMapperDecl *>
6889       CurDir;
6890 
6891   /// Function the directive is being generated for.
6892   CodeGenFunction &CGF;
6893 
6894   /// Set of all first private variables in the current directive.
6895   /// bool data is set to true if the variable is implicitly marked as
6896   /// firstprivate, false otherwise.
6897   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
6898 
6899   /// Map between device pointer declarations and their expression components.
6900   /// The key value for declarations in 'this' is null.
6901   llvm::DenseMap<
6902       const ValueDecl *,
6903       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6904       DevPointersMap;
6905 
6906   /// Map between device addr declarations and their expression components.
6907   /// The key value for declarations in 'this' is null.
6908   llvm::DenseMap<
6909       const ValueDecl *,
6910       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6911       HasDevAddrsMap;
6912 
6913   /// Map between lambda declarations and their map type.
6914   llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
6915 
6916   llvm::Value *getExprTypeSize(const Expr *E) const {
6917     QualType ExprTy = E->getType().getCanonicalType();
6918 
6919     // Calculate the size for array shaping expression.
6920     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
6921       llvm::Value *Size =
6922           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
6923       for (const Expr *SE : OAE->getDimensions()) {
6924         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
6925         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
6926                                       CGF.getContext().getSizeType(),
6927                                       SE->getExprLoc());
6928         Size = CGF.Builder.CreateNUWMul(Size, Sz);
6929       }
6930       return Size;
6931     }
6932 
6933     // Reference types are ignored for mapping purposes.
6934     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6935       ExprTy = RefTy->getPointeeType().getCanonicalType();
6936 
6937     // Given that an array section is considered a built-in type, we need to
6938     // do the calculation based on the length of the section instead of relying
6939     // on CGF.getTypeSize(E->getType()).
6940     if (const auto *OAE = dyn_cast<ArraySectionExpr>(E)) {
6941       QualType BaseTy = ArraySectionExpr::getBaseOriginalType(
6942                             OAE->getBase()->IgnoreParenImpCasts())
6943                             .getCanonicalType();
6944 
6945       // If there is no length associated with the expression and lower bound is
6946       // not specified too, that means we are using the whole length of the
6947       // base.
6948       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6949           !OAE->getLowerBound())
6950         return CGF.getTypeSize(BaseTy);
6951 
6952       llvm::Value *ElemSize;
6953       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6954         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
6955       } else {
6956         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
6957         assert(ATy && "Expecting array type if not a pointer type.");
6958         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
6959       }
6960 
6961       // If we don't have a length at this point, that is because we have an
6962       // array section with a single element.
6963       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
6964         return ElemSize;
6965 
6966       if (const Expr *LenExpr = OAE->getLength()) {
6967         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
6968         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
6969                                              CGF.getContext().getSizeType(),
6970                                              LenExpr->getExprLoc());
6971         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
6972       }
6973       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6974              OAE->getLowerBound() && "expected array_section[lb:].");
6975       // Size = sizetype - lb * elemtype;
6976       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
6977       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
6978       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
6979                                        CGF.getContext().getSizeType(),
6980                                        OAE->getLowerBound()->getExprLoc());
6981       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
6982       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
6983       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
6984       LengthVal = CGF.Builder.CreateSelect(
6985           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
6986       return LengthVal;
6987     }
6988     return CGF.getTypeSize(ExprTy);
6989   }
6990 
6991   /// Return the corresponding bits for a given map clause modifier. Add
6992   /// a flag marking the map as a pointer if requested. Add a flag marking the
6993   /// map as the first one of a series of maps that relate to the same map
6994   /// expression.
6995   OpenMPOffloadMappingFlags getMapTypeBits(
6996       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6997       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
6998       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
6999     OpenMPOffloadMappingFlags Bits =
7000         IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
7001                    : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7002     switch (MapType) {
7003     case OMPC_MAP_alloc:
7004     case OMPC_MAP_release:
7005       // alloc and release is the default behavior in the runtime library,  i.e.
7006       // if we don't pass any bits alloc/release that is what the runtime is
7007       // going to do. Therefore, we don't need to signal anything for these two
7008       // type modifiers.
7009       break;
7010     case OMPC_MAP_to:
7011       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
7012       break;
7013     case OMPC_MAP_from:
7014       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7015       break;
7016     case OMPC_MAP_tofrom:
7017       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
7018               OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7019       break;
7020     case OMPC_MAP_delete:
7021       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
7022       break;
7023     case OMPC_MAP_unknown:
7024       llvm_unreachable("Unexpected map type!");
7025     }
7026     if (AddPtrFlag)
7027       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7028     if (AddIsTargetParamFlag)
7029       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
7030     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7031       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
7032     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7033       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
7034     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7035         llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7036       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
7037     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7038       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
7039     if (IsNonContiguous)
7040       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
7041     return Bits;
7042   }
7043 
7044   /// Return true if the provided expression is a final array section. A
7045   /// final array section, is one whose length can't be proved to be one.
7046   bool isFinalArraySectionExpression(const Expr *E) const {
7047     const auto *OASE = dyn_cast<ArraySectionExpr>(E);
7048 
7049     // It is not an array section and therefore not a unity-size one.
7050     if (!OASE)
7051       return false;
7052 
7053     // An array section with no colon always refer to a single element.
7054     if (OASE->getColonLocFirst().isInvalid())
7055       return false;
7056 
7057     const Expr *Length = OASE->getLength();
7058 
7059     // If we don't have a length we have to check if the array has size 1
7060     // for this dimension. Also, we should always expect a length if the
7061     // base type is pointer.
7062     if (!Length) {
7063       QualType BaseQTy = ArraySectionExpr::getBaseOriginalType(
7064                              OASE->getBase()->IgnoreParenImpCasts())
7065                              .getCanonicalType();
7066       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7067         return ATy->getSExtSize() != 1;
7068       // If we don't have a constant dimension length, we have to consider
7069       // the current section as having any size, so it is not necessarily
7070       // unitary. If it happen to be unity size, that's user fault.
7071       return true;
7072     }
7073 
7074     // Check if the length evaluates to 1.
7075     Expr::EvalResult Result;
7076     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7077       return true; // Can have more that size 1.
7078 
7079     llvm::APSInt ConstLength = Result.Val.getInt();
7080     return ConstLength.getSExtValue() != 1;
7081   }
7082 
7083   /// Generate the base pointers, section pointers, sizes, map type bits, and
7084   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7085   /// map type, map or motion modifiers, and expression components.
7086   /// \a IsFirstComponent should be set to true if the provided set of
7087   /// components is the first associated with a capture.
7088   void generateInfoForComponentList(
7089       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7090       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7091       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7092       MapCombinedInfoTy &CombinedInfo,
7093       MapCombinedInfoTy &StructBaseCombinedInfo,
7094       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7095       bool IsImplicit, bool GenerateAllInfoForClauses,
7096       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7097       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7098       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7099           OverlappedElements = {},
7100       bool AreBothBasePtrAndPteeMapped = false) const {
7101     // The following summarizes what has to be generated for each map and the
7102     // types below. The generated information is expressed in this order:
7103     // base pointer, section pointer, size, flags
7104     // (to add to the ones that come from the map type and modifier).
7105     //
7106     // double d;
7107     // int i[100];
7108     // float *p;
7109     // int **a = &i;
7110     //
7111     // struct S1 {
7112     //   int i;
7113     //   float f[50];
7114     // }
7115     // struct S2 {
7116     //   int i;
7117     //   float f[50];
7118     //   S1 s;
7119     //   double *p;
7120     //   struct S2 *ps;
7121     //   int &ref;
7122     // }
7123     // S2 s;
7124     // S2 *ps;
7125     //
7126     // map(d)
7127     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7128     //
7129     // map(i)
7130     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7131     //
7132     // map(i[1:23])
7133     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7134     //
7135     // map(p)
7136     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7137     //
7138     // map(p[1:24])
7139     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7140     // in unified shared memory mode or for local pointers
7141     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7142     //
7143     // map((*a)[0:3])
7144     // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
7145     // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
7146     //
7147     // map(**a)
7148     // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
7149     // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
7150     //
7151     // map(s)
7152     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7153     //
7154     // map(s.i)
7155     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7156     //
7157     // map(s.s.f)
7158     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7159     //
7160     // map(s.p)
7161     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7162     //
7163     // map(to: s.p[:22])
7164     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7165     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7166     // &(s.p), &(s.p[0]), 22*sizeof(double),
7167     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7168     // (*) alloc space for struct members, only this is a target parameter
7169     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7170     //      optimizes this entry out, same in the examples below)
7171     // (***) map the pointee (map: to)
7172     //
7173     // map(to: s.ref)
7174     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7175     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7176     // (*) alloc space for struct members, only this is a target parameter
7177     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7178     //      optimizes this entry out, same in the examples below)
7179     // (***) map the pointee (map: to)
7180     //
7181     // map(s.ps)
7182     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7183     //
7184     // map(from: s.ps->s.i)
7185     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7186     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7187     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7188     //
7189     // map(to: s.ps->ps)
7190     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7191     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7192     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7193     //
7194     // map(s.ps->ps->ps)
7195     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7196     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7197     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7198     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7199     //
7200     // map(to: s.ps->ps->s.f[:22])
7201     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7202     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7203     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7204     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7205     //
7206     // map(ps)
7207     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7208     //
7209     // map(ps->i)
7210     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7211     //
7212     // map(ps->s.f)
7213     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7214     //
7215     // map(from: ps->p)
7216     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7217     //
7218     // map(to: ps->p[:22])
7219     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7220     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7221     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7222     //
7223     // map(ps->ps)
7224     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7225     //
7226     // map(from: ps->ps->s.i)
7227     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7228     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7229     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7230     //
7231     // map(from: ps->ps->ps)
7232     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7233     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7234     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7235     //
7236     // map(ps->ps->ps->ps)
7237     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7238     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7239     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7240     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7241     //
7242     // map(to: ps->ps->ps->s.f[:22])
7243     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7244     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7245     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7246     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7247     //
7248     // map(to: s.f[:22]) map(from: s.p[:33])
7249     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7250     //     sizeof(double*) (**), TARGET_PARAM
7251     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7252     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7253     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7254     // (*) allocate contiguous space needed to fit all mapped members even if
7255     //     we allocate space for members not mapped (in this example,
7256     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7257     //     them as well because they fall between &s.f[0] and &s.p)
7258     //
7259     // map(from: s.f[:22]) map(to: ps->p[:33])
7260     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7261     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7262     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7263     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7264     // (*) the struct this entry pertains to is the 2nd element in the list of
7265     //     arguments, hence MEMBER_OF(2)
7266     //
7267     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7268     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7269     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7270     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7271     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7272     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7273     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7274     // (*) the struct this entry pertains to is the 4th element in the list
7275     //     of arguments, hence MEMBER_OF(4)
7276     //
7277     // map(p, p[:100])
7278     // ===> map(p[:100])
7279     // &p, &p[0], 100*sizeof(float), TARGET_PARAM | PTR_AND_OBJ | TO | FROM
7280 
7281     // Track if the map information being generated is the first for a capture.
7282     bool IsCaptureFirstInfo = IsFirstComponentList;
7283     // When the variable is on a declare target link or in a to clause with
7284     // unified memory, a reference is needed to hold the host/device address
7285     // of the variable.
7286     bool RequiresReference = false;
7287 
7288     // Scan the components from the base to the complete expression.
7289     auto CI = Components.rbegin();
7290     auto CE = Components.rend();
7291     auto I = CI;
7292 
7293     // Track if the map information being generated is the first for a list of
7294     // components.
7295     bool IsExpressionFirstInfo = true;
7296     bool FirstPointerInComplexData = false;
7297     Address BP = Address::invalid();
7298     const Expr *AssocExpr = I->getAssociatedExpression();
7299     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7300     const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7301     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7302 
7303     if (AreBothBasePtrAndPteeMapped && std::next(I) == CE)
7304       return;
7305     if (isa<MemberExpr>(AssocExpr)) {
7306       // The base is the 'this' pointer. The content of the pointer is going
7307       // to be the base of the field being mapped.
7308       BP = CGF.LoadCXXThisAddress();
7309     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7310                (OASE &&
7311                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7312       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7313     } else if (OAShE &&
7314                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7315       BP = Address(
7316           CGF.EmitScalarExpr(OAShE->getBase()),
7317           CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7318           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7319     } else {
7320       // The base is the reference to the variable.
7321       // BP = &Var.
7322       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7323       if (const auto *VD =
7324               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7325         if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7326                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7327           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7328               ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7329                 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7330                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7331             RequiresReference = true;
7332             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7333           }
7334         }
7335       }
7336 
7337       // If the variable is a pointer and is being dereferenced (i.e. is not
7338       // the last component), the base has to be the pointer itself, not its
7339       // reference. References are ignored for mapping purposes.
7340       QualType Ty =
7341           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7342       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7343         // No need to generate individual map information for the pointer, it
7344         // can be associated with the combined storage if shared memory mode is
7345         // active or the base declaration is not global variable.
7346         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7347         if (!AreBothBasePtrAndPteeMapped &&
7348             (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7349              !VD || VD->hasLocalStorage()))
7350           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7351         else
7352           FirstPointerInComplexData = true;
7353         ++I;
7354       }
7355     }
7356 
7357     // Track whether a component of the list should be marked as MEMBER_OF some
7358     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7359     // in a component list should be marked as MEMBER_OF, all subsequent entries
7360     // do not belong to the base struct. E.g.
7361     // struct S2 s;
7362     // s.ps->ps->ps->f[:]
7363     //   (1) (2) (3) (4)
7364     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7365     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7366     // is the pointee of ps(2) which is not member of struct s, so it should not
7367     // be marked as such (it is still PTR_AND_OBJ).
7368     // The variable is initialized to false so that PTR_AND_OBJ entries which
7369     // are not struct members are not considered (e.g. array of pointers to
7370     // data).
7371     bool ShouldBeMemberOf = false;
7372 
7373     // Variable keeping track of whether or not we have encountered a component
7374     // in the component list which is a member expression. Useful when we have a
7375     // pointer or a final array section, in which case it is the previous
7376     // component in the list which tells us whether we have a member expression.
7377     // E.g. X.f[:]
7378     // While processing the final array section "[:]" it is "f" which tells us
7379     // whether we are dealing with a member of a declared struct.
7380     const MemberExpr *EncounteredME = nullptr;
7381 
7382     // Track for the total number of dimension. Start from one for the dummy
7383     // dimension.
7384     uint64_t DimSize = 1;
7385 
7386     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7387     bool IsPrevMemberReference = false;
7388 
7389     bool IsPartialMapped =
7390         !PartialStruct.PreliminaryMapData.BasePointers.empty();
7391 
7392     // We need to check if we will be encountering any MEs. If we do not
7393     // encounter any ME expression it means we will be mapping the whole struct.
7394     // In that case we need to skip adding an entry for the struct to the
7395     // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7396     // list only when generating all info for clauses.
7397     bool IsMappingWholeStruct = true;
7398     if (!GenerateAllInfoForClauses) {
7399       IsMappingWholeStruct = false;
7400     } else {
7401       for (auto TempI = I; TempI != CE; ++TempI) {
7402         const MemberExpr *PossibleME =
7403             dyn_cast<MemberExpr>(TempI->getAssociatedExpression());
7404         if (PossibleME) {
7405           IsMappingWholeStruct = false;
7406           break;
7407         }
7408       }
7409     }
7410 
7411     for (; I != CE; ++I) {
7412       // If the current component is member of a struct (parent struct) mark it.
7413       if (!EncounteredME) {
7414         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7415         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7416         // as MEMBER_OF the parent struct.
7417         if (EncounteredME) {
7418           ShouldBeMemberOf = true;
7419           // Do not emit as complex pointer if this is actually not array-like
7420           // expression.
7421           if (FirstPointerInComplexData) {
7422             QualType Ty = std::prev(I)
7423                               ->getAssociatedDeclaration()
7424                               ->getType()
7425                               .getNonReferenceType();
7426             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7427             FirstPointerInComplexData = false;
7428           }
7429         }
7430       }
7431 
7432       auto Next = std::next(I);
7433 
7434       // We need to generate the addresses and sizes if this is the last
7435       // component, if the component is a pointer or if it is an array section
7436       // whose length can't be proved to be one. If this is a pointer, it
7437       // becomes the base address for the following components.
7438 
7439       // A final array section, is one whose length can't be proved to be one.
7440       // If the map item is non-contiguous then we don't treat any array section
7441       // as final array section.
7442       bool IsFinalArraySection =
7443           !IsNonContiguous &&
7444           isFinalArraySectionExpression(I->getAssociatedExpression());
7445 
7446       // If we have a declaration for the mapping use that, otherwise use
7447       // the base declaration of the map clause.
7448       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7449                                      ? I->getAssociatedDeclaration()
7450                                      : BaseDecl;
7451       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7452                                                : MapExpr;
7453 
7454       // Get information on whether the element is a pointer. Have to do a
7455       // special treatment for array sections given that they are built-in
7456       // types.
7457       const auto *OASE =
7458           dyn_cast<ArraySectionExpr>(I->getAssociatedExpression());
7459       const auto *OAShE =
7460           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7461       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7462       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7463       bool IsPointer =
7464           OAShE ||
7465           (OASE && ArraySectionExpr::getBaseOriginalType(OASE)
7466                        .getCanonicalType()
7467                        ->isAnyPointerType()) ||
7468           I->getAssociatedExpression()->getType()->isAnyPointerType();
7469       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7470                                MapDecl &&
7471                                MapDecl->getType()->isLValueReferenceType();
7472       bool IsNonDerefPointer = IsPointer &&
7473                                !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7474                                !IsNonContiguous;
7475 
7476       if (OASE)
7477         ++DimSize;
7478 
7479       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7480           IsFinalArraySection) {
7481         // If this is not the last component, we expect the pointer to be
7482         // associated with an array expression or member expression.
7483         assert((Next == CE ||
7484                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7485                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7486                 isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
7487                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7488                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7489                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7490                "Unexpected expression");
7491 
7492         Address LB = Address::invalid();
7493         Address LowestElem = Address::invalid();
7494         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7495                                        const MemberExpr *E) {
7496           const Expr *BaseExpr = E->getBase();
7497           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
7498           // scalar.
7499           LValue BaseLV;
7500           if (E->isArrow()) {
7501             LValueBaseInfo BaseInfo;
7502             TBAAAccessInfo TBAAInfo;
7503             Address Addr =
7504                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7505             QualType PtrTy = BaseExpr->getType()->getPointeeType();
7506             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7507           } else {
7508             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7509           }
7510           return BaseLV;
7511         };
7512         if (OAShE) {
7513           LowestElem = LB =
7514               Address(CGF.EmitScalarExpr(OAShE->getBase()),
7515                       CGF.ConvertTypeForMem(
7516                           OAShE->getBase()->getType()->getPointeeType()),
7517                       CGF.getContext().getTypeAlignInChars(
7518                           OAShE->getBase()->getType()));
7519         } else if (IsMemberReference) {
7520           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7521           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7522           LowestElem = CGF.EmitLValueForFieldInitialization(
7523                               BaseLVal, cast<FieldDecl>(MapDecl))
7524                            .getAddress();
7525           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7526                    .getAddress();
7527         } else {
7528           LowestElem = LB =
7529               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7530                   .getAddress();
7531         }
7532 
7533         // If this component is a pointer inside the base struct then we don't
7534         // need to create any entry for it - it will be combined with the object
7535         // it is pointing to into a single PTR_AND_OBJ entry.
7536         bool IsMemberPointerOrAddr =
7537             EncounteredME &&
7538             (((IsPointer || ForDeviceAddr) &&
7539               I->getAssociatedExpression() == EncounteredME) ||
7540              (IsPrevMemberReference && !IsPointer) ||
7541              (IsMemberReference && Next != CE &&
7542               !Next->getAssociatedExpression()->getType()->isPointerType()));
7543         if (!OverlappedElements.empty() && Next == CE) {
7544           // Handle base element with the info for overlapped elements.
7545           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7546           assert(!IsPointer &&
7547                  "Unexpected base element with the pointer type.");
7548           // Mark the whole struct as the struct that requires allocation on the
7549           // device.
7550           PartialStruct.LowestElem = {0, LowestElem};
7551           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7552               I->getAssociatedExpression()->getType());
7553           Address HB = CGF.Builder.CreateConstGEP(
7554               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7555                   LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
7556               TypeSize.getQuantity() - 1);
7557           PartialStruct.HighestElem = {
7558               std::numeric_limits<decltype(
7559                   PartialStruct.HighestElem.first)>::max(),
7560               HB};
7561           PartialStruct.Base = BP;
7562           PartialStruct.LB = LB;
7563           assert(
7564               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7565               "Overlapped elements must be used only once for the variable.");
7566           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7567           // Emit data for non-overlapped data.
7568           OpenMPOffloadMappingFlags Flags =
7569               OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
7570               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7571                              /*AddPtrFlag=*/false,
7572                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7573           llvm::Value *Size = nullptr;
7574           // Do bitcopy of all non-overlapped structure elements.
7575           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7576                    Component : OverlappedElements) {
7577             Address ComponentLB = Address::invalid();
7578             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7579                  Component) {
7580               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7581                 const auto *FD = dyn_cast<FieldDecl>(VD);
7582                 if (FD && FD->getType()->isLValueReferenceType()) {
7583                   const auto *ME =
7584                       cast<MemberExpr>(MC.getAssociatedExpression());
7585                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7586                   ComponentLB =
7587                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7588                           .getAddress();
7589                 } else {
7590                   ComponentLB =
7591                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7592                           .getAddress();
7593                 }
7594                 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7595                 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7596                 Size = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr,
7597                                                  LBPtr);
7598                 break;
7599               }
7600             }
7601             assert(Size && "Failed to determine structure size");
7602             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7603             CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7604             CombinedInfo.DevicePtrDecls.push_back(nullptr);
7605             CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7606             CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7607             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7608                 Size, CGF.Int64Ty, /*isSigned=*/true));
7609             CombinedInfo.Types.push_back(Flags);
7610             CombinedInfo.Mappers.push_back(nullptr);
7611             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7612                                                                       : 1);
7613             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7614           }
7615           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7616           CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7617           CombinedInfo.DevicePtrDecls.push_back(nullptr);
7618           CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7619           CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7620           llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7621           Size = CGF.Builder.CreatePtrDiff(
7622               CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),
7623               LBPtr);
7624           CombinedInfo.Sizes.push_back(
7625               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7626           CombinedInfo.Types.push_back(Flags);
7627           CombinedInfo.Mappers.push_back(nullptr);
7628           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7629                                                                     : 1);
7630           break;
7631         }
7632         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7633         // Skip adding an entry in the CurInfo of this combined entry if the
7634         // whole struct is currently being mapped. The struct needs to be added
7635         // in the first position before any data internal to the struct is being
7636         // mapped.
7637         // Skip adding an entry in the CurInfo of this combined entry if the
7638         // PartialStruct.PreliminaryMapData.BasePointers has been mapped.
7639         if ((!IsMemberPointerOrAddr && !IsPartialMapped) ||
7640             (Next == CE && MapType != OMPC_MAP_unknown)) {
7641           if (!IsMappingWholeStruct) {
7642             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7643             CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7644             CombinedInfo.DevicePtrDecls.push_back(nullptr);
7645             CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7646             CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7647             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7648                 Size, CGF.Int64Ty, /*isSigned=*/true));
7649             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7650                                                                       : 1);
7651           } else {
7652             StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7653             StructBaseCombinedInfo.BasePointers.push_back(
7654                 BP.emitRawPointer(CGF));
7655             StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr);
7656             StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7657             StructBaseCombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7658             StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7659                 Size, CGF.Int64Ty, /*isSigned=*/true));
7660             StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
7661                 IsNonContiguous ? DimSize : 1);
7662           }
7663 
7664           // If Mapper is valid, the last component inherits the mapper.
7665           bool HasMapper = Mapper && Next == CE;
7666           if (!IsMappingWholeStruct)
7667             CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7668           else
7669             StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper
7670                                                                : nullptr);
7671 
7672           // We need to add a pointer flag for each map that comes from the
7673           // same expression except for the first one. We also need to signal
7674           // this map is the first one that relates with the current capture
7675           // (there is a set of entries for each capture).
7676           OpenMPOffloadMappingFlags Flags =
7677               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7678                              !IsExpressionFirstInfo || RequiresReference ||
7679                                  FirstPointerInComplexData || IsMemberReference,
7680                              AreBothBasePtrAndPteeMapped ||
7681                                  (IsCaptureFirstInfo && !RequiresReference),
7682                              IsNonContiguous);
7683 
7684           if (!IsExpressionFirstInfo || IsMemberReference) {
7685             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7686             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7687             if (IsPointer || (IsMemberReference && Next != CE))
7688               Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
7689                          OpenMPOffloadMappingFlags::OMP_MAP_FROM |
7690                          OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
7691                          OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
7692                          OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
7693 
7694             if (ShouldBeMemberOf) {
7695               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7696               // should be later updated with the correct value of MEMBER_OF.
7697               Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7698               // From now on, all subsequent PTR_AND_OBJ entries should not be
7699               // marked as MEMBER_OF.
7700               ShouldBeMemberOf = false;
7701             }
7702           }
7703 
7704           if (!IsMappingWholeStruct)
7705             CombinedInfo.Types.push_back(Flags);
7706           else
7707             StructBaseCombinedInfo.Types.push_back(Flags);
7708         }
7709 
7710         // If we have encountered a member expression so far, keep track of the
7711         // mapped member. If the parent is "*this", then the value declaration
7712         // is nullptr.
7713         if (EncounteredME) {
7714           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7715           unsigned FieldIndex = FD->getFieldIndex();
7716 
7717           // Update info about the lowest and highest elements for this struct
7718           if (!PartialStruct.Base.isValid()) {
7719             PartialStruct.LowestElem = {FieldIndex, LowestElem};
7720             if (IsFinalArraySection && OASE) {
7721               Address HB =
7722                   CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
7723                       .getAddress();
7724               PartialStruct.HighestElem = {FieldIndex, HB};
7725             } else {
7726               PartialStruct.HighestElem = {FieldIndex, LowestElem};
7727             }
7728             PartialStruct.Base = BP;
7729             PartialStruct.LB = BP;
7730           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7731             PartialStruct.LowestElem = {FieldIndex, LowestElem};
7732           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7733             if (IsFinalArraySection && OASE) {
7734               Address HB =
7735                   CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
7736                       .getAddress();
7737               PartialStruct.HighestElem = {FieldIndex, HB};
7738             } else {
7739               PartialStruct.HighestElem = {FieldIndex, LowestElem};
7740             }
7741           }
7742         }
7743 
7744         // Need to emit combined struct for array sections.
7745         if (IsFinalArraySection || IsNonContiguous)
7746           PartialStruct.IsArraySection = true;
7747 
7748         // If we have a final array section, we are done with this expression.
7749         if (IsFinalArraySection)
7750           break;
7751 
7752         // The pointer becomes the base for the next element.
7753         if (Next != CE)
7754           BP = IsMemberReference ? LowestElem : LB;
7755         if (!IsPartialMapped)
7756           IsExpressionFirstInfo = false;
7757         IsCaptureFirstInfo = false;
7758         FirstPointerInComplexData = false;
7759         IsPrevMemberReference = IsMemberReference;
7760       } else if (FirstPointerInComplexData) {
7761         QualType Ty = Components.rbegin()
7762                           ->getAssociatedDeclaration()
7763                           ->getType()
7764                           .getNonReferenceType();
7765         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7766         FirstPointerInComplexData = false;
7767       }
7768     }
7769     // If ran into the whole component - allocate the space for the whole
7770     // record.
7771     if (!EncounteredME)
7772       PartialStruct.HasCompleteRecord = true;
7773 
7774     if (!IsNonContiguous)
7775       return;
7776 
7777     const ASTContext &Context = CGF.getContext();
7778 
7779     // For supporting stride in array section, we need to initialize the first
7780     // dimension size as 1, first offset as 0, and first count as 1
7781     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7782     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7783     MapValuesArrayTy CurStrides;
7784     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7785     uint64_t ElementTypeSize;
7786 
7787     // Collect Size information for each dimension and get the element size as
7788     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7789     // should be [10, 10] and the first stride is 4 btyes.
7790     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7791          Components) {
7792       const Expr *AssocExpr = Component.getAssociatedExpression();
7793       const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7794 
7795       if (!OASE)
7796         continue;
7797 
7798       QualType Ty = ArraySectionExpr::getBaseOriginalType(OASE->getBase());
7799       auto *CAT = Context.getAsConstantArrayType(Ty);
7800       auto *VAT = Context.getAsVariableArrayType(Ty);
7801 
7802       // We need all the dimension size except for the last dimension.
7803       assert((VAT || CAT || &Component == &*Components.begin()) &&
7804              "Should be either ConstantArray or VariableArray if not the "
7805              "first Component");
7806 
7807       // Get element size if CurStrides is empty.
7808       if (CurStrides.empty()) {
7809         const Type *ElementType = nullptr;
7810         if (CAT)
7811           ElementType = CAT->getElementType().getTypePtr();
7812         else if (VAT)
7813           ElementType = VAT->getElementType().getTypePtr();
7814         else
7815           assert(&Component == &*Components.begin() &&
7816                  "Only expect pointer (non CAT or VAT) when this is the "
7817                  "first Component");
7818         // If ElementType is null, then it means the base is a pointer
7819         // (neither CAT nor VAT) and we'll attempt to get ElementType again
7820         // for next iteration.
7821         if (ElementType) {
7822           // For the case that having pointer as base, we need to remove one
7823           // level of indirection.
7824           if (&Component != &*Components.begin())
7825             ElementType = ElementType->getPointeeOrArrayElementType();
7826           ElementTypeSize =
7827               Context.getTypeSizeInChars(ElementType).getQuantity();
7828           CurStrides.push_back(
7829               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7830         }
7831       }
7832       // Get dimension value except for the last dimension since we don't need
7833       // it.
7834       if (DimSizes.size() < Components.size() - 1) {
7835         if (CAT)
7836           DimSizes.push_back(
7837               llvm::ConstantInt::get(CGF.Int64Ty, CAT->getZExtSize()));
7838         else if (VAT)
7839           DimSizes.push_back(CGF.Builder.CreateIntCast(
7840               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
7841               /*IsSigned=*/false));
7842       }
7843     }
7844 
7845     // Skip the dummy dimension since we have already have its information.
7846     auto *DI = DimSizes.begin() + 1;
7847     // Product of dimension.
7848     llvm::Value *DimProd =
7849         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
7850 
7851     // Collect info for non-contiguous. Notice that offset, count, and stride
7852     // are only meaningful for array-section, so we insert a null for anything
7853     // other than array-section.
7854     // Also, the size of offset, count, and stride are not the same as
7855     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7856     // count, and stride are the same as the number of non-contiguous
7857     // declaration in target update to/from clause.
7858     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7859          Components) {
7860       const Expr *AssocExpr = Component.getAssociatedExpression();
7861 
7862       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
7863         llvm::Value *Offset = CGF.Builder.CreateIntCast(
7864             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
7865             /*isSigned=*/false);
7866         CurOffsets.push_back(Offset);
7867         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
7868         CurStrides.push_back(CurStrides.back());
7869         continue;
7870       }
7871 
7872       const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7873 
7874       if (!OASE)
7875         continue;
7876 
7877       // Offset
7878       const Expr *OffsetExpr = OASE->getLowerBound();
7879       llvm::Value *Offset = nullptr;
7880       if (!OffsetExpr) {
7881         // If offset is absent, then we just set it to zero.
7882         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
7883       } else {
7884         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
7885                                            CGF.Int64Ty,
7886                                            /*isSigned=*/false);
7887       }
7888       CurOffsets.push_back(Offset);
7889 
7890       // Count
7891       const Expr *CountExpr = OASE->getLength();
7892       llvm::Value *Count = nullptr;
7893       if (!CountExpr) {
7894         // In Clang, once a high dimension is an array section, we construct all
7895         // the lower dimension as array section, however, for case like
7896         // arr[0:2][2], Clang construct the inner dimension as an array section
7897         // but it actually is not in an array section form according to spec.
7898         if (!OASE->getColonLocFirst().isValid() &&
7899             !OASE->getColonLocSecond().isValid()) {
7900           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
7901         } else {
7902           // OpenMP 5.0, 2.1.5 Array Sections, Description.
7903           // When the length is absent it defaults to ⌈(size −
7904           // lower-bound)/stride⌉, where size is the size of the array
7905           // dimension.
7906           const Expr *StrideExpr = OASE->getStride();
7907           llvm::Value *Stride =
7908               StrideExpr
7909                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7910                                               CGF.Int64Ty, /*isSigned=*/false)
7911                   : nullptr;
7912           if (Stride)
7913             Count = CGF.Builder.CreateUDiv(
7914                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
7915           else
7916             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
7917         }
7918       } else {
7919         Count = CGF.EmitScalarExpr(CountExpr);
7920       }
7921       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
7922       CurCounts.push_back(Count);
7923 
7924       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7925       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7926       //              Offset      Count     Stride
7927       //    D0          0           1         4    (int)    <- dummy dimension
7928       //    D1          0           2         8    (2 * (1) * 4)
7929       //    D2          1           2         20   (1 * (1 * 5) * 4)
7930       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
7931       const Expr *StrideExpr = OASE->getStride();
7932       llvm::Value *Stride =
7933           StrideExpr
7934               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7935                                           CGF.Int64Ty, /*isSigned=*/false)
7936               : nullptr;
7937       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
7938       if (Stride)
7939         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
7940       else
7941         CurStrides.push_back(DimProd);
7942       if (DI != DimSizes.end())
7943         ++DI;
7944     }
7945 
7946     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
7947     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
7948     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
7949   }
7950 
7951   /// Return the adjusted map modifiers if the declaration a capture refers to
7952   /// appears in a first-private clause. This is expected to be used only with
7953   /// directives that start with 'target'.
7954   OpenMPOffloadMappingFlags
7955   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7956     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7957 
7958     // A first private variable captured by reference will use only the
7959     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7960     // declaration is known as first-private in this handler.
7961     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7962       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7963         return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7964                OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7965       return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
7966              OpenMPOffloadMappingFlags::OMP_MAP_TO;
7967     }
7968     auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
7969     if (I != LambdasMap.end())
7970       // for map(to: lambda): using user specified map type.
7971       return getMapTypeBits(
7972           I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
7973           /*MotionModifiers=*/{}, I->getSecond()->isImplicit(),
7974           /*AddPtrFlag=*/false,
7975           /*AddIsTargetParamFlag=*/false,
7976           /*isNonContiguous=*/false);
7977     return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7978            OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7979   }
7980 
7981   void getPlainLayout(const CXXRecordDecl *RD,
7982                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7983                       bool AsBase) const {
7984     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7985 
7986     llvm::StructType *St =
7987         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7988 
7989     unsigned NumElements = St->getNumElements();
7990     llvm::SmallVector<
7991         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7992         RecordLayout(NumElements);
7993 
7994     // Fill bases.
7995     for (const auto &I : RD->bases()) {
7996       if (I.isVirtual())
7997         continue;
7998 
7999       QualType BaseTy = I.getType();
8000       const auto *Base = BaseTy->getAsCXXRecordDecl();
8001       // Ignore empty bases.
8002       if (isEmptyRecordForLayout(CGF.getContext(), BaseTy) ||
8003           CGF.getContext()
8004               .getASTRecordLayout(Base)
8005               .getNonVirtualSize()
8006               .isZero())
8007         continue;
8008 
8009       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8010       RecordLayout[FieldIndex] = Base;
8011     }
8012     // Fill in virtual bases.
8013     for (const auto &I : RD->vbases()) {
8014       QualType BaseTy = I.getType();
8015       // Ignore empty bases.
8016       if (isEmptyRecordForLayout(CGF.getContext(), BaseTy))
8017         continue;
8018 
8019       const auto *Base = BaseTy->getAsCXXRecordDecl();
8020       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8021       if (RecordLayout[FieldIndex])
8022         continue;
8023       RecordLayout[FieldIndex] = Base;
8024     }
8025     // Fill in all the fields.
8026     assert(!RD->isUnion() && "Unexpected union.");
8027     for (const auto *Field : RD->fields()) {
8028       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8029       // will fill in later.)
8030       if (!Field->isBitField() &&
8031           !isEmptyFieldForLayout(CGF.getContext(), Field)) {
8032         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8033         RecordLayout[FieldIndex] = Field;
8034       }
8035     }
8036     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8037              &Data : RecordLayout) {
8038       if (Data.isNull())
8039         continue;
8040       if (const auto *Base = dyn_cast<const CXXRecordDecl *>(Data))
8041         getPlainLayout(Base, Layout, /*AsBase=*/true);
8042       else
8043         Layout.push_back(cast<const FieldDecl *>(Data));
8044     }
8045   }
8046 
8047   /// Generate all the base pointers, section pointers, sizes, map types, and
8048   /// mappers for the extracted mappable expressions (all included in \a
8049   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8050   /// pair of the relevant declaration and index where it occurs is appended to
8051   /// the device pointers info array.
8052   void generateAllInfoForClauses(
8053       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8054       llvm::OpenMPIRBuilder &OMPBuilder,
8055       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8056           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8057     // We have to process the component lists that relate with the same
8058     // declaration in a single chunk so that we can generate the map flags
8059     // correctly. Therefore, we organize all lists in a map.
8060     enum MapKind { Present, Allocs, Other, Total };
8061     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8062                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8063         Info;
8064 
8065     // Helper function to fill the information map for the different supported
8066     // clauses.
8067     auto &&InfoGen =
8068         [&Info, &SkipVarSet](
8069             const ValueDecl *D, MapKind Kind,
8070             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8071             OpenMPMapClauseKind MapType,
8072             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8073             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8074             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8075             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8076           if (SkipVarSet.contains(D))
8077             return;
8078           auto It = Info.try_emplace(D, Total).first;
8079           It->second[Kind].emplace_back(
8080               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8081               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8082         };
8083 
8084     for (const auto *Cl : Clauses) {
8085       const auto *C = dyn_cast<OMPMapClause>(Cl);
8086       if (!C)
8087         continue;
8088       MapKind Kind = Other;
8089       if (llvm::is_contained(C->getMapTypeModifiers(),
8090                              OMPC_MAP_MODIFIER_present))
8091         Kind = Present;
8092       else if (C->getMapType() == OMPC_MAP_alloc)
8093         Kind = Allocs;
8094       const auto *EI = C->getVarRefs().begin();
8095       for (const auto L : C->component_lists()) {
8096         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8097         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8098                 C->getMapTypeModifiers(), {},
8099                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8100                 E);
8101         ++EI;
8102       }
8103     }
8104     for (const auto *Cl : Clauses) {
8105       const auto *C = dyn_cast<OMPToClause>(Cl);
8106       if (!C)
8107         continue;
8108       MapKind Kind = Other;
8109       if (llvm::is_contained(C->getMotionModifiers(),
8110                              OMPC_MOTION_MODIFIER_present))
8111         Kind = Present;
8112       const auto *EI = C->getVarRefs().begin();
8113       for (const auto L : C->component_lists()) {
8114         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, {},
8115                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8116                 C->isImplicit(), std::get<2>(L), *EI);
8117         ++EI;
8118       }
8119     }
8120     for (const auto *Cl : Clauses) {
8121       const auto *C = dyn_cast<OMPFromClause>(Cl);
8122       if (!C)
8123         continue;
8124       MapKind Kind = Other;
8125       if (llvm::is_contained(C->getMotionModifiers(),
8126                              OMPC_MOTION_MODIFIER_present))
8127         Kind = Present;
8128       const auto *EI = C->getVarRefs().begin();
8129       for (const auto L : C->component_lists()) {
8130         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, {},
8131                 C->getMotionModifiers(),
8132                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8133                 *EI);
8134         ++EI;
8135       }
8136     }
8137 
8138     // Look at the use_device_ptr and use_device_addr clauses information and
8139     // mark the existing map entries as such. If there is no map information for
8140     // an entry in the use_device_ptr and use_device_addr list, we create one
8141     // with map type 'alloc' and zero size section. It is the user fault if that
8142     // was not mapped before. If there is no map information and the pointer is
8143     // a struct member, then we defer the emission of that entry until the whole
8144     // struct has been processed.
8145     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8146                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8147         DeferredInfo;
8148     MapCombinedInfoTy UseDeviceDataCombinedInfo;
8149 
8150     auto &&UseDeviceDataCombinedInfoGen =
8151         [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
8152                                      CodeGenFunction &CGF, bool IsDevAddr) {
8153           UseDeviceDataCombinedInfo.Exprs.push_back(VD);
8154           UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
8155           UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
8156           UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
8157               IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8158           UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
8159           UseDeviceDataCombinedInfo.Sizes.push_back(
8160               llvm::Constant::getNullValue(CGF.Int64Ty));
8161           UseDeviceDataCombinedInfo.Types.push_back(
8162               OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
8163           UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
8164         };
8165 
8166     auto &&MapInfoGen =
8167         [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
8168          &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
8169                    OMPClauseMappableExprCommon::MappableExprComponentListRef
8170                        Components,
8171                    bool IsImplicit, bool IsDevAddr) {
8172           // We didn't find any match in our map information - generate a zero
8173           // size array section - if the pointer is a struct member we defer
8174           // this action until the whole struct has been processed.
8175           if (isa<MemberExpr>(IE)) {
8176             // Insert the pointer into Info to be processed by
8177             // generateInfoForComponentList. Because it is a member pointer
8178             // without a pointee, no entry will be generated for it, therefore
8179             // we need to generate one after the whole struct has been
8180             // processed. Nonetheless, generateInfoForComponentList must be
8181             // called to take the pointer into account for the calculation of
8182             // the range of the partial struct.
8183             InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, {}, {},
8184                     /*ReturnDevicePointer=*/false, IsImplicit, nullptr, nullptr,
8185                     IsDevAddr);
8186             DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
8187           } else {
8188             llvm::Value *Ptr;
8189             if (IsDevAddr) {
8190               if (IE->isGLValue())
8191                 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8192               else
8193                 Ptr = CGF.EmitScalarExpr(IE);
8194             } else {
8195               Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8196             }
8197             UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);
8198           }
8199         };
8200 
8201     auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
8202                                     const Expr *IE, bool IsDevAddr) -> bool {
8203       // We potentially have map information for this declaration already.
8204       // Look for the first set of components that refer to it. If found,
8205       // return true.
8206       // If the first component is a member expression, we have to look into
8207       // 'this', which maps to null in the map of map information. Otherwise
8208       // look directly for the information.
8209       auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8210       if (It != Info.end()) {
8211         bool Found = false;
8212         for (auto &Data : It->second) {
8213           auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8214             return MI.Components.back().getAssociatedDeclaration() == VD;
8215           });
8216           // If we found a map entry, signal that the pointer has to be
8217           // returned and move on to the next declaration. Exclude cases where
8218           // the base pointer is mapped as array subscript, array section or
8219           // array shaping. The base address is passed as a pointer to base in
8220           // this case and cannot be used as a base for use_device_ptr list
8221           // item.
8222           if (CI != Data.end()) {
8223             if (IsDevAddr) {
8224               CI->ForDeviceAddr = IsDevAddr;
8225               CI->ReturnDevicePointer = true;
8226               Found = true;
8227               break;
8228             } else {
8229               auto PrevCI = std::next(CI->Components.rbegin());
8230               const auto *VarD = dyn_cast<VarDecl>(VD);
8231               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8232                   isa<MemberExpr>(IE) ||
8233                   !VD->getType().getNonReferenceType()->isPointerType() ||
8234                   PrevCI == CI->Components.rend() ||
8235                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8236                   VarD->hasLocalStorage()) {
8237                 CI->ForDeviceAddr = IsDevAddr;
8238                 CI->ReturnDevicePointer = true;
8239                 Found = true;
8240                 break;
8241               }
8242             }
8243           }
8244         }
8245         return Found;
8246       }
8247       return false;
8248     };
8249 
8250     // Look at the use_device_ptr clause information and mark the existing map
8251     // entries as such. If there is no map information for an entry in the
8252     // use_device_ptr list, we create one with map type 'alloc' and zero size
8253     // section. It is the user fault if that was not mapped before. If there is
8254     // no map information and the pointer is a struct member, then we defer the
8255     // emission of that entry until the whole struct has been processed.
8256     for (const auto *Cl : Clauses) {
8257       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8258       if (!C)
8259         continue;
8260       for (const auto L : C->component_lists()) {
8261         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8262             std::get<1>(L);
8263         assert(!Components.empty() &&
8264                "Not expecting empty list of components!");
8265         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8266         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8267         const Expr *IE = Components.back().getAssociatedExpression();
8268         if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
8269           continue;
8270         MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8271                    /*IsDevAddr=*/false);
8272       }
8273     }
8274 
8275     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8276     for (const auto *Cl : Clauses) {
8277       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8278       if (!C)
8279         continue;
8280       for (const auto L : C->component_lists()) {
8281         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8282             std::get<1>(L);
8283         assert(!std::get<1>(L).empty() &&
8284                "Not expecting empty list of components!");
8285         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8286         if (!Processed.insert(VD).second)
8287           continue;
8288         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8289         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8290         if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8291           continue;
8292         MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8293                    /*IsDevAddr=*/true);
8294       }
8295     }
8296 
8297     for (const auto &Data : Info) {
8298       StructRangeInfoTy PartialStruct;
8299       // Current struct information:
8300       MapCombinedInfoTy CurInfo;
8301       // Current struct base information:
8302       MapCombinedInfoTy StructBaseCurInfo;
8303       const Decl *D = Data.first;
8304       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8305       bool HasMapBasePtr = false;
8306       bool HasMapArraySec = false;
8307       if (VD && VD->getType()->isAnyPointerType()) {
8308         for (const auto &M : Data.second) {
8309           HasMapBasePtr = any_of(M, [](const MapInfo &L) {
8310             return isa_and_present<DeclRefExpr>(L.VarRef);
8311           });
8312           HasMapArraySec = any_of(M, [](const MapInfo &L) {
8313             return isa_and_present<ArraySectionExpr, ArraySubscriptExpr>(
8314                 L.VarRef);
8315           });
8316           if (HasMapBasePtr && HasMapArraySec)
8317             break;
8318         }
8319       }
8320       for (const auto &M : Data.second) {
8321         for (const MapInfo &L : M) {
8322           assert(!L.Components.empty() &&
8323                  "Not expecting declaration with no component lists.");
8324 
8325           // Remember the current base pointer index.
8326           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8327           unsigned StructBasePointersIdx =
8328               StructBaseCurInfo.BasePointers.size();
8329           CurInfo.NonContigInfo.IsNonContiguous =
8330               L.Components.back().isNonContiguous();
8331           generateInfoForComponentList(
8332               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8333               CurInfo, StructBaseCurInfo, PartialStruct,
8334               /*IsFirstComponentList=*/false, L.IsImplicit,
8335               /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD,
8336               L.VarRef, /*OverlappedElements*/ {},
8337               HasMapBasePtr && HasMapArraySec);
8338 
8339           // If this entry relates to a device pointer, set the relevant
8340           // declaration and add the 'return pointer' flag.
8341           if (L.ReturnDevicePointer) {
8342             // Check whether a value was added to either CurInfo or
8343             // StructBaseCurInfo and error if no value was added to either of
8344             // them:
8345             assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() ||
8346                     StructBasePointersIdx <
8347                         StructBaseCurInfo.BasePointers.size()) &&
8348                    "Unexpected number of mapped base pointers.");
8349 
8350             // Choose a base pointer index which is always valid:
8351             const ValueDecl *RelevantVD =
8352                 L.Components.back().getAssociatedDeclaration();
8353             assert(RelevantVD &&
8354                    "No relevant declaration related with device pointer??");
8355 
8356             // If StructBaseCurInfo has been updated this iteration then work on
8357             // the first new entry added to it i.e. make sure that when multiple
8358             // values are added to any of the lists, the first value added is
8359             // being modified by the assignments below (not the last value
8360             // added).
8361             if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) {
8362               StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
8363                   RelevantVD;
8364               StructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
8365                   L.ForDeviceAddr ? DeviceInfoTy::Address
8366                                   : DeviceInfoTy::Pointer;
8367               StructBaseCurInfo.Types[StructBasePointersIdx] |=
8368                   OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8369             } else {
8370               CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8371               CurInfo.DevicePointers[CurrentBasePointersIdx] =
8372                   L.ForDeviceAddr ? DeviceInfoTy::Address
8373                                   : DeviceInfoTy::Pointer;
8374               CurInfo.Types[CurrentBasePointersIdx] |=
8375                   OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8376             }
8377           }
8378         }
8379       }
8380 
8381       // Append any pending zero-length pointers which are struct members and
8382       // used with use_device_ptr or use_device_addr.
8383       auto CI = DeferredInfo.find(Data.first);
8384       if (CI != DeferredInfo.end()) {
8385         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8386           llvm::Value *BasePtr;
8387           llvm::Value *Ptr;
8388           if (L.ForDeviceAddr) {
8389             if (L.IE->isGLValue())
8390               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8391             else
8392               Ptr = this->CGF.EmitScalarExpr(L.IE);
8393             BasePtr = Ptr;
8394             // Entry is RETURN_PARAM. Also, set the placeholder value
8395             // MEMBER_OF=FFFF so that the entry is later updated with the
8396             // correct value of MEMBER_OF.
8397             CurInfo.Types.push_back(
8398                 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8399                 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8400           } else {
8401             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8402             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8403                                              L.IE->getExprLoc());
8404             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8405             // placeholder value MEMBER_OF=FFFF so that the entry is later
8406             // updated with the correct value of MEMBER_OF.
8407             CurInfo.Types.push_back(
8408                 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8409                 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8410                 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8411           }
8412           CurInfo.Exprs.push_back(L.VD);
8413           CurInfo.BasePointers.emplace_back(BasePtr);
8414           CurInfo.DevicePtrDecls.emplace_back(L.VD);
8415           CurInfo.DevicePointers.emplace_back(
8416               L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8417           CurInfo.Pointers.push_back(Ptr);
8418           CurInfo.Sizes.push_back(
8419               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8420           CurInfo.Mappers.push_back(nullptr);
8421         }
8422       }
8423 
8424       // Unify entries in one list making sure the struct mapping precedes the
8425       // individual fields:
8426       MapCombinedInfoTy UnionCurInfo;
8427       UnionCurInfo.append(StructBaseCurInfo);
8428       UnionCurInfo.append(CurInfo);
8429 
8430       // If there is an entry in PartialStruct it means we have a struct with
8431       // individual members mapped. Emit an extra combined entry.
8432       if (PartialStruct.Base.isValid()) {
8433         UnionCurInfo.NonContigInfo.Dims.push_back(0);
8434         // Emit a combined entry:
8435         emitCombinedEntry(CombinedInfo, UnionCurInfo.Types, PartialStruct,
8436                           /*IsMapThis*/ !VD, OMPBuilder, VD);
8437       }
8438 
8439       // We need to append the results of this capture to what we already have.
8440       CombinedInfo.append(UnionCurInfo);
8441     }
8442     // Append data for use_device_ptr clauses.
8443     CombinedInfo.append(UseDeviceDataCombinedInfo);
8444   }
8445 
8446 public:
8447   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8448       : CurDir(&Dir), CGF(CGF) {
8449     // Extract firstprivate clause information.
8450     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8451       for (const auto *D : C->varlist())
8452         FirstPrivateDecls.try_emplace(
8453             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8454     // Extract implicit firstprivates from uses_allocators clauses.
8455     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8456       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8457         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8458         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8459           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8460                                         /*Implicit=*/true);
8461         else if (const auto *VD = dyn_cast<VarDecl>(
8462                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8463                          ->getDecl()))
8464           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8465       }
8466     }
8467     // Extract device pointer clause information.
8468     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8469       for (auto L : C->component_lists())
8470         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8471     // Extract device addr clause information.
8472     for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8473       for (auto L : C->component_lists())
8474         HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8475     // Extract map information.
8476     for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8477       if (C->getMapType() != OMPC_MAP_to)
8478         continue;
8479       for (auto L : C->component_lists()) {
8480         const ValueDecl *VD = std::get<0>(L);
8481         const auto *RD = VD ? VD->getType()
8482                                   .getCanonicalType()
8483                                   .getNonReferenceType()
8484                                   ->getAsCXXRecordDecl()
8485                             : nullptr;
8486         if (RD && RD->isLambda())
8487           LambdasMap.try_emplace(std::get<0>(L), C);
8488       }
8489     }
8490   }
8491 
8492   /// Constructor for the declare mapper directive.
8493   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8494       : CurDir(&Dir), CGF(CGF) {}
8495 
8496   /// Generate code for the combined entry if we have a partially mapped struct
8497   /// and take care of the mapping flags of the arguments corresponding to
8498   /// individual struct members.
8499   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8500                          MapFlagsArrayTy &CurTypes,
8501                          const StructRangeInfoTy &PartialStruct, bool IsMapThis,
8502                          llvm::OpenMPIRBuilder &OMPBuilder,
8503                          const ValueDecl *VD = nullptr,
8504                          unsigned OffsetForMemberOfFlag = 0,
8505                          bool NotTargetParams = true) const {
8506     if (CurTypes.size() == 1 &&
8507         ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
8508          OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
8509         !PartialStruct.IsArraySection)
8510       return;
8511     Address LBAddr = PartialStruct.LowestElem.second;
8512     Address HBAddr = PartialStruct.HighestElem.second;
8513     if (PartialStruct.HasCompleteRecord) {
8514       LBAddr = PartialStruct.LB;
8515       HBAddr = PartialStruct.LB;
8516     }
8517     CombinedInfo.Exprs.push_back(VD);
8518     // Base is the base of the struct
8519     CombinedInfo.BasePointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
8520     CombinedInfo.DevicePtrDecls.push_back(nullptr);
8521     CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8522     // Pointer is the address of the lowest element
8523     llvm::Value *LB = LBAddr.emitRawPointer(CGF);
8524     const CXXMethodDecl *MD =
8525         CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
8526     const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
8527     bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
8528     // There should not be a mapper for a combined entry.
8529     if (HasBaseClass) {
8530       // OpenMP 5.2 148:21:
8531       // If the target construct is within a class non-static member function,
8532       // and a variable is an accessible data member of the object for which the
8533       // non-static data member function is invoked, the variable is treated as
8534       // if the this[:1] expression had appeared in a map clause with a map-type
8535       // of tofrom.
8536       // Emit this[:1]
8537       CombinedInfo.Pointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
8538       QualType Ty = MD->getFunctionObjectParameterType();
8539       llvm::Value *Size =
8540           CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
8541                                     /*isSigned=*/true);
8542       CombinedInfo.Sizes.push_back(Size);
8543     } else {
8544       CombinedInfo.Pointers.push_back(LB);
8545       // Size is (addr of {highest+1} element) - (addr of lowest element)
8546       llvm::Value *HB = HBAddr.emitRawPointer(CGF);
8547       llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
8548           HBAddr.getElementType(), HB, /*Idx0=*/1);
8549       llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8550       llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8551       llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8552       llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8553                                                     /*isSigned=*/false);
8554       CombinedInfo.Sizes.push_back(Size);
8555     }
8556     CombinedInfo.Mappers.push_back(nullptr);
8557     // Map type is always TARGET_PARAM, if generate info for captures.
8558     CombinedInfo.Types.push_back(
8559         NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8560         : !PartialStruct.PreliminaryMapData.BasePointers.empty()
8561             ? OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
8562             : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8563     // If any element has the present modifier, then make sure the runtime
8564     // doesn't attempt to allocate the struct.
8565     if (CurTypes.end() !=
8566         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8567           return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8568               Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8569         }))
8570       CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
8571     // Remove TARGET_PARAM flag from the first element
8572     (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8573     // If any element has the ompx_hold modifier, then make sure the runtime
8574     // uses the hold reference count for the struct as a whole so that it won't
8575     // be unmapped by an extra dynamic reference count decrement.  Add it to all
8576     // elements as well so the runtime knows which reference count to check
8577     // when determining whether it's time for device-to-host transfers of
8578     // individual elements.
8579     if (CurTypes.end() !=
8580         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8581           return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8582               Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
8583         })) {
8584       CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8585       for (auto &M : CurTypes)
8586         M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8587     }
8588 
8589     // All other current entries will be MEMBER_OF the combined entry
8590     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8591     // 0xFFFF in the MEMBER_OF field).
8592     OpenMPOffloadMappingFlags MemberOfFlag = OMPBuilder.getMemberOfFlag(
8593         OffsetForMemberOfFlag + CombinedInfo.BasePointers.size() - 1);
8594     for (auto &M : CurTypes)
8595       OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);
8596   }
8597 
8598   /// Generate all the base pointers, section pointers, sizes, map types, and
8599   /// mappers for the extracted mappable expressions (all included in \a
8600   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8601   /// pair of the relevant declaration and index where it occurs is appended to
8602   /// the device pointers info array.
8603   void generateAllInfo(
8604       MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
8605       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8606           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8607     assert(isa<const OMPExecutableDirective *>(CurDir) &&
8608            "Expect a executable directive");
8609     const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
8610     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,
8611                               SkipVarSet);
8612   }
8613 
8614   /// Generate all the base pointers, section pointers, sizes, map types, and
8615   /// mappers for the extracted map clauses of user-defined mapper (all included
8616   /// in \a CombinedInfo).
8617   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
8618                                 llvm::OpenMPIRBuilder &OMPBuilder) const {
8619     assert(isa<const OMPDeclareMapperDecl *>(CurDir) &&
8620            "Expect a declare mapper directive");
8621     const auto *CurMapperDir = cast<const OMPDeclareMapperDecl *>(CurDir);
8622     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,
8623                               OMPBuilder);
8624   }
8625 
8626   /// Emit capture info for lambdas for variables captured by reference.
8627   void generateInfoForLambdaCaptures(
8628       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8629       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8630     QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
8631     const auto *RD = VDType->getAsCXXRecordDecl();
8632     if (!RD || !RD->isLambda())
8633       return;
8634     Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
8635                    CGF.getContext().getDeclAlign(VD));
8636     LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
8637     llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8638     FieldDecl *ThisCapture = nullptr;
8639     RD->getCaptureFields(Captures, ThisCapture);
8640     if (ThisCapture) {
8641       LValue ThisLVal =
8642           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8643       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8644       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8645                                  VDLVal.getPointer(CGF));
8646       CombinedInfo.Exprs.push_back(VD);
8647       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8648       CombinedInfo.DevicePtrDecls.push_back(nullptr);
8649       CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8650       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8651       CombinedInfo.Sizes.push_back(
8652           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8653                                     CGF.Int64Ty, /*isSigned=*/true));
8654       CombinedInfo.Types.push_back(
8655           OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8656           OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8657           OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8658           OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8659       CombinedInfo.Mappers.push_back(nullptr);
8660     }
8661     for (const LambdaCapture &LC : RD->captures()) {
8662       if (!LC.capturesVariable())
8663         continue;
8664       const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
8665       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8666         continue;
8667       auto It = Captures.find(VD);
8668       assert(It != Captures.end() && "Found lambda capture without field.");
8669       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8670       if (LC.getCaptureKind() == LCK_ByRef) {
8671         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8672         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8673                                    VDLVal.getPointer(CGF));
8674         CombinedInfo.Exprs.push_back(VD);
8675         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8676         CombinedInfo.DevicePtrDecls.push_back(nullptr);
8677         CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8678         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8679         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8680             CGF.getTypeSize(
8681                 VD->getType().getCanonicalType().getNonReferenceType()),
8682             CGF.Int64Ty, /*isSigned=*/true));
8683       } else {
8684         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8685         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8686                                    VDLVal.getPointer(CGF));
8687         CombinedInfo.Exprs.push_back(VD);
8688         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8689         CombinedInfo.DevicePtrDecls.push_back(nullptr);
8690         CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8691         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8692         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8693       }
8694       CombinedInfo.Types.push_back(
8695           OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8696           OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8697           OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8698           OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8699       CombinedInfo.Mappers.push_back(nullptr);
8700     }
8701   }
8702 
8703   /// Set correct indices for lambdas captures.
8704   void adjustMemberOfForLambdaCaptures(
8705       llvm::OpenMPIRBuilder &OMPBuilder,
8706       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8707       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8708       MapFlagsArrayTy &Types) const {
8709     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8710       // Set correct member_of idx for all implicit lambda captures.
8711       if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8712                        OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8713                        OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8714                        OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
8715         continue;
8716       llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
8717       assert(BasePtr && "Unable to find base lambda address.");
8718       int TgtIdx = -1;
8719       for (unsigned J = I; J > 0; --J) {
8720         unsigned Idx = J - 1;
8721         if (Pointers[Idx] != BasePtr)
8722           continue;
8723         TgtIdx = Idx;
8724         break;
8725       }
8726       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8727       // All other current entries will be MEMBER_OF the combined entry
8728       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8729       // 0xFFFF in the MEMBER_OF field).
8730       OpenMPOffloadMappingFlags MemberOfFlag =
8731           OMPBuilder.getMemberOfFlag(TgtIdx);
8732       OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8733     }
8734   }
8735 
8736   /// For a capture that has an associated clause, generate the base pointers,
8737   /// section pointers, sizes, map types, and mappers (all included in
8738   /// \a CurCaptureVarInfo).
8739   void generateInfoForCaptureFromClauseInfo(
8740       const CapturedStmt::Capture *Cap, llvm::Value *Arg,
8741       MapCombinedInfoTy &CurCaptureVarInfo, llvm::OpenMPIRBuilder &OMPBuilder,
8742       unsigned OffsetForMemberOfFlag) const {
8743     assert(!Cap->capturesVariableArrayType() &&
8744            "Not expecting to generate map info for a variable array type!");
8745 
8746     // We need to know when we generating information for the first component
8747     const ValueDecl *VD = Cap->capturesThis()
8748                               ? nullptr
8749                               : Cap->getCapturedVar()->getCanonicalDecl();
8750 
8751     // for map(to: lambda): skip here, processing it in
8752     // generateDefaultMapInfo
8753     if (LambdasMap.count(VD))
8754       return;
8755 
8756     // If this declaration appears in a is_device_ptr clause we just have to
8757     // pass the pointer by value. If it is a reference to a declaration, we just
8758     // pass its value.
8759     if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
8760       CurCaptureVarInfo.Exprs.push_back(VD);
8761       CurCaptureVarInfo.BasePointers.emplace_back(Arg);
8762       CurCaptureVarInfo.DevicePtrDecls.emplace_back(VD);
8763       CurCaptureVarInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
8764       CurCaptureVarInfo.Pointers.push_back(Arg);
8765       CurCaptureVarInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8766           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8767           /*isSigned=*/true));
8768       CurCaptureVarInfo.Types.push_back(
8769           OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8770           OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8771       CurCaptureVarInfo.Mappers.push_back(nullptr);
8772       return;
8773     }
8774 
8775     MapDataArrayTy DeclComponentLists;
8776     // For member fields list in is_device_ptr, store it in
8777     // DeclComponentLists for generating components info.
8778     static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
8779     auto It = DevPointersMap.find(VD);
8780     if (It != DevPointersMap.end())
8781       for (const auto &MCL : It->second)
8782         DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
8783                                         /*IsImpicit = */ true, nullptr,
8784                                         nullptr);
8785     auto I = HasDevAddrsMap.find(VD);
8786     if (I != HasDevAddrsMap.end())
8787       for (const auto &MCL : I->second)
8788         DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
8789                                         /*IsImpicit = */ true, nullptr,
8790                                         nullptr);
8791     assert(isa<const OMPExecutableDirective *>(CurDir) &&
8792            "Expect a executable directive");
8793     const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
8794     bool HasMapBasePtr = false;
8795     bool HasMapArraySec = false;
8796     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8797       const auto *EI = C->getVarRefs().begin();
8798       for (const auto L : C->decl_component_lists(VD)) {
8799         const ValueDecl *VDecl, *Mapper;
8800         // The Expression is not correct if the mapping is implicit
8801         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8802         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8803         std::tie(VDecl, Components, Mapper) = L;
8804         assert(VDecl == VD && "We got information for the wrong declaration??");
8805         assert(!Components.empty() &&
8806                "Not expecting declaration with no component lists.");
8807         if (VD && E && VD->getType()->isAnyPointerType() && isa<DeclRefExpr>(E))
8808           HasMapBasePtr = true;
8809         if (VD && E && VD->getType()->isAnyPointerType() &&
8810             (isa<ArraySectionExpr>(E) || isa<ArraySubscriptExpr>(E)))
8811           HasMapArraySec = true;
8812         DeclComponentLists.emplace_back(Components, C->getMapType(),
8813                                         C->getMapTypeModifiers(),
8814                                         C->isImplicit(), Mapper, E);
8815         ++EI;
8816       }
8817     }
8818     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8819                                              const MapData &RHS) {
8820       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8821       OpenMPMapClauseKind MapType = std::get<1>(RHS);
8822       bool HasPresent =
8823           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8824       bool HasAllocs = MapType == OMPC_MAP_alloc;
8825       MapModifiers = std::get<2>(RHS);
8826       MapType = std::get<1>(LHS);
8827       bool HasPresentR =
8828           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8829       bool HasAllocsR = MapType == OMPC_MAP_alloc;
8830       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8831     });
8832 
8833     auto GenerateInfoForComponentLists =
8834         [&](ArrayRef<MapData> DeclComponentLists,
8835             bool IsEligibleForTargetParamFlag) {
8836           MapCombinedInfoTy CurInfoForComponentLists;
8837           StructRangeInfoTy PartialStruct;
8838 
8839           if (DeclComponentLists.empty())
8840             return;
8841 
8842           generateInfoForCaptureFromComponentLists(
8843               VD, DeclComponentLists, CurInfoForComponentLists, PartialStruct,
8844               IsEligibleForTargetParamFlag,
8845               /*AreBothBasePtrAndPteeMapped=*/HasMapBasePtr && HasMapArraySec);
8846 
8847           // If there is an entry in PartialStruct it means we have a
8848           // struct with individual members mapped. Emit an extra combined
8849           // entry.
8850           if (PartialStruct.Base.isValid()) {
8851             CurCaptureVarInfo.append(PartialStruct.PreliminaryMapData);
8852             emitCombinedEntry(
8853                 CurCaptureVarInfo, CurInfoForComponentLists.Types,
8854                 PartialStruct, Cap->capturesThis(), OMPBuilder, nullptr,
8855                 OffsetForMemberOfFlag,
8856                 /*NotTargetParams*/ !IsEligibleForTargetParamFlag);
8857           }
8858 
8859           // Return if we didn't add any entries.
8860           if (CurInfoForComponentLists.BasePointers.empty())
8861             return;
8862 
8863           CurCaptureVarInfo.append(CurInfoForComponentLists);
8864         };
8865 
8866     GenerateInfoForComponentLists(DeclComponentLists,
8867                                   /*IsEligibleForTargetParamFlag=*/true);
8868   }
8869 
8870   /// Generate the base pointers, section pointers, sizes, map types, and
8871   /// mappers associated to \a DeclComponentLists for a given capture
8872   /// \a VD (all included in \a CurComponentListInfo).
8873   void generateInfoForCaptureFromComponentLists(
8874       const ValueDecl *VD, ArrayRef<MapData> DeclComponentLists,
8875       MapCombinedInfoTy &CurComponentListInfo, StructRangeInfoTy &PartialStruct,
8876       bool IsListEligibleForTargetParamFlag,
8877       bool AreBothBasePtrAndPteeMapped = false) const {
8878     // Find overlapping elements (including the offset from the base element).
8879     llvm::SmallDenseMap<
8880         const MapData *,
8881         llvm::SmallVector<
8882             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8883         4>
8884         OverlappedData;
8885     size_t Count = 0;
8886     for (const MapData &L : DeclComponentLists) {
8887       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8888       OpenMPMapClauseKind MapType;
8889       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8890       bool IsImplicit;
8891       const ValueDecl *Mapper;
8892       const Expr *VarRef;
8893       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8894           L;
8895       ++Count;
8896       for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
8897         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8898         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8899                  VarRef) = L1;
8900         auto CI = Components.rbegin();
8901         auto CE = Components.rend();
8902         auto SI = Components1.rbegin();
8903         auto SE = Components1.rend();
8904         for (; CI != CE && SI != SE; ++CI, ++SI) {
8905           if (CI->getAssociatedExpression()->getStmtClass() !=
8906               SI->getAssociatedExpression()->getStmtClass())
8907             break;
8908           // Are we dealing with different variables/fields?
8909           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8910             break;
8911         }
8912         // Found overlapping if, at least for one component, reached the head
8913         // of the components list.
8914         if (CI == CE || SI == SE) {
8915           // Ignore it if it is the same component.
8916           if (CI == CE && SI == SE)
8917             continue;
8918           const auto It = (SI == SE) ? CI : SI;
8919           // If one component is a pointer and another one is a kind of
8920           // dereference of this pointer (array subscript, section, dereference,
8921           // etc.), it is not an overlapping.
8922           // Same, if one component is a base and another component is a
8923           // dereferenced pointer memberexpr with the same base.
8924           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
8925               (std::prev(It)->getAssociatedDeclaration() &&
8926                std::prev(It)
8927                    ->getAssociatedDeclaration()
8928                    ->getType()
8929                    ->isPointerType()) ||
8930               (It->getAssociatedDeclaration() &&
8931                It->getAssociatedDeclaration()->getType()->isPointerType() &&
8932                std::next(It) != CE && std::next(It) != SE))
8933             continue;
8934           const MapData &BaseData = CI == CE ? L : L1;
8935           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8936               SI == SE ? Components : Components1;
8937           OverlappedData[&BaseData].push_back(SubData);
8938         }
8939       }
8940     }
8941     // Sort the overlapped elements for each item.
8942     llvm::SmallVector<const FieldDecl *, 4> Layout;
8943     if (!OverlappedData.empty()) {
8944       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8945       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8946       while (BaseType != OrigType) {
8947         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8948         OrigType = BaseType->getPointeeOrArrayElementType();
8949       }
8950 
8951       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8952         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8953       else {
8954         const auto *RD = BaseType->getAsRecordDecl();
8955         Layout.append(RD->field_begin(), RD->field_end());
8956       }
8957     }
8958     for (auto &Pair : OverlappedData) {
8959       llvm::stable_sort(
8960           Pair.getSecond(),
8961           [&Layout](
8962               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8963               OMPClauseMappableExprCommon::MappableExprComponentListRef
8964                   Second) {
8965             auto CI = First.rbegin();
8966             auto CE = First.rend();
8967             auto SI = Second.rbegin();
8968             auto SE = Second.rend();
8969             for (; CI != CE && SI != SE; ++CI, ++SI) {
8970               if (CI->getAssociatedExpression()->getStmtClass() !=
8971                   SI->getAssociatedExpression()->getStmtClass())
8972                 break;
8973               // Are we dealing with different variables/fields?
8974               if (CI->getAssociatedDeclaration() !=
8975                   SI->getAssociatedDeclaration())
8976                 break;
8977             }
8978 
8979             // Lists contain the same elements.
8980             if (CI == CE && SI == SE)
8981               return false;
8982 
8983             // List with less elements is less than list with more elements.
8984             if (CI == CE || SI == SE)
8985               return CI == CE;
8986 
8987             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8988             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8989             if (FD1->getParent() == FD2->getParent())
8990               return FD1->getFieldIndex() < FD2->getFieldIndex();
8991             const auto *It =
8992                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8993                   return FD == FD1 || FD == FD2;
8994                 });
8995             return *It == FD1;
8996           });
8997     }
8998 
8999     // Associated with a capture, because the mapping flags depend on it.
9000     // Go through all of the elements with the overlapped elements.
9001     bool AddTargetParamFlag = IsListEligibleForTargetParamFlag;
9002     MapCombinedInfoTy StructBaseCombinedInfo;
9003     for (const auto &Pair : OverlappedData) {
9004       const MapData &L = *Pair.getFirst();
9005       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9006       OpenMPMapClauseKind MapType;
9007       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9008       bool IsImplicit;
9009       const ValueDecl *Mapper;
9010       const Expr *VarRef;
9011       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9012           L;
9013       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9014           OverlappedComponents = Pair.getSecond();
9015       generateInfoForComponentList(
9016           MapType, MapModifiers, {}, Components, CurComponentListInfo,
9017           StructBaseCombinedInfo, PartialStruct, AddTargetParamFlag, IsImplicit,
9018           /*GenerateAllInfoForClauses*/ false, Mapper,
9019           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9020       AddTargetParamFlag = false;
9021     }
9022     // Go through other elements without overlapped elements.
9023     for (const MapData &L : DeclComponentLists) {
9024       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9025       OpenMPMapClauseKind MapType;
9026       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9027       bool IsImplicit;
9028       const ValueDecl *Mapper;
9029       const Expr *VarRef;
9030       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9031           L;
9032       auto It = OverlappedData.find(&L);
9033       if (It == OverlappedData.end())
9034         generateInfoForComponentList(
9035             MapType, MapModifiers, {}, Components, CurComponentListInfo,
9036             StructBaseCombinedInfo, PartialStruct, AddTargetParamFlag,
9037             IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
9038             /*ForDeviceAddr=*/false, VD, VarRef,
9039             /*OverlappedElements*/ {}, AreBothBasePtrAndPteeMapped);
9040       AddTargetParamFlag = false;
9041     }
9042   }
9043 
9044   /// Generate the default map information for a given capture \a CI,
9045   /// record field declaration \a RI and captured value \a CV.
9046   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9047                               const FieldDecl &RI, llvm::Value *CV,
9048                               MapCombinedInfoTy &CombinedInfo) const {
9049     bool IsImplicit = true;
9050     // Do the default mapping.
9051     if (CI.capturesThis()) {
9052       CombinedInfo.Exprs.push_back(nullptr);
9053       CombinedInfo.BasePointers.push_back(CV);
9054       CombinedInfo.DevicePtrDecls.push_back(nullptr);
9055       CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9056       CombinedInfo.Pointers.push_back(CV);
9057       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9058       CombinedInfo.Sizes.push_back(
9059           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9060                                     CGF.Int64Ty, /*isSigned=*/true));
9061       // Default map type.
9062       CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
9063                                    OpenMPOffloadMappingFlags::OMP_MAP_FROM);
9064     } else if (CI.capturesVariableByCopy()) {
9065       const VarDecl *VD = CI.getCapturedVar();
9066       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9067       CombinedInfo.BasePointers.push_back(CV);
9068       CombinedInfo.DevicePtrDecls.push_back(nullptr);
9069       CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9070       CombinedInfo.Pointers.push_back(CV);
9071       if (!RI.getType()->isAnyPointerType()) {
9072         // We have to signal to the runtime captures passed by value that are
9073         // not pointers.
9074         CombinedInfo.Types.push_back(
9075             OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
9076         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9077             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9078       } else {
9079         // Pointers are implicitly mapped with a zero size and no flags
9080         // (other than first map that is added for all implicit maps).
9081         CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
9082         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9083       }
9084       auto I = FirstPrivateDecls.find(VD);
9085       if (I != FirstPrivateDecls.end())
9086         IsImplicit = I->getSecond();
9087     } else {
9088       assert(CI.capturesVariable() && "Expected captured reference.");
9089       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9090       QualType ElementType = PtrTy->getPointeeType();
9091       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9092           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9093       // The default map type for a scalar/complex type is 'to' because by
9094       // default the value doesn't have to be retrieved. For an aggregate
9095       // type, the default is 'tofrom'.
9096       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9097       const VarDecl *VD = CI.getCapturedVar();
9098       auto I = FirstPrivateDecls.find(VD);
9099       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9100       CombinedInfo.BasePointers.push_back(CV);
9101       CombinedInfo.DevicePtrDecls.push_back(nullptr);
9102       CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9103       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9104         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9105             CV, ElementType, CGF.getContext().getDeclAlign(VD),
9106             AlignmentSource::Decl));
9107         CombinedInfo.Pointers.push_back(PtrAddr.emitRawPointer(CGF));
9108       } else {
9109         CombinedInfo.Pointers.push_back(CV);
9110       }
9111       if (I != FirstPrivateDecls.end())
9112         IsImplicit = I->getSecond();
9113     }
9114     // Every default map produces a single argument which is a target parameter.
9115     CombinedInfo.Types.back() |=
9116         OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
9117 
9118     // Add flag stating this is an implicit map.
9119     if (IsImplicit)
9120       CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
9121 
9122     // No user-defined mapper for default mapping.
9123     CombinedInfo.Mappers.push_back(nullptr);
9124   }
9125 };
9126 } // anonymous namespace
9127 
9128 // Try to extract the base declaration from a `this->x` expression if possible.
9129 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9130   if (!E)
9131     return nullptr;
9132 
9133   if (const auto *OASE = dyn_cast<ArraySectionExpr>(E->IgnoreParenCasts()))
9134     if (const MemberExpr *ME =
9135             dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9136       return ME->getMemberDecl();
9137   return nullptr;
9138 }
9139 
9140 /// Emit a string constant containing the names of the values mapped to the
9141 /// offloading runtime library.
9142 static llvm::Constant *
9143 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9144                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9145 
9146   uint32_t SrcLocStrSize;
9147   if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9148     return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9149 
9150   SourceLocation Loc;
9151   if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9152     if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9153       Loc = VD->getLocation();
9154     else
9155       Loc = MapExprs.getMapExpr()->getExprLoc();
9156   } else {
9157     Loc = MapExprs.getMapDecl()->getLocation();
9158   }
9159 
9160   std::string ExprName;
9161   if (MapExprs.getMapExpr()) {
9162     PrintingPolicy P(CGF.getContext().getLangOpts());
9163     llvm::raw_string_ostream OS(ExprName);
9164     MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9165   } else {
9166     ExprName = MapExprs.getMapDecl()->getNameAsString();
9167   }
9168 
9169   std::string FileName;
9170   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9171   if (auto *DbgInfo = CGF.getDebugInfo())
9172     FileName = DbgInfo->remapDIPath(PLoc.getFilename());
9173   else
9174     FileName = PLoc.getFilename();
9175   return OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName, PLoc.getLine(),
9176                                          PLoc.getColumn(), SrcLocStrSize);
9177 }
9178 /// Emit the arrays used to pass the captures and map information to the
9179 /// offloading runtime library. If there is no map or capture information,
9180 /// return nullptr by reference.
9181 static void emitOffloadingArraysAndArgs(
9182     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9183     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9184     bool IsNonContiguous = false, bool ForEndCall = false) {
9185   CodeGenModule &CGM = CGF.CGM;
9186 
9187   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
9188   InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
9189                          CGF.AllocaInsertPt->getIterator());
9190   InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
9191                           CGF.Builder.GetInsertPoint());
9192 
9193   auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
9194     if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
9195       Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
9196     }
9197   };
9198 
9199   auto CustomMapperCB = [&](unsigned int I) {
9200     llvm::Function *MFunc = nullptr;
9201     if (CombinedInfo.Mappers[I]) {
9202       Info.HasMapper = true;
9203       MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9204           cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9205     }
9206     return MFunc;
9207   };
9208   cantFail(OMPBuilder.emitOffloadingArraysAndArgs(
9209       AllocaIP, CodeGenIP, Info, Info.RTArgs, CombinedInfo, CustomMapperCB,
9210       IsNonContiguous, ForEndCall, DeviceAddrCB));
9211 }
9212 
9213 /// Check for inner distribute directive.
9214 static const OMPExecutableDirective *
9215 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9216   const auto *CS = D.getInnermostCapturedStmt();
9217   const auto *Body =
9218       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9219   const Stmt *ChildStmt =
9220       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9221 
9222   if (const auto *NestedDir =
9223           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9224     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9225     switch (D.getDirectiveKind()) {
9226     case OMPD_target:
9227       // For now, treat 'target' with nested 'teams loop' as if it's
9228       // distributed (target teams distribute).
9229       if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
9230         return NestedDir;
9231       if (DKind == OMPD_teams) {
9232         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9233             /*IgnoreCaptured=*/true);
9234         if (!Body)
9235           return nullptr;
9236         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9237         if (const auto *NND =
9238                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9239           DKind = NND->getDirectiveKind();
9240           if (isOpenMPDistributeDirective(DKind))
9241             return NND;
9242         }
9243       }
9244       return nullptr;
9245     case OMPD_target_teams:
9246       if (isOpenMPDistributeDirective(DKind))
9247         return NestedDir;
9248       return nullptr;
9249     case OMPD_target_parallel:
9250     case OMPD_target_simd:
9251     case OMPD_target_parallel_for:
9252     case OMPD_target_parallel_for_simd:
9253       return nullptr;
9254     case OMPD_target_teams_distribute:
9255     case OMPD_target_teams_distribute_simd:
9256     case OMPD_target_teams_distribute_parallel_for:
9257     case OMPD_target_teams_distribute_parallel_for_simd:
9258     case OMPD_parallel:
9259     case OMPD_for:
9260     case OMPD_parallel_for:
9261     case OMPD_parallel_master:
9262     case OMPD_parallel_sections:
9263     case OMPD_for_simd:
9264     case OMPD_parallel_for_simd:
9265     case OMPD_cancel:
9266     case OMPD_cancellation_point:
9267     case OMPD_ordered:
9268     case OMPD_threadprivate:
9269     case OMPD_allocate:
9270     case OMPD_task:
9271     case OMPD_simd:
9272     case OMPD_tile:
9273     case OMPD_unroll:
9274     case OMPD_sections:
9275     case OMPD_section:
9276     case OMPD_single:
9277     case OMPD_master:
9278     case OMPD_critical:
9279     case OMPD_taskyield:
9280     case OMPD_barrier:
9281     case OMPD_taskwait:
9282     case OMPD_taskgroup:
9283     case OMPD_atomic:
9284     case OMPD_flush:
9285     case OMPD_depobj:
9286     case OMPD_scan:
9287     case OMPD_teams:
9288     case OMPD_target_data:
9289     case OMPD_target_exit_data:
9290     case OMPD_target_enter_data:
9291     case OMPD_distribute:
9292     case OMPD_distribute_simd:
9293     case OMPD_distribute_parallel_for:
9294     case OMPD_distribute_parallel_for_simd:
9295     case OMPD_teams_distribute:
9296     case OMPD_teams_distribute_simd:
9297     case OMPD_teams_distribute_parallel_for:
9298     case OMPD_teams_distribute_parallel_for_simd:
9299     case OMPD_target_update:
9300     case OMPD_declare_simd:
9301     case OMPD_declare_variant:
9302     case OMPD_begin_declare_variant:
9303     case OMPD_end_declare_variant:
9304     case OMPD_declare_target:
9305     case OMPD_end_declare_target:
9306     case OMPD_declare_reduction:
9307     case OMPD_declare_mapper:
9308     case OMPD_taskloop:
9309     case OMPD_taskloop_simd:
9310     case OMPD_master_taskloop:
9311     case OMPD_master_taskloop_simd:
9312     case OMPD_parallel_master_taskloop:
9313     case OMPD_parallel_master_taskloop_simd:
9314     case OMPD_requires:
9315     case OMPD_metadirective:
9316     case OMPD_unknown:
9317     default:
9318       llvm_unreachable("Unexpected directive.");
9319     }
9320   }
9321 
9322   return nullptr;
9323 }
9324 
9325 /// Emit the user-defined mapper function. The code generation follows the
9326 /// pattern in the example below.
9327 /// \code
9328 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9329 ///                                           void *base, void *begin,
9330 ///                                           int64_t size, int64_t type,
9331 ///                                           void *name = nullptr) {
9332 ///   // Allocate space for an array section first or add a base/begin for
9333 ///   // pointer dereference.
9334 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9335 ///       !maptype.IsDelete)
9336 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9337 ///                                 size*sizeof(Ty), clearToFromMember(type));
9338 ///   // Map members.
9339 ///   for (unsigned i = 0; i < size; i++) {
9340 ///     // For each component specified by this mapper:
9341 ///     for (auto c : begin[i]->all_components) {
9342 ///       if (c.hasMapper())
9343 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9344 ///                       c.arg_type, c.arg_name);
9345 ///       else
9346 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9347 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9348 ///                                     c.arg_name);
9349 ///     }
9350 ///   }
9351 ///   // Delete the array section.
9352 ///   if (size > 1 && maptype.IsDelete)
9353 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9354 ///                                 size*sizeof(Ty), clearToFromMember(type));
9355 /// }
9356 /// \endcode
9357 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9358                                             CodeGenFunction *CGF) {
9359   if (UDMMap.count(D) > 0)
9360     return;
9361   ASTContext &C = CGM.getContext();
9362   QualType Ty = D->getType();
9363   auto *MapperVarDecl =
9364       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9365   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9366   llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9367 
9368   CodeGenFunction MapperCGF(CGM);
9369   MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9370   auto PrivatizeAndGenMapInfoCB =
9371       [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, llvm::Value *PtrPHI,
9372           llvm::Value *BeginArg) -> llvm::OpenMPIRBuilder::MapInfosTy & {
9373     MapperCGF.Builder.restoreIP(CodeGenIP);
9374 
9375     // Privatize the declared variable of mapper to be the current array
9376     // element.
9377     Address PtrCurrent(
9378         PtrPHI, ElemTy,
9379         Address(BeginArg, MapperCGF.VoidPtrTy, CGM.getPointerAlign())
9380             .getAlignment()
9381             .alignmentOfArrayElement(ElementSize));
9382     CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9383     Scope.addPrivate(MapperVarDecl, PtrCurrent);
9384     (void)Scope.Privatize();
9385 
9386     // Get map clause information.
9387     MappableExprsHandler MEHandler(*D, MapperCGF);
9388     MEHandler.generateAllInfoForMapper(CombinedInfo, OMPBuilder);
9389 
9390     auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9391       return emitMappingInformation(MapperCGF, OMPBuilder, MapExpr);
9392     };
9393     if (CGM.getCodeGenOpts().getDebugInfo() !=
9394         llvm::codegenoptions::NoDebugInfo) {
9395       CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
9396       llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
9397                       FillInfoMap);
9398     }
9399 
9400     return CombinedInfo;
9401   };
9402 
9403   auto CustomMapperCB = [&](unsigned I) {
9404     llvm::Function *MapperFunc = nullptr;
9405     if (CombinedInfo.Mappers[I]) {
9406       // Call the corresponding mapper function.
9407       MapperFunc = getOrCreateUserDefinedMapperFunc(
9408           cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9409       assert(MapperFunc && "Expect a valid mapper function is available.");
9410     }
9411     return MapperFunc;
9412   };
9413 
9414   SmallString<64> TyStr;
9415   llvm::raw_svector_ostream Out(TyStr);
9416   CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out);
9417   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9418 
9419   llvm::Function *NewFn = cantFail(OMPBuilder.emitUserDefinedMapper(
9420       PrivatizeAndGenMapInfoCB, ElemTy, Name, CustomMapperCB));
9421   UDMMap.try_emplace(D, NewFn);
9422   if (CGF)
9423     FunctionUDMMap[CGF->CurFn].push_back(D);
9424 }
9425 
9426 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9427     const OMPDeclareMapperDecl *D) {
9428   auto I = UDMMap.find(D);
9429   if (I != UDMMap.end())
9430     return I->second;
9431   emitUserDefinedMapper(D);
9432   return UDMMap.lookup(D);
9433 }
9434 
9435 llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
9436     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9437     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9438                                      const OMPLoopDirective &D)>
9439         SizeEmitter) {
9440   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9441   const OMPExecutableDirective *TD = &D;
9442   // Get nested teams distribute kind directive, if any. For now, treat
9443   // 'target_teams_loop' as if it's really a target_teams_distribute.
9444   if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
9445       Kind != OMPD_target_teams_loop)
9446     TD = getNestedDistributeDirective(CGM.getContext(), D);
9447   if (!TD)
9448     return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9449 
9450   const auto *LD = cast<OMPLoopDirective>(TD);
9451   if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9452     return NumIterations;
9453   return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9454 }
9455 
9456 static void
9457 emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9458                        const OMPExecutableDirective &D,
9459                        llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9460                        bool RequiresOuterTask, const CapturedStmt &CS,
9461                        bool OffloadingMandatory, CodeGenFunction &CGF) {
9462   if (OffloadingMandatory) {
9463     CGF.Builder.CreateUnreachable();
9464   } else {
9465     if (RequiresOuterTask) {
9466       CapturedVars.clear();
9467       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9468     }
9469     OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
9470                                          CapturedVars);
9471   }
9472 }
9473 
9474 static llvm::Value *emitDeviceID(
9475     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9476     CodeGenFunction &CGF) {
9477   // Emit device ID if any.
9478   llvm::Value *DeviceID;
9479   if (Device.getPointer()) {
9480     assert((Device.getInt() == OMPC_DEVICE_unknown ||
9481             Device.getInt() == OMPC_DEVICE_device_num) &&
9482            "Expected device_num modifier.");
9483     llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9484     DeviceID =
9485         CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9486   } else {
9487     DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9488   }
9489   return DeviceID;
9490 }
9491 
9492 static llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
9493                                       CodeGenFunction &CGF) {
9494   llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0);
9495 
9496   if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
9497     CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
9498     llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
9499         DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
9500     DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty,
9501                                              /*isSigned=*/false);
9502   }
9503   return DynCGroupMem;
9504 }
9505 static void genMapInfoForCaptures(
9506     MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
9507     const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9508     llvm::OpenMPIRBuilder &OMPBuilder,
9509     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
9510     MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
9511 
9512   llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9513   auto RI = CS.getCapturedRecordDecl()->field_begin();
9514   auto *CV = CapturedVars.begin();
9515   for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9516                                             CE = CS.capture_end();
9517        CI != CE; ++CI, ++RI, ++CV) {
9518     MappableExprsHandler::MapCombinedInfoTy CurInfo;
9519 
9520     // VLA sizes are passed to the outlined region by copy and do not have map
9521     // information associated.
9522     if (CI->capturesVariableArrayType()) {
9523       CurInfo.Exprs.push_back(nullptr);
9524       CurInfo.BasePointers.push_back(*CV);
9525       CurInfo.DevicePtrDecls.push_back(nullptr);
9526       CurInfo.DevicePointers.push_back(
9527           MappableExprsHandler::DeviceInfoTy::None);
9528       CurInfo.Pointers.push_back(*CV);
9529       CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9530           CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9531       // Copy to the device as an argument. No need to retrieve it.
9532       CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9533                               OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
9534                               OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9535       CurInfo.Mappers.push_back(nullptr);
9536     } else {
9537       // If we have any information in the map clause, we use it, otherwise we
9538       // just do a default mapping.
9539       MEHandler.generateInfoForCaptureFromClauseInfo(
9540           CI, *CV, CurInfo, OMPBuilder,
9541           /*OffsetForMemberOfFlag=*/CombinedInfo.BasePointers.size());
9542 
9543       if (!CI->capturesThis())
9544         MappedVarSet.insert(CI->getCapturedVar());
9545       else
9546         MappedVarSet.insert(nullptr);
9547 
9548       if (CurInfo.BasePointers.empty())
9549         MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
9550 
9551       // Generate correct mapping for variables captured by reference in
9552       // lambdas.
9553       if (CI->capturesVariable())
9554         MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
9555                                                 CurInfo, LambdaPointers);
9556     }
9557     // We expect to have at least an element of information for this capture.
9558     assert(!CurInfo.BasePointers.empty() &&
9559            "Non-existing map pointer for capture!");
9560     assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9561            CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9562            CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9563            CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9564            "Inconsistent map information sizes!");
9565 
9566     // We need to append the results of this capture to what we already have.
9567     CombinedInfo.append(CurInfo);
9568   }
9569   // Adjust MEMBER_OF flags for the lambdas captures.
9570   MEHandler.adjustMemberOfForLambdaCaptures(
9571       OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
9572       CombinedInfo.Pointers, CombinedInfo.Types);
9573 }
9574 static void
9575 genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
9576            MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9577            llvm::OpenMPIRBuilder &OMPBuilder,
9578            const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet =
9579                llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) {
9580 
9581   CodeGenModule &CGM = CGF.CGM;
9582   // Map any list items in a map clause that were not captures because they
9583   // weren't referenced within the construct.
9584   MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkippedVarSet);
9585 
9586   auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9587     return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9588   };
9589   if (CGM.getCodeGenOpts().getDebugInfo() !=
9590       llvm::codegenoptions::NoDebugInfo) {
9591     CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
9592     llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
9593                     FillInfoMap);
9594   }
9595 }
9596 
9597 static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
9598                        const CapturedStmt &CS,
9599                        llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9600                        llvm::OpenMPIRBuilder &OMPBuilder,
9601                        MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
9602   // Get mappable expression information.
9603   MappableExprsHandler MEHandler(D, CGF);
9604   llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
9605 
9606   genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
9607                         MappedVarSet, CombinedInfo);
9608   genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet);
9609 }
9610 
9611 template <typename ClauseTy>
9612 static void
9613 emitClauseForBareTargetDirective(CodeGenFunction &CGF,
9614                                  const OMPExecutableDirective &D,
9615                                  llvm::SmallVectorImpl<llvm::Value *> &Values) {
9616   const auto *C = D.getSingleClause<ClauseTy>();
9617   assert(!C->varlist_empty() &&
9618          "ompx_bare requires explicit num_teams and thread_limit");
9619   CodeGenFunction::RunCleanupsScope Scope(CGF);
9620   for (auto *E : C->varlist()) {
9621     llvm::Value *V = CGF.EmitScalarExpr(E);
9622     Values.push_back(
9623         CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
9624   }
9625 }
9626 
9627 static void emitTargetCallKernelLaunch(
9628     CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9629     const OMPExecutableDirective &D,
9630     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
9631     const CapturedStmt &CS, bool OffloadingMandatory,
9632     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9633     llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
9634     llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
9635     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9636                                      const OMPLoopDirective &D)>
9637         SizeEmitter,
9638     CodeGenFunction &CGF, CodeGenModule &CGM) {
9639   llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
9640 
9641   // Fill up the arrays with all the captured variables.
9642   MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9643   CGOpenMPRuntime::TargetDataInfo Info;
9644   genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
9645 
9646   emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
9647                               /*IsNonContiguous=*/true, /*ForEndCall=*/false);
9648 
9649   InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9650   InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
9651                                         CGF.VoidPtrTy, CGM.getPointerAlign());
9652   InputInfo.PointersArray =
9653       Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9654   InputInfo.SizesArray =
9655       Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
9656   InputInfo.MappersArray =
9657       Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9658   MapTypesArray = Info.RTArgs.MapTypesArray;
9659   MapNamesArray = Info.RTArgs.MapNamesArray;
9660 
9661   auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
9662                     RequiresOuterTask, &CS, OffloadingMandatory, Device,
9663                     OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
9664                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9665     bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
9666 
9667     if (IsReverseOffloading) {
9668       // Reverse offloading is not supported, so just execute on the host.
9669       // FIXME: This fallback solution is incorrect since it ignores the
9670       // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
9671       // assert here and ensure SEMA emits an error.
9672       emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9673                              RequiresOuterTask, CS, OffloadingMandatory, CGF);
9674       return;
9675     }
9676 
9677     bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
9678     unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
9679 
9680     llvm::Value *BasePointersArray =
9681         InputInfo.BasePointersArray.emitRawPointer(CGF);
9682     llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
9683     llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
9684     llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
9685 
9686     auto &&EmitTargetCallFallbackCB =
9687         [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9688          OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
9689         -> llvm::OpenMPIRBuilder::InsertPointTy {
9690       CGF.Builder.restoreIP(IP);
9691       emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9692                              RequiresOuterTask, CS, OffloadingMandatory, CGF);
9693       return CGF.Builder.saveIP();
9694     };
9695 
9696     bool IsBare = D.hasClausesOfKind<OMPXBareClause>();
9697     SmallVector<llvm::Value *, 3> NumTeams;
9698     SmallVector<llvm::Value *, 3> NumThreads;
9699     if (IsBare) {
9700       emitClauseForBareTargetDirective<OMPNumTeamsClause>(CGF, D, NumTeams);
9701       emitClauseForBareTargetDirective<OMPThreadLimitClause>(CGF, D,
9702                                                              NumThreads);
9703     } else {
9704       NumTeams.push_back(OMPRuntime->emitNumTeamsForTargetDirective(CGF, D));
9705       NumThreads.push_back(
9706           OMPRuntime->emitNumThreadsForTargetDirective(CGF, D));
9707     }
9708 
9709     llvm::Value *DeviceID = emitDeviceID(Device, CGF);
9710     llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
9711     llvm::Value *NumIterations =
9712         OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
9713     llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
9714     llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
9715         CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
9716 
9717     llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
9718         BasePointersArray, PointersArray, SizesArray, MapTypesArray,
9719         nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
9720 
9721     llvm::OpenMPIRBuilder::TargetKernelArgs Args(
9722         NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
9723         DynCGGroupMem, HasNoWait);
9724 
9725     llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
9726         cantFail(OMPRuntime->getOMPBuilder().emitKernelLaunch(
9727             CGF.Builder, OutlinedFnID, EmitTargetCallFallbackCB, Args, DeviceID,
9728             RTLoc, AllocaIP));
9729     CGF.Builder.restoreIP(AfterIP);
9730   };
9731 
9732   if (RequiresOuterTask)
9733     CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9734   else
9735     OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9736 }
9737 
9738 static void
9739 emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9740                    const OMPExecutableDirective &D,
9741                    llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9742                    bool RequiresOuterTask, const CapturedStmt &CS,
9743                    bool OffloadingMandatory, CodeGenFunction &CGF) {
9744 
9745   // Notify that the host version must be executed.
9746   auto &&ElseGen =
9747       [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9748        OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9749         emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9750                                RequiresOuterTask, CS, OffloadingMandatory, CGF);
9751       };
9752 
9753   if (RequiresOuterTask) {
9754     CodeGenFunction::OMPTargetDataInfo InputInfo;
9755     CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9756   } else {
9757     OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9758   }
9759 }
9760 
9761 void CGOpenMPRuntime::emitTargetCall(
9762     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9763     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9764     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9765     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9766                                      const OMPLoopDirective &D)>
9767         SizeEmitter) {
9768   if (!CGF.HaveInsertPoint())
9769     return;
9770 
9771   const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
9772                                    CGM.getLangOpts().OpenMPOffloadMandatory;
9773 
9774   assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
9775 
9776   const bool RequiresOuterTask =
9777       D.hasClausesOfKind<OMPDependClause>() ||
9778       D.hasClausesOfKind<OMPNowaitClause>() ||
9779       D.hasClausesOfKind<OMPInReductionClause>() ||
9780       (CGM.getLangOpts().OpenMP >= 51 &&
9781        needsTaskBasedThreadLimit(D.getDirectiveKind()) &&
9782        D.hasClausesOfKind<OMPThreadLimitClause>());
9783   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9784   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9785   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9786                                             PrePostActionTy &) {
9787     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9788   };
9789   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9790 
9791   CodeGenFunction::OMPTargetDataInfo InputInfo;
9792   llvm::Value *MapTypesArray = nullptr;
9793   llvm::Value *MapNamesArray = nullptr;
9794 
9795   auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
9796                           RequiresOuterTask, &CS, OffloadingMandatory, Device,
9797                           OutlinedFnID, &InputInfo, &MapTypesArray,
9798                           &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
9799                                                        PrePostActionTy &) {
9800     emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
9801                                RequiresOuterTask, CS, OffloadingMandatory,
9802                                Device, OutlinedFnID, InputInfo, MapTypesArray,
9803                                MapNamesArray, SizeEmitter, CGF, CGM);
9804   };
9805 
9806   auto &&TargetElseGen =
9807       [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9808        OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9809         emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
9810                            CS, OffloadingMandatory, CGF);
9811       };
9812 
9813   // If we have a target function ID it means that we need to support
9814   // offloading, otherwise, just execute on the host. We need to execute on host
9815   // regardless of the conditional in the if clause if, e.g., the user do not
9816   // specify target triples.
9817   if (OutlinedFnID) {
9818     if (IfCond) {
9819       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9820     } else {
9821       RegionCodeGenTy ThenRCG(TargetThenGen);
9822       ThenRCG(CGF);
9823     }
9824   } else {
9825     RegionCodeGenTy ElseRCG(TargetElseGen);
9826     ElseRCG(CGF);
9827   }
9828 }
9829 
9830 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9831                                                     StringRef ParentName) {
9832   if (!S)
9833     return;
9834 
9835   // Codegen OMP target directives that offload compute to the device.
9836   bool RequiresDeviceCodegen =
9837       isa<OMPExecutableDirective>(S) &&
9838       isOpenMPTargetExecutionDirective(
9839           cast<OMPExecutableDirective>(S)->getDirectiveKind());
9840 
9841   if (RequiresDeviceCodegen) {
9842     const auto &E = *cast<OMPExecutableDirective>(S);
9843 
9844     llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
9845         CGM, OMPBuilder, E.getBeginLoc(), ParentName);
9846 
9847     // Is this a target region that should not be emitted as an entry point? If
9848     // so just signal we are done with this target region.
9849     if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
9850       return;
9851 
9852     switch (E.getDirectiveKind()) {
9853     case OMPD_target:
9854       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9855                                                    cast<OMPTargetDirective>(E));
9856       break;
9857     case OMPD_target_parallel:
9858       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9859           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9860       break;
9861     case OMPD_target_teams:
9862       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9863           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9864       break;
9865     case OMPD_target_teams_distribute:
9866       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9867           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9868       break;
9869     case OMPD_target_teams_distribute_simd:
9870       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9871           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9872       break;
9873     case OMPD_target_parallel_for:
9874       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9875           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9876       break;
9877     case OMPD_target_parallel_for_simd:
9878       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9879           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9880       break;
9881     case OMPD_target_simd:
9882       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9883           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9884       break;
9885     case OMPD_target_teams_distribute_parallel_for:
9886       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9887           CGM, ParentName,
9888           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9889       break;
9890     case OMPD_target_teams_distribute_parallel_for_simd:
9891       CodeGenFunction::
9892           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9893               CGM, ParentName,
9894               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9895       break;
9896     case OMPD_target_teams_loop:
9897       CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
9898           CGM, ParentName, cast<OMPTargetTeamsGenericLoopDirective>(E));
9899       break;
9900     case OMPD_target_parallel_loop:
9901       CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
9902           CGM, ParentName, cast<OMPTargetParallelGenericLoopDirective>(E));
9903       break;
9904     case OMPD_parallel:
9905     case OMPD_for:
9906     case OMPD_parallel_for:
9907     case OMPD_parallel_master:
9908     case OMPD_parallel_sections:
9909     case OMPD_for_simd:
9910     case OMPD_parallel_for_simd:
9911     case OMPD_cancel:
9912     case OMPD_cancellation_point:
9913     case OMPD_ordered:
9914     case OMPD_threadprivate:
9915     case OMPD_allocate:
9916     case OMPD_task:
9917     case OMPD_simd:
9918     case OMPD_tile:
9919     case OMPD_unroll:
9920     case OMPD_sections:
9921     case OMPD_section:
9922     case OMPD_single:
9923     case OMPD_master:
9924     case OMPD_critical:
9925     case OMPD_taskyield:
9926     case OMPD_barrier:
9927     case OMPD_taskwait:
9928     case OMPD_taskgroup:
9929     case OMPD_atomic:
9930     case OMPD_flush:
9931     case OMPD_depobj:
9932     case OMPD_scan:
9933     case OMPD_teams:
9934     case OMPD_target_data:
9935     case OMPD_target_exit_data:
9936     case OMPD_target_enter_data:
9937     case OMPD_distribute:
9938     case OMPD_distribute_simd:
9939     case OMPD_distribute_parallel_for:
9940     case OMPD_distribute_parallel_for_simd:
9941     case OMPD_teams_distribute:
9942     case OMPD_teams_distribute_simd:
9943     case OMPD_teams_distribute_parallel_for:
9944     case OMPD_teams_distribute_parallel_for_simd:
9945     case OMPD_target_update:
9946     case OMPD_declare_simd:
9947     case OMPD_declare_variant:
9948     case OMPD_begin_declare_variant:
9949     case OMPD_end_declare_variant:
9950     case OMPD_declare_target:
9951     case OMPD_end_declare_target:
9952     case OMPD_declare_reduction:
9953     case OMPD_declare_mapper:
9954     case OMPD_taskloop:
9955     case OMPD_taskloop_simd:
9956     case OMPD_master_taskloop:
9957     case OMPD_master_taskloop_simd:
9958     case OMPD_parallel_master_taskloop:
9959     case OMPD_parallel_master_taskloop_simd:
9960     case OMPD_requires:
9961     case OMPD_metadirective:
9962     case OMPD_unknown:
9963     default:
9964       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9965     }
9966     return;
9967   }
9968 
9969   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9970     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9971       return;
9972 
9973     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
9974     return;
9975   }
9976 
9977   // If this is a lambda function, look into its body.
9978   if (const auto *L = dyn_cast<LambdaExpr>(S))
9979     S = L->getBody();
9980 
9981   // Keep looking for target regions recursively.
9982   for (const Stmt *II : S->children())
9983     scanForTargetRegionsFunctions(II, ParentName);
9984 }
9985 
9986 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
9987   std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9988       OMPDeclareTargetDeclAttr::getDeviceType(VD);
9989   if (!DevTy)
9990     return false;
9991   // Do not emit device_type(nohost) functions for the host.
9992   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9993     return true;
9994   // Do not emit device_type(host) functions for the device.
9995   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9996     return true;
9997   return false;
9998 }
9999 
10000 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10001   // If emitting code for the host, we do not process FD here. Instead we do
10002   // the normal code generation.
10003   if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
10004     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10005       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10006                                   CGM.getLangOpts().OpenMPIsTargetDevice))
10007         return true;
10008     return false;
10009   }
10010 
10011   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10012   // Try to detect target regions in the function.
10013   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10014     StringRef Name = CGM.getMangledName(GD);
10015     scanForTargetRegionsFunctions(FD->getBody(), Name);
10016     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10017                                 CGM.getLangOpts().OpenMPIsTargetDevice))
10018       return true;
10019   }
10020 
10021   // Do not to emit function if it is not marked as declare target.
10022   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10023          AlreadyEmittedTargetDecls.count(VD) == 0;
10024 }
10025 
10026 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10027   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10028                               CGM.getLangOpts().OpenMPIsTargetDevice))
10029     return true;
10030 
10031   if (!CGM.getLangOpts().OpenMPIsTargetDevice)
10032     return false;
10033 
10034   // Check if there are Ctors/Dtors in this declaration and look for target
10035   // regions in it. We use the complete variant to produce the kernel name
10036   // mangling.
10037   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10038   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10039     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10040       StringRef ParentName =
10041           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10042       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10043     }
10044     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10045       StringRef ParentName =
10046           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10047       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10048     }
10049   }
10050 
10051   // Do not to emit variable if it is not marked as declare target.
10052   std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10053       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10054           cast<VarDecl>(GD.getDecl()));
10055   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10056       ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10057         *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10058        HasRequiresUnifiedSharedMemory)) {
10059     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10060     return true;
10061   }
10062   return false;
10063 }
10064 
10065 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10066                                                    llvm::Constant *Addr) {
10067   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10068       !CGM.getLangOpts().OpenMPIsTargetDevice)
10069     return;
10070 
10071   std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10072       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10073 
10074   // If this is an 'extern' declaration we defer to the canonical definition and
10075   // do not emit an offloading entry.
10076   if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
10077       VD->hasExternalStorage())
10078     return;
10079 
10080   if (!Res) {
10081     if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10082       // Register non-target variables being emitted in device code (debug info
10083       // may cause this).
10084       StringRef VarName = CGM.getMangledName(VD);
10085       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10086     }
10087     return;
10088   }
10089 
10090   auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
10091   auto LinkageForVariable = [&VD, this]() {
10092     return CGM.getLLVMLinkageVarDefinition(VD);
10093   };
10094 
10095   std::vector<llvm::GlobalVariable *> GeneratedRefs;
10096   OMPBuilder.registerTargetGlobalVariable(
10097       convertCaptureClause(VD), convertDeviceClause(VD),
10098       VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
10099       VD->isExternallyVisible(),
10100       getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
10101                                   VD->getCanonicalDecl()->getBeginLoc()),
10102       CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
10103       CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
10104       CGM.getTypes().ConvertTypeForMem(
10105           CGM.getContext().getPointerType(VD->getType())),
10106       Addr);
10107 
10108   for (auto *ref : GeneratedRefs)
10109     CGM.addCompilerUsedGlobal(ref);
10110 }
10111 
10112 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10113   if (isa<FunctionDecl>(GD.getDecl()) ||
10114       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10115     return emitTargetFunctions(GD);
10116 
10117   return emitTargetGlobalVariable(GD);
10118 }
10119 
10120 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10121   for (const VarDecl *VD : DeferredGlobalVariables) {
10122     std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10123         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10124     if (!Res)
10125       continue;
10126     if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10127          *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10128         !HasRequiresUnifiedSharedMemory) {
10129       CGM.EmitGlobal(VD);
10130     } else {
10131       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10132               ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10133                 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10134                HasRequiresUnifiedSharedMemory)) &&
10135              "Expected link clause or to clause with unified memory.");
10136       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10137     }
10138   }
10139 }
10140 
10141 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10142     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10143   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10144          " Expected target-based directive.");
10145 }
10146 
10147 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10148   for (const OMPClause *Clause : D->clauselists()) {
10149     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10150       HasRequiresUnifiedSharedMemory = true;
10151       OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
10152     } else if (const auto *AC =
10153                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10154       switch (AC->getAtomicDefaultMemOrderKind()) {
10155       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10156         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10157         break;
10158       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10159         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10160         break;
10161       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10162         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10163         break;
10164       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10165         break;
10166       }
10167     }
10168   }
10169 }
10170 
10171 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10172   return RequiresAtomicOrdering;
10173 }
10174 
10175 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10176                                                        LangAS &AS) {
10177   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10178     return false;
10179   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10180   switch(A->getAllocatorType()) {
10181   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10182   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10183   // Not supported, fallback to the default mem space.
10184   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10185   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10186   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10187   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10188   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10189   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10190   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10191     AS = LangAS::Default;
10192     return true;
10193   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10194     llvm_unreachable("Expected predefined allocator for the variables with the "
10195                      "static storage.");
10196   }
10197   return false;
10198 }
10199 
10200 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10201   return HasRequiresUnifiedSharedMemory;
10202 }
10203 
10204 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10205     CodeGenModule &CGM)
10206     : CGM(CGM) {
10207   if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10208     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10209     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10210   }
10211 }
10212 
10213 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10214   if (CGM.getLangOpts().OpenMPIsTargetDevice)
10215     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10216 }
10217 
10218 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10219   if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
10220     return true;
10221 
10222   const auto *D = cast<FunctionDecl>(GD.getDecl());
10223   // Do not to emit function if it is marked as declare target as it was already
10224   // emitted.
10225   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10226     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10227       if (auto *F = dyn_cast_or_null<llvm::Function>(
10228               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10229         return !F->isDeclaration();
10230       return false;
10231     }
10232     return true;
10233   }
10234 
10235   return !AlreadyEmittedTargetDecls.insert(D).second;
10236 }
10237 
10238 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10239                                     const OMPExecutableDirective &D,
10240                                     SourceLocation Loc,
10241                                     llvm::Function *OutlinedFn,
10242                                     ArrayRef<llvm::Value *> CapturedVars) {
10243   if (!CGF.HaveInsertPoint())
10244     return;
10245 
10246   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10247   CodeGenFunction::RunCleanupsScope Scope(CGF);
10248 
10249   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10250   llvm::Value *Args[] = {
10251       RTLoc,
10252       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10253       OutlinedFn};
10254   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10255   RealArgs.append(std::begin(Args), std::end(Args));
10256   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10257 
10258   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10259       CGM.getModule(), OMPRTL___kmpc_fork_teams);
10260   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10261 }
10262 
10263 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10264                                          const Expr *NumTeams,
10265                                          const Expr *ThreadLimit,
10266                                          SourceLocation Loc) {
10267   if (!CGF.HaveInsertPoint())
10268     return;
10269 
10270   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10271 
10272   llvm::Value *NumTeamsVal =
10273       NumTeams
10274           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10275                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10276           : CGF.Builder.getInt32(0);
10277 
10278   llvm::Value *ThreadLimitVal =
10279       ThreadLimit
10280           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10281                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10282           : CGF.Builder.getInt32(0);
10283 
10284   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10285   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10286                                      ThreadLimitVal};
10287   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10288                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10289                       PushNumTeamsArgs);
10290 }
10291 
10292 void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction &CGF,
10293                                             const Expr *ThreadLimit,
10294                                             SourceLocation Loc) {
10295   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10296   llvm::Value *ThreadLimitVal =
10297       ThreadLimit
10298           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10299                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10300           : CGF.Builder.getInt32(0);
10301 
10302   // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
10303   llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
10304                                     ThreadLimitVal};
10305   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10306                           CGM.getModule(), OMPRTL___kmpc_set_thread_limit),
10307                       ThreadLimitArgs);
10308 }
10309 
10310 void CGOpenMPRuntime::emitTargetDataCalls(
10311     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10312     const Expr *Device, const RegionCodeGenTy &CodeGen,
10313     CGOpenMPRuntime::TargetDataInfo &Info) {
10314   if (!CGF.HaveInsertPoint())
10315     return;
10316 
10317   // Action used to replace the default codegen action and turn privatization
10318   // off.
10319   PrePostActionTy NoPrivAction;
10320 
10321   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10322 
10323   llvm::Value *IfCondVal = nullptr;
10324   if (IfCond)
10325     IfCondVal = CGF.EvaluateExprAsBool(IfCond);
10326 
10327   // Emit device ID if any.
10328   llvm::Value *DeviceID = nullptr;
10329   if (Device) {
10330     DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10331                                          CGF.Int64Ty, /*isSigned=*/true);
10332   } else {
10333     DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10334   }
10335 
10336   // Fill up the arrays with all the mapped variables.
10337   MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10338   auto GenMapInfoCB =
10339       [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10340     CGF.Builder.restoreIP(CodeGenIP);
10341     // Get map clause information.
10342     MappableExprsHandler MEHandler(D, CGF);
10343     MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10344 
10345     auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10346       return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10347     };
10348     if (CGM.getCodeGenOpts().getDebugInfo() !=
10349         llvm::codegenoptions::NoDebugInfo) {
10350       CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10351       llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10352                       FillInfoMap);
10353     }
10354 
10355     return CombinedInfo;
10356   };
10357   using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
10358   auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
10359     CGF.Builder.restoreIP(CodeGenIP);
10360     switch (BodyGenType) {
10361     case BodyGenTy::Priv:
10362       if (!Info.CaptureDeviceAddrMap.empty())
10363         CodeGen(CGF);
10364       break;
10365     case BodyGenTy::DupNoPriv:
10366       if (!Info.CaptureDeviceAddrMap.empty()) {
10367         CodeGen.setAction(NoPrivAction);
10368         CodeGen(CGF);
10369       }
10370       break;
10371     case BodyGenTy::NoPriv:
10372       if (Info.CaptureDeviceAddrMap.empty()) {
10373         CodeGen.setAction(NoPrivAction);
10374         CodeGen(CGF);
10375       }
10376       break;
10377     }
10378     return InsertPointTy(CGF.Builder.GetInsertBlock(),
10379                          CGF.Builder.GetInsertPoint());
10380   };
10381 
10382   auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10383     if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10384       Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
10385     }
10386   };
10387 
10388   auto CustomMapperCB = [&](unsigned int I) {
10389     llvm::Function *MFunc = nullptr;
10390     if (CombinedInfo.Mappers[I]) {
10391       Info.HasMapper = true;
10392       MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
10393           cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10394     }
10395     return MFunc;
10396   };
10397 
10398   // Source location for the ident struct
10399   llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10400 
10401   InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10402                          CGF.AllocaInsertPt->getIterator());
10403   InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10404                           CGF.Builder.GetInsertPoint());
10405   llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
10406   llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
10407       cantFail(OMPBuilder.createTargetData(
10408           OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
10409           CustomMapperCB,
10410           /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, RTLoc));
10411   CGF.Builder.restoreIP(AfterIP);
10412 }
10413 
10414 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10415     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10416     const Expr *Device) {
10417   if (!CGF.HaveInsertPoint())
10418     return;
10419 
10420   assert((isa<OMPTargetEnterDataDirective>(D) ||
10421           isa<OMPTargetExitDataDirective>(D) ||
10422           isa<OMPTargetUpdateDirective>(D)) &&
10423          "Expecting either target enter, exit data, or update directives.");
10424 
10425   CodeGenFunction::OMPTargetDataInfo InputInfo;
10426   llvm::Value *MapTypesArray = nullptr;
10427   llvm::Value *MapNamesArray = nullptr;
10428   // Generate the code for the opening of the data environment.
10429   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10430                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10431     // Emit device ID if any.
10432     llvm::Value *DeviceID = nullptr;
10433     if (Device) {
10434       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10435                                            CGF.Int64Ty, /*isSigned=*/true);
10436     } else {
10437       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10438     }
10439 
10440     // Emit the number of elements in the offloading arrays.
10441     llvm::Constant *PointerNum =
10442         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10443 
10444     // Source location for the ident struct
10445     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10446 
10447     SmallVector<llvm::Value *, 13> OffloadingArgs(
10448         {RTLoc, DeviceID, PointerNum,
10449          InputInfo.BasePointersArray.emitRawPointer(CGF),
10450          InputInfo.PointersArray.emitRawPointer(CGF),
10451          InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray,
10452          InputInfo.MappersArray.emitRawPointer(CGF)});
10453 
10454     // Select the right runtime function call for each standalone
10455     // directive.
10456     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10457     RuntimeFunction RTLFn;
10458     switch (D.getDirectiveKind()) {
10459     case OMPD_target_enter_data:
10460       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10461                         : OMPRTL___tgt_target_data_begin_mapper;
10462       break;
10463     case OMPD_target_exit_data:
10464       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10465                         : OMPRTL___tgt_target_data_end_mapper;
10466       break;
10467     case OMPD_target_update:
10468       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10469                         : OMPRTL___tgt_target_data_update_mapper;
10470       break;
10471     case OMPD_parallel:
10472     case OMPD_for:
10473     case OMPD_parallel_for:
10474     case OMPD_parallel_master:
10475     case OMPD_parallel_sections:
10476     case OMPD_for_simd:
10477     case OMPD_parallel_for_simd:
10478     case OMPD_cancel:
10479     case OMPD_cancellation_point:
10480     case OMPD_ordered:
10481     case OMPD_threadprivate:
10482     case OMPD_allocate:
10483     case OMPD_task:
10484     case OMPD_simd:
10485     case OMPD_tile:
10486     case OMPD_unroll:
10487     case OMPD_sections:
10488     case OMPD_section:
10489     case OMPD_single:
10490     case OMPD_master:
10491     case OMPD_critical:
10492     case OMPD_taskyield:
10493     case OMPD_barrier:
10494     case OMPD_taskwait:
10495     case OMPD_taskgroup:
10496     case OMPD_atomic:
10497     case OMPD_flush:
10498     case OMPD_depobj:
10499     case OMPD_scan:
10500     case OMPD_teams:
10501     case OMPD_target_data:
10502     case OMPD_distribute:
10503     case OMPD_distribute_simd:
10504     case OMPD_distribute_parallel_for:
10505     case OMPD_distribute_parallel_for_simd:
10506     case OMPD_teams_distribute:
10507     case OMPD_teams_distribute_simd:
10508     case OMPD_teams_distribute_parallel_for:
10509     case OMPD_teams_distribute_parallel_for_simd:
10510     case OMPD_declare_simd:
10511     case OMPD_declare_variant:
10512     case OMPD_begin_declare_variant:
10513     case OMPD_end_declare_variant:
10514     case OMPD_declare_target:
10515     case OMPD_end_declare_target:
10516     case OMPD_declare_reduction:
10517     case OMPD_declare_mapper:
10518     case OMPD_taskloop:
10519     case OMPD_taskloop_simd:
10520     case OMPD_master_taskloop:
10521     case OMPD_master_taskloop_simd:
10522     case OMPD_parallel_master_taskloop:
10523     case OMPD_parallel_master_taskloop_simd:
10524     case OMPD_target:
10525     case OMPD_target_simd:
10526     case OMPD_target_teams_distribute:
10527     case OMPD_target_teams_distribute_simd:
10528     case OMPD_target_teams_distribute_parallel_for:
10529     case OMPD_target_teams_distribute_parallel_for_simd:
10530     case OMPD_target_teams:
10531     case OMPD_target_parallel:
10532     case OMPD_target_parallel_for:
10533     case OMPD_target_parallel_for_simd:
10534     case OMPD_requires:
10535     case OMPD_metadirective:
10536     case OMPD_unknown:
10537     default:
10538       llvm_unreachable("Unexpected standalone target data directive.");
10539       break;
10540     }
10541     if (HasNowait) {
10542       OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
10543       OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
10544       OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
10545       OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
10546     }
10547     CGF.EmitRuntimeCall(
10548         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10549         OffloadingArgs);
10550   };
10551 
10552   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10553                           &MapNamesArray](CodeGenFunction &CGF,
10554                                           PrePostActionTy &) {
10555     // Fill up the arrays with all the mapped variables.
10556     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10557     CGOpenMPRuntime::TargetDataInfo Info;
10558     MappableExprsHandler MEHandler(D, CGF);
10559     genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
10560     emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
10561                                 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
10562 
10563     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10564                              D.hasClausesOfKind<OMPNowaitClause>();
10565 
10566     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10567     InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10568                                           CGF.VoidPtrTy, CGM.getPointerAlign());
10569     InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10570                                       CGM.getPointerAlign());
10571     InputInfo.SizesArray =
10572         Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10573     InputInfo.MappersArray =
10574         Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10575     MapTypesArray = Info.RTArgs.MapTypesArray;
10576     MapNamesArray = Info.RTArgs.MapNamesArray;
10577     if (RequiresOuterTask)
10578       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10579     else
10580       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10581   };
10582 
10583   if (IfCond) {
10584     emitIfClause(CGF, IfCond, TargetThenGen,
10585                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
10586   } else {
10587     RegionCodeGenTy ThenRCG(TargetThenGen);
10588     ThenRCG(CGF);
10589   }
10590 }
10591 
10592 namespace {
10593   /// Kind of parameter in a function with 'declare simd' directive.
10594 enum ParamKindTy {
10595   Linear,
10596   LinearRef,
10597   LinearUVal,
10598   LinearVal,
10599   Uniform,
10600   Vector,
10601 };
10602 /// Attribute set of the parameter.
10603 struct ParamAttrTy {
10604   ParamKindTy Kind = Vector;
10605   llvm::APSInt StrideOrArg;
10606   llvm::APSInt Alignment;
10607   bool HasVarStride = false;
10608 };
10609 } // namespace
10610 
10611 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10612                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10613   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10614   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10615   // of that clause. The VLEN value must be power of 2.
10616   // In other case the notion of the function`s "characteristic data type" (CDT)
10617   // is used to compute the vector length.
10618   // CDT is defined in the following order:
10619   //   a) For non-void function, the CDT is the return type.
10620   //   b) If the function has any non-uniform, non-linear parameters, then the
10621   //   CDT is the type of the first such parameter.
10622   //   c) If the CDT determined by a) or b) above is struct, union, or class
10623   //   type which is pass-by-value (except for the type that maps to the
10624   //   built-in complex data type), the characteristic data type is int.
10625   //   d) If none of the above three cases is applicable, the CDT is int.
10626   // The VLEN is then determined based on the CDT and the size of vector
10627   // register of that ISA for which current vector version is generated. The
10628   // VLEN is computed using the formula below:
10629   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10630   // where vector register size specified in section 3.2.1 Registers and the
10631   // Stack Frame of original AMD64 ABI document.
10632   QualType RetType = FD->getReturnType();
10633   if (RetType.isNull())
10634     return 0;
10635   ASTContext &C = FD->getASTContext();
10636   QualType CDT;
10637   if (!RetType.isNull() && !RetType->isVoidType()) {
10638     CDT = RetType;
10639   } else {
10640     unsigned Offset = 0;
10641     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10642       if (ParamAttrs[Offset].Kind == Vector)
10643         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10644       ++Offset;
10645     }
10646     if (CDT.isNull()) {
10647       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10648         if (ParamAttrs[I + Offset].Kind == Vector) {
10649           CDT = FD->getParamDecl(I)->getType();
10650           break;
10651         }
10652       }
10653     }
10654   }
10655   if (CDT.isNull())
10656     CDT = C.IntTy;
10657   CDT = CDT->getCanonicalTypeUnqualified();
10658   if (CDT->isRecordType() || CDT->isUnionType())
10659     CDT = C.IntTy;
10660   return C.getTypeSize(CDT);
10661 }
10662 
10663 /// Mangle the parameter part of the vector function name according to
10664 /// their OpenMP classification. The mangling function is defined in
10665 /// section 4.5 of the AAVFABI(2021Q1).
10666 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10667   SmallString<256> Buffer;
10668   llvm::raw_svector_ostream Out(Buffer);
10669   for (const auto &ParamAttr : ParamAttrs) {
10670     switch (ParamAttr.Kind) {
10671     case Linear:
10672       Out << 'l';
10673       break;
10674     case LinearRef:
10675       Out << 'R';
10676       break;
10677     case LinearUVal:
10678       Out << 'U';
10679       break;
10680     case LinearVal:
10681       Out << 'L';
10682       break;
10683     case Uniform:
10684       Out << 'u';
10685       break;
10686     case Vector:
10687       Out << 'v';
10688       break;
10689     }
10690     if (ParamAttr.HasVarStride)
10691       Out << "s" << ParamAttr.StrideOrArg;
10692     else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
10693              ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
10694       // Don't print the step value if it is not present or if it is
10695       // equal to 1.
10696       if (ParamAttr.StrideOrArg < 0)
10697         Out << 'n' << -ParamAttr.StrideOrArg;
10698       else if (ParamAttr.StrideOrArg != 1)
10699         Out << ParamAttr.StrideOrArg;
10700     }
10701 
10702     if (!!ParamAttr.Alignment)
10703       Out << 'a' << ParamAttr.Alignment;
10704   }
10705 
10706   return std::string(Out.str());
10707 }
10708 
10709 static void
10710 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10711                            const llvm::APSInt &VLENVal,
10712                            ArrayRef<ParamAttrTy> ParamAttrs,
10713                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
10714   struct ISADataTy {
10715     char ISA;
10716     unsigned VecRegSize;
10717   };
10718   ISADataTy ISAData[] = {
10719       {
10720           'b', 128
10721       }, // SSE
10722       {
10723           'c', 256
10724       }, // AVX
10725       {
10726           'd', 256
10727       }, // AVX2
10728       {
10729           'e', 512
10730       }, // AVX512
10731   };
10732   llvm::SmallVector<char, 2> Masked;
10733   switch (State) {
10734   case OMPDeclareSimdDeclAttr::BS_Undefined:
10735     Masked.push_back('N');
10736     Masked.push_back('M');
10737     break;
10738   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10739     Masked.push_back('N');
10740     break;
10741   case OMPDeclareSimdDeclAttr::BS_Inbranch:
10742     Masked.push_back('M');
10743     break;
10744   }
10745   for (char Mask : Masked) {
10746     for (const ISADataTy &Data : ISAData) {
10747       SmallString<256> Buffer;
10748       llvm::raw_svector_ostream Out(Buffer);
10749       Out << "_ZGV" << Data.ISA << Mask;
10750       if (!VLENVal) {
10751         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10752         assert(NumElts && "Non-zero simdlen/cdtsize expected");
10753         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10754       } else {
10755         Out << VLENVal;
10756       }
10757       Out << mangleVectorParameters(ParamAttrs);
10758       Out << '_' << Fn->getName();
10759       Fn->addFnAttr(Out.str());
10760     }
10761   }
10762 }
10763 
10764 // This are the Functions that are needed to mangle the name of the
10765 // vector functions generated by the compiler, according to the rules
10766 // defined in the "Vector Function ABI specifications for AArch64",
10767 // available at
10768 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10769 
10770 /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
10771 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10772   QT = QT.getCanonicalType();
10773 
10774   if (QT->isVoidType())
10775     return false;
10776 
10777   if (Kind == ParamKindTy::Uniform)
10778     return false;
10779 
10780   if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
10781     return false;
10782 
10783   if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
10784       !QT->isReferenceType())
10785     return false;
10786 
10787   return true;
10788 }
10789 
10790 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10791 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10792   QT = QT.getCanonicalType();
10793   unsigned Size = C.getTypeSize(QT);
10794 
10795   // Only scalars and complex within 16 bytes wide set PVB to true.
10796   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10797     return false;
10798 
10799   if (QT->isFloatingType())
10800     return true;
10801 
10802   if (QT->isIntegerType())
10803     return true;
10804 
10805   if (QT->isPointerType())
10806     return true;
10807 
10808   // TODO: Add support for complex types (section 3.1.2, item 2).
10809 
10810   return false;
10811 }
10812 
10813 /// Computes the lane size (LS) of a return type or of an input parameter,
10814 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10815 /// TODO: Add support for references, section 3.2.1, item 1.
10816 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10817   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10818     QualType PTy = QT.getCanonicalType()->getPointeeType();
10819     if (getAArch64PBV(PTy, C))
10820       return C.getTypeSize(PTy);
10821   }
10822   if (getAArch64PBV(QT, C))
10823     return C.getTypeSize(QT);
10824 
10825   return C.getTypeSize(C.getUIntPtrType());
10826 }
10827 
10828 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10829 // signature of the scalar function, as defined in 3.2.2 of the
10830 // AAVFABI.
10831 static std::tuple<unsigned, unsigned, bool>
10832 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10833   QualType RetType = FD->getReturnType().getCanonicalType();
10834 
10835   ASTContext &C = FD->getASTContext();
10836 
10837   bool OutputBecomesInput = false;
10838 
10839   llvm::SmallVector<unsigned, 8> Sizes;
10840   if (!RetType->isVoidType()) {
10841     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10842     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10843       OutputBecomesInput = true;
10844   }
10845   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10846     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10847     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10848   }
10849 
10850   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10851   // The LS of a function parameter / return value can only be a power
10852   // of 2, starting from 8 bits, up to 128.
10853   assert(llvm::all_of(Sizes,
10854                       [](unsigned Size) {
10855                         return Size == 8 || Size == 16 || Size == 32 ||
10856                                Size == 64 || Size == 128;
10857                       }) &&
10858          "Invalid size");
10859 
10860   return std::make_tuple(*llvm::min_element(Sizes), *llvm::max_element(Sizes),
10861                          OutputBecomesInput);
10862 }
10863 
10864 // Function used to add the attribute. The parameter `VLEN` is
10865 // templated to allow the use of "x" when targeting scalable functions
10866 // for SVE.
10867 template <typename T>
10868 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10869                                  char ISA, StringRef ParSeq,
10870                                  StringRef MangledName, bool OutputBecomesInput,
10871                                  llvm::Function *Fn) {
10872   SmallString<256> Buffer;
10873   llvm::raw_svector_ostream Out(Buffer);
10874   Out << Prefix << ISA << LMask << VLEN;
10875   if (OutputBecomesInput)
10876     Out << "v";
10877   Out << ParSeq << "_" << MangledName;
10878   Fn->addFnAttr(Out.str());
10879 }
10880 
10881 // Helper function to generate the Advanced SIMD names depending on
10882 // the value of the NDS when simdlen is not present.
10883 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10884                                       StringRef Prefix, char ISA,
10885                                       StringRef ParSeq, StringRef MangledName,
10886                                       bool OutputBecomesInput,
10887                                       llvm::Function *Fn) {
10888   switch (NDS) {
10889   case 8:
10890     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10891                          OutputBecomesInput, Fn);
10892     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10893                          OutputBecomesInput, Fn);
10894     break;
10895   case 16:
10896     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10897                          OutputBecomesInput, Fn);
10898     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10899                          OutputBecomesInput, Fn);
10900     break;
10901   case 32:
10902     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10903                          OutputBecomesInput, Fn);
10904     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10905                          OutputBecomesInput, Fn);
10906     break;
10907   case 64:
10908   case 128:
10909     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10910                          OutputBecomesInput, Fn);
10911     break;
10912   default:
10913     llvm_unreachable("Scalar type is too wide.");
10914   }
10915 }
10916 
10917 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10918 static void emitAArch64DeclareSimdFunction(
10919     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10920     ArrayRef<ParamAttrTy> ParamAttrs,
10921     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10922     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10923 
10924   // Get basic data for building the vector signature.
10925   const auto Data = getNDSWDS(FD, ParamAttrs);
10926   const unsigned NDS = std::get<0>(Data);
10927   const unsigned WDS = std::get<1>(Data);
10928   const bool OutputBecomesInput = std::get<2>(Data);
10929 
10930   // Check the values provided via `simdlen` by the user.
10931   // 1. A `simdlen(1)` doesn't produce vector signatures,
10932   if (UserVLEN == 1) {
10933     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10934         DiagnosticsEngine::Warning,
10935         "The clause simdlen(1) has no effect when targeting aarch64.");
10936     CGM.getDiags().Report(SLoc, DiagID);
10937     return;
10938   }
10939 
10940   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10941   // Advanced SIMD output.
10942   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10943     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10944         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10945                                     "power of 2 when targeting Advanced SIMD.");
10946     CGM.getDiags().Report(SLoc, DiagID);
10947     return;
10948   }
10949 
10950   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10951   // limits.
10952   if (ISA == 's' && UserVLEN != 0) {
10953     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10954       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10955           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10956                                       "lanes in the architectural constraints "
10957                                       "for SVE (min is 128-bit, max is "
10958                                       "2048-bit, by steps of 128-bit)");
10959       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10960       return;
10961     }
10962   }
10963 
10964   // Sort out parameter sequence.
10965   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10966   StringRef Prefix = "_ZGV";
10967   // Generate simdlen from user input (if any).
10968   if (UserVLEN) {
10969     if (ISA == 's') {
10970       // SVE generates only a masked function.
10971       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10972                            OutputBecomesInput, Fn);
10973     } else {
10974       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10975       // Advanced SIMD generates one or two functions, depending on
10976       // the `[not]inbranch` clause.
10977       switch (State) {
10978       case OMPDeclareSimdDeclAttr::BS_Undefined:
10979         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10980                              OutputBecomesInput, Fn);
10981         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10982                              OutputBecomesInput, Fn);
10983         break;
10984       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10985         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10986                              OutputBecomesInput, Fn);
10987         break;
10988       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10989         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10990                              OutputBecomesInput, Fn);
10991         break;
10992       }
10993     }
10994   } else {
10995     // If no user simdlen is provided, follow the AAVFABI rules for
10996     // generating the vector length.
10997     if (ISA == 's') {
10998       // SVE, section 3.4.1, item 1.
10999       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11000                            OutputBecomesInput, Fn);
11001     } else {
11002       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11003       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11004       // two vector names depending on the use of the clause
11005       // `[not]inbranch`.
11006       switch (State) {
11007       case OMPDeclareSimdDeclAttr::BS_Undefined:
11008         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11009                                   OutputBecomesInput, Fn);
11010         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11011                                   OutputBecomesInput, Fn);
11012         break;
11013       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11014         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11015                                   OutputBecomesInput, Fn);
11016         break;
11017       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11018         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11019                                   OutputBecomesInput, Fn);
11020         break;
11021       }
11022     }
11023   }
11024 }
11025 
11026 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11027                                               llvm::Function *Fn) {
11028   ASTContext &C = CGM.getContext();
11029   FD = FD->getMostRecentDecl();
11030   while (FD) {
11031     // Map params to their positions in function decl.
11032     llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11033     if (isa<CXXMethodDecl>(FD))
11034       ParamPositions.try_emplace(FD, 0);
11035     unsigned ParamPos = ParamPositions.size();
11036     for (const ParmVarDecl *P : FD->parameters()) {
11037       ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11038       ++ParamPos;
11039     }
11040     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11041       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11042       // Mark uniform parameters.
11043       for (const Expr *E : Attr->uniforms()) {
11044         E = E->IgnoreParenImpCasts();
11045         unsigned Pos;
11046         if (isa<CXXThisExpr>(E)) {
11047           Pos = ParamPositions[FD];
11048         } else {
11049           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11050                                 ->getCanonicalDecl();
11051           auto It = ParamPositions.find(PVD);
11052           assert(It != ParamPositions.end() && "Function parameter not found");
11053           Pos = It->second;
11054         }
11055         ParamAttrs[Pos].Kind = Uniform;
11056       }
11057       // Get alignment info.
11058       auto *NI = Attr->alignments_begin();
11059       for (const Expr *E : Attr->aligneds()) {
11060         E = E->IgnoreParenImpCasts();
11061         unsigned Pos;
11062         QualType ParmTy;
11063         if (isa<CXXThisExpr>(E)) {
11064           Pos = ParamPositions[FD];
11065           ParmTy = E->getType();
11066         } else {
11067           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11068                                 ->getCanonicalDecl();
11069           auto It = ParamPositions.find(PVD);
11070           assert(It != ParamPositions.end() && "Function parameter not found");
11071           Pos = It->second;
11072           ParmTy = PVD->getType();
11073         }
11074         ParamAttrs[Pos].Alignment =
11075             (*NI)
11076                 ? (*NI)->EvaluateKnownConstInt(C)
11077                 : llvm::APSInt::getUnsigned(
11078                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11079                           .getQuantity());
11080         ++NI;
11081       }
11082       // Mark linear parameters.
11083       auto *SI = Attr->steps_begin();
11084       auto *MI = Attr->modifiers_begin();
11085       for (const Expr *E : Attr->linears()) {
11086         E = E->IgnoreParenImpCasts();
11087         unsigned Pos;
11088         bool IsReferenceType = false;
11089         // Rescaling factor needed to compute the linear parameter
11090         // value in the mangled name.
11091         unsigned PtrRescalingFactor = 1;
11092         if (isa<CXXThisExpr>(E)) {
11093           Pos = ParamPositions[FD];
11094           auto *P = cast<PointerType>(E->getType());
11095           PtrRescalingFactor = CGM.getContext()
11096                                    .getTypeSizeInChars(P->getPointeeType())
11097                                    .getQuantity();
11098         } else {
11099           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11100                                 ->getCanonicalDecl();
11101           auto It = ParamPositions.find(PVD);
11102           assert(It != ParamPositions.end() && "Function parameter not found");
11103           Pos = It->second;
11104           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11105             PtrRescalingFactor = CGM.getContext()
11106                                      .getTypeSizeInChars(P->getPointeeType())
11107                                      .getQuantity();
11108           else if (PVD->getType()->isReferenceType()) {
11109             IsReferenceType = true;
11110             PtrRescalingFactor =
11111                 CGM.getContext()
11112                     .getTypeSizeInChars(PVD->getType().getNonReferenceType())
11113                     .getQuantity();
11114           }
11115         }
11116         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11117         if (*MI == OMPC_LINEAR_ref)
11118           ParamAttr.Kind = LinearRef;
11119         else if (*MI == OMPC_LINEAR_uval)
11120           ParamAttr.Kind = LinearUVal;
11121         else if (IsReferenceType)
11122           ParamAttr.Kind = LinearVal;
11123         else
11124           ParamAttr.Kind = Linear;
11125         // Assuming a stride of 1, for `linear` without modifiers.
11126         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11127         if (*SI) {
11128           Expr::EvalResult Result;
11129           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11130             if (const auto *DRE =
11131                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11132               if (const auto *StridePVD =
11133                       dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11134                 ParamAttr.HasVarStride = true;
11135                 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11136                 assert(It != ParamPositions.end() &&
11137                        "Function parameter not found");
11138                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11139               }
11140             }
11141           } else {
11142             ParamAttr.StrideOrArg = Result.Val.getInt();
11143           }
11144         }
11145         // If we are using a linear clause on a pointer, we need to
11146         // rescale the value of linear_step with the byte size of the
11147         // pointee type.
11148         if (!ParamAttr.HasVarStride &&
11149             (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11150           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11151         ++SI;
11152         ++MI;
11153       }
11154       llvm::APSInt VLENVal;
11155       SourceLocation ExprLoc;
11156       const Expr *VLENExpr = Attr->getSimdlen();
11157       if (VLENExpr) {
11158         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11159         ExprLoc = VLENExpr->getExprLoc();
11160       }
11161       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11162       if (CGM.getTriple().isX86()) {
11163         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11164       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11165         unsigned VLEN = VLENVal.getExtValue();
11166         StringRef MangledName = Fn->getName();
11167         if (CGM.getTarget().hasFeature("sve"))
11168           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11169                                          MangledName, 's', 128, Fn, ExprLoc);
11170         else if (CGM.getTarget().hasFeature("neon"))
11171           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11172                                          MangledName, 'n', 128, Fn, ExprLoc);
11173       }
11174     }
11175     FD = FD->getPreviousDecl();
11176   }
11177 }
11178 
11179 namespace {
11180 /// Cleanup action for doacross support.
11181 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11182 public:
11183   static const int DoacrossFinArgs = 2;
11184 
11185 private:
11186   llvm::FunctionCallee RTLFn;
11187   llvm::Value *Args[DoacrossFinArgs];
11188 
11189 public:
11190   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11191                     ArrayRef<llvm::Value *> CallArgs)
11192       : RTLFn(RTLFn) {
11193     assert(CallArgs.size() == DoacrossFinArgs);
11194     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11195   }
11196   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11197     if (!CGF.HaveInsertPoint())
11198       return;
11199     CGF.EmitRuntimeCall(RTLFn, Args);
11200   }
11201 };
11202 } // namespace
11203 
11204 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11205                                        const OMPLoopDirective &D,
11206                                        ArrayRef<Expr *> NumIterations) {
11207   if (!CGF.HaveInsertPoint())
11208     return;
11209 
11210   ASTContext &C = CGM.getContext();
11211   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11212   RecordDecl *RD;
11213   if (KmpDimTy.isNull()) {
11214     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11215     //  kmp_int64 lo; // lower
11216     //  kmp_int64 up; // upper
11217     //  kmp_int64 st; // stride
11218     // };
11219     RD = C.buildImplicitRecord("kmp_dim");
11220     RD->startDefinition();
11221     addFieldToRecordDecl(C, RD, Int64Ty);
11222     addFieldToRecordDecl(C, RD, Int64Ty);
11223     addFieldToRecordDecl(C, RD, Int64Ty);
11224     RD->completeDefinition();
11225     KmpDimTy = C.getRecordType(RD);
11226   } else {
11227     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11228   }
11229   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11230   QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
11231                                             ArraySizeModifier::Normal, 0);
11232 
11233   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11234   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11235   enum { LowerFD = 0, UpperFD, StrideFD };
11236   // Fill dims with data.
11237   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11238     LValue DimsLVal = CGF.MakeAddrLValue(
11239         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11240     // dims.upper = num_iterations;
11241     LValue UpperLVal = CGF.EmitLValueForField(
11242         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11243     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11244         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11245         Int64Ty, NumIterations[I]->getExprLoc());
11246     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11247     // dims.stride = 1;
11248     LValue StrideLVal = CGF.EmitLValueForField(
11249         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11250     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11251                           StrideLVal);
11252   }
11253 
11254   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11255   // kmp_int32 num_dims, struct kmp_dim * dims);
11256   llvm::Value *Args[] = {
11257       emitUpdateLocation(CGF, D.getBeginLoc()),
11258       getThreadID(CGF, D.getBeginLoc()),
11259       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11260       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11261           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).emitRawPointer(CGF),
11262           CGM.VoidPtrTy)};
11263 
11264   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11265       CGM.getModule(), OMPRTL___kmpc_doacross_init);
11266   CGF.EmitRuntimeCall(RTLFn, Args);
11267   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11268       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11269   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11270       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11271   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11272                                              llvm::ArrayRef(FiniArgs));
11273 }
11274 
11275 template <typename T>
11276 static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM,
11277                                 const T *C, llvm::Value *ULoc,
11278                                 llvm::Value *ThreadID) {
11279   QualType Int64Ty =
11280       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11281   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11282   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11283       Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);
11284   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11285   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11286     const Expr *CounterVal = C->getLoopData(I);
11287     assert(CounterVal);
11288     llvm::Value *CntVal = CGF.EmitScalarConversion(
11289         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11290         CounterVal->getExprLoc());
11291     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11292                           /*Volatile=*/false, Int64Ty);
11293   }
11294   llvm::Value *Args[] = {
11295       ULoc, ThreadID,
11296       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).emitRawPointer(CGF)};
11297   llvm::FunctionCallee RTLFn;
11298   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
11299   OMPDoacrossKind<T> ODK;
11300   if (ODK.isSource(C)) {
11301     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11302                                                   OMPRTL___kmpc_doacross_post);
11303   } else {
11304     assert(ODK.isSink(C) && "Expect sink modifier.");
11305     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11306                                                   OMPRTL___kmpc_doacross_wait);
11307   }
11308   CGF.EmitRuntimeCall(RTLFn, Args);
11309 }
11310 
11311 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11312                                           const OMPDependClause *C) {
11313   return EmitDoacrossOrdered<OMPDependClause>(
11314       CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11315       getThreadID(CGF, C->getBeginLoc()));
11316 }
11317 
11318 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11319                                           const OMPDoacrossClause *C) {
11320   return EmitDoacrossOrdered<OMPDoacrossClause>(
11321       CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11322       getThreadID(CGF, C->getBeginLoc()));
11323 }
11324 
11325 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11326                                llvm::FunctionCallee Callee,
11327                                ArrayRef<llvm::Value *> Args) const {
11328   assert(Loc.isValid() && "Outlined function call location must be valid.");
11329   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11330 
11331   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11332     if (Fn->doesNotThrow()) {
11333       CGF.EmitNounwindRuntimeCall(Fn, Args);
11334       return;
11335     }
11336   }
11337   CGF.EmitRuntimeCall(Callee, Args);
11338 }
11339 
11340 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11341     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11342     ArrayRef<llvm::Value *> Args) const {
11343   emitCall(CGF, Loc, OutlinedFn, Args);
11344 }
11345 
11346 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11347   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11348     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11349       HasEmittedDeclareTargetRegion = true;
11350 }
11351 
11352 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11353                                              const VarDecl *NativeParam,
11354                                              const VarDecl *TargetParam) const {
11355   return CGF.GetAddrOfLocalVar(NativeParam);
11356 }
11357 
11358 /// Return allocator value from expression, or return a null allocator (default
11359 /// when no allocator specified).
11360 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11361                                     const Expr *Allocator) {
11362   llvm::Value *AllocVal;
11363   if (Allocator) {
11364     AllocVal = CGF.EmitScalarExpr(Allocator);
11365     // According to the standard, the original allocator type is a enum
11366     // (integer). Convert to pointer type, if required.
11367     AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11368                                         CGF.getContext().VoidPtrTy,
11369                                         Allocator->getExprLoc());
11370   } else {
11371     // If no allocator specified, it defaults to the null allocator.
11372     AllocVal = llvm::Constant::getNullValue(
11373         CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
11374   }
11375   return AllocVal;
11376 }
11377 
11378 /// Return the alignment from an allocate directive if present.
11379 static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11380   std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11381 
11382   if (!AllocateAlignment)
11383     return nullptr;
11384 
11385   return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
11386 }
11387 
11388 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11389                                                    const VarDecl *VD) {
11390   if (!VD)
11391     return Address::invalid();
11392   Address UntiedAddr = Address::invalid();
11393   Address UntiedRealAddr = Address::invalid();
11394   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11395   if (It != FunctionToUntiedTaskStackMap.end()) {
11396     const UntiedLocalVarsAddressesMap &UntiedData =
11397         UntiedLocalVarsStack[It->second];
11398     auto I = UntiedData.find(VD);
11399     if (I != UntiedData.end()) {
11400       UntiedAddr = I->second.first;
11401       UntiedRealAddr = I->second.second;
11402     }
11403   }
11404   const VarDecl *CVD = VD->getCanonicalDecl();
11405   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11406     // Use the default allocation.
11407     if (!isAllocatableDecl(VD))
11408       return UntiedAddr;
11409     llvm::Value *Size;
11410     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11411     if (CVD->getType()->isVariablyModifiedType()) {
11412       Size = CGF.getTypeSize(CVD->getType());
11413       // Align the size: ((size + align - 1) / align) * align
11414       Size = CGF.Builder.CreateNUWAdd(
11415           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11416       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11417       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11418     } else {
11419       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11420       Size = CGM.getSize(Sz.alignTo(Align));
11421     }
11422     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11423     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11424     const Expr *Allocator = AA->getAllocator();
11425     llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11426     llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
11427     SmallVector<llvm::Value *, 4> Args;
11428     Args.push_back(ThreadID);
11429     if (Alignment)
11430       Args.push_back(Alignment);
11431     Args.push_back(Size);
11432     Args.push_back(AllocVal);
11433     llvm::omp::RuntimeFunction FnID =
11434         Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11435     llvm::Value *Addr = CGF.EmitRuntimeCall(
11436         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
11437         getName({CVD->getName(), ".void.addr"}));
11438     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11439         CGM.getModule(), OMPRTL___kmpc_free);
11440     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11441     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11442         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11443     if (UntiedAddr.isValid())
11444       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11445 
11446     // Cleanup action for allocate support.
11447     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11448       llvm::FunctionCallee RTLFn;
11449       SourceLocation::UIntTy LocEncoding;
11450       Address Addr;
11451       const Expr *AllocExpr;
11452 
11453     public:
11454       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11455                            SourceLocation::UIntTy LocEncoding, Address Addr,
11456                            const Expr *AllocExpr)
11457           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11458             AllocExpr(AllocExpr) {}
11459       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11460         if (!CGF.HaveInsertPoint())
11461           return;
11462         llvm::Value *Args[3];
11463         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11464             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11465         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11466             Addr.emitRawPointer(CGF), CGF.VoidPtrTy);
11467         llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
11468         Args[2] = AllocVal;
11469         CGF.EmitRuntimeCall(RTLFn, Args);
11470       }
11471     };
11472     Address VDAddr =
11473         UntiedRealAddr.isValid()
11474             ? UntiedRealAddr
11475             : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
11476     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11477         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11478         VDAddr, Allocator);
11479     if (UntiedRealAddr.isValid())
11480       if (auto *Region =
11481               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11482         Region->emitUntiedSwitch(CGF);
11483     return VDAddr;
11484   }
11485   return UntiedAddr;
11486 }
11487 
11488 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11489                                              const VarDecl *VD) const {
11490   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11491   if (It == FunctionToUntiedTaskStackMap.end())
11492     return false;
11493   return UntiedLocalVarsStack[It->second].count(VD) > 0;
11494 }
11495 
11496 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11497     CodeGenModule &CGM, const OMPLoopDirective &S)
11498     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11499   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11500   if (!NeedToPush)
11501     return;
11502   NontemporalDeclsSet &DS =
11503       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11504   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11505     for (const Stmt *Ref : C->private_refs()) {
11506       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11507       const ValueDecl *VD;
11508       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11509         VD = DRE->getDecl();
11510       } else {
11511         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11512         assert((ME->isImplicitCXXThis() ||
11513                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11514                "Expected member of current class.");
11515         VD = ME->getMemberDecl();
11516       }
11517       DS.insert(VD);
11518     }
11519   }
11520 }
11521 
11522 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11523   if (!NeedToPush)
11524     return;
11525   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11526 }
11527 
11528 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11529     CodeGenFunction &CGF,
11530     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
11531                           std::pair<Address, Address>> &LocalVars)
11532     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11533   if (!NeedToPush)
11534     return;
11535   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
11536       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11537   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11538 }
11539 
11540 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11541   if (!NeedToPush)
11542     return;
11543   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
11544 }
11545 
11546 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11547   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11548 
11549   return llvm::any_of(
11550       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11551       [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
11552 }
11553 
11554 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11555     const OMPExecutableDirective &S,
11556     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11557     const {
11558   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11559   // Vars in target/task regions must be excluded completely.
11560   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11561       isOpenMPTaskingDirective(S.getDirectiveKind())) {
11562     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11563     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11564     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11565     for (const CapturedStmt::Capture &Cap : CS->captures()) {
11566       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11567         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11568     }
11569   }
11570   // Exclude vars in private clauses.
11571   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11572     for (const Expr *Ref : C->varlist()) {
11573       if (!Ref->getType()->isScalarType())
11574         continue;
11575       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11576       if (!DRE)
11577         continue;
11578       NeedToCheckForLPCs.insert(DRE->getDecl());
11579     }
11580   }
11581   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11582     for (const Expr *Ref : C->varlist()) {
11583       if (!Ref->getType()->isScalarType())
11584         continue;
11585       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11586       if (!DRE)
11587         continue;
11588       NeedToCheckForLPCs.insert(DRE->getDecl());
11589     }
11590   }
11591   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11592     for (const Expr *Ref : C->varlist()) {
11593       if (!Ref->getType()->isScalarType())
11594         continue;
11595       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11596       if (!DRE)
11597         continue;
11598       NeedToCheckForLPCs.insert(DRE->getDecl());
11599     }
11600   }
11601   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11602     for (const Expr *Ref : C->varlist()) {
11603       if (!Ref->getType()->isScalarType())
11604         continue;
11605       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11606       if (!DRE)
11607         continue;
11608       NeedToCheckForLPCs.insert(DRE->getDecl());
11609     }
11610   }
11611   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11612     for (const Expr *Ref : C->varlist()) {
11613       if (!Ref->getType()->isScalarType())
11614         continue;
11615       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11616       if (!DRE)
11617         continue;
11618       NeedToCheckForLPCs.insert(DRE->getDecl());
11619     }
11620   }
11621   for (const Decl *VD : NeedToCheckForLPCs) {
11622     for (const LastprivateConditionalData &Data :
11623          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11624       if (Data.DeclToUniqueName.count(VD) > 0) {
11625         if (!Data.Disabled)
11626           NeedToAddForLPCsAsDisabled.insert(VD);
11627         break;
11628       }
11629     }
11630   }
11631 }
11632 
11633 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11634     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11635     : CGM(CGF.CGM),
11636       Action((CGM.getLangOpts().OpenMP >= 50 &&
11637               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11638                            [](const OMPLastprivateClause *C) {
11639                              return C->getKind() ==
11640                                     OMPC_LASTPRIVATE_conditional;
11641                            }))
11642                  ? ActionToDo::PushAsLastprivateConditional
11643                  : ActionToDo::DoNotPush) {
11644   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11645   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11646     return;
11647   assert(Action == ActionToDo::PushAsLastprivateConditional &&
11648          "Expected a push action.");
11649   LastprivateConditionalData &Data =
11650       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11651   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11652     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11653       continue;
11654 
11655     for (const Expr *Ref : C->varlist()) {
11656       Data.DeclToUniqueName.insert(std::make_pair(
11657           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11658           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11659     }
11660   }
11661   Data.IVLVal = IVLVal;
11662   Data.Fn = CGF.CurFn;
11663 }
11664 
11665 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11666     CodeGenFunction &CGF, const OMPExecutableDirective &S)
11667     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11668   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11669   if (CGM.getLangOpts().OpenMP < 50)
11670     return;
11671   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11672   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11673   if (!NeedToAddForLPCsAsDisabled.empty()) {
11674     Action = ActionToDo::DisableLastprivateConditional;
11675     LastprivateConditionalData &Data =
11676         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11677     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11678       Data.DeclToUniqueName.try_emplace(VD);
11679     Data.Fn = CGF.CurFn;
11680     Data.Disabled = true;
11681   }
11682 }
11683 
11684 CGOpenMPRuntime::LastprivateConditionalRAII
11685 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11686     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11687   return LastprivateConditionalRAII(CGF, S);
11688 }
11689 
11690 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11691   if (CGM.getLangOpts().OpenMP < 50)
11692     return;
11693   if (Action == ActionToDo::DisableLastprivateConditional) {
11694     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11695            "Expected list of disabled private vars.");
11696     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11697   }
11698   if (Action == ActionToDo::PushAsLastprivateConditional) {
11699     assert(
11700         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11701         "Expected list of lastprivate conditional vars.");
11702     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11703   }
11704 }
11705 
11706 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11707                                                         const VarDecl *VD) {
11708   ASTContext &C = CGM.getContext();
11709   auto I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11710   QualType NewType;
11711   const FieldDecl *VDField;
11712   const FieldDecl *FiredField;
11713   LValue BaseLVal;
11714   auto VI = I->getSecond().find(VD);
11715   if (VI == I->getSecond().end()) {
11716     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11717     RD->startDefinition();
11718     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11719     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11720     RD->completeDefinition();
11721     NewType = C.getRecordType(RD);
11722     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11723     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11724     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11725   } else {
11726     NewType = std::get<0>(VI->getSecond());
11727     VDField = std::get<1>(VI->getSecond());
11728     FiredField = std::get<2>(VI->getSecond());
11729     BaseLVal = std::get<3>(VI->getSecond());
11730   }
11731   LValue FiredLVal =
11732       CGF.EmitLValueForField(BaseLVal, FiredField);
11733   CGF.EmitStoreOfScalar(
11734       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11735       FiredLVal);
11736   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress();
11737 }
11738 
11739 namespace {
11740 /// Checks if the lastprivate conditional variable is referenced in LHS.
11741 class LastprivateConditionalRefChecker final
11742     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11743   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11744   const Expr *FoundE = nullptr;
11745   const Decl *FoundD = nullptr;
11746   StringRef UniqueDeclName;
11747   LValue IVLVal;
11748   llvm::Function *FoundFn = nullptr;
11749   SourceLocation Loc;
11750 
11751 public:
11752   bool VisitDeclRefExpr(const DeclRefExpr *E) {
11753     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11754          llvm::reverse(LPM)) {
11755       auto It = D.DeclToUniqueName.find(E->getDecl());
11756       if (It == D.DeclToUniqueName.end())
11757         continue;
11758       if (D.Disabled)
11759         return false;
11760       FoundE = E;
11761       FoundD = E->getDecl()->getCanonicalDecl();
11762       UniqueDeclName = It->second;
11763       IVLVal = D.IVLVal;
11764       FoundFn = D.Fn;
11765       break;
11766     }
11767     return FoundE == E;
11768   }
11769   bool VisitMemberExpr(const MemberExpr *E) {
11770     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11771       return false;
11772     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11773          llvm::reverse(LPM)) {
11774       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11775       if (It == D.DeclToUniqueName.end())
11776         continue;
11777       if (D.Disabled)
11778         return false;
11779       FoundE = E;
11780       FoundD = E->getMemberDecl()->getCanonicalDecl();
11781       UniqueDeclName = It->second;
11782       IVLVal = D.IVLVal;
11783       FoundFn = D.Fn;
11784       break;
11785     }
11786     return FoundE == E;
11787   }
11788   bool VisitStmt(const Stmt *S) {
11789     for (const Stmt *Child : S->children()) {
11790       if (!Child)
11791         continue;
11792       if (const auto *E = dyn_cast<Expr>(Child))
11793         if (!E->isGLValue())
11794           continue;
11795       if (Visit(Child))
11796         return true;
11797     }
11798     return false;
11799   }
11800   explicit LastprivateConditionalRefChecker(
11801       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11802       : LPM(LPM) {}
11803   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11804   getFoundData() const {
11805     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11806   }
11807 };
11808 } // namespace
11809 
11810 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11811                                                        LValue IVLVal,
11812                                                        StringRef UniqueDeclName,
11813                                                        LValue LVal,
11814                                                        SourceLocation Loc) {
11815   // Last updated loop counter for the lastprivate conditional var.
11816   // int<xx> last_iv = 0;
11817   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11818   llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
11819       LLIVTy, getName({UniqueDeclName, "iv"}));
11820   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11821       IVLVal.getAlignment().getAsAlign());
11822   LValue LastIVLVal =
11823       CGF.MakeNaturalAlignRawAddrLValue(LastIV, IVLVal.getType());
11824 
11825   // Last value of the lastprivate conditional.
11826   // decltype(priv_a) last_a;
11827   llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
11828       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11829   cast<llvm::GlobalVariable>(Last)->setAlignment(
11830       LVal.getAlignment().getAsAlign());
11831   LValue LastLVal =
11832       CGF.MakeRawAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11833 
11834   // Global loop counter. Required to handle inner parallel-for regions.
11835   // iv
11836   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11837 
11838   // #pragma omp critical(a)
11839   // if (last_iv <= iv) {
11840   //   last_iv = iv;
11841   //   last_a = priv_a;
11842   // }
11843   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11844                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11845     Action.Enter(CGF);
11846     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11847     // (last_iv <= iv) ? Check if the variable is updated and store new
11848     // value in global var.
11849     llvm::Value *CmpRes;
11850     if (IVLVal.getType()->isSignedIntegerType()) {
11851       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11852     } else {
11853       assert(IVLVal.getType()->isUnsignedIntegerType() &&
11854              "Loop iteration variable must be integer.");
11855       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11856     }
11857     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11858     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11859     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11860     // {
11861     CGF.EmitBlock(ThenBB);
11862 
11863     //   last_iv = iv;
11864     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11865 
11866     //   last_a = priv_a;
11867     switch (CGF.getEvaluationKind(LVal.getType())) {
11868     case TEK_Scalar: {
11869       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11870       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11871       break;
11872     }
11873     case TEK_Complex: {
11874       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11875       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11876       break;
11877     }
11878     case TEK_Aggregate:
11879       llvm_unreachable(
11880           "Aggregates are not supported in lastprivate conditional.");
11881     }
11882     // }
11883     CGF.EmitBranch(ExitBB);
11884     // There is no need to emit line number for unconditional branch.
11885     (void)ApplyDebugLocation::CreateEmpty(CGF);
11886     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11887   };
11888 
11889   if (CGM.getLangOpts().OpenMPSimd) {
11890     // Do not emit as a critical region as no parallel region could be emitted.
11891     RegionCodeGenTy ThenRCG(CodeGen);
11892     ThenRCG(CGF);
11893   } else {
11894     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11895   }
11896 }
11897 
11898 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11899                                                          const Expr *LHS) {
11900   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11901     return;
11902   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11903   if (!Checker.Visit(LHS))
11904     return;
11905   const Expr *FoundE;
11906   const Decl *FoundD;
11907   StringRef UniqueDeclName;
11908   LValue IVLVal;
11909   llvm::Function *FoundFn;
11910   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11911       Checker.getFoundData();
11912   if (FoundFn != CGF.CurFn) {
11913     // Special codegen for inner parallel regions.
11914     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11915     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11916     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11917            "Lastprivate conditional is not found in outer region.");
11918     QualType StructTy = std::get<0>(It->getSecond());
11919     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11920     LValue PrivLVal = CGF.EmitLValue(FoundE);
11921     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11922         PrivLVal.getAddress(),
11923         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
11924         CGF.ConvertTypeForMem(StructTy));
11925     LValue BaseLVal =
11926         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11927     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11928     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11929                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11930                         FiredLVal, llvm::AtomicOrdering::Unordered,
11931                         /*IsVolatile=*/true, /*isInit=*/false);
11932     return;
11933   }
11934 
11935   // Private address of the lastprivate conditional in the current context.
11936   // priv_a
11937   LValue LVal = CGF.EmitLValue(FoundE);
11938   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11939                                    FoundE->getExprLoc());
11940 }
11941 
11942 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11943     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11944     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11945   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11946     return;
11947   auto Range = llvm::reverse(LastprivateConditionalStack);
11948   auto It = llvm::find_if(
11949       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11950   if (It == Range.end() || It->Fn != CGF.CurFn)
11951     return;
11952   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11953   assert(LPCI != LastprivateConditionalToTypes.end() &&
11954          "Lastprivates must be registered already.");
11955   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11956   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11957   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11958   for (const auto &Pair : It->DeclToUniqueName) {
11959     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11960     if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
11961       continue;
11962     auto I = LPCI->getSecond().find(Pair.first);
11963     assert(I != LPCI->getSecond().end() &&
11964            "Lastprivate must be rehistered already.");
11965     // bool Cmp = priv_a.Fired != 0;
11966     LValue BaseLVal = std::get<3>(I->getSecond());
11967     LValue FiredLVal =
11968         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11969     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11970     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11971     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11972     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11973     // if (Cmp) {
11974     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11975     CGF.EmitBlock(ThenBB);
11976     Address Addr = CGF.GetAddrOfLocalVar(VD);
11977     LValue LVal;
11978     if (VD->getType()->isReferenceType())
11979       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11980                                            AlignmentSource::Decl);
11981     else
11982       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11983                                 AlignmentSource::Decl);
11984     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11985                                      D.getBeginLoc());
11986     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11987     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11988     // }
11989   }
11990 }
11991 
11992 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11993     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11994     SourceLocation Loc) {
11995   if (CGF.getLangOpts().OpenMP < 50)
11996     return;
11997   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11998   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11999          "Unknown lastprivate conditional variable.");
12000   StringRef UniqueName = It->second;
12001   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12002   // The variable was not updated in the region - exit.
12003   if (!GV)
12004     return;
12005   LValue LPLVal = CGF.MakeRawAddrLValue(
12006       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12007   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12008   CGF.EmitStoreOfScalar(Res, PrivLVal);
12009 }
12010 
12011 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12012     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12013     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
12014     const RegionCodeGenTy &CodeGen) {
12015   llvm_unreachable("Not supported in SIMD-only mode");
12016 }
12017 
12018 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12019     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12020     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
12021     const RegionCodeGenTy &CodeGen) {
12022   llvm_unreachable("Not supported in SIMD-only mode");
12023 }
12024 
12025 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12026     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12027     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12028     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12029     bool Tied, unsigned &NumberOfParts) {
12030   llvm_unreachable("Not supported in SIMD-only mode");
12031 }
12032 
12033 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12034                                            SourceLocation Loc,
12035                                            llvm::Function *OutlinedFn,
12036                                            ArrayRef<llvm::Value *> CapturedVars,
12037                                            const Expr *IfCond,
12038                                            llvm::Value *NumThreads) {
12039   llvm_unreachable("Not supported in SIMD-only mode");
12040 }
12041 
12042 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12043     CodeGenFunction &CGF, StringRef CriticalName,
12044     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12045     const Expr *Hint) {
12046   llvm_unreachable("Not supported in SIMD-only mode");
12047 }
12048 
12049 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12050                                            const RegionCodeGenTy &MasterOpGen,
12051                                            SourceLocation Loc) {
12052   llvm_unreachable("Not supported in SIMD-only mode");
12053 }
12054 
12055 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12056                                            const RegionCodeGenTy &MasterOpGen,
12057                                            SourceLocation Loc,
12058                                            const Expr *Filter) {
12059   llvm_unreachable("Not supported in SIMD-only mode");
12060 }
12061 
12062 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12063                                             SourceLocation Loc) {
12064   llvm_unreachable("Not supported in SIMD-only mode");
12065 }
12066 
12067 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12068     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12069     SourceLocation Loc) {
12070   llvm_unreachable("Not supported in SIMD-only mode");
12071 }
12072 
12073 void CGOpenMPSIMDRuntime::emitSingleRegion(
12074     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12075     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12076     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12077     ArrayRef<const Expr *> AssignmentOps) {
12078   llvm_unreachable("Not supported in SIMD-only mode");
12079 }
12080 
12081 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12082                                             const RegionCodeGenTy &OrderedOpGen,
12083                                             SourceLocation Loc,
12084                                             bool IsThreads) {
12085   llvm_unreachable("Not supported in SIMD-only mode");
12086 }
12087 
12088 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12089                                           SourceLocation Loc,
12090                                           OpenMPDirectiveKind Kind,
12091                                           bool EmitChecks,
12092                                           bool ForceSimpleCall) {
12093   llvm_unreachable("Not supported in SIMD-only mode");
12094 }
12095 
12096 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12097     CodeGenFunction &CGF, SourceLocation Loc,
12098     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12099     bool Ordered, const DispatchRTInput &DispatchValues) {
12100   llvm_unreachable("Not supported in SIMD-only mode");
12101 }
12102 
12103 void CGOpenMPSIMDRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
12104                                                 SourceLocation Loc) {
12105   llvm_unreachable("Not supported in SIMD-only mode");
12106 }
12107 
12108 void CGOpenMPSIMDRuntime::emitForStaticInit(
12109     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12110     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12111   llvm_unreachable("Not supported in SIMD-only mode");
12112 }
12113 
12114 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12115     CodeGenFunction &CGF, SourceLocation Loc,
12116     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12117   llvm_unreachable("Not supported in SIMD-only mode");
12118 }
12119 
12120 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12121                                                      SourceLocation Loc,
12122                                                      unsigned IVSize,
12123                                                      bool IVSigned) {
12124   llvm_unreachable("Not supported in SIMD-only mode");
12125 }
12126 
12127 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12128                                               SourceLocation Loc,
12129                                               OpenMPDirectiveKind DKind) {
12130   llvm_unreachable("Not supported in SIMD-only mode");
12131 }
12132 
12133 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12134                                               SourceLocation Loc,
12135                                               unsigned IVSize, bool IVSigned,
12136                                               Address IL, Address LB,
12137                                               Address UB, Address ST) {
12138   llvm_unreachable("Not supported in SIMD-only mode");
12139 }
12140 
12141 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12142                                                llvm::Value *NumThreads,
12143                                                SourceLocation Loc) {
12144   llvm_unreachable("Not supported in SIMD-only mode");
12145 }
12146 
12147 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12148                                              ProcBindKind ProcBind,
12149                                              SourceLocation Loc) {
12150   llvm_unreachable("Not supported in SIMD-only mode");
12151 }
12152 
12153 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12154                                                     const VarDecl *VD,
12155                                                     Address VDAddr,
12156                                                     SourceLocation Loc) {
12157   llvm_unreachable("Not supported in SIMD-only mode");
12158 }
12159 
12160 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12161     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12162     CodeGenFunction *CGF) {
12163   llvm_unreachable("Not supported in SIMD-only mode");
12164 }
12165 
12166 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12167     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12168   llvm_unreachable("Not supported in SIMD-only mode");
12169 }
12170 
12171 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12172                                     ArrayRef<const Expr *> Vars,
12173                                     SourceLocation Loc,
12174                                     llvm::AtomicOrdering AO) {
12175   llvm_unreachable("Not supported in SIMD-only mode");
12176 }
12177 
12178 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12179                                        const OMPExecutableDirective &D,
12180                                        llvm::Function *TaskFunction,
12181                                        QualType SharedsTy, Address Shareds,
12182                                        const Expr *IfCond,
12183                                        const OMPTaskDataTy &Data) {
12184   llvm_unreachable("Not supported in SIMD-only mode");
12185 }
12186 
12187 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12188     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12189     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12190     const Expr *IfCond, const OMPTaskDataTy &Data) {
12191   llvm_unreachable("Not supported in SIMD-only mode");
12192 }
12193 
12194 void CGOpenMPSIMDRuntime::emitReduction(
12195     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12196     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12197     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12198   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12199   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12200                                  ReductionOps, Options);
12201 }
12202 
12203 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12204     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12205     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12206   llvm_unreachable("Not supported in SIMD-only mode");
12207 }
12208 
12209 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12210                                                 SourceLocation Loc,
12211                                                 bool IsWorksharingReduction) {
12212   llvm_unreachable("Not supported in SIMD-only mode");
12213 }
12214 
12215 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12216                                                   SourceLocation Loc,
12217                                                   ReductionCodeGen &RCG,
12218                                                   unsigned N) {
12219   llvm_unreachable("Not supported in SIMD-only mode");
12220 }
12221 
12222 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12223                                                   SourceLocation Loc,
12224                                                   llvm::Value *ReductionsPtr,
12225                                                   LValue SharedLVal) {
12226   llvm_unreachable("Not supported in SIMD-only mode");
12227 }
12228 
12229 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12230                                            SourceLocation Loc,
12231                                            const OMPTaskDataTy &Data) {
12232   llvm_unreachable("Not supported in SIMD-only mode");
12233 }
12234 
12235 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12236     CodeGenFunction &CGF, SourceLocation Loc,
12237     OpenMPDirectiveKind CancelRegion) {
12238   llvm_unreachable("Not supported in SIMD-only mode");
12239 }
12240 
12241 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12242                                          SourceLocation Loc, const Expr *IfCond,
12243                                          OpenMPDirectiveKind CancelRegion) {
12244   llvm_unreachable("Not supported in SIMD-only mode");
12245 }
12246 
12247 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12248     const OMPExecutableDirective &D, StringRef ParentName,
12249     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12250     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12251   llvm_unreachable("Not supported in SIMD-only mode");
12252 }
12253 
12254 void CGOpenMPSIMDRuntime::emitTargetCall(
12255     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12256     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12257     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12258     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12259                                      const OMPLoopDirective &D)>
12260         SizeEmitter) {
12261   llvm_unreachable("Not supported in SIMD-only mode");
12262 }
12263 
12264 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12265   llvm_unreachable("Not supported in SIMD-only mode");
12266 }
12267 
12268 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12269   llvm_unreachable("Not supported in SIMD-only mode");
12270 }
12271 
12272 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12273   return false;
12274 }
12275 
12276 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12277                                         const OMPExecutableDirective &D,
12278                                         SourceLocation Loc,
12279                                         llvm::Function *OutlinedFn,
12280                                         ArrayRef<llvm::Value *> CapturedVars) {
12281   llvm_unreachable("Not supported in SIMD-only mode");
12282 }
12283 
12284 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12285                                              const Expr *NumTeams,
12286                                              const Expr *ThreadLimit,
12287                                              SourceLocation Loc) {
12288   llvm_unreachable("Not supported in SIMD-only mode");
12289 }
12290 
12291 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12292     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12293     const Expr *Device, const RegionCodeGenTy &CodeGen,
12294     CGOpenMPRuntime::TargetDataInfo &Info) {
12295   llvm_unreachable("Not supported in SIMD-only mode");
12296 }
12297 
12298 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12299     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12300     const Expr *Device) {
12301   llvm_unreachable("Not supported in SIMD-only mode");
12302 }
12303 
12304 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12305                                            const OMPLoopDirective &D,
12306                                            ArrayRef<Expr *> NumIterations) {
12307   llvm_unreachable("Not supported in SIMD-only mode");
12308 }
12309 
12310 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12311                                               const OMPDependClause *C) {
12312   llvm_unreachable("Not supported in SIMD-only mode");
12313 }
12314 
12315 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12316                                               const OMPDoacrossClause *C) {
12317   llvm_unreachable("Not supported in SIMD-only mode");
12318 }
12319 
12320 const VarDecl *
12321 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12322                                         const VarDecl *NativeParam) const {
12323   llvm_unreachable("Not supported in SIMD-only mode");
12324 }
12325 
12326 Address
12327 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12328                                          const VarDecl *NativeParam,
12329                                          const VarDecl *TargetParam) const {
12330   llvm_unreachable("Not supported in SIMD-only mode");
12331 }
12332