xref: /freebsd/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "ABIInfoImpl.h"
15 #include "CGCXXABI.h"
16 #include "CGCleanup.h"
17 #include "CGRecordLayout.h"
18 #include "CodeGenFunction.h"
19 #include "TargetInfo.h"
20 #include "clang/AST/APValue.h"
21 #include "clang/AST/Attr.h"
22 #include "clang/AST/Decl.h"
23 #include "clang/AST/OpenMPClause.h"
24 #include "clang/AST/StmtOpenMP.h"
25 #include "clang/AST/StmtVisitor.h"
26 #include "clang/Basic/BitmaskEnum.h"
27 #include "clang/Basic/FileManager.h"
28 #include "clang/Basic/OpenMPKinds.h"
29 #include "clang/Basic/SourceManager.h"
30 #include "clang/CodeGen/ConstantInitBuilder.h"
31 #include "llvm/ADT/ArrayRef.h"
32 #include "llvm/ADT/SetOperations.h"
33 #include "llvm/ADT/SmallBitVector.h"
34 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/ADT/StringExtras.h"
36 #include "llvm/Bitcode/BitcodeReader.h"
37 #include "llvm/IR/Constants.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/GlobalValue.h"
40 #include "llvm/IR/InstrTypes.h"
41 #include "llvm/IR/Value.h"
42 #include "llvm/Support/AtomicOrdering.h"
43 #include "llvm/Support/Format.h"
44 #include "llvm/Support/raw_ostream.h"
45 #include <cassert>
46 #include <cstdint>
47 #include <numeric>
48 #include <optional>
49 
50 using namespace clang;
51 using namespace CodeGen;
52 using namespace llvm::omp;
53 
54 namespace {
55 /// Base class for handling code generation inside OpenMP regions.
56 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
57 public:
58   /// Kinds of OpenMP regions used in codegen.
59   enum CGOpenMPRegionKind {
60     /// Region with outlined function for standalone 'parallel'
61     /// directive.
62     ParallelOutlinedRegion,
63     /// Region with outlined function for standalone 'task' directive.
64     TaskOutlinedRegion,
65     /// Region for constructs that do not require function outlining,
66     /// like 'for', 'sections', 'atomic' etc. directives.
67     InlinedRegion,
68     /// Region with outlined function for standalone 'target' directive.
69     TargetRegion,
70   };
71 
72   CGOpenMPRegionInfo(const CapturedStmt &CS,
73                      const CGOpenMPRegionKind RegionKind,
74                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
75                      bool HasCancel)
76       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
77         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
78 
79   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
80                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
81                      bool HasCancel)
82       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
83         Kind(Kind), HasCancel(HasCancel) {}
84 
85   /// Get a variable or parameter for storing global thread id
86   /// inside OpenMP construct.
87   virtual const VarDecl *getThreadIDVariable() const = 0;
88 
89   /// Emit the captured statement body.
90   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
91 
92   /// Get an LValue for the current ThreadID variable.
93   /// \return LValue for thread id variable. This LValue always has type int32*.
94   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
95 
96   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
97 
98   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
99 
100   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
101 
102   bool hasCancel() const { return HasCancel; }
103 
104   static bool classof(const CGCapturedStmtInfo *Info) {
105     return Info->getKind() == CR_OpenMP;
106   }
107 
108   ~CGOpenMPRegionInfo() override = default;
109 
110 protected:
111   CGOpenMPRegionKind RegionKind;
112   RegionCodeGenTy CodeGen;
113   OpenMPDirectiveKind Kind;
114   bool HasCancel;
115 };
116 
117 /// API for captured statement code generation in OpenMP constructs.
118 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
119 public:
120   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
121                              const RegionCodeGenTy &CodeGen,
122                              OpenMPDirectiveKind Kind, bool HasCancel,
123                              StringRef HelperName)
124       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
125                            HasCancel),
126         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
127     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
128   }
129 
130   /// Get a variable or parameter for storing global thread id
131   /// inside OpenMP construct.
132   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
133 
134   /// Get the name of the capture helper.
135   StringRef getHelperName() const override { return HelperName; }
136 
137   static bool classof(const CGCapturedStmtInfo *Info) {
138     return CGOpenMPRegionInfo::classof(Info) &&
139            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
140                ParallelOutlinedRegion;
141   }
142 
143 private:
144   /// A variable or parameter storing global thread id for OpenMP
145   /// constructs.
146   const VarDecl *ThreadIDVar;
147   StringRef HelperName;
148 };
149 
150 /// API for captured statement code generation in OpenMP constructs.
151 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
152 public:
153   class UntiedTaskActionTy final : public PrePostActionTy {
154     bool Untied;
155     const VarDecl *PartIDVar;
156     const RegionCodeGenTy UntiedCodeGen;
157     llvm::SwitchInst *UntiedSwitch = nullptr;
158 
159   public:
160     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
161                        const RegionCodeGenTy &UntiedCodeGen)
162         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
163     void Enter(CodeGenFunction &CGF) override {
164       if (Untied) {
165         // Emit task switching point.
166         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
167             CGF.GetAddrOfLocalVar(PartIDVar),
168             PartIDVar->getType()->castAs<PointerType>());
169         llvm::Value *Res =
170             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
171         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
172         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
173         CGF.EmitBlock(DoneBB);
174         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
175         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
176         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
177                               CGF.Builder.GetInsertBlock());
178         emitUntiedSwitch(CGF);
179       }
180     }
181     void emitUntiedSwitch(CodeGenFunction &CGF) const {
182       if (Untied) {
183         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
184             CGF.GetAddrOfLocalVar(PartIDVar),
185             PartIDVar->getType()->castAs<PointerType>());
186         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
187                               PartIdLVal);
188         UntiedCodeGen(CGF);
189         CodeGenFunction::JumpDest CurPoint =
190             CGF.getJumpDestInCurrentScope(".untied.next.");
191         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
192         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
193         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
194                               CGF.Builder.GetInsertBlock());
195         CGF.EmitBranchThroughCleanup(CurPoint);
196         CGF.EmitBlock(CurPoint.getBlock());
197       }
198     }
199     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
200   };
201   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
202                                  const VarDecl *ThreadIDVar,
203                                  const RegionCodeGenTy &CodeGen,
204                                  OpenMPDirectiveKind Kind, bool HasCancel,
205                                  const UntiedTaskActionTy &Action)
206       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
207         ThreadIDVar(ThreadIDVar), Action(Action) {
208     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
209   }
210 
211   /// Get a variable or parameter for storing global thread id
212   /// inside OpenMP construct.
213   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
214 
215   /// Get an LValue for the current ThreadID variable.
216   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
217 
218   /// Get the name of the capture helper.
219   StringRef getHelperName() const override { return ".omp_outlined."; }
220 
221   void emitUntiedSwitch(CodeGenFunction &CGF) override {
222     Action.emitUntiedSwitch(CGF);
223   }
224 
225   static bool classof(const CGCapturedStmtInfo *Info) {
226     return CGOpenMPRegionInfo::classof(Info) &&
227            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
228                TaskOutlinedRegion;
229   }
230 
231 private:
232   /// A variable or parameter storing global thread id for OpenMP
233   /// constructs.
234   const VarDecl *ThreadIDVar;
235   /// Action for emitting code for untied tasks.
236   const UntiedTaskActionTy &Action;
237 };
238 
239 /// API for inlined captured statement code generation in OpenMP
240 /// constructs.
241 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
242 public:
243   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
244                             const RegionCodeGenTy &CodeGen,
245                             OpenMPDirectiveKind Kind, bool HasCancel)
246       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
247         OldCSI(OldCSI),
248         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
249 
250   // Retrieve the value of the context parameter.
251   llvm::Value *getContextValue() const override {
252     if (OuterRegionInfo)
253       return OuterRegionInfo->getContextValue();
254     llvm_unreachable("No context value for inlined OpenMP region");
255   }
256 
257   void setContextValue(llvm::Value *V) override {
258     if (OuterRegionInfo) {
259       OuterRegionInfo->setContextValue(V);
260       return;
261     }
262     llvm_unreachable("No context value for inlined OpenMP region");
263   }
264 
265   /// Lookup the captured field decl for a variable.
266   const FieldDecl *lookup(const VarDecl *VD) const override {
267     if (OuterRegionInfo)
268       return OuterRegionInfo->lookup(VD);
269     // If there is no outer outlined region,no need to lookup in a list of
270     // captured variables, we can use the original one.
271     return nullptr;
272   }
273 
274   FieldDecl *getThisFieldDecl() const override {
275     if (OuterRegionInfo)
276       return OuterRegionInfo->getThisFieldDecl();
277     return nullptr;
278   }
279 
280   /// Get a variable or parameter for storing global thread id
281   /// inside OpenMP construct.
282   const VarDecl *getThreadIDVariable() const override {
283     if (OuterRegionInfo)
284       return OuterRegionInfo->getThreadIDVariable();
285     return nullptr;
286   }
287 
288   /// Get an LValue for the current ThreadID variable.
289   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
290     if (OuterRegionInfo)
291       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
292     llvm_unreachable("No LValue for inlined OpenMP construct");
293   }
294 
295   /// Get the name of the capture helper.
296   StringRef getHelperName() const override {
297     if (auto *OuterRegionInfo = getOldCSI())
298       return OuterRegionInfo->getHelperName();
299     llvm_unreachable("No helper name for inlined OpenMP construct");
300   }
301 
302   void emitUntiedSwitch(CodeGenFunction &CGF) override {
303     if (OuterRegionInfo)
304       OuterRegionInfo->emitUntiedSwitch(CGF);
305   }
306 
307   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
308 
309   static bool classof(const CGCapturedStmtInfo *Info) {
310     return CGOpenMPRegionInfo::classof(Info) &&
311            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
312   }
313 
314   ~CGOpenMPInlinedRegionInfo() override = default;
315 
316 private:
317   /// CodeGen info about outer OpenMP region.
318   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
319   CGOpenMPRegionInfo *OuterRegionInfo;
320 };
321 
322 /// API for captured statement code generation in OpenMP target
323 /// constructs. For this captures, implicit parameters are used instead of the
324 /// captured fields. The name of the target region has to be unique in a given
325 /// application so it is provided by the client, because only the client has
326 /// the information to generate that.
327 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
328 public:
329   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
330                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
331       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
332                            /*HasCancel=*/false),
333         HelperName(HelperName) {}
334 
335   /// This is unused for target regions because each starts executing
336   /// with a single thread.
337   const VarDecl *getThreadIDVariable() const override { return nullptr; }
338 
339   /// Get the name of the capture helper.
340   StringRef getHelperName() const override { return HelperName; }
341 
342   static bool classof(const CGCapturedStmtInfo *Info) {
343     return CGOpenMPRegionInfo::classof(Info) &&
344            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
345   }
346 
347 private:
348   StringRef HelperName;
349 };
350 
351 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
352   llvm_unreachable("No codegen for expressions");
353 }
354 /// API for generation of expressions captured in a innermost OpenMP
355 /// region.
356 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
357 public:
358   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
359       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
360                                   OMPD_unknown,
361                                   /*HasCancel=*/false),
362         PrivScope(CGF) {
363     // Make sure the globals captured in the provided statement are local by
364     // using the privatization logic. We assume the same variable is not
365     // captured more than once.
366     for (const auto &C : CS.captures()) {
367       if (!C.capturesVariable() && !C.capturesVariableByCopy())
368         continue;
369 
370       const VarDecl *VD = C.getCapturedVar();
371       if (VD->isLocalVarDeclOrParm())
372         continue;
373 
374       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
375                       /*RefersToEnclosingVariableOrCapture=*/false,
376                       VD->getType().getNonReferenceType(), VK_LValue,
377                       C.getLocation());
378       PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress());
379     }
380     (void)PrivScope.Privatize();
381   }
382 
383   /// Lookup the captured field decl for a variable.
384   const FieldDecl *lookup(const VarDecl *VD) const override {
385     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
386       return FD;
387     return nullptr;
388   }
389 
390   /// Emit the captured statement body.
391   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
392     llvm_unreachable("No body for expressions");
393   }
394 
395   /// Get a variable or parameter for storing global thread id
396   /// inside OpenMP construct.
397   const VarDecl *getThreadIDVariable() const override {
398     llvm_unreachable("No thread id for expressions");
399   }
400 
401   /// Get the name of the capture helper.
402   StringRef getHelperName() const override {
403     llvm_unreachable("No helper name for expressions");
404   }
405 
406   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
407 
408 private:
409   /// Private scope to capture global variables.
410   CodeGenFunction::OMPPrivateScope PrivScope;
411 };
412 
413 /// RAII for emitting code of OpenMP constructs.
414 class InlinedOpenMPRegionRAII {
415   CodeGenFunction &CGF;
416   llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
417   FieldDecl *LambdaThisCaptureField = nullptr;
418   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
419   bool NoInheritance = false;
420 
421 public:
422   /// Constructs region for combined constructs.
423   /// \param CodeGen Code generation sequence for combined directives. Includes
424   /// a list of functions used for code generation of implicitly inlined
425   /// regions.
426   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
427                           OpenMPDirectiveKind Kind, bool HasCancel,
428                           bool NoInheritance = true)
429       : CGF(CGF), NoInheritance(NoInheritance) {
430     // Start emission for the construct.
431     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
432         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
433     if (NoInheritance) {
434       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
435       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
436       CGF.LambdaThisCaptureField = nullptr;
437       BlockInfo = CGF.BlockInfo;
438       CGF.BlockInfo = nullptr;
439     }
440   }
441 
442   ~InlinedOpenMPRegionRAII() {
443     // Restore original CapturedStmtInfo only if we're done with code emission.
444     auto *OldCSI =
445         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
446     delete CGF.CapturedStmtInfo;
447     CGF.CapturedStmtInfo = OldCSI;
448     if (NoInheritance) {
449       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
450       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
451       CGF.BlockInfo = BlockInfo;
452     }
453   }
454 };
455 
456 /// Values for bit flags used in the ident_t to describe the fields.
457 /// All enumeric elements are named and described in accordance with the code
458 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
459 enum OpenMPLocationFlags : unsigned {
460   /// Use trampoline for internal microtask.
461   OMP_IDENT_IMD = 0x01,
462   /// Use c-style ident structure.
463   OMP_IDENT_KMPC = 0x02,
464   /// Atomic reduction option for kmpc_reduce.
465   OMP_ATOMIC_REDUCE = 0x10,
466   /// Explicit 'barrier' directive.
467   OMP_IDENT_BARRIER_EXPL = 0x20,
468   /// Implicit barrier in code.
469   OMP_IDENT_BARRIER_IMPL = 0x40,
470   /// Implicit barrier in 'for' directive.
471   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
472   /// Implicit barrier in 'sections' directive.
473   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
474   /// Implicit barrier in 'single' directive.
475   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
476   /// Call of __kmp_for_static_init for static loop.
477   OMP_IDENT_WORK_LOOP = 0x200,
478   /// Call of __kmp_for_static_init for sections.
479   OMP_IDENT_WORK_SECTIONS = 0x400,
480   /// Call of __kmp_for_static_init for distribute.
481   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
482   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
483 };
484 
485 /// Describes ident structure that describes a source location.
486 /// All descriptions are taken from
487 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
488 /// Original structure:
489 /// typedef struct ident {
490 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
491 ///                                  see above  */
492 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
493 ///                                  KMP_IDENT_KMPC identifies this union
494 ///                                  member  */
495 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
496 ///                                  see above */
497 ///#if USE_ITT_BUILD
498 ///                            /*  but currently used for storing
499 ///                                region-specific ITT */
500 ///                            /*  contextual information. */
501 ///#endif /* USE_ITT_BUILD */
502 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
503 ///                                 C++  */
504 ///    char const *psource;    /**< String describing the source location.
505 ///                            The string is composed of semi-colon separated
506 //                             fields which describe the source file,
507 ///                            the function and a pair of line numbers that
508 ///                            delimit the construct.
509 ///                             */
510 /// } ident_t;
511 enum IdentFieldIndex {
512   /// might be used in Fortran
513   IdentField_Reserved_1,
514   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
515   IdentField_Flags,
516   /// Not really used in Fortran any more
517   IdentField_Reserved_2,
518   /// Source[4] in Fortran, do not use for C++
519   IdentField_Reserved_3,
520   /// String describing the source location. The string is composed of
521   /// semi-colon separated fields which describe the source file, the function
522   /// and a pair of line numbers that delimit the construct.
523   IdentField_PSource
524 };
525 
526 /// Schedule types for 'omp for' loops (these enumerators are taken from
527 /// the enum sched_type in kmp.h).
528 enum OpenMPSchedType {
529   /// Lower bound for default (unordered) versions.
530   OMP_sch_lower = 32,
531   OMP_sch_static_chunked = 33,
532   OMP_sch_static = 34,
533   OMP_sch_dynamic_chunked = 35,
534   OMP_sch_guided_chunked = 36,
535   OMP_sch_runtime = 37,
536   OMP_sch_auto = 38,
537   /// static with chunk adjustment (e.g., simd)
538   OMP_sch_static_balanced_chunked = 45,
539   /// Lower bound for 'ordered' versions.
540   OMP_ord_lower = 64,
541   OMP_ord_static_chunked = 65,
542   OMP_ord_static = 66,
543   OMP_ord_dynamic_chunked = 67,
544   OMP_ord_guided_chunked = 68,
545   OMP_ord_runtime = 69,
546   OMP_ord_auto = 70,
547   OMP_sch_default = OMP_sch_static,
548   /// dist_schedule types
549   OMP_dist_sch_static_chunked = 91,
550   OMP_dist_sch_static = 92,
551   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
552   /// Set if the monotonic schedule modifier was present.
553   OMP_sch_modifier_monotonic = (1 << 29),
554   /// Set if the nonmonotonic schedule modifier was present.
555   OMP_sch_modifier_nonmonotonic = (1 << 30),
556 };
557 
558 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
559 /// region.
560 class CleanupTy final : public EHScopeStack::Cleanup {
561   PrePostActionTy *Action;
562 
563 public:
564   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
565   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
566     if (!CGF.HaveInsertPoint())
567       return;
568     Action->Exit(CGF);
569   }
570 };
571 
572 } // anonymous namespace
573 
574 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
575   CodeGenFunction::RunCleanupsScope Scope(CGF);
576   if (PrePostAction) {
577     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
578     Callback(CodeGen, CGF, *PrePostAction);
579   } else {
580     PrePostActionTy Action;
581     Callback(CodeGen, CGF, Action);
582   }
583 }
584 
585 /// Check if the combiner is a call to UDR combiner and if it is so return the
586 /// UDR decl used for reduction.
587 static const OMPDeclareReductionDecl *
588 getReductionInit(const Expr *ReductionOp) {
589   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
590     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
591       if (const auto *DRE =
592               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
593         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
594           return DRD;
595   return nullptr;
596 }
597 
598 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
599                                              const OMPDeclareReductionDecl *DRD,
600                                              const Expr *InitOp,
601                                              Address Private, Address Original,
602                                              QualType Ty) {
603   if (DRD->getInitializer()) {
604     std::pair<llvm::Function *, llvm::Function *> Reduction =
605         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
606     const auto *CE = cast<CallExpr>(InitOp);
607     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
608     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
609     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
610     const auto *LHSDRE =
611         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
612     const auto *RHSDRE =
613         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
614     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
615     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
616     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
617     (void)PrivateScope.Privatize();
618     RValue Func = RValue::get(Reduction.second);
619     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
620     CGF.EmitIgnoredExpr(InitOp);
621   } else {
622     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
623     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
624     auto *GV = new llvm::GlobalVariable(
625         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
626         llvm::GlobalValue::PrivateLinkage, Init, Name);
627     LValue LV = CGF.MakeNaturalAlignRawAddrLValue(GV, Ty);
628     RValue InitRVal;
629     switch (CGF.getEvaluationKind(Ty)) {
630     case TEK_Scalar:
631       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
632       break;
633     case TEK_Complex:
634       InitRVal =
635           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
636       break;
637     case TEK_Aggregate: {
638       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
639       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
640       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
641                            /*IsInitializer=*/false);
642       return;
643     }
644     }
645     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
646     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
647     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
648                          /*IsInitializer=*/false);
649   }
650 }
651 
652 /// Emit initialization of arrays of complex types.
653 /// \param DestAddr Address of the array.
654 /// \param Type Type of array.
655 /// \param Init Initial expression of array.
656 /// \param SrcAddr Address of the original array.
657 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
658                                  QualType Type, bool EmitDeclareReductionInit,
659                                  const Expr *Init,
660                                  const OMPDeclareReductionDecl *DRD,
661                                  Address SrcAddr = Address::invalid()) {
662   // Perform element-by-element initialization.
663   QualType ElementTy;
664 
665   // Drill down to the base element type on both arrays.
666   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
667   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
668   if (DRD)
669     SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
670 
671   llvm::Value *SrcBegin = nullptr;
672   if (DRD)
673     SrcBegin = SrcAddr.emitRawPointer(CGF);
674   llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
675   // Cast from pointer to array type to pointer to single element.
676   llvm::Value *DestEnd =
677       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
678   // The basic structure here is a while-do loop.
679   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
680   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
681   llvm::Value *IsEmpty =
682       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
683   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
684 
685   // Enter the loop body, making that address the current address.
686   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
687   CGF.EmitBlock(BodyBB);
688 
689   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
690 
691   llvm::PHINode *SrcElementPHI = nullptr;
692   Address SrcElementCurrent = Address::invalid();
693   if (DRD) {
694     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
695                                           "omp.arraycpy.srcElementPast");
696     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
697     SrcElementCurrent =
698         Address(SrcElementPHI, SrcAddr.getElementType(),
699                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
700   }
701   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
702       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
703   DestElementPHI->addIncoming(DestBegin, EntryBB);
704   Address DestElementCurrent =
705       Address(DestElementPHI, DestAddr.getElementType(),
706               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
707 
708   // Emit copy.
709   {
710     CodeGenFunction::RunCleanupsScope InitScope(CGF);
711     if (EmitDeclareReductionInit) {
712       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
713                                        SrcElementCurrent, ElementTy);
714     } else
715       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
716                            /*IsInitializer=*/false);
717   }
718 
719   if (DRD) {
720     // Shift the address forward by one element.
721     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
722         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
723         "omp.arraycpy.dest.element");
724     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
725   }
726 
727   // Shift the address forward by one element.
728   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
729       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
730       "omp.arraycpy.dest.element");
731   // Check whether we've reached the end.
732   llvm::Value *Done =
733       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
734   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
735   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
736 
737   // Done.
738   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
739 }
740 
741 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
742   return CGF.EmitOMPSharedLValue(E);
743 }
744 
745 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
746                                             const Expr *E) {
747   if (const auto *OASE = dyn_cast<ArraySectionExpr>(E))
748     return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false);
749   return LValue();
750 }
751 
752 void ReductionCodeGen::emitAggregateInitialization(
753     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
754     const OMPDeclareReductionDecl *DRD) {
755   // Emit VarDecl with copy init for arrays.
756   // Get the address of the original variable captured in current
757   // captured region.
758   const auto *PrivateVD =
759       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
760   bool EmitDeclareReductionInit =
761       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
762   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
763                        EmitDeclareReductionInit,
764                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
765                                                 : PrivateVD->getInit(),
766                        DRD, SharedAddr);
767 }
768 
769 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
770                                    ArrayRef<const Expr *> Origs,
771                                    ArrayRef<const Expr *> Privates,
772                                    ArrayRef<const Expr *> ReductionOps) {
773   ClausesData.reserve(Shareds.size());
774   SharedAddresses.reserve(Shareds.size());
775   Sizes.reserve(Shareds.size());
776   BaseDecls.reserve(Shareds.size());
777   const auto *IOrig = Origs.begin();
778   const auto *IPriv = Privates.begin();
779   const auto *IRed = ReductionOps.begin();
780   for (const Expr *Ref : Shareds) {
781     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
782     std::advance(IOrig, 1);
783     std::advance(IPriv, 1);
784     std::advance(IRed, 1);
785   }
786 }
787 
788 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
789   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
790          "Number of generated lvalues must be exactly N.");
791   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
792   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
793   SharedAddresses.emplace_back(First, Second);
794   if (ClausesData[N].Shared == ClausesData[N].Ref) {
795     OrigAddresses.emplace_back(First, Second);
796   } else {
797     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
798     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
799     OrigAddresses.emplace_back(First, Second);
800   }
801 }
802 
803 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
804   QualType PrivateType = getPrivateType(N);
805   bool AsArraySection = isa<ArraySectionExpr>(ClausesData[N].Ref);
806   if (!PrivateType->isVariablyModifiedType()) {
807     Sizes.emplace_back(
808         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
809         nullptr);
810     return;
811   }
812   llvm::Value *Size;
813   llvm::Value *SizeInChars;
814   auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
815   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
816   if (AsArraySection) {
817     Size = CGF.Builder.CreatePtrDiff(ElemType,
818                                      OrigAddresses[N].second.getPointer(CGF),
819                                      OrigAddresses[N].first.getPointer(CGF));
820     Size = CGF.Builder.CreateNUWAdd(
821         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
822     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
823   } else {
824     SizeInChars =
825         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
826     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
827   }
828   Sizes.emplace_back(SizeInChars, Size);
829   CodeGenFunction::OpaqueValueMapping OpaqueMap(
830       CGF,
831       cast<OpaqueValueExpr>(
832           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
833       RValue::get(Size));
834   CGF.EmitVariablyModifiedType(PrivateType);
835 }
836 
837 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
838                                          llvm::Value *Size) {
839   QualType PrivateType = getPrivateType(N);
840   if (!PrivateType->isVariablyModifiedType()) {
841     assert(!Size && !Sizes[N].second &&
842            "Size should be nullptr for non-variably modified reduction "
843            "items.");
844     return;
845   }
846   CodeGenFunction::OpaqueValueMapping OpaqueMap(
847       CGF,
848       cast<OpaqueValueExpr>(
849           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
850       RValue::get(Size));
851   CGF.EmitVariablyModifiedType(PrivateType);
852 }
853 
854 void ReductionCodeGen::emitInitialization(
855     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
856     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
857   assert(SharedAddresses.size() > N && "No variable was generated");
858   const auto *PrivateVD =
859       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
860   const OMPDeclareReductionDecl *DRD =
861       getReductionInit(ClausesData[N].ReductionOp);
862   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
863     if (DRD && DRD->getInitializer())
864       (void)DefaultInit(CGF);
865     emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
866   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
867     (void)DefaultInit(CGF);
868     QualType SharedType = SharedAddresses[N].first.getType();
869     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
870                                      PrivateAddr, SharedAddr, SharedType);
871   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
872              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
873     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
874                          PrivateVD->getType().getQualifiers(),
875                          /*IsInitializer=*/false);
876   }
877 }
878 
879 bool ReductionCodeGen::needCleanups(unsigned N) {
880   QualType PrivateType = getPrivateType(N);
881   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
882   return DTorKind != QualType::DK_none;
883 }
884 
885 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
886                                     Address PrivateAddr) {
887   QualType PrivateType = getPrivateType(N);
888   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
889   if (needCleanups(N)) {
890     PrivateAddr =
891         PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
892     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
893   }
894 }
895 
896 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
897                           LValue BaseLV) {
898   BaseTy = BaseTy.getNonReferenceType();
899   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
900          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
901     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
902       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
903     } else {
904       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
905       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
906     }
907     BaseTy = BaseTy->getPointeeType();
908   }
909   return CGF.MakeAddrLValue(
910       BaseLV.getAddress().withElementType(CGF.ConvertTypeForMem(ElTy)),
911       BaseLV.getType(), BaseLV.getBaseInfo(),
912       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
913 }
914 
915 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
916                           Address OriginalBaseAddress, llvm::Value *Addr) {
917   RawAddress Tmp = RawAddress::invalid();
918   Address TopTmp = Address::invalid();
919   Address MostTopTmp = Address::invalid();
920   BaseTy = BaseTy.getNonReferenceType();
921   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
922          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
923     Tmp = CGF.CreateMemTemp(BaseTy);
924     if (TopTmp.isValid())
925       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
926     else
927       MostTopTmp = Tmp;
928     TopTmp = Tmp;
929     BaseTy = BaseTy->getPointeeType();
930   }
931 
932   if (Tmp.isValid()) {
933     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
934         Addr, Tmp.getElementType());
935     CGF.Builder.CreateStore(Addr, Tmp);
936     return MostTopTmp;
937   }
938 
939   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
940       Addr, OriginalBaseAddress.getType());
941   return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
942 }
943 
944 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
945   const VarDecl *OrigVD = nullptr;
946   if (const auto *OASE = dyn_cast<ArraySectionExpr>(Ref)) {
947     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
948     while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base))
949       Base = TempOASE->getBase()->IgnoreParenImpCasts();
950     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
951       Base = TempASE->getBase()->IgnoreParenImpCasts();
952     DE = cast<DeclRefExpr>(Base);
953     OrigVD = cast<VarDecl>(DE->getDecl());
954   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
955     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
956     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
957       Base = TempASE->getBase()->IgnoreParenImpCasts();
958     DE = cast<DeclRefExpr>(Base);
959     OrigVD = cast<VarDecl>(DE->getDecl());
960   }
961   return OrigVD;
962 }
963 
964 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
965                                                Address PrivateAddr) {
966   const DeclRefExpr *DE;
967   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
968     BaseDecls.emplace_back(OrigVD);
969     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
970     LValue BaseLValue =
971         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
972                     OriginalBaseLValue);
973     Address SharedAddr = SharedAddresses[N].first.getAddress();
974     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
975         SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
976         SharedAddr.emitRawPointer(CGF));
977     llvm::Value *PrivatePointer =
978         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
979             PrivateAddr.emitRawPointer(CGF), SharedAddr.getType());
980     llvm::Value *Ptr = CGF.Builder.CreateGEP(
981         SharedAddr.getElementType(), PrivatePointer, Adjustment);
982     return castToBase(CGF, OrigVD->getType(),
983                       SharedAddresses[N].first.getType(),
984                       OriginalBaseLValue.getAddress(), Ptr);
985   }
986   BaseDecls.emplace_back(
987       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
988   return PrivateAddr;
989 }
990 
991 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
992   const OMPDeclareReductionDecl *DRD =
993       getReductionInit(ClausesData[N].ReductionOp);
994   return DRD && DRD->getInitializer();
995 }
996 
997 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
998   return CGF.EmitLoadOfPointerLValue(
999       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1000       getThreadIDVariable()->getType()->castAs<PointerType>());
1001 }
1002 
1003 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1004   if (!CGF.HaveInsertPoint())
1005     return;
1006   // 1.2.2 OpenMP Language Terminology
1007   // Structured block - An executable statement with a single entry at the
1008   // top and a single exit at the bottom.
1009   // The point of exit cannot be a branch out of the structured block.
1010   // longjmp() and throw() must not violate the entry/exit criteria.
1011   CGF.EHStack.pushTerminate();
1012   if (S)
1013     CGF.incrementProfileCounter(S);
1014   CodeGen(CGF);
1015   CGF.EHStack.popTerminate();
1016 }
1017 
1018 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1019     CodeGenFunction &CGF) {
1020   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1021                             getThreadIDVariable()->getType(),
1022                             AlignmentSource::Decl);
1023 }
1024 
1025 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1026                                        QualType FieldTy) {
1027   auto *Field = FieldDecl::Create(
1028       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1029       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1030       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1031   Field->setAccess(AS_public);
1032   DC->addDecl(Field);
1033   return Field;
1034 }
1035 
1036 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
1037     : CGM(CGM), OMPBuilder(CGM.getModule()) {
1038   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1039   llvm::OpenMPIRBuilderConfig Config(
1040       CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1041       CGM.getLangOpts().OpenMPOffloadMandatory,
1042       /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1043       hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1044   OMPBuilder.initialize();
1045   OMPBuilder.loadOffloadInfoMetadata(CGM.getLangOpts().OpenMPIsTargetDevice
1046                                          ? CGM.getLangOpts().OMPHostIRFile
1047                                          : StringRef{});
1048   OMPBuilder.setConfig(Config);
1049 
1050   // The user forces the compiler to behave as if omp requires
1051   // unified_shared_memory was given.
1052   if (CGM.getLangOpts().OpenMPForceUSM) {
1053     HasRequiresUnifiedSharedMemory = true;
1054     OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1055   }
1056 }
1057 
1058 void CGOpenMPRuntime::clear() {
1059   InternalVars.clear();
1060   // Clean non-target variable declarations possibly used only in debug info.
1061   for (const auto &Data : EmittedNonTargetVariables) {
1062     if (!Data.getValue().pointsToAliveValue())
1063       continue;
1064     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1065     if (!GV)
1066       continue;
1067     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1068       continue;
1069     GV->eraseFromParent();
1070   }
1071 }
1072 
1073 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1074   return OMPBuilder.createPlatformSpecificName(Parts);
1075 }
1076 
1077 static llvm::Function *
1078 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1079                           const Expr *CombinerInitializer, const VarDecl *In,
1080                           const VarDecl *Out, bool IsCombiner) {
1081   // void .omp_combiner.(Ty *in, Ty *out);
1082   ASTContext &C = CGM.getContext();
1083   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1084   FunctionArgList Args;
1085   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1086                                /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1087   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1088                               /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1089   Args.push_back(&OmpOutParm);
1090   Args.push_back(&OmpInParm);
1091   const CGFunctionInfo &FnInfo =
1092       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1093   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1094   std::string Name = CGM.getOpenMPRuntime().getName(
1095       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1096   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1097                                     Name, &CGM.getModule());
1098   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1099   if (CGM.getLangOpts().Optimize) {
1100     Fn->removeFnAttr(llvm::Attribute::NoInline);
1101     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1102     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1103   }
1104   CodeGenFunction CGF(CGM);
1105   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1106   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1107   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1108                     Out->getLocation());
1109   CodeGenFunction::OMPPrivateScope Scope(CGF);
1110   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1111   Scope.addPrivate(
1112       In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1113               .getAddress());
1114   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1115   Scope.addPrivate(
1116       Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1117                .getAddress());
1118   (void)Scope.Privatize();
1119   if (!IsCombiner && Out->hasInit() &&
1120       !CGF.isTrivialInitializer(Out->getInit())) {
1121     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1122                          Out->getType().getQualifiers(),
1123                          /*IsInitializer=*/true);
1124   }
1125   if (CombinerInitializer)
1126     CGF.EmitIgnoredExpr(CombinerInitializer);
1127   Scope.ForceCleanup();
1128   CGF.FinishFunction();
1129   return Fn;
1130 }
1131 
1132 void CGOpenMPRuntime::emitUserDefinedReduction(
1133     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1134   if (UDRMap.count(D) > 0)
1135     return;
1136   llvm::Function *Combiner = emitCombinerOrInitializer(
1137       CGM, D->getType(), D->getCombiner(),
1138       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1139       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1140       /*IsCombiner=*/true);
1141   llvm::Function *Initializer = nullptr;
1142   if (const Expr *Init = D->getInitializer()) {
1143     Initializer = emitCombinerOrInitializer(
1144         CGM, D->getType(),
1145         D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init
1146                                                                      : nullptr,
1147         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1148         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1149         /*IsCombiner=*/false);
1150   }
1151   UDRMap.try_emplace(D, Combiner, Initializer);
1152   if (CGF) {
1153     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1154     Decls.second.push_back(D);
1155   }
1156 }
1157 
1158 std::pair<llvm::Function *, llvm::Function *>
1159 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1160   auto I = UDRMap.find(D);
1161   if (I != UDRMap.end())
1162     return I->second;
1163   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1164   return UDRMap.lookup(D);
1165 }
1166 
1167 namespace {
1168 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1169 // Builder if one is present.
1170 struct PushAndPopStackRAII {
1171   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1172                       bool HasCancel, llvm::omp::Directive Kind)
1173       : OMPBuilder(OMPBuilder) {
1174     if (!OMPBuilder)
1175       return;
1176 
1177     // The following callback is the crucial part of clangs cleanup process.
1178     //
1179     // NOTE:
1180     // Once the OpenMPIRBuilder is used to create parallel regions (and
1181     // similar), the cancellation destination (Dest below) is determined via
1182     // IP. That means if we have variables to finalize we split the block at IP,
1183     // use the new block (=BB) as destination to build a JumpDest (via
1184     // getJumpDestInCurrentScope(BB)) which then is fed to
1185     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1186     // to push & pop an FinalizationInfo object.
1187     // The FiniCB will still be needed but at the point where the
1188     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1189     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1190       assert(IP.getBlock()->end() == IP.getPoint() &&
1191              "Clang CG should cause non-terminated block!");
1192       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1193       CGF.Builder.restoreIP(IP);
1194       CodeGenFunction::JumpDest Dest =
1195           CGF.getOMPCancelDestination(OMPD_parallel);
1196       CGF.EmitBranchThroughCleanup(Dest);
1197     };
1198 
1199     // TODO: Remove this once we emit parallel regions through the
1200     //       OpenMPIRBuilder as it can do this setup internally.
1201     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1202     OMPBuilder->pushFinalizationCB(std::move(FI));
1203   }
1204   ~PushAndPopStackRAII() {
1205     if (OMPBuilder)
1206       OMPBuilder->popFinalizationCB();
1207   }
1208   llvm::OpenMPIRBuilder *OMPBuilder;
1209 };
1210 } // namespace
1211 
1212 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1213     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1214     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1215     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1216   assert(ThreadIDVar->getType()->isPointerType() &&
1217          "thread id variable must be of type kmp_int32 *");
1218   CodeGenFunction CGF(CGM, true);
1219   bool HasCancel = false;
1220   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1221     HasCancel = OPD->hasCancel();
1222   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1223     HasCancel = OPD->hasCancel();
1224   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1225     HasCancel = OPSD->hasCancel();
1226   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1227     HasCancel = OPFD->hasCancel();
1228   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1229     HasCancel = OPFD->hasCancel();
1230   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1231     HasCancel = OPFD->hasCancel();
1232   else if (const auto *OPFD =
1233                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1234     HasCancel = OPFD->hasCancel();
1235   else if (const auto *OPFD =
1236                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1237     HasCancel = OPFD->hasCancel();
1238 
1239   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1240   //       parallel region to make cancellation barriers work properly.
1241   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1242   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1243   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1244                                     HasCancel, OutlinedHelperName);
1245   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1246   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1247 }
1248 
1249 std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1250   std::string Suffix = getName({"omp_outlined"});
1251   return (Name + Suffix).str();
1252 }
1253 
1254 std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const {
1255   return getOutlinedHelperName(CGF.CurFn->getName());
1256 }
1257 
1258 std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1259   std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1260   return (Name + Suffix).str();
1261 }
1262 
1263 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1264     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1265     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1266     const RegionCodeGenTy &CodeGen) {
1267   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1268   return emitParallelOrTeamsOutlinedFunction(
1269       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1270       CodeGen);
1271 }
1272 
1273 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1274     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1275     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1276     const RegionCodeGenTy &CodeGen) {
1277   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1278   return emitParallelOrTeamsOutlinedFunction(
1279       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1280       CodeGen);
1281 }
1282 
1283 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1284     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1285     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1286     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1287     bool Tied, unsigned &NumberOfParts) {
1288   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1289                                               PrePostActionTy &) {
1290     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1291     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1292     llvm::Value *TaskArgs[] = {
1293         UpLoc, ThreadID,
1294         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1295                                     TaskTVar->getType()->castAs<PointerType>())
1296             .getPointer(CGF)};
1297     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1298                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1299                         TaskArgs);
1300   };
1301   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1302                                                             UntiedCodeGen);
1303   CodeGen.setAction(Action);
1304   assert(!ThreadIDVar->getType()->isPointerType() &&
1305          "thread id variable must be of type kmp_int32 for tasks");
1306   const OpenMPDirectiveKind Region =
1307       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1308                                                       : OMPD_task;
1309   const CapturedStmt *CS = D.getCapturedStmt(Region);
1310   bool HasCancel = false;
1311   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1312     HasCancel = TD->hasCancel();
1313   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1314     HasCancel = TD->hasCancel();
1315   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1316     HasCancel = TD->hasCancel();
1317   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1318     HasCancel = TD->hasCancel();
1319 
1320   CodeGenFunction CGF(CGM, true);
1321   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1322                                         InnermostKind, HasCancel, Action);
1323   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1324   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1325   if (!Tied)
1326     NumberOfParts = Action.getNumberOfParts();
1327   return Res;
1328 }
1329 
1330 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1331                                              bool AtCurrentPoint) {
1332   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1333   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1334 
1335   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1336   if (AtCurrentPoint) {
1337     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1338         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1339   } else {
1340     Elem.second.ServiceInsertPt =
1341         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1342     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1343   }
1344 }
1345 
1346 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1347   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1348   if (Elem.second.ServiceInsertPt) {
1349     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1350     Elem.second.ServiceInsertPt = nullptr;
1351     Ptr->eraseFromParent();
1352   }
1353 }
1354 
1355 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1356                                                   SourceLocation Loc,
1357                                                   SmallString<128> &Buffer) {
1358   llvm::raw_svector_ostream OS(Buffer);
1359   // Build debug location
1360   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1361   OS << ";" << PLoc.getFilename() << ";";
1362   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1363     OS << FD->getQualifiedNameAsString();
1364   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1365   return OS.str();
1366 }
1367 
1368 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1369                                                  SourceLocation Loc,
1370                                                  unsigned Flags, bool EmitLoc) {
1371   uint32_t SrcLocStrSize;
1372   llvm::Constant *SrcLocStr;
1373   if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1374                        llvm::codegenoptions::NoDebugInfo) ||
1375       Loc.isInvalid()) {
1376     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1377   } else {
1378     std::string FunctionName;
1379     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1380       FunctionName = FD->getQualifiedNameAsString();
1381     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1382     const char *FileName = PLoc.getFilename();
1383     unsigned Line = PLoc.getLine();
1384     unsigned Column = PLoc.getColumn();
1385     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1386                                                 Column, SrcLocStrSize);
1387   }
1388   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1389   return OMPBuilder.getOrCreateIdent(
1390       SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1391 }
1392 
1393 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1394                                           SourceLocation Loc) {
1395   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1396   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1397   // the clang invariants used below might be broken.
1398   if (CGM.getLangOpts().OpenMPIRBuilder) {
1399     SmallString<128> Buffer;
1400     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1401     uint32_t SrcLocStrSize;
1402     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1403         getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1404     return OMPBuilder.getOrCreateThreadID(
1405         OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1406   }
1407 
1408   llvm::Value *ThreadID = nullptr;
1409   // Check whether we've already cached a load of the thread id in this
1410   // function.
1411   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1412   if (I != OpenMPLocThreadIDMap.end()) {
1413     ThreadID = I->second.ThreadID;
1414     if (ThreadID != nullptr)
1415       return ThreadID;
1416   }
1417   // If exceptions are enabled, do not use parameter to avoid possible crash.
1418   if (auto *OMPRegionInfo =
1419           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1420     if (OMPRegionInfo->getThreadIDVariable()) {
1421       // Check if this an outlined function with thread id passed as argument.
1422       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1423       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1424       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1425           !CGF.getLangOpts().CXXExceptions ||
1426           CGF.Builder.GetInsertBlock() == TopBlock ||
1427           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1428           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1429               TopBlock ||
1430           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1431               CGF.Builder.GetInsertBlock()) {
1432         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1433         // If value loaded in entry block, cache it and use it everywhere in
1434         // function.
1435         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1436           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1437           Elem.second.ThreadID = ThreadID;
1438         }
1439         return ThreadID;
1440       }
1441     }
1442   }
1443 
1444   // This is not an outlined function region - need to call __kmpc_int32
1445   // kmpc_global_thread_num(ident_t *loc).
1446   // Generate thread id value and cache this value for use across the
1447   // function.
1448   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1449   if (!Elem.second.ServiceInsertPt)
1450     setLocThreadIdInsertPt(CGF);
1451   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1452   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1453   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
1454   llvm::CallInst *Call = CGF.Builder.CreateCall(
1455       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1456                                             OMPRTL___kmpc_global_thread_num),
1457       emitUpdateLocation(CGF, Loc));
1458   Call->setCallingConv(CGF.getRuntimeCC());
1459   Elem.second.ThreadID = Call;
1460   return Call;
1461 }
1462 
1463 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1464   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1465   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1466     clearLocThreadIdInsertPt(CGF);
1467     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1468   }
1469   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1470     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1471       UDRMap.erase(D);
1472     FunctionUDRMap.erase(CGF.CurFn);
1473   }
1474   auto I = FunctionUDMMap.find(CGF.CurFn);
1475   if (I != FunctionUDMMap.end()) {
1476     for(const auto *D : I->second)
1477       UDMMap.erase(D);
1478     FunctionUDMMap.erase(I);
1479   }
1480   LastprivateConditionalToTypes.erase(CGF.CurFn);
1481   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1482 }
1483 
1484 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1485   return OMPBuilder.IdentPtr;
1486 }
1487 
1488 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1489   if (!Kmpc_MicroTy) {
1490     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1491     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1492                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1493     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1494   }
1495   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1496 }
1497 
1498 llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1499 convertDeviceClause(const VarDecl *VD) {
1500   std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1501       OMPDeclareTargetDeclAttr::getDeviceType(VD);
1502   if (!DevTy)
1503     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1504 
1505   switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1506   case OMPDeclareTargetDeclAttr::DT_Host:
1507     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1508     break;
1509   case OMPDeclareTargetDeclAttr::DT_NoHost:
1510     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1511     break;
1512   case OMPDeclareTargetDeclAttr::DT_Any:
1513     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1514     break;
1515   default:
1516     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1517     break;
1518   }
1519 }
1520 
1521 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1522 convertCaptureClause(const VarDecl *VD) {
1523   std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1524       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1525   if (!MapType)
1526     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1527   switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1528   case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1529     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1530     break;
1531   case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1532     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1533     break;
1534   case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1535     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1536     break;
1537   default:
1538     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1539     break;
1540   }
1541 }
1542 
1543 static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1544     CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1545     SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1546 
1547   auto FileInfoCallBack = [&]() {
1548     SourceManager &SM = CGM.getContext().getSourceManager();
1549     PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1550 
1551     llvm::sys::fs::UniqueID ID;
1552     if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1553       PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1554     }
1555 
1556     return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1557   };
1558 
1559   return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName);
1560 }
1561 
1562 ConstantAddress CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1563   auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1564 
1565   auto LinkageForVariable = [&VD, this]() {
1566     return CGM.getLLVMLinkageVarDefinition(VD);
1567   };
1568 
1569   std::vector<llvm::GlobalVariable *> GeneratedRefs;
1570 
1571   llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1572       CGM.getContext().getPointerType(VD->getType()));
1573   llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1574       convertCaptureClause(VD), convertDeviceClause(VD),
1575       VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1576       VD->isExternallyVisible(),
1577       getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
1578                                   VD->getCanonicalDecl()->getBeginLoc()),
1579       CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1580       CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1581       LinkageForVariable);
1582 
1583   if (!addr)
1584     return ConstantAddress::invalid();
1585   return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1586 }
1587 
1588 llvm::Constant *
1589 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1590   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1591          !CGM.getContext().getTargetInfo().isTLSSupported());
1592   // Lookup the entry, lazily creating it if necessary.
1593   std::string Suffix = getName({"cache", ""});
1594   return OMPBuilder.getOrCreateInternalVariable(
1595       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1596 }
1597 
1598 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1599                                                 const VarDecl *VD,
1600                                                 Address VDAddr,
1601                                                 SourceLocation Loc) {
1602   if (CGM.getLangOpts().OpenMPUseTLS &&
1603       CGM.getContext().getTargetInfo().isTLSSupported())
1604     return VDAddr;
1605 
1606   llvm::Type *VarTy = VDAddr.getElementType();
1607   llvm::Value *Args[] = {
1608       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1609       CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy),
1610       CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1611       getOrCreateThreadPrivateCache(VD)};
1612   return Address(
1613       CGF.EmitRuntimeCall(
1614           OMPBuilder.getOrCreateRuntimeFunction(
1615               CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1616           Args),
1617       CGF.Int8Ty, VDAddr.getAlignment());
1618 }
1619 
1620 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1621     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1622     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1623   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1624   // library.
1625   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1626   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1627                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1628                       OMPLoc);
1629   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1630   // to register constructor/destructor for variable.
1631   llvm::Value *Args[] = {
1632       OMPLoc,
1633       CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.VoidPtrTy),
1634       Ctor, CopyCtor, Dtor};
1635   CGF.EmitRuntimeCall(
1636       OMPBuilder.getOrCreateRuntimeFunction(
1637           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1638       Args);
1639 }
1640 
1641 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1642     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1643     bool PerformInit, CodeGenFunction *CGF) {
1644   if (CGM.getLangOpts().OpenMPUseTLS &&
1645       CGM.getContext().getTargetInfo().isTLSSupported())
1646     return nullptr;
1647 
1648   VD = VD->getDefinition(CGM.getContext());
1649   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1650     QualType ASTTy = VD->getType();
1651 
1652     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1653     const Expr *Init = VD->getAnyInitializer();
1654     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1655       // Generate function that re-emits the declaration's initializer into the
1656       // threadprivate copy of the variable VD
1657       CodeGenFunction CtorCGF(CGM);
1658       FunctionArgList Args;
1659       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1660                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1661                             ImplicitParamKind::Other);
1662       Args.push_back(&Dst);
1663 
1664       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1665           CGM.getContext().VoidPtrTy, Args);
1666       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1667       std::string Name = getName({"__kmpc_global_ctor_", ""});
1668       llvm::Function *Fn =
1669           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1670       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1671                             Args, Loc, Loc);
1672       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1673           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1674           CGM.getContext().VoidPtrTy, Dst.getLocation());
1675       Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1676                   VDAddr.getAlignment());
1677       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1678                                /*IsInitializer=*/true);
1679       ArgVal = CtorCGF.EmitLoadOfScalar(
1680           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1681           CGM.getContext().VoidPtrTy, Dst.getLocation());
1682       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1683       CtorCGF.FinishFunction();
1684       Ctor = Fn;
1685     }
1686     if (VD->getType().isDestructedType() != QualType::DK_none) {
1687       // Generate function that emits destructor call for the threadprivate copy
1688       // of the variable VD
1689       CodeGenFunction DtorCGF(CGM);
1690       FunctionArgList Args;
1691       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1692                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1693                             ImplicitParamKind::Other);
1694       Args.push_back(&Dst);
1695 
1696       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1697           CGM.getContext().VoidTy, Args);
1698       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1699       std::string Name = getName({"__kmpc_global_dtor_", ""});
1700       llvm::Function *Fn =
1701           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1702       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1703       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1704                             Loc, Loc);
1705       // Create a scope with an artificial location for the body of this function.
1706       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1707       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1708           DtorCGF.GetAddrOfLocalVar(&Dst),
1709           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1710       DtorCGF.emitDestroy(
1711           Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1712           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1713           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1714       DtorCGF.FinishFunction();
1715       Dtor = Fn;
1716     }
1717     // Do not emit init function if it is not required.
1718     if (!Ctor && !Dtor)
1719       return nullptr;
1720 
1721     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1722     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1723                                                /*isVarArg=*/false)
1724                            ->getPointerTo();
1725     // Copying constructor for the threadprivate variable.
1726     // Must be NULL - reserved by runtime, but currently it requires that this
1727     // parameter is always NULL. Otherwise it fires assertion.
1728     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1729     if (Ctor == nullptr) {
1730       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1731                                              /*isVarArg=*/false)
1732                          ->getPointerTo();
1733       Ctor = llvm::Constant::getNullValue(CtorTy);
1734     }
1735     if (Dtor == nullptr) {
1736       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1737                                              /*isVarArg=*/false)
1738                          ->getPointerTo();
1739       Dtor = llvm::Constant::getNullValue(DtorTy);
1740     }
1741     if (!CGF) {
1742       auto *InitFunctionTy =
1743           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1744       std::string Name = getName({"__omp_threadprivate_init_", ""});
1745       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1746           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1747       CodeGenFunction InitCGF(CGM);
1748       FunctionArgList ArgList;
1749       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1750                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1751                             Loc, Loc);
1752       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1753       InitCGF.FinishFunction();
1754       return InitFunction;
1755     }
1756     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1757   }
1758   return nullptr;
1759 }
1760 
1761 void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
1762                                                 llvm::GlobalValue *GV) {
1763   std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1764       OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1765 
1766   // We only need to handle active 'indirect' declare target functions.
1767   if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1768     return;
1769 
1770   // Get a mangled name to store the new device global in.
1771   llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1772       CGM, OMPBuilder, FD->getCanonicalDecl()->getBeginLoc(), FD->getName());
1773   SmallString<128> Name;
1774   OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1775 
1776   // We need to generate a new global to hold the address of the indirectly
1777   // called device function. Doing this allows us to keep the visibility and
1778   // linkage of the associated function unchanged while allowing the runtime to
1779   // access its value.
1780   llvm::GlobalValue *Addr = GV;
1781   if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1782     Addr = new llvm::GlobalVariable(
1783         CGM.getModule(), CGM.VoidPtrTy,
1784         /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1785         nullptr, llvm::GlobalValue::NotThreadLocal,
1786         CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1787     Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1788   }
1789 
1790   OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1791       Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),
1792       llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1793       llvm::GlobalValue::WeakODRLinkage);
1794 }
1795 
1796 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1797                                                           QualType VarType,
1798                                                           StringRef Name) {
1799   std::string Suffix = getName({"artificial", ""});
1800   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1801   llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1802       VarLVType, Twine(Name).concat(Suffix).str());
1803   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1804       CGM.getTarget().isTLSSupported()) {
1805     GAddr->setThreadLocal(/*Val=*/true);
1806     return Address(GAddr, GAddr->getValueType(),
1807                    CGM.getContext().getTypeAlignInChars(VarType));
1808   }
1809   std::string CacheSuffix = getName({"cache", ""});
1810   llvm::Value *Args[] = {
1811       emitUpdateLocation(CGF, SourceLocation()),
1812       getThreadID(CGF, SourceLocation()),
1813       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1814       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1815                                 /*isSigned=*/false),
1816       OMPBuilder.getOrCreateInternalVariable(
1817           CGM.VoidPtrPtrTy,
1818           Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1819   return Address(
1820       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1821           CGF.EmitRuntimeCall(
1822               OMPBuilder.getOrCreateRuntimeFunction(
1823                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1824               Args),
1825           VarLVType->getPointerTo(/*AddrSpace=*/0)),
1826       VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1827 }
1828 
1829 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
1830                                    const RegionCodeGenTy &ThenGen,
1831                                    const RegionCodeGenTy &ElseGen) {
1832   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1833 
1834   // If the condition constant folds and can be elided, try to avoid emitting
1835   // the condition and the dead arm of the if/else.
1836   bool CondConstant;
1837   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1838     if (CondConstant)
1839       ThenGen(CGF);
1840     else
1841       ElseGen(CGF);
1842     return;
1843   }
1844 
1845   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
1846   // emit the conditional branch.
1847   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1848   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1849   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1850   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1851 
1852   // Emit the 'then' code.
1853   CGF.EmitBlock(ThenBlock);
1854   ThenGen(CGF);
1855   CGF.EmitBranch(ContBlock);
1856   // Emit the 'else' code if present.
1857   // There is no need to emit line number for unconditional branch.
1858   (void)ApplyDebugLocation::CreateEmpty(CGF);
1859   CGF.EmitBlock(ElseBlock);
1860   ElseGen(CGF);
1861   // There is no need to emit line number for unconditional branch.
1862   (void)ApplyDebugLocation::CreateEmpty(CGF);
1863   CGF.EmitBranch(ContBlock);
1864   // Emit the continuation block for code after the if.
1865   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1866 }
1867 
1868 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1869                                        llvm::Function *OutlinedFn,
1870                                        ArrayRef<llvm::Value *> CapturedVars,
1871                                        const Expr *IfCond,
1872                                        llvm::Value *NumThreads) {
1873   if (!CGF.HaveInsertPoint())
1874     return;
1875   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1876   auto &M = CGM.getModule();
1877   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1878                     this](CodeGenFunction &CGF, PrePostActionTy &) {
1879     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1880     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
1881     llvm::Value *Args[] = {
1882         RTLoc,
1883         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1884         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
1885     llvm::SmallVector<llvm::Value *, 16> RealArgs;
1886     RealArgs.append(std::begin(Args), std::end(Args));
1887     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1888 
1889     llvm::FunctionCallee RTLFn =
1890         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
1891     CGF.EmitRuntimeCall(RTLFn, RealArgs);
1892   };
1893   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1894                     this](CodeGenFunction &CGF, PrePostActionTy &) {
1895     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
1896     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1897     // Build calls:
1898     // __kmpc_serialized_parallel(&Loc, GTid);
1899     llvm::Value *Args[] = {RTLoc, ThreadID};
1900     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1901                             M, OMPRTL___kmpc_serialized_parallel),
1902                         Args);
1903 
1904     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
1905     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1906     RawAddress ZeroAddrBound =
1907         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
1908                                          /*Name=*/".bound.zero.addr");
1909     CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
1910     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1911     // ThreadId for serialized parallels is 0.
1912     OutlinedFnArgs.push_back(ThreadIDAddr.emitRawPointer(CGF));
1913     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
1914     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1915 
1916     // Ensure we do not inline the function. This is trivially true for the ones
1917     // passed to __kmpc_fork_call but the ones called in serialized regions
1918     // could be inlined. This is not a perfect but it is closer to the invariant
1919     // we want, namely, every data environment starts with a new function.
1920     // TODO: We should pass the if condition to the runtime function and do the
1921     //       handling there. Much cleaner code.
1922     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
1923     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
1924     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
1925 
1926     // __kmpc_end_serialized_parallel(&Loc, GTid);
1927     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1928     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1929                             M, OMPRTL___kmpc_end_serialized_parallel),
1930                         EndArgs);
1931   };
1932   if (IfCond) {
1933     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
1934   } else {
1935     RegionCodeGenTy ThenRCG(ThenGen);
1936     ThenRCG(CGF);
1937   }
1938 }
1939 
1940 // If we're inside an (outlined) parallel region, use the region info's
1941 // thread-ID variable (it is passed in a first argument of the outlined function
1942 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1943 // regular serial code region, get thread ID by calling kmp_int32
1944 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1945 // return the address of that temp.
1946 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1947                                              SourceLocation Loc) {
1948   if (auto *OMPRegionInfo =
1949           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1950     if (OMPRegionInfo->getThreadIDVariable())
1951       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1952 
1953   llvm::Value *ThreadID = getThreadID(CGF, Loc);
1954   QualType Int32Ty =
1955       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1956   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1957   CGF.EmitStoreOfScalar(ThreadID,
1958                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1959 
1960   return ThreadIDTemp;
1961 }
1962 
1963 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1964   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1965   std::string Name = getName({Prefix, "var"});
1966   return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
1967 }
1968 
1969 namespace {
1970 /// Common pre(post)-action for different OpenMP constructs.
1971 class CommonActionTy final : public PrePostActionTy {
1972   llvm::FunctionCallee EnterCallee;
1973   ArrayRef<llvm::Value *> EnterArgs;
1974   llvm::FunctionCallee ExitCallee;
1975   ArrayRef<llvm::Value *> ExitArgs;
1976   bool Conditional;
1977   llvm::BasicBlock *ContBlock = nullptr;
1978 
1979 public:
1980   CommonActionTy(llvm::FunctionCallee EnterCallee,
1981                  ArrayRef<llvm::Value *> EnterArgs,
1982                  llvm::FunctionCallee ExitCallee,
1983                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1984       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1985         ExitArgs(ExitArgs), Conditional(Conditional) {}
1986   void Enter(CodeGenFunction &CGF) override {
1987     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
1988     if (Conditional) {
1989       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
1990       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1991       ContBlock = CGF.createBasicBlock("omp_if.end");
1992       // Generate the branch (If-stmt)
1993       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1994       CGF.EmitBlock(ThenBlock);
1995     }
1996   }
1997   void Done(CodeGenFunction &CGF) {
1998     // Emit the rest of blocks/branches
1999     CGF.EmitBranch(ContBlock);
2000     CGF.EmitBlock(ContBlock, true);
2001   }
2002   void Exit(CodeGenFunction &CGF) override {
2003     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2004   }
2005 };
2006 } // anonymous namespace
2007 
2008 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2009                                          StringRef CriticalName,
2010                                          const RegionCodeGenTy &CriticalOpGen,
2011                                          SourceLocation Loc, const Expr *Hint) {
2012   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2013   // CriticalOpGen();
2014   // __kmpc_end_critical(ident_t *, gtid, Lock);
2015   // Prepare arguments and build a call to __kmpc_critical
2016   if (!CGF.HaveInsertPoint())
2017     return;
2018   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2019                          getCriticalRegionLock(CriticalName)};
2020   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2021                                                 std::end(Args));
2022   if (Hint) {
2023     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2024         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2025   }
2026   CommonActionTy Action(
2027       OMPBuilder.getOrCreateRuntimeFunction(
2028           CGM.getModule(),
2029           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2030       EnterArgs,
2031       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2032                                             OMPRTL___kmpc_end_critical),
2033       Args);
2034   CriticalOpGen.setAction(Action);
2035   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2036 }
2037 
2038 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2039                                        const RegionCodeGenTy &MasterOpGen,
2040                                        SourceLocation Loc) {
2041   if (!CGF.HaveInsertPoint())
2042     return;
2043   // if(__kmpc_master(ident_t *, gtid)) {
2044   //   MasterOpGen();
2045   //   __kmpc_end_master(ident_t *, gtid);
2046   // }
2047   // Prepare arguments and build a call to __kmpc_master
2048   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2049   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2050                             CGM.getModule(), OMPRTL___kmpc_master),
2051                         Args,
2052                         OMPBuilder.getOrCreateRuntimeFunction(
2053                             CGM.getModule(), OMPRTL___kmpc_end_master),
2054                         Args,
2055                         /*Conditional=*/true);
2056   MasterOpGen.setAction(Action);
2057   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2058   Action.Done(CGF);
2059 }
2060 
2061 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2062                                        const RegionCodeGenTy &MaskedOpGen,
2063                                        SourceLocation Loc, const Expr *Filter) {
2064   if (!CGF.HaveInsertPoint())
2065     return;
2066   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2067   //   MaskedOpGen();
2068   //   __kmpc_end_masked(iden_t *, gtid);
2069   // }
2070   // Prepare arguments and build a call to __kmpc_masked
2071   llvm::Value *FilterVal = Filter
2072                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2073                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2074   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2075                          FilterVal};
2076   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2077                             getThreadID(CGF, Loc)};
2078   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2079                             CGM.getModule(), OMPRTL___kmpc_masked),
2080                         Args,
2081                         OMPBuilder.getOrCreateRuntimeFunction(
2082                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2083                         ArgsEnd,
2084                         /*Conditional=*/true);
2085   MaskedOpGen.setAction(Action);
2086   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2087   Action.Done(CGF);
2088 }
2089 
2090 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2091                                         SourceLocation Loc) {
2092   if (!CGF.HaveInsertPoint())
2093     return;
2094   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2095     OMPBuilder.createTaskyield(CGF.Builder);
2096   } else {
2097     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2098     llvm::Value *Args[] = {
2099         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2100         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2101     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2102                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2103                         Args);
2104   }
2105 
2106   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2107     Region->emitUntiedSwitch(CGF);
2108 }
2109 
2110 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2111                                           const RegionCodeGenTy &TaskgroupOpGen,
2112                                           SourceLocation Loc) {
2113   if (!CGF.HaveInsertPoint())
2114     return;
2115   // __kmpc_taskgroup(ident_t *, gtid);
2116   // TaskgroupOpGen();
2117   // __kmpc_end_taskgroup(ident_t *, gtid);
2118   // Prepare arguments and build a call to __kmpc_taskgroup
2119   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2120   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2121                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2122                         Args,
2123                         OMPBuilder.getOrCreateRuntimeFunction(
2124                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2125                         Args);
2126   TaskgroupOpGen.setAction(Action);
2127   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2128 }
2129 
2130 /// Given an array of pointers to variables, project the address of a
2131 /// given variable.
2132 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2133                                       unsigned Index, const VarDecl *Var) {
2134   // Pull out the pointer to the variable.
2135   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2136   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2137 
2138   llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2139   return Address(
2140       CGF.Builder.CreateBitCast(
2141           Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())),
2142       ElemTy, CGF.getContext().getDeclAlign(Var));
2143 }
2144 
2145 static llvm::Value *emitCopyprivateCopyFunction(
2146     CodeGenModule &CGM, llvm::Type *ArgsElemType,
2147     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2148     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2149     SourceLocation Loc) {
2150   ASTContext &C = CGM.getContext();
2151   // void copy_func(void *LHSArg, void *RHSArg);
2152   FunctionArgList Args;
2153   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2154                            ImplicitParamKind::Other);
2155   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2156                            ImplicitParamKind::Other);
2157   Args.push_back(&LHSArg);
2158   Args.push_back(&RHSArg);
2159   const auto &CGFI =
2160       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2161   std::string Name =
2162       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2163   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2164                                     llvm::GlobalValue::InternalLinkage, Name,
2165                                     &CGM.getModule());
2166   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2167   Fn->setDoesNotRecurse();
2168   CodeGenFunction CGF(CGM);
2169   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2170   // Dest = (void*[n])(LHSArg);
2171   // Src = (void*[n])(RHSArg);
2172   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2173                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2174                   ArgsElemType->getPointerTo()),
2175               ArgsElemType, CGF.getPointerAlign());
2176   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2177                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2178                   ArgsElemType->getPointerTo()),
2179               ArgsElemType, CGF.getPointerAlign());
2180   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2181   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2182   // ...
2183   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2184   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2185     const auto *DestVar =
2186         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2187     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2188 
2189     const auto *SrcVar =
2190         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2191     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2192 
2193     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2194     QualType Type = VD->getType();
2195     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2196   }
2197   CGF.FinishFunction();
2198   return Fn;
2199 }
2200 
2201 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2202                                        const RegionCodeGenTy &SingleOpGen,
2203                                        SourceLocation Loc,
2204                                        ArrayRef<const Expr *> CopyprivateVars,
2205                                        ArrayRef<const Expr *> SrcExprs,
2206                                        ArrayRef<const Expr *> DstExprs,
2207                                        ArrayRef<const Expr *> AssignmentOps) {
2208   if (!CGF.HaveInsertPoint())
2209     return;
2210   assert(CopyprivateVars.size() == SrcExprs.size() &&
2211          CopyprivateVars.size() == DstExprs.size() &&
2212          CopyprivateVars.size() == AssignmentOps.size());
2213   ASTContext &C = CGM.getContext();
2214   // int32 did_it = 0;
2215   // if(__kmpc_single(ident_t *, gtid)) {
2216   //   SingleOpGen();
2217   //   __kmpc_end_single(ident_t *, gtid);
2218   //   did_it = 1;
2219   // }
2220   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2221   // <copy_func>, did_it);
2222 
2223   Address DidIt = Address::invalid();
2224   if (!CopyprivateVars.empty()) {
2225     // int32 did_it = 0;
2226     QualType KmpInt32Ty =
2227         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2228     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2229     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2230   }
2231   // Prepare arguments and build a call to __kmpc_single
2232   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2233   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2234                             CGM.getModule(), OMPRTL___kmpc_single),
2235                         Args,
2236                         OMPBuilder.getOrCreateRuntimeFunction(
2237                             CGM.getModule(), OMPRTL___kmpc_end_single),
2238                         Args,
2239                         /*Conditional=*/true);
2240   SingleOpGen.setAction(Action);
2241   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2242   if (DidIt.isValid()) {
2243     // did_it = 1;
2244     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2245   }
2246   Action.Done(CGF);
2247   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2248   // <copy_func>, did_it);
2249   if (DidIt.isValid()) {
2250     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2251     QualType CopyprivateArrayTy = C.getConstantArrayType(
2252         C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2253         /*IndexTypeQuals=*/0);
2254     // Create a list of all private variables for copyprivate.
2255     Address CopyprivateList =
2256         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2257     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2258       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2259       CGF.Builder.CreateStore(
2260           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2261               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2262               CGF.VoidPtrTy),
2263           Elem);
2264     }
2265     // Build function that copies private values from single region to all other
2266     // threads in the corresponding parallel region.
2267     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2268         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2269         SrcExprs, DstExprs, AssignmentOps, Loc);
2270     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2271     Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2272         CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2273     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2274     llvm::Value *Args[] = {
2275         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2276         getThreadID(CGF, Loc),        // i32 <gtid>
2277         BufSize,                      // size_t <buf_size>
2278         CL.emitRawPointer(CGF),       // void *<copyprivate list>
2279         CpyFn,                        // void (*) (void *, void *) <copy_func>
2280         DidItVal                      // i32 did_it
2281     };
2282     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2283                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2284                         Args);
2285   }
2286 }
2287 
2288 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2289                                         const RegionCodeGenTy &OrderedOpGen,
2290                                         SourceLocation Loc, bool IsThreads) {
2291   if (!CGF.HaveInsertPoint())
2292     return;
2293   // __kmpc_ordered(ident_t *, gtid);
2294   // OrderedOpGen();
2295   // __kmpc_end_ordered(ident_t *, gtid);
2296   // Prepare arguments and build a call to __kmpc_ordered
2297   if (IsThreads) {
2298     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2299     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2300                               CGM.getModule(), OMPRTL___kmpc_ordered),
2301                           Args,
2302                           OMPBuilder.getOrCreateRuntimeFunction(
2303                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2304                           Args);
2305     OrderedOpGen.setAction(Action);
2306     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2307     return;
2308   }
2309   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2310 }
2311 
2312 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2313   unsigned Flags;
2314   if (Kind == OMPD_for)
2315     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2316   else if (Kind == OMPD_sections)
2317     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2318   else if (Kind == OMPD_single)
2319     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2320   else if (Kind == OMPD_barrier)
2321     Flags = OMP_IDENT_BARRIER_EXPL;
2322   else
2323     Flags = OMP_IDENT_BARRIER_IMPL;
2324   return Flags;
2325 }
2326 
2327 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2328     CodeGenFunction &CGF, const OMPLoopDirective &S,
2329     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2330   // Check if the loop directive is actually a doacross loop directive. In this
2331   // case choose static, 1 schedule.
2332   if (llvm::any_of(
2333           S.getClausesOfKind<OMPOrderedClause>(),
2334           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2335     ScheduleKind = OMPC_SCHEDULE_static;
2336     // Chunk size is 1 in this case.
2337     llvm::APInt ChunkSize(32, 1);
2338     ChunkExpr = IntegerLiteral::Create(
2339         CGF.getContext(), ChunkSize,
2340         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2341         SourceLocation());
2342   }
2343 }
2344 
2345 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2346                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2347                                       bool ForceSimpleCall) {
2348   // Check if we should use the OMPBuilder
2349   auto *OMPRegionInfo =
2350       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2351   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2352     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2353         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2354     return;
2355   }
2356 
2357   if (!CGF.HaveInsertPoint())
2358     return;
2359   // Build call __kmpc_cancel_barrier(loc, thread_id);
2360   // Build call __kmpc_barrier(loc, thread_id);
2361   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2362   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2363   // thread_id);
2364   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2365                          getThreadID(CGF, Loc)};
2366   if (OMPRegionInfo) {
2367     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2368       llvm::Value *Result = CGF.EmitRuntimeCall(
2369           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2370                                                 OMPRTL___kmpc_cancel_barrier),
2371           Args);
2372       if (EmitChecks) {
2373         // if (__kmpc_cancel_barrier()) {
2374         //   exit from construct;
2375         // }
2376         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2377         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2378         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2379         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2380         CGF.EmitBlock(ExitBB);
2381         //   exit from construct;
2382         CodeGenFunction::JumpDest CancelDestination =
2383             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2384         CGF.EmitBranchThroughCleanup(CancelDestination);
2385         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2386       }
2387       return;
2388     }
2389   }
2390   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2391                           CGM.getModule(), OMPRTL___kmpc_barrier),
2392                       Args);
2393 }
2394 
2395 void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc,
2396                                     Expr *ME, bool IsFatal) {
2397   llvm::Value *MVL =
2398       ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF)
2399          : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2400   // Build call void __kmpc_error(ident_t *loc, int severity, const char
2401   // *message)
2402   llvm::Value *Args[] = {
2403       emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2404       llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2405       CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2406   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2407                           CGM.getModule(), OMPRTL___kmpc_error),
2408                       Args);
2409 }
2410 
2411 /// Map the OpenMP loop schedule to the runtime enumeration.
2412 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2413                                           bool Chunked, bool Ordered) {
2414   switch (ScheduleKind) {
2415   case OMPC_SCHEDULE_static:
2416     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2417                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2418   case OMPC_SCHEDULE_dynamic:
2419     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2420   case OMPC_SCHEDULE_guided:
2421     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2422   case OMPC_SCHEDULE_runtime:
2423     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2424   case OMPC_SCHEDULE_auto:
2425     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2426   case OMPC_SCHEDULE_unknown:
2427     assert(!Chunked && "chunk was specified but schedule kind not known");
2428     return Ordered ? OMP_ord_static : OMP_sch_static;
2429   }
2430   llvm_unreachable("Unexpected runtime schedule");
2431 }
2432 
2433 /// Map the OpenMP distribute schedule to the runtime enumeration.
2434 static OpenMPSchedType
2435 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2436   // only static is allowed for dist_schedule
2437   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2438 }
2439 
2440 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2441                                          bool Chunked) const {
2442   OpenMPSchedType Schedule =
2443       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2444   return Schedule == OMP_sch_static;
2445 }
2446 
2447 bool CGOpenMPRuntime::isStaticNonchunked(
2448     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2449   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2450   return Schedule == OMP_dist_sch_static;
2451 }
2452 
2453 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2454                                       bool Chunked) const {
2455   OpenMPSchedType Schedule =
2456       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2457   return Schedule == OMP_sch_static_chunked;
2458 }
2459 
2460 bool CGOpenMPRuntime::isStaticChunked(
2461     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2462   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2463   return Schedule == OMP_dist_sch_static_chunked;
2464 }
2465 
2466 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2467   OpenMPSchedType Schedule =
2468       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2469   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2470   return Schedule != OMP_sch_static;
2471 }
2472 
2473 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2474                                   OpenMPScheduleClauseModifier M1,
2475                                   OpenMPScheduleClauseModifier M2) {
2476   int Modifier = 0;
2477   switch (M1) {
2478   case OMPC_SCHEDULE_MODIFIER_monotonic:
2479     Modifier = OMP_sch_modifier_monotonic;
2480     break;
2481   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2482     Modifier = OMP_sch_modifier_nonmonotonic;
2483     break;
2484   case OMPC_SCHEDULE_MODIFIER_simd:
2485     if (Schedule == OMP_sch_static_chunked)
2486       Schedule = OMP_sch_static_balanced_chunked;
2487     break;
2488   case OMPC_SCHEDULE_MODIFIER_last:
2489   case OMPC_SCHEDULE_MODIFIER_unknown:
2490     break;
2491   }
2492   switch (M2) {
2493   case OMPC_SCHEDULE_MODIFIER_monotonic:
2494     Modifier = OMP_sch_modifier_monotonic;
2495     break;
2496   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2497     Modifier = OMP_sch_modifier_nonmonotonic;
2498     break;
2499   case OMPC_SCHEDULE_MODIFIER_simd:
2500     if (Schedule == OMP_sch_static_chunked)
2501       Schedule = OMP_sch_static_balanced_chunked;
2502     break;
2503   case OMPC_SCHEDULE_MODIFIER_last:
2504   case OMPC_SCHEDULE_MODIFIER_unknown:
2505     break;
2506   }
2507   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2508   // If the static schedule kind is specified or if the ordered clause is
2509   // specified, and if the nonmonotonic modifier is not specified, the effect is
2510   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2511   // modifier is specified, the effect is as if the nonmonotonic modifier is
2512   // specified.
2513   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2514     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2515           Schedule == OMP_sch_static_balanced_chunked ||
2516           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2517           Schedule == OMP_dist_sch_static_chunked ||
2518           Schedule == OMP_dist_sch_static))
2519       Modifier = OMP_sch_modifier_nonmonotonic;
2520   }
2521   return Schedule | Modifier;
2522 }
2523 
2524 void CGOpenMPRuntime::emitForDispatchInit(
2525     CodeGenFunction &CGF, SourceLocation Loc,
2526     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2527     bool Ordered, const DispatchRTInput &DispatchValues) {
2528   if (!CGF.HaveInsertPoint())
2529     return;
2530   OpenMPSchedType Schedule = getRuntimeSchedule(
2531       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2532   assert(Ordered ||
2533          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2534           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2535           Schedule != OMP_sch_static_balanced_chunked));
2536   // Call __kmpc_dispatch_init(
2537   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2538   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2539   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2540 
2541   // If the Chunk was not specified in the clause - use default value 1.
2542   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2543                                             : CGF.Builder.getIntN(IVSize, 1);
2544   llvm::Value *Args[] = {
2545       emitUpdateLocation(CGF, Loc),
2546       getThreadID(CGF, Loc),
2547       CGF.Builder.getInt32(addMonoNonMonoModifier(
2548           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2549       DispatchValues.LB,                                     // Lower
2550       DispatchValues.UB,                                     // Upper
2551       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2552       Chunk                                                  // Chunk
2553   };
2554   CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2555                       Args);
2556 }
2557 
2558 void CGOpenMPRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
2559                                             SourceLocation Loc) {
2560   if (!CGF.HaveInsertPoint())
2561     return;
2562   // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2563   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2564   CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args);
2565 }
2566 
2567 static void emitForStaticInitCall(
2568     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2569     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2570     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2571     const CGOpenMPRuntime::StaticRTInput &Values) {
2572   if (!CGF.HaveInsertPoint())
2573     return;
2574 
2575   assert(!Values.Ordered);
2576   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2577          Schedule == OMP_sch_static_balanced_chunked ||
2578          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2579          Schedule == OMP_dist_sch_static ||
2580          Schedule == OMP_dist_sch_static_chunked);
2581 
2582   // Call __kmpc_for_static_init(
2583   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2584   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2585   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2586   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2587   llvm::Value *Chunk = Values.Chunk;
2588   if (Chunk == nullptr) {
2589     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2590             Schedule == OMP_dist_sch_static) &&
2591            "expected static non-chunked schedule");
2592     // If the Chunk was not specified in the clause - use default value 1.
2593     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2594   } else {
2595     assert((Schedule == OMP_sch_static_chunked ||
2596             Schedule == OMP_sch_static_balanced_chunked ||
2597             Schedule == OMP_ord_static_chunked ||
2598             Schedule == OMP_dist_sch_static_chunked) &&
2599            "expected static chunked schedule");
2600   }
2601   llvm::Value *Args[] = {
2602       UpdateLocation,
2603       ThreadId,
2604       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2605                                                   M2)), // Schedule type
2606       Values.IL.emitRawPointer(CGF),                    // &isLastIter
2607       Values.LB.emitRawPointer(CGF),                    // &LB
2608       Values.UB.emitRawPointer(CGF),                    // &UB
2609       Values.ST.emitRawPointer(CGF),                    // &Stride
2610       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2611       Chunk                                             // Chunk
2612   };
2613   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2614 }
2615 
2616 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2617                                         SourceLocation Loc,
2618                                         OpenMPDirectiveKind DKind,
2619                                         const OpenMPScheduleTy &ScheduleKind,
2620                                         const StaticRTInput &Values) {
2621   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2622       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2623   assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2624          "Expected loop-based or sections-based directive.");
2625   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2626                                              isOpenMPLoopDirective(DKind)
2627                                                  ? OMP_IDENT_WORK_LOOP
2628                                                  : OMP_IDENT_WORK_SECTIONS);
2629   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2630   llvm::FunctionCallee StaticInitFunction =
2631       OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2632                                              false);
2633   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2634   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2635                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2636 }
2637 
2638 void CGOpenMPRuntime::emitDistributeStaticInit(
2639     CodeGenFunction &CGF, SourceLocation Loc,
2640     OpenMPDistScheduleClauseKind SchedKind,
2641     const CGOpenMPRuntime::StaticRTInput &Values) {
2642   OpenMPSchedType ScheduleNum =
2643       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2644   llvm::Value *UpdatedLocation =
2645       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2646   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2647   llvm::FunctionCallee StaticInitFunction;
2648   bool isGPUDistribute =
2649       CGM.getLangOpts().OpenMPIsTargetDevice &&
2650       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2651   StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2652       Values.IVSize, Values.IVSigned, isGPUDistribute);
2653 
2654   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2655                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2656                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2657 }
2658 
2659 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2660                                           SourceLocation Loc,
2661                                           OpenMPDirectiveKind DKind) {
2662   assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2663           DKind == OMPD_sections) &&
2664          "Expected distribute, for, or sections directive kind");
2665   if (!CGF.HaveInsertPoint())
2666     return;
2667   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2668   llvm::Value *Args[] = {
2669       emitUpdateLocation(CGF, Loc,
2670                          isOpenMPDistributeDirective(DKind) ||
2671                                  (DKind == OMPD_target_teams_loop)
2672                              ? OMP_IDENT_WORK_DISTRIBUTE
2673                          : isOpenMPLoopDirective(DKind)
2674                              ? OMP_IDENT_WORK_LOOP
2675                              : OMP_IDENT_WORK_SECTIONS),
2676       getThreadID(CGF, Loc)};
2677   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2678   if (isOpenMPDistributeDirective(DKind) &&
2679       CGM.getLangOpts().OpenMPIsTargetDevice &&
2680       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2681     CGF.EmitRuntimeCall(
2682         OMPBuilder.getOrCreateRuntimeFunction(
2683             CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2684         Args);
2685   else
2686     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2687                             CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2688                         Args);
2689 }
2690 
2691 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2692                                                  SourceLocation Loc,
2693                                                  unsigned IVSize,
2694                                                  bool IVSigned) {
2695   if (!CGF.HaveInsertPoint())
2696     return;
2697   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2698   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2699   CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2700                       Args);
2701 }
2702 
2703 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2704                                           SourceLocation Loc, unsigned IVSize,
2705                                           bool IVSigned, Address IL,
2706                                           Address LB, Address UB,
2707                                           Address ST) {
2708   // Call __kmpc_dispatch_next(
2709   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2710   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2711   //          kmp_int[32|64] *p_stride);
2712   llvm::Value *Args[] = {
2713       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2714       IL.emitRawPointer(CGF), // &isLastIter
2715       LB.emitRawPointer(CGF), // &Lower
2716       UB.emitRawPointer(CGF), // &Upper
2717       ST.emitRawPointer(CGF)  // &Stride
2718   };
2719   llvm::Value *Call = CGF.EmitRuntimeCall(
2720       OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2721   return CGF.EmitScalarConversion(
2722       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2723       CGF.getContext().BoolTy, Loc);
2724 }
2725 
2726 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2727                                            llvm::Value *NumThreads,
2728                                            SourceLocation Loc) {
2729   if (!CGF.HaveInsertPoint())
2730     return;
2731   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2732   llvm::Value *Args[] = {
2733       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2734       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2735   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2736                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2737                       Args);
2738 }
2739 
2740 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2741                                          ProcBindKind ProcBind,
2742                                          SourceLocation Loc) {
2743   if (!CGF.HaveInsertPoint())
2744     return;
2745   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2746   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2747   llvm::Value *Args[] = {
2748       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2749       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2750   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2751                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2752                       Args);
2753 }
2754 
2755 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2756                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2757   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2758     OMPBuilder.createFlush(CGF.Builder);
2759   } else {
2760     if (!CGF.HaveInsertPoint())
2761       return;
2762     // Build call void __kmpc_flush(ident_t *loc)
2763     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2764                             CGM.getModule(), OMPRTL___kmpc_flush),
2765                         emitUpdateLocation(CGF, Loc));
2766   }
2767 }
2768 
2769 namespace {
2770 /// Indexes of fields for type kmp_task_t.
2771 enum KmpTaskTFields {
2772   /// List of shared variables.
2773   KmpTaskTShareds,
2774   /// Task routine.
2775   KmpTaskTRoutine,
2776   /// Partition id for the untied tasks.
2777   KmpTaskTPartId,
2778   /// Function with call of destructors for private variables.
2779   Data1,
2780   /// Task priority.
2781   Data2,
2782   /// (Taskloops only) Lower bound.
2783   KmpTaskTLowerBound,
2784   /// (Taskloops only) Upper bound.
2785   KmpTaskTUpperBound,
2786   /// (Taskloops only) Stride.
2787   KmpTaskTStride,
2788   /// (Taskloops only) Is last iteration flag.
2789   KmpTaskTLastIter,
2790   /// (Taskloops only) Reduction data.
2791   KmpTaskTReductions,
2792 };
2793 } // anonymous namespace
2794 
2795 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2796   // If we are in simd mode or there are no entries, we don't need to do
2797   // anything.
2798   if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2799     return;
2800 
2801   llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2802       [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2803              const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2804     SourceLocation Loc;
2805     if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2806       for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2807                 E = CGM.getContext().getSourceManager().fileinfo_end();
2808            I != E; ++I) {
2809         if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2810             I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2811           Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2812               I->getFirst(), EntryInfo.Line, 1);
2813           break;
2814         }
2815       }
2816     }
2817     switch (Kind) {
2818     case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2819       unsigned DiagID = CGM.getDiags().getCustomDiagID(
2820           DiagnosticsEngine::Error, "Offloading entry for target region in "
2821                                     "%0 is incorrect: either the "
2822                                     "address or the ID is invalid.");
2823       CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2824     } break;
2825     case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2826       unsigned DiagID = CGM.getDiags().getCustomDiagID(
2827           DiagnosticsEngine::Error, "Offloading entry for declare target "
2828                                     "variable %0 is incorrect: the "
2829                                     "address is invalid.");
2830       CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2831     } break;
2832     case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2833       unsigned DiagID = CGM.getDiags().getCustomDiagID(
2834           DiagnosticsEngine::Error,
2835           "Offloading entry for declare target variable is incorrect: the "
2836           "address is invalid.");
2837       CGM.getDiags().Report(DiagID);
2838     } break;
2839     }
2840   };
2841 
2842   OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2843 }
2844 
2845 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
2846   if (!KmpRoutineEntryPtrTy) {
2847     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2848     ASTContext &C = CGM.getContext();
2849     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2850     FunctionProtoType::ExtProtoInfo EPI;
2851     KmpRoutineEntryPtrQTy = C.getPointerType(
2852         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2853     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
2854   }
2855 }
2856 
2857 namespace {
2858 struct PrivateHelpersTy {
2859   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2860                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2861       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2862         PrivateElemInit(PrivateElemInit) {}
2863   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2864   const Expr *OriginalRef = nullptr;
2865   const VarDecl *Original = nullptr;
2866   const VarDecl *PrivateCopy = nullptr;
2867   const VarDecl *PrivateElemInit = nullptr;
2868   bool isLocalPrivate() const {
2869     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2870   }
2871 };
2872 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2873 } // anonymous namespace
2874 
2875 static bool isAllocatableDecl(const VarDecl *VD) {
2876   const VarDecl *CVD = VD->getCanonicalDecl();
2877   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2878     return false;
2879   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2880   // Use the default allocation.
2881   return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2882            !AA->getAllocator());
2883 }
2884 
2885 static RecordDecl *
2886 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
2887   if (!Privates.empty()) {
2888     ASTContext &C = CGM.getContext();
2889     // Build struct .kmp_privates_t. {
2890     //         /*  private vars  */
2891     //       };
2892     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
2893     RD->startDefinition();
2894     for (const auto &Pair : Privates) {
2895       const VarDecl *VD = Pair.second.Original;
2896       QualType Type = VD->getType().getNonReferenceType();
2897       // If the private variable is a local variable with lvalue ref type,
2898       // allocate the pointer instead of the pointee type.
2899       if (Pair.second.isLocalPrivate()) {
2900         if (VD->getType()->isLValueReferenceType())
2901           Type = C.getPointerType(Type);
2902         if (isAllocatableDecl(VD))
2903           Type = C.getPointerType(Type);
2904       }
2905       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
2906       if (VD->hasAttrs()) {
2907         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2908              E(VD->getAttrs().end());
2909              I != E; ++I)
2910           FD->addAttr(*I);
2911       }
2912     }
2913     RD->completeDefinition();
2914     return RD;
2915   }
2916   return nullptr;
2917 }
2918 
2919 static RecordDecl *
2920 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
2921                          QualType KmpInt32Ty,
2922                          QualType KmpRoutineEntryPointerQTy) {
2923   ASTContext &C = CGM.getContext();
2924   // Build struct kmp_task_t {
2925   //         void *              shareds;
2926   //         kmp_routine_entry_t routine;
2927   //         kmp_int32           part_id;
2928   //         kmp_cmplrdata_t data1;
2929   //         kmp_cmplrdata_t data2;
2930   // For taskloops additional fields:
2931   //         kmp_uint64          lb;
2932   //         kmp_uint64          ub;
2933   //         kmp_int64           st;
2934   //         kmp_int32           liter;
2935   //         void *              reductions;
2936   //       };
2937   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
2938   UD->startDefinition();
2939   addFieldToRecordDecl(C, UD, KmpInt32Ty);
2940   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
2941   UD->completeDefinition();
2942   QualType KmpCmplrdataTy = C.getRecordType(UD);
2943   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
2944   RD->startDefinition();
2945   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2946   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2947   addFieldToRecordDecl(C, RD, KmpInt32Ty);
2948   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2949   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2950   if (isOpenMPTaskLoopDirective(Kind)) {
2951     QualType KmpUInt64Ty =
2952         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2953     QualType KmpInt64Ty =
2954         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2955     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2956     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2957     addFieldToRecordDecl(C, RD, KmpInt64Ty);
2958     addFieldToRecordDecl(C, RD, KmpInt32Ty);
2959     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2960   }
2961   RD->completeDefinition();
2962   return RD;
2963 }
2964 
2965 static RecordDecl *
2966 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
2967                                      ArrayRef<PrivateDataTy> Privates) {
2968   ASTContext &C = CGM.getContext();
2969   // Build struct kmp_task_t_with_privates {
2970   //         kmp_task_t task_data;
2971   //         .kmp_privates_t. privates;
2972   //       };
2973   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2974   RD->startDefinition();
2975   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2976   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2977     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
2978   RD->completeDefinition();
2979   return RD;
2980 }
2981 
2982 /// Emit a proxy function which accepts kmp_task_t as the second
2983 /// argument.
2984 /// \code
2985 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2986 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
2987 ///   For taskloops:
2988 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
2989 ///   tt->reductions, tt->shareds);
2990 ///   return 0;
2991 /// }
2992 /// \endcode
2993 static llvm::Function *
2994 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
2995                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
2996                       QualType KmpTaskTWithPrivatesPtrQTy,
2997                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2998                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
2999                       llvm::Value *TaskPrivatesMap) {
3000   ASTContext &C = CGM.getContext();
3001   FunctionArgList Args;
3002   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3003                             ImplicitParamKind::Other);
3004   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3005                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3006                                 ImplicitParamKind::Other);
3007   Args.push_back(&GtidArg);
3008   Args.push_back(&TaskTypeArg);
3009   const auto &TaskEntryFnInfo =
3010       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3011   llvm::FunctionType *TaskEntryTy =
3012       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3013   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3014   auto *TaskEntry = llvm::Function::Create(
3015       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3016   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3017   TaskEntry->setDoesNotRecurse();
3018   CodeGenFunction CGF(CGM);
3019   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3020                     Loc, Loc);
3021 
3022   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3023   // tt,
3024   // For taskloops:
3025   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3026   // tt->task_data.shareds);
3027   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3028       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3029   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3030       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3031       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3032   const auto *KmpTaskTWithPrivatesQTyRD =
3033       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3034   LValue Base =
3035       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3036   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3037   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3038   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3039   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3040 
3041   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3042   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3043   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3044       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3045       CGF.ConvertTypeForMem(SharedsPtrTy));
3046 
3047   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3048   llvm::Value *PrivatesParam;
3049   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3050     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3051     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3052         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3053   } else {
3054     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3055   }
3056 
3057   llvm::Value *CommonArgs[] = {
3058       GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3059       CGF.Builder
3060           .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(),
3061                                                CGF.VoidPtrTy, CGF.Int8Ty)
3062           .emitRawPointer(CGF)};
3063   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3064                                           std::end(CommonArgs));
3065   if (isOpenMPTaskLoopDirective(Kind)) {
3066     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3067     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3068     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3069     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3070     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3071     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3072     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3073     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3074     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3075     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3076     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3077     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3078     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3079     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3080     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3081     CallArgs.push_back(LBParam);
3082     CallArgs.push_back(UBParam);
3083     CallArgs.push_back(StParam);
3084     CallArgs.push_back(LIParam);
3085     CallArgs.push_back(RParam);
3086   }
3087   CallArgs.push_back(SharedsParam);
3088 
3089   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3090                                                   CallArgs);
3091   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3092                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3093   CGF.FinishFunction();
3094   return TaskEntry;
3095 }
3096 
3097 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3098                                             SourceLocation Loc,
3099                                             QualType KmpInt32Ty,
3100                                             QualType KmpTaskTWithPrivatesPtrQTy,
3101                                             QualType KmpTaskTWithPrivatesQTy) {
3102   ASTContext &C = CGM.getContext();
3103   FunctionArgList Args;
3104   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3105                             ImplicitParamKind::Other);
3106   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3107                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3108                                 ImplicitParamKind::Other);
3109   Args.push_back(&GtidArg);
3110   Args.push_back(&TaskTypeArg);
3111   const auto &DestructorFnInfo =
3112       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3113   llvm::FunctionType *DestructorFnTy =
3114       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3115   std::string Name =
3116       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3117   auto *DestructorFn =
3118       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3119                              Name, &CGM.getModule());
3120   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3121                                     DestructorFnInfo);
3122   DestructorFn->setDoesNotRecurse();
3123   CodeGenFunction CGF(CGM);
3124   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3125                     Args, Loc, Loc);
3126 
3127   LValue Base = CGF.EmitLoadOfPointerLValue(
3128       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3129       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3130   const auto *KmpTaskTWithPrivatesQTyRD =
3131       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3132   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3133   Base = CGF.EmitLValueForField(Base, *FI);
3134   for (const auto *Field :
3135        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3136     if (QualType::DestructionKind DtorKind =
3137             Field->getType().isDestructedType()) {
3138       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3139       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3140     }
3141   }
3142   CGF.FinishFunction();
3143   return DestructorFn;
3144 }
3145 
3146 /// Emit a privates mapping function for correct handling of private and
3147 /// firstprivate variables.
3148 /// \code
3149 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3150 /// **noalias priv1,...,  <tyn> **noalias privn) {
3151 ///   *priv1 = &.privates.priv1;
3152 ///   ...;
3153 ///   *privn = &.privates.privn;
3154 /// }
3155 /// \endcode
3156 static llvm::Value *
3157 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3158                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3159                                ArrayRef<PrivateDataTy> Privates) {
3160   ASTContext &C = CGM.getContext();
3161   FunctionArgList Args;
3162   ImplicitParamDecl TaskPrivatesArg(
3163       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3164       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3165       ImplicitParamKind::Other);
3166   Args.push_back(&TaskPrivatesArg);
3167   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3168   unsigned Counter = 1;
3169   for (const Expr *E : Data.PrivateVars) {
3170     Args.push_back(ImplicitParamDecl::Create(
3171         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3172         C.getPointerType(C.getPointerType(E->getType()))
3173             .withConst()
3174             .withRestrict(),
3175         ImplicitParamKind::Other));
3176     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3177     PrivateVarsPos[VD] = Counter;
3178     ++Counter;
3179   }
3180   for (const Expr *E : Data.FirstprivateVars) {
3181     Args.push_back(ImplicitParamDecl::Create(
3182         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3183         C.getPointerType(C.getPointerType(E->getType()))
3184             .withConst()
3185             .withRestrict(),
3186         ImplicitParamKind::Other));
3187     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3188     PrivateVarsPos[VD] = Counter;
3189     ++Counter;
3190   }
3191   for (const Expr *E : Data.LastprivateVars) {
3192     Args.push_back(ImplicitParamDecl::Create(
3193         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3194         C.getPointerType(C.getPointerType(E->getType()))
3195             .withConst()
3196             .withRestrict(),
3197         ImplicitParamKind::Other));
3198     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3199     PrivateVarsPos[VD] = Counter;
3200     ++Counter;
3201   }
3202   for (const VarDecl *VD : Data.PrivateLocals) {
3203     QualType Ty = VD->getType().getNonReferenceType();
3204     if (VD->getType()->isLValueReferenceType())
3205       Ty = C.getPointerType(Ty);
3206     if (isAllocatableDecl(VD))
3207       Ty = C.getPointerType(Ty);
3208     Args.push_back(ImplicitParamDecl::Create(
3209         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3210         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3211         ImplicitParamKind::Other));
3212     PrivateVarsPos[VD] = Counter;
3213     ++Counter;
3214   }
3215   const auto &TaskPrivatesMapFnInfo =
3216       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3217   llvm::FunctionType *TaskPrivatesMapTy =
3218       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3219   std::string Name =
3220       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3221   auto *TaskPrivatesMap = llvm::Function::Create(
3222       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3223       &CGM.getModule());
3224   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3225                                     TaskPrivatesMapFnInfo);
3226   if (CGM.getLangOpts().Optimize) {
3227     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3228     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3229     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3230   }
3231   CodeGenFunction CGF(CGM);
3232   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3233                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3234 
3235   // *privi = &.privates.privi;
3236   LValue Base = CGF.EmitLoadOfPointerLValue(
3237       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3238       TaskPrivatesArg.getType()->castAs<PointerType>());
3239   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3240   Counter = 0;
3241   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3242     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3243     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3244     LValue RefLVal =
3245         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3246     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3247         RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3248     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3249     ++Counter;
3250   }
3251   CGF.FinishFunction();
3252   return TaskPrivatesMap;
3253 }
3254 
3255 /// Emit initialization for private variables in task-based directives.
3256 static void emitPrivatesInit(CodeGenFunction &CGF,
3257                              const OMPExecutableDirective &D,
3258                              Address KmpTaskSharedsPtr, LValue TDBase,
3259                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3260                              QualType SharedsTy, QualType SharedsPtrTy,
3261                              const OMPTaskDataTy &Data,
3262                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3263   ASTContext &C = CGF.getContext();
3264   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3265   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3266   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3267                                  ? OMPD_taskloop
3268                                  : OMPD_task;
3269   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3270   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3271   LValue SrcBase;
3272   bool IsTargetTask =
3273       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3274       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3275   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3276   // PointersArray, SizesArray, and MappersArray. The original variables for
3277   // these arrays are not captured and we get their addresses explicitly.
3278   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3279       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3280     SrcBase = CGF.MakeAddrLValue(
3281         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3282             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3283             CGF.ConvertTypeForMem(SharedsTy)),
3284         SharedsTy);
3285   }
3286   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3287   for (const PrivateDataTy &Pair : Privates) {
3288     // Do not initialize private locals.
3289     if (Pair.second.isLocalPrivate()) {
3290       ++FI;
3291       continue;
3292     }
3293     const VarDecl *VD = Pair.second.PrivateCopy;
3294     const Expr *Init = VD->getAnyInitializer();
3295     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3296                              !CGF.isTrivialInitializer(Init)))) {
3297       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3298       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3299         const VarDecl *OriginalVD = Pair.second.Original;
3300         // Check if the variable is the target-based BasePointersArray,
3301         // PointersArray, SizesArray, or MappersArray.
3302         LValue SharedRefLValue;
3303         QualType Type = PrivateLValue.getType();
3304         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3305         if (IsTargetTask && !SharedField) {
3306           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3307                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3308                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3309                          ->getNumParams() == 0 &&
3310                  isa<TranslationUnitDecl>(
3311                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3312                          ->getDeclContext()) &&
3313                  "Expected artificial target data variable.");
3314           SharedRefLValue =
3315               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3316         } else if (ForDup) {
3317           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3318           SharedRefLValue = CGF.MakeAddrLValue(
3319               SharedRefLValue.getAddress().withAlignment(
3320                   C.getDeclAlign(OriginalVD)),
3321               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3322               SharedRefLValue.getTBAAInfo());
3323         } else if (CGF.LambdaCaptureFields.count(
3324                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3325                    isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3326           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3327         } else {
3328           // Processing for implicitly captured variables.
3329           InlinedOpenMPRegionRAII Region(
3330               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3331               /*HasCancel=*/false, /*NoInheritance=*/true);
3332           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3333         }
3334         if (Type->isArrayType()) {
3335           // Initialize firstprivate array.
3336           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3337             // Perform simple memcpy.
3338             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3339           } else {
3340             // Initialize firstprivate array using element-by-element
3341             // initialization.
3342             CGF.EmitOMPAggregateAssign(
3343                 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
3344                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3345                                                   Address SrcElement) {
3346                   // Clean up any temporaries needed by the initialization.
3347                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3348                   InitScope.addPrivate(Elem, SrcElement);
3349                   (void)InitScope.Privatize();
3350                   // Emit initialization for single element.
3351                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3352                       CGF, &CapturesInfo);
3353                   CGF.EmitAnyExprToMem(Init, DestElement,
3354                                        Init->getType().getQualifiers(),
3355                                        /*IsInitializer=*/false);
3356                 });
3357           }
3358         } else {
3359           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3360           InitScope.addPrivate(Elem, SharedRefLValue.getAddress());
3361           (void)InitScope.Privatize();
3362           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3363           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3364                              /*capturedByInit=*/false);
3365         }
3366       } else {
3367         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3368       }
3369     }
3370     ++FI;
3371   }
3372 }
3373 
3374 /// Check if duplication function is required for taskloops.
3375 static bool checkInitIsRequired(CodeGenFunction &CGF,
3376                                 ArrayRef<PrivateDataTy> Privates) {
3377   bool InitRequired = false;
3378   for (const PrivateDataTy &Pair : Privates) {
3379     if (Pair.second.isLocalPrivate())
3380       continue;
3381     const VarDecl *VD = Pair.second.PrivateCopy;
3382     const Expr *Init = VD->getAnyInitializer();
3383     InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3384                                     !CGF.isTrivialInitializer(Init));
3385     if (InitRequired)
3386       break;
3387   }
3388   return InitRequired;
3389 }
3390 
3391 
3392 /// Emit task_dup function (for initialization of
3393 /// private/firstprivate/lastprivate vars and last_iter flag)
3394 /// \code
3395 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3396 /// lastpriv) {
3397 /// // setup lastprivate flag
3398 ///    task_dst->last = lastpriv;
3399 /// // could be constructor calls here...
3400 /// }
3401 /// \endcode
3402 static llvm::Value *
3403 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3404                     const OMPExecutableDirective &D,
3405                     QualType KmpTaskTWithPrivatesPtrQTy,
3406                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3407                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3408                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3409                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3410   ASTContext &C = CGM.getContext();
3411   FunctionArgList Args;
3412   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3413                            KmpTaskTWithPrivatesPtrQTy,
3414                            ImplicitParamKind::Other);
3415   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3416                            KmpTaskTWithPrivatesPtrQTy,
3417                            ImplicitParamKind::Other);
3418   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3419                                 ImplicitParamKind::Other);
3420   Args.push_back(&DstArg);
3421   Args.push_back(&SrcArg);
3422   Args.push_back(&LastprivArg);
3423   const auto &TaskDupFnInfo =
3424       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3425   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3426   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3427   auto *TaskDup = llvm::Function::Create(
3428       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3429   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3430   TaskDup->setDoesNotRecurse();
3431   CodeGenFunction CGF(CGM);
3432   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3433                     Loc);
3434 
3435   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3436       CGF.GetAddrOfLocalVar(&DstArg),
3437       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3438   // task_dst->liter = lastpriv;
3439   if (WithLastIter) {
3440     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3441     LValue Base = CGF.EmitLValueForField(
3442         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3443     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3444     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3445         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3446     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3447   }
3448 
3449   // Emit initial values for private copies (if any).
3450   assert(!Privates.empty());
3451   Address KmpTaskSharedsPtr = Address::invalid();
3452   if (!Data.FirstprivateVars.empty()) {
3453     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3454         CGF.GetAddrOfLocalVar(&SrcArg),
3455         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3456     LValue Base = CGF.EmitLValueForField(
3457         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3458     KmpTaskSharedsPtr = Address(
3459         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3460                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
3461                                                   KmpTaskTShareds)),
3462                              Loc),
3463         CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3464   }
3465   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3466                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3467   CGF.FinishFunction();
3468   return TaskDup;
3469 }
3470 
3471 /// Checks if destructor function is required to be generated.
3472 /// \return true if cleanups are required, false otherwise.
3473 static bool
3474 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3475                          ArrayRef<PrivateDataTy> Privates) {
3476   for (const PrivateDataTy &P : Privates) {
3477     if (P.second.isLocalPrivate())
3478       continue;
3479     QualType Ty = P.second.Original->getType().getNonReferenceType();
3480     if (Ty.isDestructedType())
3481       return true;
3482   }
3483   return false;
3484 }
3485 
3486 namespace {
3487 /// Loop generator for OpenMP iterator expression.
3488 class OMPIteratorGeneratorScope final
3489     : public CodeGenFunction::OMPPrivateScope {
3490   CodeGenFunction &CGF;
3491   const OMPIteratorExpr *E = nullptr;
3492   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3493   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3494   OMPIteratorGeneratorScope() = delete;
3495   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3496 
3497 public:
3498   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3499       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3500     if (!E)
3501       return;
3502     SmallVector<llvm::Value *, 4> Uppers;
3503     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3504       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3505       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3506       addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3507       const OMPIteratorHelperData &HelperData = E->getHelper(I);
3508       addPrivate(
3509           HelperData.CounterVD,
3510           CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3511     }
3512     Privatize();
3513 
3514     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3515       const OMPIteratorHelperData &HelperData = E->getHelper(I);
3516       LValue CLVal =
3517           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3518                              HelperData.CounterVD->getType());
3519       // Counter = 0;
3520       CGF.EmitStoreOfScalar(
3521           llvm::ConstantInt::get(CLVal.getAddress().getElementType(), 0),
3522           CLVal);
3523       CodeGenFunction::JumpDest &ContDest =
3524           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3525       CodeGenFunction::JumpDest &ExitDest =
3526           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3527       // N = <number-of_iterations>;
3528       llvm::Value *N = Uppers[I];
3529       // cont:
3530       // if (Counter < N) goto body; else goto exit;
3531       CGF.EmitBlock(ContDest.getBlock());
3532       auto *CVal =
3533           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3534       llvm::Value *Cmp =
3535           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3536               ? CGF.Builder.CreateICmpSLT(CVal, N)
3537               : CGF.Builder.CreateICmpULT(CVal, N);
3538       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3539       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3540       // body:
3541       CGF.EmitBlock(BodyBB);
3542       // Iteri = Begini + Counter * Stepi;
3543       CGF.EmitIgnoredExpr(HelperData.Update);
3544     }
3545   }
3546   ~OMPIteratorGeneratorScope() {
3547     if (!E)
3548       return;
3549     for (unsigned I = E->numOfIterators(); I > 0; --I) {
3550       // Counter = Counter + 1;
3551       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3552       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3553       // goto cont;
3554       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3555       // exit:
3556       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3557     }
3558   }
3559 };
3560 } // namespace
3561 
3562 static std::pair<llvm::Value *, llvm::Value *>
3563 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
3564   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3565   llvm::Value *Addr;
3566   if (OASE) {
3567     const Expr *Base = OASE->getBase();
3568     Addr = CGF.EmitScalarExpr(Base);
3569   } else {
3570     Addr = CGF.EmitLValue(E).getPointer(CGF);
3571   }
3572   llvm::Value *SizeVal;
3573   QualType Ty = E->getType();
3574   if (OASE) {
3575     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3576     for (const Expr *SE : OASE->getDimensions()) {
3577       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3578       Sz = CGF.EmitScalarConversion(
3579           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3580       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3581     }
3582   } else if (const auto *ASE =
3583                  dyn_cast<ArraySectionExpr>(E->IgnoreParenImpCasts())) {
3584     LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false);
3585     Address UpAddrAddress = UpAddrLVal.getAddress();
3586     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3587         UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF),
3588         /*Idx0=*/1);
3589     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3590     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3591     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3592   } else {
3593     SizeVal = CGF.getTypeSize(Ty);
3594   }
3595   return std::make_pair(Addr, SizeVal);
3596 }
3597 
3598 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3599 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3600   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3601   if (KmpTaskAffinityInfoTy.isNull()) {
3602     RecordDecl *KmpAffinityInfoRD =
3603         C.buildImplicitRecord("kmp_task_affinity_info_t");
3604     KmpAffinityInfoRD->startDefinition();
3605     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3606     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3607     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3608     KmpAffinityInfoRD->completeDefinition();
3609     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
3610   }
3611 }
3612 
3613 CGOpenMPRuntime::TaskResultTy
3614 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3615                               const OMPExecutableDirective &D,
3616                               llvm::Function *TaskFunction, QualType SharedsTy,
3617                               Address Shareds, const OMPTaskDataTy &Data) {
3618   ASTContext &C = CGM.getContext();
3619   llvm::SmallVector<PrivateDataTy, 4> Privates;
3620   // Aggregate privates and sort them by the alignment.
3621   const auto *I = Data.PrivateCopies.begin();
3622   for (const Expr *E : Data.PrivateVars) {
3623     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3624     Privates.emplace_back(
3625         C.getDeclAlign(VD),
3626         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3627                          /*PrivateElemInit=*/nullptr));
3628     ++I;
3629   }
3630   I = Data.FirstprivateCopies.begin();
3631   const auto *IElemInitRef = Data.FirstprivateInits.begin();
3632   for (const Expr *E : Data.FirstprivateVars) {
3633     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3634     Privates.emplace_back(
3635         C.getDeclAlign(VD),
3636         PrivateHelpersTy(
3637             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3638             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3639     ++I;
3640     ++IElemInitRef;
3641   }
3642   I = Data.LastprivateCopies.begin();
3643   for (const Expr *E : Data.LastprivateVars) {
3644     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3645     Privates.emplace_back(
3646         C.getDeclAlign(VD),
3647         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3648                          /*PrivateElemInit=*/nullptr));
3649     ++I;
3650   }
3651   for (const VarDecl *VD : Data.PrivateLocals) {
3652     if (isAllocatableDecl(VD))
3653       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3654     else
3655       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3656   }
3657   llvm::stable_sort(Privates,
3658                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
3659                       return L.first > R.first;
3660                     });
3661   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3662   // Build type kmp_routine_entry_t (if not built yet).
3663   emitKmpRoutineEntryT(KmpInt32Ty);
3664   // Build type kmp_task_t (if not built yet).
3665   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3666     if (SavedKmpTaskloopTQTy.isNull()) {
3667       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3668           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3669     }
3670     KmpTaskTQTy = SavedKmpTaskloopTQTy;
3671   } else {
3672     assert((D.getDirectiveKind() == OMPD_task ||
3673             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3674             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3675            "Expected taskloop, task or target directive");
3676     if (SavedKmpTaskTQTy.isNull()) {
3677       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3678           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3679     }
3680     KmpTaskTQTy = SavedKmpTaskTQTy;
3681   }
3682   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3683   // Build particular struct kmp_task_t for the given task.
3684   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3685       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3686   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3687   QualType KmpTaskTWithPrivatesPtrQTy =
3688       C.getPointerType(KmpTaskTWithPrivatesQTy);
3689   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
3690   llvm::Type *KmpTaskTWithPrivatesPtrTy =
3691       KmpTaskTWithPrivatesTy->getPointerTo();
3692   llvm::Value *KmpTaskTWithPrivatesTySize =
3693       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3694   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3695 
3696   // Emit initial values for private copies (if any).
3697   llvm::Value *TaskPrivatesMap = nullptr;
3698   llvm::Type *TaskPrivatesMapTy =
3699       std::next(TaskFunction->arg_begin(), 3)->getType();
3700   if (!Privates.empty()) {
3701     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3702     TaskPrivatesMap =
3703         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3704     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3705         TaskPrivatesMap, TaskPrivatesMapTy);
3706   } else {
3707     TaskPrivatesMap = llvm::ConstantPointerNull::get(
3708         cast<llvm::PointerType>(TaskPrivatesMapTy));
3709   }
3710   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3711   // kmp_task_t *tt);
3712   llvm::Function *TaskEntry = emitProxyTaskFunction(
3713       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3714       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3715       TaskPrivatesMap);
3716 
3717   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3718   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3719   // kmp_routine_entry_t *task_entry);
3720   // Task flags. Format is taken from
3721   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3722   // description of kmp_tasking_flags struct.
3723   enum {
3724     TiedFlag = 0x1,
3725     FinalFlag = 0x2,
3726     DestructorsFlag = 0x8,
3727     PriorityFlag = 0x20,
3728     DetachableFlag = 0x40,
3729   };
3730   unsigned Flags = Data.Tied ? TiedFlag : 0;
3731   bool NeedsCleanup = false;
3732   if (!Privates.empty()) {
3733     NeedsCleanup =
3734         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3735     if (NeedsCleanup)
3736       Flags = Flags | DestructorsFlag;
3737   }
3738   if (Data.Priority.getInt())
3739     Flags = Flags | PriorityFlag;
3740   if (D.hasClausesOfKind<OMPDetachClause>())
3741     Flags = Flags | DetachableFlag;
3742   llvm::Value *TaskFlags =
3743       Data.Final.getPointer()
3744           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3745                                      CGF.Builder.getInt32(FinalFlag),
3746                                      CGF.Builder.getInt32(/*C=*/0))
3747           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3748   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3749   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3750   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
3751       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3752       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3753           TaskEntry, KmpRoutineEntryPtrTy)};
3754   llvm::Value *NewTask;
3755   if (D.hasClausesOfKind<OMPNowaitClause>()) {
3756     // Check if we have any device clause associated with the directive.
3757     const Expr *Device = nullptr;
3758     if (auto *C = D.getSingleClause<OMPDeviceClause>())
3759       Device = C->getDevice();
3760     // Emit device ID if any otherwise use default value.
3761     llvm::Value *DeviceID;
3762     if (Device)
3763       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3764                                            CGF.Int64Ty, /*isSigned=*/true);
3765     else
3766       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3767     AllocArgs.push_back(DeviceID);
3768     NewTask = CGF.EmitRuntimeCall(
3769         OMPBuilder.getOrCreateRuntimeFunction(
3770             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3771         AllocArgs);
3772   } else {
3773     NewTask =
3774         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3775                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3776                             AllocArgs);
3777   }
3778   // Emit detach clause initialization.
3779   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3780   // task_descriptor);
3781   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3782     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3783     LValue EvtLVal = CGF.EmitLValue(Evt);
3784 
3785     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3786     // int gtid, kmp_task_t *task);
3787     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3788     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3789     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3790     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3791         OMPBuilder.getOrCreateRuntimeFunction(
3792             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3793         {Loc, Tid, NewTask});
3794     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3795                                       Evt->getExprLoc());
3796     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3797   }
3798   // Process affinity clauses.
3799   if (D.hasClausesOfKind<OMPAffinityClause>()) {
3800     // Process list of affinity data.
3801     ASTContext &C = CGM.getContext();
3802     Address AffinitiesArray = Address::invalid();
3803     // Calculate number of elements to form the array of affinity data.
3804     llvm::Value *NumOfElements = nullptr;
3805     unsigned NumAffinities = 0;
3806     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3807       if (const Expr *Modifier = C->getModifier()) {
3808         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3809         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3810           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3811           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3812           NumOfElements =
3813               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3814         }
3815       } else {
3816         NumAffinities += C->varlist_size();
3817       }
3818     }
3819     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
3820     // Fields ids in kmp_task_affinity_info record.
3821     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3822 
3823     QualType KmpTaskAffinityInfoArrayTy;
3824     if (NumOfElements) {
3825       NumOfElements = CGF.Builder.CreateNUWAdd(
3826           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3827       auto *OVE = new (C) OpaqueValueExpr(
3828           Loc,
3829           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3830           VK_PRValue);
3831       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3832                                                     RValue::get(NumOfElements));
3833       KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3834           KmpTaskAffinityInfoTy, OVE, ArraySizeModifier::Normal,
3835           /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
3836       // Properly emit variable-sized array.
3837       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3838                                            ImplicitParamKind::Other);
3839       CGF.EmitVarDecl(*PD);
3840       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3841       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3842                                                 /*isSigned=*/false);
3843     } else {
3844       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3845           KmpTaskAffinityInfoTy,
3846           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3847           ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3848       AffinitiesArray =
3849           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
3850       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
3851       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
3852                                              /*isSigned=*/false);
3853     }
3854 
3855     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3856     // Fill array by elements without iterators.
3857     unsigned Pos = 0;
3858     bool HasIterator = false;
3859     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3860       if (C->getModifier()) {
3861         HasIterator = true;
3862         continue;
3863       }
3864       for (const Expr *E : C->varlists()) {
3865         llvm::Value *Addr;
3866         llvm::Value *Size;
3867         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3868         LValue Base =
3869             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
3870                                KmpTaskAffinityInfoTy);
3871         // affs[i].base_addr = &<Affinities[i].second>;
3872         LValue BaseAddrLVal = CGF.EmitLValueForField(
3873             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3874         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3875                               BaseAddrLVal);
3876         // affs[i].len = sizeof(<Affinities[i].second>);
3877         LValue LenLVal = CGF.EmitLValueForField(
3878             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3879         CGF.EmitStoreOfScalar(Size, LenLVal);
3880         ++Pos;
3881       }
3882     }
3883     LValue PosLVal;
3884     if (HasIterator) {
3885       PosLVal = CGF.MakeAddrLValue(
3886           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
3887           C.getSizeType());
3888       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
3889     }
3890     // Process elements with iterators.
3891     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3892       const Expr *Modifier = C->getModifier();
3893       if (!Modifier)
3894         continue;
3895       OMPIteratorGeneratorScope IteratorScope(
3896           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
3897       for (const Expr *E : C->varlists()) {
3898         llvm::Value *Addr;
3899         llvm::Value *Size;
3900         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3901         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
3902         LValue Base =
3903             CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx),
3904                                KmpTaskAffinityInfoTy);
3905         // affs[i].base_addr = &<Affinities[i].second>;
3906         LValue BaseAddrLVal = CGF.EmitLValueForField(
3907             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3908         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3909                               BaseAddrLVal);
3910         // affs[i].len = sizeof(<Affinities[i].second>);
3911         LValue LenLVal = CGF.EmitLValueForField(
3912             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3913         CGF.EmitStoreOfScalar(Size, LenLVal);
3914         Idx = CGF.Builder.CreateNUWAdd(
3915             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
3916         CGF.EmitStoreOfScalar(Idx, PosLVal);
3917       }
3918     }
3919     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3920     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3921     // naffins, kmp_task_affinity_info_t *affin_list);
3922     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3923     llvm::Value *GTid = getThreadID(CGF, Loc);
3924     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3925         AffinitiesArray.emitRawPointer(CGF), CGM.VoidPtrTy);
3926     // FIXME: Emit the function and ignore its result for now unless the
3927     // runtime function is properly implemented.
3928     (void)CGF.EmitRuntimeCall(
3929         OMPBuilder.getOrCreateRuntimeFunction(
3930             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
3931         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3932   }
3933   llvm::Value *NewTaskNewTaskTTy =
3934       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3935           NewTask, KmpTaskTWithPrivatesPtrTy);
3936   LValue Base = CGF.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy,
3937                                                   KmpTaskTWithPrivatesQTy);
3938   LValue TDBase =
3939       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3940   // Fill the data in the resulting kmp_task_t record.
3941   // Copy shareds if there are any.
3942   Address KmpTaskSharedsPtr = Address::invalid();
3943   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3944     KmpTaskSharedsPtr = Address(
3945         CGF.EmitLoadOfScalar(
3946             CGF.EmitLValueForField(
3947                 TDBase,
3948                 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
3949             Loc),
3950         CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3951     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
3952     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
3953     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
3954   }
3955   // Emit initial values for private copies (if any).
3956   TaskResultTy Result;
3957   if (!Privates.empty()) {
3958     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3959                      SharedsTy, SharedsPtrTy, Data, Privates,
3960                      /*ForDup=*/false);
3961     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3962         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3963       Result.TaskDupFn = emitTaskDupFunction(
3964           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3965           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3966           /*WithLastIter=*/!Data.LastprivateVars.empty());
3967     }
3968   }
3969   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3970   enum { Priority = 0, Destructors = 1 };
3971   // Provide pointer to function with destructors for privates.
3972   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3973   const RecordDecl *KmpCmplrdataUD =
3974       (*FI)->getType()->getAsUnionType()->getDecl();
3975   if (NeedsCleanup) {
3976     llvm::Value *DestructorFn = emitDestructorsFunction(
3977         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3978         KmpTaskTWithPrivatesQTy);
3979     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
3980     LValue DestructorsLV = CGF.EmitLValueForField(
3981         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
3982     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3983                               DestructorFn, KmpRoutineEntryPtrTy),
3984                           DestructorsLV);
3985   }
3986   // Set priority.
3987   if (Data.Priority.getInt()) {
3988     LValue Data2LV = CGF.EmitLValueForField(
3989         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
3990     LValue PriorityLV = CGF.EmitLValueForField(
3991         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
3992     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
3993   }
3994   Result.NewTask = NewTask;
3995   Result.TaskEntry = TaskEntry;
3996   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
3997   Result.TDBase = TDBase;
3998   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
3999   return Result;
4000 }
4001 
4002 /// Translates internal dependency kind into the runtime kind.
4003 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4004   RTLDependenceKindTy DepKind;
4005   switch (K) {
4006   case OMPC_DEPEND_in:
4007     DepKind = RTLDependenceKindTy::DepIn;
4008     break;
4009   // Out and InOut dependencies must use the same code.
4010   case OMPC_DEPEND_out:
4011   case OMPC_DEPEND_inout:
4012     DepKind = RTLDependenceKindTy::DepInOut;
4013     break;
4014   case OMPC_DEPEND_mutexinoutset:
4015     DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4016     break;
4017   case OMPC_DEPEND_inoutset:
4018     DepKind = RTLDependenceKindTy::DepInOutSet;
4019     break;
4020   case OMPC_DEPEND_outallmemory:
4021     DepKind = RTLDependenceKindTy::DepOmpAllMem;
4022     break;
4023   case OMPC_DEPEND_source:
4024   case OMPC_DEPEND_sink:
4025   case OMPC_DEPEND_depobj:
4026   case OMPC_DEPEND_inoutallmemory:
4027   case OMPC_DEPEND_unknown:
4028     llvm_unreachable("Unknown task dependence type");
4029   }
4030   return DepKind;
4031 }
4032 
4033 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4034 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4035                            QualType &FlagsTy) {
4036   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4037   if (KmpDependInfoTy.isNull()) {
4038     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4039     KmpDependInfoRD->startDefinition();
4040     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4041     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4042     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4043     KmpDependInfoRD->completeDefinition();
4044     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4045   }
4046 }
4047 
4048 std::pair<llvm::Value *, LValue>
4049 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4050                                    SourceLocation Loc) {
4051   ASTContext &C = CGM.getContext();
4052   QualType FlagsTy;
4053   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4054   RecordDecl *KmpDependInfoRD =
4055       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4056   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4057   LValue Base = CGF.EmitLoadOfPointerLValue(
4058       DepobjLVal.getAddress().withElementType(
4059           CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4060       KmpDependInfoPtrTy->castAs<PointerType>());
4061   Address DepObjAddr = CGF.Builder.CreateGEP(
4062       CGF, Base.getAddress(),
4063       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4064   LValue NumDepsBase = CGF.MakeAddrLValue(
4065       DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4066   // NumDeps = deps[i].base_addr;
4067   LValue BaseAddrLVal = CGF.EmitLValueForField(
4068       NumDepsBase,
4069       *std::next(KmpDependInfoRD->field_begin(),
4070                  static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4071   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4072   return std::make_pair(NumDeps, Base);
4073 }
4074 
4075 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4076                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4077                            const OMPTaskDataTy::DependData &Data,
4078                            Address DependenciesArray) {
4079   CodeGenModule &CGM = CGF.CGM;
4080   ASTContext &C = CGM.getContext();
4081   QualType FlagsTy;
4082   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4083   RecordDecl *KmpDependInfoRD =
4084       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4085   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4086 
4087   OMPIteratorGeneratorScope IteratorScope(
4088       CGF, cast_or_null<OMPIteratorExpr>(
4089                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4090                                  : nullptr));
4091   for (const Expr *E : Data.DepExprs) {
4092     llvm::Value *Addr;
4093     llvm::Value *Size;
4094 
4095     // The expression will be a nullptr in the 'omp_all_memory' case.
4096     if (E) {
4097       std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4098       Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4099     } else {
4100       Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4101       Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4102     }
4103     LValue Base;
4104     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4105       Base = CGF.MakeAddrLValue(
4106           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4107     } else {
4108       assert(E && "Expected a non-null expression");
4109       LValue &PosLVal = *Pos.get<LValue *>();
4110       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4111       Base = CGF.MakeAddrLValue(
4112           CGF.Builder.CreateGEP(CGF, DependenciesArray, Idx), KmpDependInfoTy);
4113     }
4114     // deps[i].base_addr = &<Dependencies[i].second>;
4115     LValue BaseAddrLVal = CGF.EmitLValueForField(
4116         Base,
4117         *std::next(KmpDependInfoRD->field_begin(),
4118                    static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4119     CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4120     // deps[i].len = sizeof(<Dependencies[i].second>);
4121     LValue LenLVal = CGF.EmitLValueForField(
4122         Base, *std::next(KmpDependInfoRD->field_begin(),
4123                          static_cast<unsigned int>(RTLDependInfoFields::Len)));
4124     CGF.EmitStoreOfScalar(Size, LenLVal);
4125     // deps[i].flags = <Dependencies[i].first>;
4126     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4127     LValue FlagsLVal = CGF.EmitLValueForField(
4128         Base,
4129         *std::next(KmpDependInfoRD->field_begin(),
4130                    static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4131     CGF.EmitStoreOfScalar(
4132         llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4133         FlagsLVal);
4134     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4135       ++(*P);
4136     } else {
4137       LValue &PosLVal = *Pos.get<LValue *>();
4138       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4139       Idx = CGF.Builder.CreateNUWAdd(Idx,
4140                                      llvm::ConstantInt::get(Idx->getType(), 1));
4141       CGF.EmitStoreOfScalar(Idx, PosLVal);
4142     }
4143   }
4144 }
4145 
4146 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4147     CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4148     const OMPTaskDataTy::DependData &Data) {
4149   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4150          "Expected depobj dependency kind.");
4151   SmallVector<llvm::Value *, 4> Sizes;
4152   SmallVector<LValue, 4> SizeLVals;
4153   ASTContext &C = CGF.getContext();
4154   {
4155     OMPIteratorGeneratorScope IteratorScope(
4156         CGF, cast_or_null<OMPIteratorExpr>(
4157                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4158                                    : nullptr));
4159     for (const Expr *E : Data.DepExprs) {
4160       llvm::Value *NumDeps;
4161       LValue Base;
4162       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4163       std::tie(NumDeps, Base) =
4164           getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4165       LValue NumLVal = CGF.MakeAddrLValue(
4166           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4167           C.getUIntPtrType());
4168       CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4169                               NumLVal.getAddress());
4170       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4171       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4172       CGF.EmitStoreOfScalar(Add, NumLVal);
4173       SizeLVals.push_back(NumLVal);
4174     }
4175   }
4176   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4177     llvm::Value *Size =
4178         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4179     Sizes.push_back(Size);
4180   }
4181   return Sizes;
4182 }
4183 
4184 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4185                                          QualType &KmpDependInfoTy,
4186                                          LValue PosLVal,
4187                                          const OMPTaskDataTy::DependData &Data,
4188                                          Address DependenciesArray) {
4189   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4190          "Expected depobj dependency kind.");
4191   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4192   {
4193     OMPIteratorGeneratorScope IteratorScope(
4194         CGF, cast_or_null<OMPIteratorExpr>(
4195                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4196                                    : nullptr));
4197     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4198       const Expr *E = Data.DepExprs[I];
4199       llvm::Value *NumDeps;
4200       LValue Base;
4201       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4202       std::tie(NumDeps, Base) =
4203           getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4204 
4205       // memcopy dependency data.
4206       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4207           ElSize,
4208           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4209       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4210       Address DepAddr = CGF.Builder.CreateGEP(CGF, DependenciesArray, Pos);
4211       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(), Size);
4212 
4213       // Increase pos.
4214       // pos += size;
4215       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4216       CGF.EmitStoreOfScalar(Add, PosLVal);
4217     }
4218   }
4219 }
4220 
4221 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4222     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4223     SourceLocation Loc) {
4224   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4225         return D.DepExprs.empty();
4226       }))
4227     return std::make_pair(nullptr, Address::invalid());
4228   // Process list of dependencies.
4229   ASTContext &C = CGM.getContext();
4230   Address DependenciesArray = Address::invalid();
4231   llvm::Value *NumOfElements = nullptr;
4232   unsigned NumDependencies = std::accumulate(
4233       Dependencies.begin(), Dependencies.end(), 0,
4234       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4235         return D.DepKind == OMPC_DEPEND_depobj
4236                    ? V
4237                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4238       });
4239   QualType FlagsTy;
4240   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4241   bool HasDepobjDeps = false;
4242   bool HasRegularWithIterators = false;
4243   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4244   llvm::Value *NumOfRegularWithIterators =
4245       llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4246   // Calculate number of depobj dependencies and regular deps with the
4247   // iterators.
4248   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4249     if (D.DepKind == OMPC_DEPEND_depobj) {
4250       SmallVector<llvm::Value *, 4> Sizes =
4251           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4252       for (llvm::Value *Size : Sizes) {
4253         NumOfDepobjElements =
4254             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4255       }
4256       HasDepobjDeps = true;
4257       continue;
4258     }
4259     // Include number of iterations, if any.
4260 
4261     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4262       llvm::Value *ClauseIteratorSpace =
4263           llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4264       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4265         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4266         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4267         ClauseIteratorSpace = CGF.Builder.CreateNUWMul(Sz, ClauseIteratorSpace);
4268       }
4269       llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4270           ClauseIteratorSpace,
4271           llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4272       NumOfRegularWithIterators =
4273           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4274       HasRegularWithIterators = true;
4275       continue;
4276     }
4277   }
4278 
4279   QualType KmpDependInfoArrayTy;
4280   if (HasDepobjDeps || HasRegularWithIterators) {
4281     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4282                                            /*isSigned=*/false);
4283     if (HasDepobjDeps) {
4284       NumOfElements =
4285           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4286     }
4287     if (HasRegularWithIterators) {
4288       NumOfElements =
4289           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4290     }
4291     auto *OVE = new (C) OpaqueValueExpr(
4292         Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4293         VK_PRValue);
4294     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4295                                                   RValue::get(NumOfElements));
4296     KmpDependInfoArrayTy =
4297         C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4298                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4299     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4300     // Properly emit variable-sized array.
4301     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4302                                          ImplicitParamKind::Other);
4303     CGF.EmitVarDecl(*PD);
4304     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4305     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4306                                               /*isSigned=*/false);
4307   } else {
4308     KmpDependInfoArrayTy = C.getConstantArrayType(
4309         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4310         ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4311     DependenciesArray =
4312         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4313     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4314     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4315                                            /*isSigned=*/false);
4316   }
4317   unsigned Pos = 0;
4318   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4319     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4320         Dependencies[I].IteratorExpr)
4321       continue;
4322     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4323                    DependenciesArray);
4324   }
4325   // Copy regular dependencies with iterators.
4326   LValue PosLVal = CGF.MakeAddrLValue(
4327       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4328   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4329   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4330     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4331         !Dependencies[I].IteratorExpr)
4332       continue;
4333     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4334                    DependenciesArray);
4335   }
4336   // Copy final depobj arrays without iterators.
4337   if (HasDepobjDeps) {
4338     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4339       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4340         continue;
4341       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4342                          DependenciesArray);
4343     }
4344   }
4345   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4346       DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4347   return std::make_pair(NumOfElements, DependenciesArray);
4348 }
4349 
4350 Address CGOpenMPRuntime::emitDepobjDependClause(
4351     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4352     SourceLocation Loc) {
4353   if (Dependencies.DepExprs.empty())
4354     return Address::invalid();
4355   // Process list of dependencies.
4356   ASTContext &C = CGM.getContext();
4357   Address DependenciesArray = Address::invalid();
4358   unsigned NumDependencies = Dependencies.DepExprs.size();
4359   QualType FlagsTy;
4360   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4361   RecordDecl *KmpDependInfoRD =
4362       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4363 
4364   llvm::Value *Size;
4365   // Define type kmp_depend_info[<Dependencies.size()>];
4366   // For depobj reserve one extra element to store the number of elements.
4367   // It is required to handle depobj(x) update(in) construct.
4368   // kmp_depend_info[<Dependencies.size()>] deps;
4369   llvm::Value *NumDepsVal;
4370   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4371   if (const auto *IE =
4372           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4373     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4374     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4375       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4376       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4377       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4378     }
4379     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4380                                     NumDepsVal);
4381     CharUnits SizeInBytes =
4382         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4383     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4384     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4385     NumDepsVal =
4386         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4387   } else {
4388     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4389         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4390         nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4391     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4392     Size = CGM.getSize(Sz.alignTo(Align));
4393     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4394   }
4395   // Need to allocate on the dynamic memory.
4396   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4397   // Use default allocator.
4398   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4399   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4400 
4401   llvm::Value *Addr =
4402       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4403                               CGM.getModule(), OMPRTL___kmpc_alloc),
4404                           Args, ".dep.arr.addr");
4405   llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4406   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4407       Addr, KmpDependInfoLlvmTy->getPointerTo());
4408   DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4409   // Write number of elements in the first element of array for depobj.
4410   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4411   // deps[i].base_addr = NumDependencies;
4412   LValue BaseAddrLVal = CGF.EmitLValueForField(
4413       Base,
4414       *std::next(KmpDependInfoRD->field_begin(),
4415                  static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4416   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4417   llvm::PointerUnion<unsigned *, LValue *> Pos;
4418   unsigned Idx = 1;
4419   LValue PosLVal;
4420   if (Dependencies.IteratorExpr) {
4421     PosLVal = CGF.MakeAddrLValue(
4422         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4423         C.getSizeType());
4424     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4425                           /*IsInit=*/true);
4426     Pos = &PosLVal;
4427   } else {
4428     Pos = &Idx;
4429   }
4430   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4431   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4432       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4433       CGF.Int8Ty);
4434   return DependenciesArray;
4435 }
4436 
4437 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4438                                         SourceLocation Loc) {
4439   ASTContext &C = CGM.getContext();
4440   QualType FlagsTy;
4441   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4442   LValue Base = CGF.EmitLoadOfPointerLValue(DepobjLVal.getAddress(),
4443                                             C.VoidPtrTy.castAs<PointerType>());
4444   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4445   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4446       Base.getAddress(), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4447       CGF.ConvertTypeForMem(KmpDependInfoTy));
4448   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4449       Addr.getElementType(), Addr.emitRawPointer(CGF),
4450       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4451   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4452                                                                CGF.VoidPtrTy);
4453   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4454   // Use default allocator.
4455   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4456   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4457 
4458   // _kmpc_free(gtid, addr, nullptr);
4459   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4460                                 CGM.getModule(), OMPRTL___kmpc_free),
4461                             Args);
4462 }
4463 
4464 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4465                                        OpenMPDependClauseKind NewDepKind,
4466                                        SourceLocation Loc) {
4467   ASTContext &C = CGM.getContext();
4468   QualType FlagsTy;
4469   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4470   RecordDecl *KmpDependInfoRD =
4471       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4472   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4473   llvm::Value *NumDeps;
4474   LValue Base;
4475   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4476 
4477   Address Begin = Base.getAddress();
4478   // Cast from pointer to array type to pointer to single element.
4479   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getElementType(),
4480                                            Begin.emitRawPointer(CGF), NumDeps);
4481   // The basic structure here is a while-do loop.
4482   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4483   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4484   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4485   CGF.EmitBlock(BodyBB);
4486   llvm::PHINode *ElementPHI =
4487       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4488   ElementPHI->addIncoming(Begin.emitRawPointer(CGF), EntryBB);
4489   Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4490   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4491                             Base.getTBAAInfo());
4492   // deps[i].flags = NewDepKind;
4493   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4494   LValue FlagsLVal = CGF.EmitLValueForField(
4495       Base, *std::next(KmpDependInfoRD->field_begin(),
4496                        static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4497   CGF.EmitStoreOfScalar(
4498       llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4499       FlagsLVal);
4500 
4501   // Shift the address forward by one element.
4502   llvm::Value *ElementNext =
4503       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext")
4504           .emitRawPointer(CGF);
4505   ElementPHI->addIncoming(ElementNext, CGF.Builder.GetInsertBlock());
4506   llvm::Value *IsEmpty =
4507       CGF.Builder.CreateICmpEQ(ElementNext, End, "omp.isempty");
4508   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4509   // Done.
4510   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4511 }
4512 
4513 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4514                                    const OMPExecutableDirective &D,
4515                                    llvm::Function *TaskFunction,
4516                                    QualType SharedsTy, Address Shareds,
4517                                    const Expr *IfCond,
4518                                    const OMPTaskDataTy &Data) {
4519   if (!CGF.HaveInsertPoint())
4520     return;
4521 
4522   TaskResultTy Result =
4523       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4524   llvm::Value *NewTask = Result.NewTask;
4525   llvm::Function *TaskEntry = Result.TaskEntry;
4526   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4527   LValue TDBase = Result.TDBase;
4528   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4529   // Process list of dependences.
4530   Address DependenciesArray = Address::invalid();
4531   llvm::Value *NumOfElements;
4532   std::tie(NumOfElements, DependenciesArray) =
4533       emitDependClause(CGF, Data.Dependences, Loc);
4534 
4535   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4536   // libcall.
4537   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4538   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4539   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4540   // list is not empty
4541   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4542   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4543   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4544   llvm::Value *DepTaskArgs[7];
4545   if (!Data.Dependences.empty()) {
4546     DepTaskArgs[0] = UpLoc;
4547     DepTaskArgs[1] = ThreadID;
4548     DepTaskArgs[2] = NewTask;
4549     DepTaskArgs[3] = NumOfElements;
4550     DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4551     DepTaskArgs[5] = CGF.Builder.getInt32(0);
4552     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4553   }
4554   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4555                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4556     if (!Data.Tied) {
4557       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4558       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4559       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4560     }
4561     if (!Data.Dependences.empty()) {
4562       CGF.EmitRuntimeCall(
4563           OMPBuilder.getOrCreateRuntimeFunction(
4564               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4565           DepTaskArgs);
4566     } else {
4567       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4568                               CGM.getModule(), OMPRTL___kmpc_omp_task),
4569                           TaskArgs);
4570     }
4571     // Check if parent region is untied and build return for untied task;
4572     if (auto *Region =
4573             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4574       Region->emitUntiedSwitch(CGF);
4575   };
4576 
4577   llvm::Value *DepWaitTaskArgs[7];
4578   if (!Data.Dependences.empty()) {
4579     DepWaitTaskArgs[0] = UpLoc;
4580     DepWaitTaskArgs[1] = ThreadID;
4581     DepWaitTaskArgs[2] = NumOfElements;
4582     DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4583     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4584     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4585     DepWaitTaskArgs[6] =
4586         llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4587   }
4588   auto &M = CGM.getModule();
4589   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4590                         TaskEntry, &Data, &DepWaitTaskArgs,
4591                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4592     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4593     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4594     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4595     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4596     // is specified.
4597     if (!Data.Dependences.empty())
4598       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4599                               M, OMPRTL___kmpc_omp_taskwait_deps_51),
4600                           DepWaitTaskArgs);
4601     // Call proxy_task_entry(gtid, new_task);
4602     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4603                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4604       Action.Enter(CGF);
4605       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4606       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4607                                                           OutlinedFnArgs);
4608     };
4609 
4610     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4611     // kmp_task_t *new_task);
4612     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4613     // kmp_task_t *new_task);
4614     RegionCodeGenTy RCG(CodeGen);
4615     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4616                               M, OMPRTL___kmpc_omp_task_begin_if0),
4617                           TaskArgs,
4618                           OMPBuilder.getOrCreateRuntimeFunction(
4619                               M, OMPRTL___kmpc_omp_task_complete_if0),
4620                           TaskArgs);
4621     RCG.setAction(Action);
4622     RCG(CGF);
4623   };
4624 
4625   if (IfCond) {
4626     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4627   } else {
4628     RegionCodeGenTy ThenRCG(ThenCodeGen);
4629     ThenRCG(CGF);
4630   }
4631 }
4632 
4633 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4634                                        const OMPLoopDirective &D,
4635                                        llvm::Function *TaskFunction,
4636                                        QualType SharedsTy, Address Shareds,
4637                                        const Expr *IfCond,
4638                                        const OMPTaskDataTy &Data) {
4639   if (!CGF.HaveInsertPoint())
4640     return;
4641   TaskResultTy Result =
4642       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4643   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4644   // libcall.
4645   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4646   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4647   // sched, kmp_uint64 grainsize, void *task_dup);
4648   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4649   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4650   llvm::Value *IfVal;
4651   if (IfCond) {
4652     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4653                                       /*isSigned=*/true);
4654   } else {
4655     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4656   }
4657 
4658   LValue LBLVal = CGF.EmitLValueForField(
4659       Result.TDBase,
4660       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4661   const auto *LBVar =
4662       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4663   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4664                        /*IsInitializer=*/true);
4665   LValue UBLVal = CGF.EmitLValueForField(
4666       Result.TDBase,
4667       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4668   const auto *UBVar =
4669       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4670   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4671                        /*IsInitializer=*/true);
4672   LValue StLVal = CGF.EmitLValueForField(
4673       Result.TDBase,
4674       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4675   const auto *StVar =
4676       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4677   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4678                        /*IsInitializer=*/true);
4679   // Store reductions address.
4680   LValue RedLVal = CGF.EmitLValueForField(
4681       Result.TDBase,
4682       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4683   if (Data.Reductions) {
4684     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4685   } else {
4686     CGF.EmitNullInitialization(RedLVal.getAddress(),
4687                                CGF.getContext().VoidPtrTy);
4688   }
4689   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4690   llvm::Value *TaskArgs[] = {
4691       UpLoc,
4692       ThreadID,
4693       Result.NewTask,
4694       IfVal,
4695       LBLVal.getPointer(CGF),
4696       UBLVal.getPointer(CGF),
4697       CGF.EmitLoadOfScalar(StLVal, Loc),
4698       llvm::ConstantInt::getSigned(
4699           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4700       llvm::ConstantInt::getSigned(
4701           CGF.IntTy, Data.Schedule.getPointer()
4702                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
4703                          : NoSchedule),
4704       Data.Schedule.getPointer()
4705           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4706                                       /*isSigned=*/false)
4707           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
4708       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4709                              Result.TaskDupFn, CGF.VoidPtrTy)
4710                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
4711   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4712                           CGM.getModule(), OMPRTL___kmpc_taskloop),
4713                       TaskArgs);
4714 }
4715 
4716 /// Emit reduction operation for each element of array (required for
4717 /// array sections) LHS op = RHS.
4718 /// \param Type Type of array.
4719 /// \param LHSVar Variable on the left side of the reduction operation
4720 /// (references element of array in original variable).
4721 /// \param RHSVar Variable on the right side of the reduction operation
4722 /// (references element of array in original variable).
4723 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
4724 /// RHSVar.
4725 static void EmitOMPAggregateReduction(
4726     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4727     const VarDecl *RHSVar,
4728     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4729                                   const Expr *, const Expr *)> &RedOpGen,
4730     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4731     const Expr *UpExpr = nullptr) {
4732   // Perform element-by-element initialization.
4733   QualType ElementTy;
4734   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4735   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4736 
4737   // Drill down to the base element type on both arrays.
4738   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4739   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4740 
4741   llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4742   llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4743   // Cast from pointer to array type to pointer to single element.
4744   llvm::Value *LHSEnd =
4745       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4746   // The basic structure here is a while-do loop.
4747   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4748   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4749   llvm::Value *IsEmpty =
4750       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4751   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4752 
4753   // Enter the loop body, making that address the current address.
4754   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4755   CGF.EmitBlock(BodyBB);
4756 
4757   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4758 
4759   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4760       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4761   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4762   Address RHSElementCurrent(
4763       RHSElementPHI, RHSAddr.getElementType(),
4764       RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4765 
4766   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4767       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4768   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4769   Address LHSElementCurrent(
4770       LHSElementPHI, LHSAddr.getElementType(),
4771       LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4772 
4773   // Emit copy.
4774   CodeGenFunction::OMPPrivateScope Scope(CGF);
4775   Scope.addPrivate(LHSVar, LHSElementCurrent);
4776   Scope.addPrivate(RHSVar, RHSElementCurrent);
4777   Scope.Privatize();
4778   RedOpGen(CGF, XExpr, EExpr, UpExpr);
4779   Scope.ForceCleanup();
4780 
4781   // Shift the address forward by one element.
4782   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4783       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4784       "omp.arraycpy.dest.element");
4785   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4786       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4787       "omp.arraycpy.src.element");
4788   // Check whether we've reached the end.
4789   llvm::Value *Done =
4790       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4791   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4792   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4793   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4794 
4795   // Done.
4796   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4797 }
4798 
4799 /// Emit reduction combiner. If the combiner is a simple expression emit it as
4800 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4801 /// UDR combiner function.
4802 static void emitReductionCombiner(CodeGenFunction &CGF,
4803                                   const Expr *ReductionOp) {
4804   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4805     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4806       if (const auto *DRE =
4807               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4808         if (const auto *DRD =
4809                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4810           std::pair<llvm::Function *, llvm::Function *> Reduction =
4811               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
4812           RValue Func = RValue::get(Reduction.first);
4813           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4814           CGF.EmitIgnoredExpr(ReductionOp);
4815           return;
4816         }
4817   CGF.EmitIgnoredExpr(ReductionOp);
4818 }
4819 
4820 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
4821     StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4822     ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
4823     ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4824   ASTContext &C = CGM.getContext();
4825 
4826   // void reduction_func(void *LHSArg, void *RHSArg);
4827   FunctionArgList Args;
4828   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4829                            ImplicitParamKind::Other);
4830   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4831                            ImplicitParamKind::Other);
4832   Args.push_back(&LHSArg);
4833   Args.push_back(&RHSArg);
4834   const auto &CGFI =
4835       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4836   std::string Name = getReductionFuncName(ReducerName);
4837   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4838                                     llvm::GlobalValue::InternalLinkage, Name,
4839                                     &CGM.getModule());
4840   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
4841   Fn->setDoesNotRecurse();
4842   CodeGenFunction CGF(CGM);
4843   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
4844 
4845   // Dst = (void*[n])(LHSArg);
4846   // Src = (void*[n])(RHSArg);
4847   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4848                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4849                   ArgsElemType->getPointerTo()),
4850               ArgsElemType, CGF.getPointerAlign());
4851   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4852                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4853                   ArgsElemType->getPointerTo()),
4854               ArgsElemType, CGF.getPointerAlign());
4855 
4856   //  ...
4857   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4858   //  ...
4859   CodeGenFunction::OMPPrivateScope Scope(CGF);
4860   const auto *IPriv = Privates.begin();
4861   unsigned Idx = 0;
4862   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4863     const auto *RHSVar =
4864         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4865     Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
4866     const auto *LHSVar =
4867         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4868     Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
4869     QualType PrivTy = (*IPriv)->getType();
4870     if (PrivTy->isVariablyModifiedType()) {
4871       // Get array size and emit VLA type.
4872       ++Idx;
4873       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
4874       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4875       const VariableArrayType *VLA =
4876           CGF.getContext().getAsVariableArrayType(PrivTy);
4877       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4878       CodeGenFunction::OpaqueValueMapping OpaqueMap(
4879           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4880       CGF.EmitVariablyModifiedType(PrivTy);
4881     }
4882   }
4883   Scope.Privatize();
4884   IPriv = Privates.begin();
4885   const auto *ILHS = LHSExprs.begin();
4886   const auto *IRHS = RHSExprs.begin();
4887   for (const Expr *E : ReductionOps) {
4888     if ((*IPriv)->getType()->isArrayType()) {
4889       // Emit reduction for array section.
4890       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4891       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4892       EmitOMPAggregateReduction(
4893           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4894           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4895             emitReductionCombiner(CGF, E);
4896           });
4897     } else {
4898       // Emit reduction for array subscript or single variable.
4899       emitReductionCombiner(CGF, E);
4900     }
4901     ++IPriv;
4902     ++ILHS;
4903     ++IRHS;
4904   }
4905   Scope.ForceCleanup();
4906   CGF.FinishFunction();
4907   return Fn;
4908 }
4909 
4910 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
4911                                                   const Expr *ReductionOp,
4912                                                   const Expr *PrivateRef,
4913                                                   const DeclRefExpr *LHS,
4914                                                   const DeclRefExpr *RHS) {
4915   if (PrivateRef->getType()->isArrayType()) {
4916     // Emit reduction for array section.
4917     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4918     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4919     EmitOMPAggregateReduction(
4920         CGF, PrivateRef->getType(), LHSVar, RHSVar,
4921         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4922           emitReductionCombiner(CGF, ReductionOp);
4923         });
4924   } else {
4925     // Emit reduction for array subscript or single variable.
4926     emitReductionCombiner(CGF, ReductionOp);
4927   }
4928 }
4929 
4930 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
4931                                     ArrayRef<const Expr *> Privates,
4932                                     ArrayRef<const Expr *> LHSExprs,
4933                                     ArrayRef<const Expr *> RHSExprs,
4934                                     ArrayRef<const Expr *> ReductionOps,
4935                                     ReductionOptionsTy Options) {
4936   if (!CGF.HaveInsertPoint())
4937     return;
4938 
4939   bool WithNowait = Options.WithNowait;
4940   bool SimpleReduction = Options.SimpleReduction;
4941 
4942   // Next code should be emitted for reduction:
4943   //
4944   // static kmp_critical_name lock = { 0 };
4945   //
4946   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
4947   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
4948   //  ...
4949   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
4950   //  *(Type<n>-1*)rhs[<n>-1]);
4951   // }
4952   //
4953   // ...
4954   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
4955   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4956   // RedList, reduce_func, &<lock>)) {
4957   // case 1:
4958   //  ...
4959   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4960   //  ...
4961   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4962   // break;
4963   // case 2:
4964   //  ...
4965   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4966   //  ...
4967   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
4968   // break;
4969   // default:;
4970   // }
4971   //
4972   // if SimpleReduction is true, only the next code is generated:
4973   //  ...
4974   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4975   //  ...
4976 
4977   ASTContext &C = CGM.getContext();
4978 
4979   if (SimpleReduction) {
4980     CodeGenFunction::RunCleanupsScope Scope(CGF);
4981     const auto *IPriv = Privates.begin();
4982     const auto *ILHS = LHSExprs.begin();
4983     const auto *IRHS = RHSExprs.begin();
4984     for (const Expr *E : ReductionOps) {
4985       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
4986                                   cast<DeclRefExpr>(*IRHS));
4987       ++IPriv;
4988       ++ILHS;
4989       ++IRHS;
4990     }
4991     return;
4992   }
4993 
4994   // 1. Build a list of reduction variables.
4995   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
4996   auto Size = RHSExprs.size();
4997   for (const Expr *E : Privates) {
4998     if (E->getType()->isVariablyModifiedType())
4999       // Reserve place for array size.
5000       ++Size;
5001   }
5002   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5003   QualType ReductionArrayTy = C.getConstantArrayType(
5004       C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
5005       /*IndexTypeQuals=*/0);
5006   RawAddress ReductionList =
5007       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5008   const auto *IPriv = Privates.begin();
5009   unsigned Idx = 0;
5010   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5011     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5012     CGF.Builder.CreateStore(
5013         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5014             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5015         Elem);
5016     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5017       // Store array size.
5018       ++Idx;
5019       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5020       llvm::Value *Size = CGF.Builder.CreateIntCast(
5021           CGF.getVLASize(
5022                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5023               .NumElts,
5024           CGF.SizeTy, /*isSigned=*/false);
5025       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5026                               Elem);
5027     }
5028   }
5029 
5030   // 2. Emit reduce_func().
5031   llvm::Function *ReductionFn = emitReductionFunction(
5032       CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5033       Privates, LHSExprs, RHSExprs, ReductionOps);
5034 
5035   // 3. Create static kmp_critical_name lock = { 0 };
5036   std::string Name = getName({"reduction"});
5037   llvm::Value *Lock = getCriticalRegionLock(Name);
5038 
5039   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5040   // RedList, reduce_func, &<lock>);
5041   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5042   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5043   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5044   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5045       ReductionList.getPointer(), CGF.VoidPtrTy);
5046   llvm::Value *Args[] = {
5047       IdentTLoc,                             // ident_t *<loc>
5048       ThreadId,                              // i32 <gtid>
5049       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5050       ReductionArrayTySize,                  // size_type sizeof(RedList)
5051       RL,                                    // void *RedList
5052       ReductionFn, // void (*) (void *, void *) <reduce_func>
5053       Lock         // kmp_critical_name *&<lock>
5054   };
5055   llvm::Value *Res = CGF.EmitRuntimeCall(
5056       OMPBuilder.getOrCreateRuntimeFunction(
5057           CGM.getModule(),
5058           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5059       Args);
5060 
5061   // 5. Build switch(res)
5062   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5063   llvm::SwitchInst *SwInst =
5064       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5065 
5066   // 6. Build case 1:
5067   //  ...
5068   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5069   //  ...
5070   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5071   // break;
5072   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5073   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5074   CGF.EmitBlock(Case1BB);
5075 
5076   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5077   llvm::Value *EndArgs[] = {
5078       IdentTLoc, // ident_t *<loc>
5079       ThreadId,  // i32 <gtid>
5080       Lock       // kmp_critical_name *&<lock>
5081   };
5082   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5083                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5084     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5085     const auto *IPriv = Privates.begin();
5086     const auto *ILHS = LHSExprs.begin();
5087     const auto *IRHS = RHSExprs.begin();
5088     for (const Expr *E : ReductionOps) {
5089       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5090                                      cast<DeclRefExpr>(*IRHS));
5091       ++IPriv;
5092       ++ILHS;
5093       ++IRHS;
5094     }
5095   };
5096   RegionCodeGenTy RCG(CodeGen);
5097   CommonActionTy Action(
5098       nullptr, std::nullopt,
5099       OMPBuilder.getOrCreateRuntimeFunction(
5100           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5101                                       : OMPRTL___kmpc_end_reduce),
5102       EndArgs);
5103   RCG.setAction(Action);
5104   RCG(CGF);
5105 
5106   CGF.EmitBranch(DefaultBB);
5107 
5108   // 7. Build case 2:
5109   //  ...
5110   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5111   //  ...
5112   // break;
5113   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5114   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5115   CGF.EmitBlock(Case2BB);
5116 
5117   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5118                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5119     const auto *ILHS = LHSExprs.begin();
5120     const auto *IRHS = RHSExprs.begin();
5121     const auto *IPriv = Privates.begin();
5122     for (const Expr *E : ReductionOps) {
5123       const Expr *XExpr = nullptr;
5124       const Expr *EExpr = nullptr;
5125       const Expr *UpExpr = nullptr;
5126       BinaryOperatorKind BO = BO_Comma;
5127       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5128         if (BO->getOpcode() == BO_Assign) {
5129           XExpr = BO->getLHS();
5130           UpExpr = BO->getRHS();
5131         }
5132       }
5133       // Try to emit update expression as a simple atomic.
5134       const Expr *RHSExpr = UpExpr;
5135       if (RHSExpr) {
5136         // Analyze RHS part of the whole expression.
5137         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5138                 RHSExpr->IgnoreParenImpCasts())) {
5139           // If this is a conditional operator, analyze its condition for
5140           // min/max reduction operator.
5141           RHSExpr = ACO->getCond();
5142         }
5143         if (const auto *BORHS =
5144                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5145           EExpr = BORHS->getRHS();
5146           BO = BORHS->getOpcode();
5147         }
5148       }
5149       if (XExpr) {
5150         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5151         auto &&AtomicRedGen = [BO, VD,
5152                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5153                                     const Expr *EExpr, const Expr *UpExpr) {
5154           LValue X = CGF.EmitLValue(XExpr);
5155           RValue E;
5156           if (EExpr)
5157             E = CGF.EmitAnyExpr(EExpr);
5158           CGF.EmitOMPAtomicSimpleUpdateExpr(
5159               X, E, BO, /*IsXLHSInRHSPart=*/true,
5160               llvm::AtomicOrdering::Monotonic, Loc,
5161               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5162                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5163                 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5164                 CGF.emitOMPSimpleStore(
5165                     CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5166                     VD->getType().getNonReferenceType(), Loc);
5167                 PrivateScope.addPrivate(VD, LHSTemp);
5168                 (void)PrivateScope.Privatize();
5169                 return CGF.EmitAnyExpr(UpExpr);
5170               });
5171         };
5172         if ((*IPriv)->getType()->isArrayType()) {
5173           // Emit atomic reduction for array section.
5174           const auto *RHSVar =
5175               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5176           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5177                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5178         } else {
5179           // Emit atomic reduction for array subscript or single variable.
5180           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5181         }
5182       } else {
5183         // Emit as a critical region.
5184         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5185                                            const Expr *, const Expr *) {
5186           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5187           std::string Name = RT.getName({"atomic_reduction"});
5188           RT.emitCriticalRegion(
5189               CGF, Name,
5190               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5191                 Action.Enter(CGF);
5192                 emitReductionCombiner(CGF, E);
5193               },
5194               Loc);
5195         };
5196         if ((*IPriv)->getType()->isArrayType()) {
5197           const auto *LHSVar =
5198               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5199           const auto *RHSVar =
5200               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5201           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5202                                     CritRedGen);
5203         } else {
5204           CritRedGen(CGF, nullptr, nullptr, nullptr);
5205         }
5206       }
5207       ++ILHS;
5208       ++IRHS;
5209       ++IPriv;
5210     }
5211   };
5212   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5213   if (!WithNowait) {
5214     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5215     llvm::Value *EndArgs[] = {
5216         IdentTLoc, // ident_t *<loc>
5217         ThreadId,  // i32 <gtid>
5218         Lock       // kmp_critical_name *&<lock>
5219     };
5220     CommonActionTy Action(nullptr, std::nullopt,
5221                           OMPBuilder.getOrCreateRuntimeFunction(
5222                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5223                           EndArgs);
5224     AtomicRCG.setAction(Action);
5225     AtomicRCG(CGF);
5226   } else {
5227     AtomicRCG(CGF);
5228   }
5229 
5230   CGF.EmitBranch(DefaultBB);
5231   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5232 }
5233 
5234 /// Generates unique name for artificial threadprivate variables.
5235 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5236 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5237                                       const Expr *Ref) {
5238   SmallString<256> Buffer;
5239   llvm::raw_svector_ostream Out(Buffer);
5240   const clang::DeclRefExpr *DE;
5241   const VarDecl *D = ::getBaseDecl(Ref, DE);
5242   if (!D)
5243     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5244   D = D->getCanonicalDecl();
5245   std::string Name = CGM.getOpenMPRuntime().getName(
5246       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5247   Out << Prefix << Name << "_"
5248       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5249   return std::string(Out.str());
5250 }
5251 
5252 /// Emits reduction initializer function:
5253 /// \code
5254 /// void @.red_init(void* %arg, void* %orig) {
5255 /// %0 = bitcast void* %arg to <type>*
5256 /// store <type> <init>, <type>* %0
5257 /// ret void
5258 /// }
5259 /// \endcode
5260 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5261                                            SourceLocation Loc,
5262                                            ReductionCodeGen &RCG, unsigned N) {
5263   ASTContext &C = CGM.getContext();
5264   QualType VoidPtrTy = C.VoidPtrTy;
5265   VoidPtrTy.addRestrict();
5266   FunctionArgList Args;
5267   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5268                           ImplicitParamKind::Other);
5269   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5270                               ImplicitParamKind::Other);
5271   Args.emplace_back(&Param);
5272   Args.emplace_back(&ParamOrig);
5273   const auto &FnInfo =
5274       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5275   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5276   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5277   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5278                                     Name, &CGM.getModule());
5279   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5280   Fn->setDoesNotRecurse();
5281   CodeGenFunction CGF(CGM);
5282   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5283   QualType PrivateType = RCG.getPrivateType(N);
5284   Address PrivateAddr = CGF.EmitLoadOfPointer(
5285       CGF.GetAddrOfLocalVar(&Param).withElementType(
5286           CGF.ConvertTypeForMem(PrivateType)->getPointerTo()),
5287       C.getPointerType(PrivateType)->castAs<PointerType>());
5288   llvm::Value *Size = nullptr;
5289   // If the size of the reduction item is non-constant, load it from global
5290   // threadprivate variable.
5291   if (RCG.getSizes(N).second) {
5292     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5293         CGF, CGM.getContext().getSizeType(),
5294         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5295     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5296                                 CGM.getContext().getSizeType(), Loc);
5297   }
5298   RCG.emitAggregateType(CGF, N, Size);
5299   Address OrigAddr = Address::invalid();
5300   // If initializer uses initializer from declare reduction construct, emit a
5301   // pointer to the address of the original reduction item (reuired by reduction
5302   // initializer)
5303   if (RCG.usesReductionInitializer(N)) {
5304     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5305     OrigAddr = CGF.EmitLoadOfPointer(
5306         SharedAddr,
5307         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5308   }
5309   // Emit the initializer:
5310   // %0 = bitcast void* %arg to <type>*
5311   // store <type> <init>, <type>* %0
5312   RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5313                          [](CodeGenFunction &) { return false; });
5314   CGF.FinishFunction();
5315   return Fn;
5316 }
5317 
5318 /// Emits reduction combiner function:
5319 /// \code
5320 /// void @.red_comb(void* %arg0, void* %arg1) {
5321 /// %lhs = bitcast void* %arg0 to <type>*
5322 /// %rhs = bitcast void* %arg1 to <type>*
5323 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5324 /// store <type> %2, <type>* %lhs
5325 /// ret void
5326 /// }
5327 /// \endcode
5328 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5329                                            SourceLocation Loc,
5330                                            ReductionCodeGen &RCG, unsigned N,
5331                                            const Expr *ReductionOp,
5332                                            const Expr *LHS, const Expr *RHS,
5333                                            const Expr *PrivateRef) {
5334   ASTContext &C = CGM.getContext();
5335   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5336   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5337   FunctionArgList Args;
5338   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5339                                C.VoidPtrTy, ImplicitParamKind::Other);
5340   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5341                             ImplicitParamKind::Other);
5342   Args.emplace_back(&ParamInOut);
5343   Args.emplace_back(&ParamIn);
5344   const auto &FnInfo =
5345       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5346   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5347   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5348   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5349                                     Name, &CGM.getModule());
5350   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5351   Fn->setDoesNotRecurse();
5352   CodeGenFunction CGF(CGM);
5353   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5354   llvm::Value *Size = nullptr;
5355   // If the size of the reduction item is non-constant, load it from global
5356   // threadprivate variable.
5357   if (RCG.getSizes(N).second) {
5358     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5359         CGF, CGM.getContext().getSizeType(),
5360         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5361     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5362                                 CGM.getContext().getSizeType(), Loc);
5363   }
5364   RCG.emitAggregateType(CGF, N, Size);
5365   // Remap lhs and rhs variables to the addresses of the function arguments.
5366   // %lhs = bitcast void* %arg0 to <type>*
5367   // %rhs = bitcast void* %arg1 to <type>*
5368   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5369   PrivateScope.addPrivate(
5370       LHSVD,
5371       // Pull out the pointer to the variable.
5372       CGF.EmitLoadOfPointer(
5373           CGF.GetAddrOfLocalVar(&ParamInOut)
5374               .withElementType(
5375                   CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()),
5376           C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5377   PrivateScope.addPrivate(
5378       RHSVD,
5379       // Pull out the pointer to the variable.
5380       CGF.EmitLoadOfPointer(
5381           CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5382               CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()),
5383           C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5384   PrivateScope.Privatize();
5385   // Emit the combiner body:
5386   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5387   // store <type> %2, <type>* %lhs
5388   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5389       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5390       cast<DeclRefExpr>(RHS));
5391   CGF.FinishFunction();
5392   return Fn;
5393 }
5394 
5395 /// Emits reduction finalizer function:
5396 /// \code
5397 /// void @.red_fini(void* %arg) {
5398 /// %0 = bitcast void* %arg to <type>*
5399 /// <destroy>(<type>* %0)
5400 /// ret void
5401 /// }
5402 /// \endcode
5403 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5404                                            SourceLocation Loc,
5405                                            ReductionCodeGen &RCG, unsigned N) {
5406   if (!RCG.needCleanups(N))
5407     return nullptr;
5408   ASTContext &C = CGM.getContext();
5409   FunctionArgList Args;
5410   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5411                           ImplicitParamKind::Other);
5412   Args.emplace_back(&Param);
5413   const auto &FnInfo =
5414       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5415   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5416   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5417   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5418                                     Name, &CGM.getModule());
5419   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5420   Fn->setDoesNotRecurse();
5421   CodeGenFunction CGF(CGM);
5422   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5423   Address PrivateAddr = CGF.EmitLoadOfPointer(
5424       CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5425   llvm::Value *Size = nullptr;
5426   // If the size of the reduction item is non-constant, load it from global
5427   // threadprivate variable.
5428   if (RCG.getSizes(N).second) {
5429     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5430         CGF, CGM.getContext().getSizeType(),
5431         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5432     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5433                                 CGM.getContext().getSizeType(), Loc);
5434   }
5435   RCG.emitAggregateType(CGF, N, Size);
5436   // Emit the finalizer body:
5437   // <destroy>(<type>* %0)
5438   RCG.emitCleanups(CGF, N, PrivateAddr);
5439   CGF.FinishFunction(Loc);
5440   return Fn;
5441 }
5442 
5443 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5444     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5445     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5446   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5447     return nullptr;
5448 
5449   // Build typedef struct:
5450   // kmp_taskred_input {
5451   //   void *reduce_shar; // shared reduction item
5452   //   void *reduce_orig; // original reduction item used for initialization
5453   //   size_t reduce_size; // size of data item
5454   //   void *reduce_init; // data initialization routine
5455   //   void *reduce_fini; // data finalization routine
5456   //   void *reduce_comb; // data combiner routine
5457   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
5458   // } kmp_taskred_input_t;
5459   ASTContext &C = CGM.getContext();
5460   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5461   RD->startDefinition();
5462   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5463   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5464   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5465   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5466   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5467   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5468   const FieldDecl *FlagsFD = addFieldToRecordDecl(
5469       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5470   RD->completeDefinition();
5471   QualType RDType = C.getRecordType(RD);
5472   unsigned Size = Data.ReductionVars.size();
5473   llvm::APInt ArraySize(/*numBits=*/64, Size);
5474   QualType ArrayRDType =
5475       C.getConstantArrayType(RDType, ArraySize, nullptr,
5476                              ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5477   // kmp_task_red_input_t .rd_input.[Size];
5478   RawAddress TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5479   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5480                        Data.ReductionCopies, Data.ReductionOps);
5481   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5482     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5483     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5484                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5485     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5486         TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5487         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5488         ".rd_input.gep.");
5489     LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(GEP, RDType);
5490     // ElemLVal.reduce_shar = &Shareds[Cnt];
5491     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5492     RCG.emitSharedOrigLValue(CGF, Cnt);
5493     llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5494     CGF.EmitStoreOfScalar(Shared, SharedLVal);
5495     // ElemLVal.reduce_orig = &Origs[Cnt];
5496     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5497     llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5498     CGF.EmitStoreOfScalar(Orig, OrigLVal);
5499     RCG.emitAggregateType(CGF, Cnt);
5500     llvm::Value *SizeValInChars;
5501     llvm::Value *SizeVal;
5502     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5503     // We use delayed creation/initialization for VLAs and array sections. It is
5504     // required because runtime does not provide the way to pass the sizes of
5505     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5506     // threadprivate global variables are used to store these values and use
5507     // them in the functions.
5508     bool DelayedCreation = !!SizeVal;
5509     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5510                                                /*isSigned=*/false);
5511     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5512     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5513     // ElemLVal.reduce_init = init;
5514     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5515     llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5516     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5517     // ElemLVal.reduce_fini = fini;
5518     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5519     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5520     llvm::Value *FiniAddr =
5521         Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5522     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5523     // ElemLVal.reduce_comb = comb;
5524     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5525     llvm::Value *CombAddr = emitReduceCombFunction(
5526         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5527         RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5528     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5529     // ElemLVal.flags = 0;
5530     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5531     if (DelayedCreation) {
5532       CGF.EmitStoreOfScalar(
5533           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5534           FlagsLVal);
5535     } else
5536       CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
5537   }
5538   if (Data.IsReductionWithTaskMod) {
5539     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5540     // is_ws, int num, void *data);
5541     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5542     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5543                                                   CGM.IntTy, /*isSigned=*/true);
5544     llvm::Value *Args[] = {
5545         IdentTLoc, GTid,
5546         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5547                                /*isSigned=*/true),
5548         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5549         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5550             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5551     return CGF.EmitRuntimeCall(
5552         OMPBuilder.getOrCreateRuntimeFunction(
5553             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5554         Args);
5555   }
5556   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5557   llvm::Value *Args[] = {
5558       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5559                                 /*isSigned=*/true),
5560       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5561       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
5562                                                       CGM.VoidPtrTy)};
5563   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5564                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
5565                              Args);
5566 }
5567 
5568 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
5569                                             SourceLocation Loc,
5570                                             bool IsWorksharingReduction) {
5571   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5572   // is_ws, int num, void *data);
5573   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5574   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5575                                                 CGM.IntTy, /*isSigned=*/true);
5576   llvm::Value *Args[] = {IdentTLoc, GTid,
5577                          llvm::ConstantInt::get(CGM.IntTy,
5578                                                 IsWorksharingReduction ? 1 : 0,
5579                                                 /*isSigned=*/true)};
5580   (void)CGF.EmitRuntimeCall(
5581       OMPBuilder.getOrCreateRuntimeFunction(
5582           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5583       Args);
5584 }
5585 
5586 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
5587                                               SourceLocation Loc,
5588                                               ReductionCodeGen &RCG,
5589                                               unsigned N) {
5590   auto Sizes = RCG.getSizes(N);
5591   // Emit threadprivate global variable if the type is non-constant
5592   // (Sizes.second = nullptr).
5593   if (Sizes.second) {
5594     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5595                                                      /*isSigned=*/false);
5596     Address SizeAddr = getAddrOfArtificialThreadPrivate(
5597         CGF, CGM.getContext().getSizeType(),
5598         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5599     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5600   }
5601 }
5602 
5603 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
5604                                               SourceLocation Loc,
5605                                               llvm::Value *ReductionsPtr,
5606                                               LValue SharedLVal) {
5607   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5608   // *d);
5609   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5610                                                    CGM.IntTy,
5611                                                    /*isSigned=*/true),
5612                          ReductionsPtr,
5613                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5614                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
5615   return Address(
5616       CGF.EmitRuntimeCall(
5617           OMPBuilder.getOrCreateRuntimeFunction(
5618               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
5619           Args),
5620       CGF.Int8Ty, SharedLVal.getAlignment());
5621 }
5622 
5623 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
5624                                        const OMPTaskDataTy &Data) {
5625   if (!CGF.HaveInsertPoint())
5626     return;
5627 
5628   if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5629     // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5630     OMPBuilder.createTaskwait(CGF.Builder);
5631   } else {
5632     llvm::Value *ThreadID = getThreadID(CGF, Loc);
5633     llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5634     auto &M = CGM.getModule();
5635     Address DependenciesArray = Address::invalid();
5636     llvm::Value *NumOfElements;
5637     std::tie(NumOfElements, DependenciesArray) =
5638         emitDependClause(CGF, Data.Dependences, Loc);
5639     if (!Data.Dependences.empty()) {
5640       llvm::Value *DepWaitTaskArgs[7];
5641       DepWaitTaskArgs[0] = UpLoc;
5642       DepWaitTaskArgs[1] = ThreadID;
5643       DepWaitTaskArgs[2] = NumOfElements;
5644       DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
5645       DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5646       DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5647       DepWaitTaskArgs[6] =
5648           llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
5649 
5650       CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5651 
5652       // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5653       // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5654       // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5655       // kmp_int32 has_no_wait); if dependence info is specified.
5656       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5657                               M, OMPRTL___kmpc_omp_taskwait_deps_51),
5658                           DepWaitTaskArgs);
5659 
5660     } else {
5661 
5662       // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5663       // global_tid);
5664       llvm::Value *Args[] = {UpLoc, ThreadID};
5665       // Ignore return result until untied tasks are supported.
5666       CGF.EmitRuntimeCall(
5667           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
5668           Args);
5669     }
5670   }
5671 
5672   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5673     Region->emitUntiedSwitch(CGF);
5674 }
5675 
5676 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
5677                                            OpenMPDirectiveKind InnerKind,
5678                                            const RegionCodeGenTy &CodeGen,
5679                                            bool HasCancel) {
5680   if (!CGF.HaveInsertPoint())
5681     return;
5682   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5683                                  InnerKind != OMPD_critical &&
5684                                      InnerKind != OMPD_master &&
5685                                      InnerKind != OMPD_masked);
5686   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5687 }
5688 
5689 namespace {
5690 enum RTCancelKind {
5691   CancelNoreq = 0,
5692   CancelParallel = 1,
5693   CancelLoop = 2,
5694   CancelSections = 3,
5695   CancelTaskgroup = 4
5696 };
5697 } // anonymous namespace
5698 
5699 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5700   RTCancelKind CancelKind = CancelNoreq;
5701   if (CancelRegion == OMPD_parallel)
5702     CancelKind = CancelParallel;
5703   else if (CancelRegion == OMPD_for)
5704     CancelKind = CancelLoop;
5705   else if (CancelRegion == OMPD_sections)
5706     CancelKind = CancelSections;
5707   else {
5708     assert(CancelRegion == OMPD_taskgroup);
5709     CancelKind = CancelTaskgroup;
5710   }
5711   return CancelKind;
5712 }
5713 
5714 void CGOpenMPRuntime::emitCancellationPointCall(
5715     CodeGenFunction &CGF, SourceLocation Loc,
5716     OpenMPDirectiveKind CancelRegion) {
5717   if (!CGF.HaveInsertPoint())
5718     return;
5719   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5720   // global_tid, kmp_int32 cncl_kind);
5721   if (auto *OMPRegionInfo =
5722           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5723     // For 'cancellation point taskgroup', the task region info may not have a
5724     // cancel. This may instead happen in another adjacent task.
5725     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5726       llvm::Value *Args[] = {
5727           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
5728           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5729       // Ignore return result until untied tasks are supported.
5730       llvm::Value *Result = CGF.EmitRuntimeCall(
5731           OMPBuilder.getOrCreateRuntimeFunction(
5732               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
5733           Args);
5734       // if (__kmpc_cancellationpoint()) {
5735       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5736       //   exit from construct;
5737       // }
5738       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5739       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5740       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5741       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5742       CGF.EmitBlock(ExitBB);
5743       if (CancelRegion == OMPD_parallel)
5744         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5745       // exit from construct;
5746       CodeGenFunction::JumpDest CancelDest =
5747           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5748       CGF.EmitBranchThroughCleanup(CancelDest);
5749       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5750     }
5751   }
5752 }
5753 
5754 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
5755                                      const Expr *IfCond,
5756                                      OpenMPDirectiveKind CancelRegion) {
5757   if (!CGF.HaveInsertPoint())
5758     return;
5759   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5760   // kmp_int32 cncl_kind);
5761   auto &M = CGM.getModule();
5762   if (auto *OMPRegionInfo =
5763           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5764     auto &&ThenGen = [this, &M, Loc, CancelRegion,
5765                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
5766       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5767       llvm::Value *Args[] = {
5768           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
5769           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5770       // Ignore return result until untied tasks are supported.
5771       llvm::Value *Result = CGF.EmitRuntimeCall(
5772           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
5773       // if (__kmpc_cancel()) {
5774       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5775       //   exit from construct;
5776       // }
5777       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5778       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5779       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5780       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5781       CGF.EmitBlock(ExitBB);
5782       if (CancelRegion == OMPD_parallel)
5783         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5784       // exit from construct;
5785       CodeGenFunction::JumpDest CancelDest =
5786           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5787       CGF.EmitBranchThroughCleanup(CancelDest);
5788       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5789     };
5790     if (IfCond) {
5791       emitIfClause(CGF, IfCond, ThenGen,
5792                    [](CodeGenFunction &, PrePostActionTy &) {});
5793     } else {
5794       RegionCodeGenTy ThenRCG(ThenGen);
5795       ThenRCG(CGF);
5796     }
5797   }
5798 }
5799 
5800 namespace {
5801 /// Cleanup action for uses_allocators support.
5802 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
5803   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
5804 
5805 public:
5806   OMPUsesAllocatorsActionTy(
5807       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
5808       : Allocators(Allocators) {}
5809   void Enter(CodeGenFunction &CGF) override {
5810     if (!CGF.HaveInsertPoint())
5811       return;
5812     for (const auto &AllocatorData : Allocators) {
5813       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
5814           CGF, AllocatorData.first, AllocatorData.second);
5815     }
5816   }
5817   void Exit(CodeGenFunction &CGF) override {
5818     if (!CGF.HaveInsertPoint())
5819       return;
5820     for (const auto &AllocatorData : Allocators) {
5821       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
5822                                                         AllocatorData.first);
5823     }
5824   }
5825 };
5826 } // namespace
5827 
5828 void CGOpenMPRuntime::emitTargetOutlinedFunction(
5829     const OMPExecutableDirective &D, StringRef ParentName,
5830     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5831     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5832   assert(!ParentName.empty() && "Invalid target entry parent name!");
5833   HasEmittedTargetRegion = true;
5834   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
5835   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
5836     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
5837       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
5838       if (!D.AllocatorTraits)
5839         continue;
5840       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
5841     }
5842   }
5843   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
5844   CodeGen.setAction(UsesAllocatorAction);
5845   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
5846                                    IsOffloadEntry, CodeGen);
5847 }
5848 
5849 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
5850                                              const Expr *Allocator,
5851                                              const Expr *AllocatorTraits) {
5852   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5853   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5854   // Use default memspace handle.
5855   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5856   llvm::Value *NumTraits = llvm::ConstantInt::get(
5857       CGF.IntTy, cast<ConstantArrayType>(
5858                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
5859                      ->getSize()
5860                      .getLimitedValue());
5861   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
5862   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5863       AllocatorTraitsLVal.getAddress(), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
5864   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
5865                                            AllocatorTraitsLVal.getBaseInfo(),
5866                                            AllocatorTraitsLVal.getTBAAInfo());
5867   llvm::Value *Traits = Addr.emitRawPointer(CGF);
5868 
5869   llvm::Value *AllocatorVal =
5870       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5871                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
5872                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
5873   // Store to allocator.
5874   CGF.EmitAutoVarAlloca(*cast<VarDecl>(
5875       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
5876   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
5877   AllocatorVal =
5878       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
5879                                Allocator->getType(), Allocator->getExprLoc());
5880   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
5881 }
5882 
5883 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
5884                                              const Expr *Allocator) {
5885   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5886   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5887   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
5888   llvm::Value *AllocatorVal =
5889       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
5890   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
5891                                           CGF.getContext().VoidPtrTy,
5892                                           Allocator->getExprLoc());
5893   (void)CGF.EmitRuntimeCall(
5894       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
5895                                             OMPRTL___kmpc_destroy_allocator),
5896       {ThreadId, AllocatorVal});
5897 }
5898 
5899 void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams(
5900     const OMPExecutableDirective &D, CodeGenFunction &CGF,
5901     int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal,
5902     int32_t &MaxTeamsVal) {
5903 
5904   getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal, MaxTeamsVal);
5905   getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
5906                                       /*UpperBoundOnly=*/true);
5907 
5908   for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5909     for (auto *A : C->getAttrs()) {
5910       int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
5911       int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
5912       if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
5913         CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
5914                                        &AttrMinBlocksVal, &AttrMaxBlocksVal);
5915       else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
5916         CGM.handleAMDGPUFlatWorkGroupSizeAttr(
5917             nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
5918             &AttrMaxThreadsVal);
5919       else
5920         continue;
5921 
5922       MinThreadsVal = std::max(MinThreadsVal, AttrMinThreadsVal);
5923       if (AttrMaxThreadsVal > 0)
5924         MaxThreadsVal = MaxThreadsVal > 0
5925                             ? std::min(MaxThreadsVal, AttrMaxThreadsVal)
5926                             : AttrMaxThreadsVal;
5927       MinTeamsVal = std::max(MinTeamsVal, AttrMinBlocksVal);
5928       if (AttrMaxBlocksVal > 0)
5929         MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
5930                                       : AttrMaxBlocksVal;
5931     }
5932   }
5933 }
5934 
5935 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
5936     const OMPExecutableDirective &D, StringRef ParentName,
5937     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5938     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5939 
5940   llvm::TargetRegionEntryInfo EntryInfo =
5941       getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName);
5942 
5943   CodeGenFunction CGF(CGM, true);
5944   llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
5945       [&CGF, &D, &CodeGen](StringRef EntryFnName) {
5946         const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
5947 
5948         CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
5949         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
5950         return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
5951       };
5952 
5953   OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction,
5954                                       IsOffloadEntry, OutlinedFn, OutlinedFnID);
5955 
5956   if (!OutlinedFn)
5957     return;
5958 
5959   CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
5960 
5961   for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5962     for (auto *A : C->getAttrs()) {
5963       if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
5964         CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
5965     }
5966   }
5967 }
5968 
5969 /// Checks if the expression is constant or does not have non-trivial function
5970 /// calls.
5971 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
5972   // We can skip constant expressions.
5973   // We can skip expressions with trivial calls or simple expressions.
5974   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
5975           !E->hasNonTrivialCall(Ctx)) &&
5976          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
5977 }
5978 
5979 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
5980                                                     const Stmt *Body) {
5981   const Stmt *Child = Body->IgnoreContainers();
5982   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
5983     Child = nullptr;
5984     for (const Stmt *S : C->body()) {
5985       if (const auto *E = dyn_cast<Expr>(S)) {
5986         if (isTrivial(Ctx, E))
5987           continue;
5988       }
5989       // Some of the statements can be ignored.
5990       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
5991           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
5992         continue;
5993       // Analyze declarations.
5994       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
5995         if (llvm::all_of(DS->decls(), [](const Decl *D) {
5996               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
5997                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
5998                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
5999                   isa<UsingDirectiveDecl>(D) ||
6000                   isa<OMPDeclareReductionDecl>(D) ||
6001                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6002                 return true;
6003               const auto *VD = dyn_cast<VarDecl>(D);
6004               if (!VD)
6005                 return false;
6006               return VD->hasGlobalStorage() || !VD->isUsed();
6007             }))
6008           continue;
6009       }
6010       // Found multiple children - cannot get the one child only.
6011       if (Child)
6012         return nullptr;
6013       Child = S;
6014     }
6015     if (Child)
6016       Child = Child->IgnoreContainers();
6017   }
6018   return Child;
6019 }
6020 
6021 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6022     CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6023     int32_t &MaxTeamsVal) {
6024 
6025   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6026   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6027          "Expected target-based executable directive.");
6028   switch (DirectiveKind) {
6029   case OMPD_target: {
6030     const auto *CS = D.getInnermostCapturedStmt();
6031     const auto *Body =
6032         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6033     const Stmt *ChildStmt =
6034         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6035     if (const auto *NestedDir =
6036             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6037       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6038         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6039           const Expr *NumTeams =
6040               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6041           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6042             if (auto Constant =
6043                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6044               MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6045           return NumTeams;
6046         }
6047         MinTeamsVal = MaxTeamsVal = 0;
6048         return nullptr;
6049       }
6050       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6051           isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6052         MinTeamsVal = MaxTeamsVal = 1;
6053         return nullptr;
6054       }
6055       MinTeamsVal = MaxTeamsVal = 1;
6056       return nullptr;
6057     }
6058     // A value of -1 is used to check if we need to emit no teams region
6059     MinTeamsVal = MaxTeamsVal = -1;
6060     return nullptr;
6061   }
6062   case OMPD_target_teams_loop:
6063   case OMPD_target_teams:
6064   case OMPD_target_teams_distribute:
6065   case OMPD_target_teams_distribute_simd:
6066   case OMPD_target_teams_distribute_parallel_for:
6067   case OMPD_target_teams_distribute_parallel_for_simd: {
6068     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6069       const Expr *NumTeams =
6070           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6071       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6072         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6073           MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6074       return NumTeams;
6075     }
6076     MinTeamsVal = MaxTeamsVal = 0;
6077     return nullptr;
6078   }
6079   case OMPD_target_parallel:
6080   case OMPD_target_parallel_for:
6081   case OMPD_target_parallel_for_simd:
6082   case OMPD_target_parallel_loop:
6083   case OMPD_target_simd:
6084     MinTeamsVal = MaxTeamsVal = 1;
6085     return nullptr;
6086   case OMPD_parallel:
6087   case OMPD_for:
6088   case OMPD_parallel_for:
6089   case OMPD_parallel_loop:
6090   case OMPD_parallel_master:
6091   case OMPD_parallel_sections:
6092   case OMPD_for_simd:
6093   case OMPD_parallel_for_simd:
6094   case OMPD_cancel:
6095   case OMPD_cancellation_point:
6096   case OMPD_ordered:
6097   case OMPD_threadprivate:
6098   case OMPD_allocate:
6099   case OMPD_task:
6100   case OMPD_simd:
6101   case OMPD_tile:
6102   case OMPD_unroll:
6103   case OMPD_sections:
6104   case OMPD_section:
6105   case OMPD_single:
6106   case OMPD_master:
6107   case OMPD_critical:
6108   case OMPD_taskyield:
6109   case OMPD_barrier:
6110   case OMPD_taskwait:
6111   case OMPD_taskgroup:
6112   case OMPD_atomic:
6113   case OMPD_flush:
6114   case OMPD_depobj:
6115   case OMPD_scan:
6116   case OMPD_teams:
6117   case OMPD_target_data:
6118   case OMPD_target_exit_data:
6119   case OMPD_target_enter_data:
6120   case OMPD_distribute:
6121   case OMPD_distribute_simd:
6122   case OMPD_distribute_parallel_for:
6123   case OMPD_distribute_parallel_for_simd:
6124   case OMPD_teams_distribute:
6125   case OMPD_teams_distribute_simd:
6126   case OMPD_teams_distribute_parallel_for:
6127   case OMPD_teams_distribute_parallel_for_simd:
6128   case OMPD_target_update:
6129   case OMPD_declare_simd:
6130   case OMPD_declare_variant:
6131   case OMPD_begin_declare_variant:
6132   case OMPD_end_declare_variant:
6133   case OMPD_declare_target:
6134   case OMPD_end_declare_target:
6135   case OMPD_declare_reduction:
6136   case OMPD_declare_mapper:
6137   case OMPD_taskloop:
6138   case OMPD_taskloop_simd:
6139   case OMPD_master_taskloop:
6140   case OMPD_master_taskloop_simd:
6141   case OMPD_parallel_master_taskloop:
6142   case OMPD_parallel_master_taskloop_simd:
6143   case OMPD_requires:
6144   case OMPD_metadirective:
6145   case OMPD_unknown:
6146     break;
6147   default:
6148     break;
6149   }
6150   llvm_unreachable("Unexpected directive kind.");
6151 }
6152 
6153 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6154     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6155   assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6156          "Clauses associated with the teams directive expected to be emitted "
6157          "only for the host!");
6158   CGBuilderTy &Bld = CGF.Builder;
6159   int32_t MinNT = -1, MaxNT = -1;
6160   const Expr *NumTeams =
6161       getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
6162   if (NumTeams != nullptr) {
6163     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6164 
6165     switch (DirectiveKind) {
6166     case OMPD_target: {
6167       const auto *CS = D.getInnermostCapturedStmt();
6168       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6169       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6170       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6171                                                   /*IgnoreResultAssign*/ true);
6172       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6173                              /*isSigned=*/true);
6174     }
6175     case OMPD_target_teams:
6176     case OMPD_target_teams_distribute:
6177     case OMPD_target_teams_distribute_simd:
6178     case OMPD_target_teams_distribute_parallel_for:
6179     case OMPD_target_teams_distribute_parallel_for_simd: {
6180       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6181       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6182                                                   /*IgnoreResultAssign*/ true);
6183       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6184                              /*isSigned=*/true);
6185     }
6186     default:
6187       break;
6188     }
6189   }
6190 
6191   assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6192   return llvm::ConstantInt::get(CGF.Int32Ty, MinNT);
6193 }
6194 
6195 /// Check for a num threads constant value (stored in \p DefaultVal), or
6196 /// expression (stored in \p E). If the value is conditional (via an if-clause),
6197 /// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6198 /// nullptr, no expression evaluation is perfomed.
6199 static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6200                           const Expr **E, int32_t &UpperBound,
6201                           bool UpperBoundOnly, llvm::Value **CondVal) {
6202   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6203       CGF.getContext(), CS->getCapturedStmt());
6204   const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6205   if (!Dir)
6206     return;
6207 
6208   if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6209     // Handle if clause. If if clause present, the number of threads is
6210     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6211     if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6212       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6213       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6214       const OMPIfClause *IfClause = nullptr;
6215       for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6216         if (C->getNameModifier() == OMPD_unknown ||
6217             C->getNameModifier() == OMPD_parallel) {
6218           IfClause = C;
6219           break;
6220         }
6221       }
6222       if (IfClause) {
6223         const Expr *CondExpr = IfClause->getCondition();
6224         bool Result;
6225         if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6226           if (!Result) {
6227             UpperBound = 1;
6228             return;
6229           }
6230         } else {
6231           CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange());
6232           if (const auto *PreInit =
6233                   cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6234             for (const auto *I : PreInit->decls()) {
6235               if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6236                 CGF.EmitVarDecl(cast<VarDecl>(*I));
6237               } else {
6238                 CodeGenFunction::AutoVarEmission Emission =
6239                     CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6240                 CGF.EmitAutoVarCleanups(Emission);
6241               }
6242             }
6243             *CondVal = CGF.EvaluateExprAsBool(CondExpr);
6244           }
6245         }
6246       }
6247     }
6248     // Check the value of num_threads clause iff if clause was not specified
6249     // or is not evaluated to false.
6250     if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6251       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6252       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6253       const auto *NumThreadsClause =
6254           Dir->getSingleClause<OMPNumThreadsClause>();
6255       const Expr *NTExpr = NumThreadsClause->getNumThreads();
6256       if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
6257         if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
6258           UpperBound =
6259               UpperBound
6260                   ? Constant->getZExtValue()
6261                   : std::min(UpperBound,
6262                              static_cast<int32_t>(Constant->getZExtValue()));
6263       // If we haven't found a upper bound, remember we saw a thread limiting
6264       // clause.
6265       if (UpperBound == -1)
6266         UpperBound = 0;
6267       if (!E)
6268         return;
6269       CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6270       if (const auto *PreInit =
6271               cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6272         for (const auto *I : PreInit->decls()) {
6273           if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6274             CGF.EmitVarDecl(cast<VarDecl>(*I));
6275           } else {
6276             CodeGenFunction::AutoVarEmission Emission =
6277                 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6278             CGF.EmitAutoVarCleanups(Emission);
6279           }
6280         }
6281       }
6282       *E = NTExpr;
6283     }
6284     return;
6285   }
6286   if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6287     UpperBound = 1;
6288 }
6289 
6290 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6291     CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6292     bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6293   assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6294          "Clauses associated with the teams directive expected to be emitted "
6295          "only for the host!");
6296   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6297   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6298          "Expected target-based executable directive.");
6299 
6300   const Expr *NT = nullptr;
6301   const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6302 
6303   auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6304     if (E->isIntegerConstantExpr(CGF.getContext())) {
6305       if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
6306         UpperBound = UpperBound ? Constant->getZExtValue()
6307                                 : std::min(UpperBound,
6308                                            int32_t(Constant->getZExtValue()));
6309     }
6310     // If we haven't found a upper bound, remember we saw a thread limiting
6311     // clause.
6312     if (UpperBound == -1)
6313       UpperBound = 0;
6314     if (EPtr)
6315       *EPtr = E;
6316   };
6317 
6318   auto ReturnSequential = [&]() {
6319     UpperBound = 1;
6320     return NT;
6321   };
6322 
6323   switch (DirectiveKind) {
6324   case OMPD_target: {
6325     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6326     getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6327     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6328         CGF.getContext(), CS->getCapturedStmt());
6329     // TODO: The standard is not clear how to resolve two thread limit clauses,
6330     //       let's pick the teams one if it's present, otherwise the target one.
6331     const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6332     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6333       if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6334         ThreadLimitClause = TLC;
6335         if (ThreadLimitExpr) {
6336           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6337           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6338           CodeGenFunction::LexicalScope Scope(
6339               CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6340           if (const auto *PreInit =
6341                   cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6342             for (const auto *I : PreInit->decls()) {
6343               if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6344                 CGF.EmitVarDecl(cast<VarDecl>(*I));
6345               } else {
6346                 CodeGenFunction::AutoVarEmission Emission =
6347                     CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6348                 CGF.EmitAutoVarCleanups(Emission);
6349               }
6350             }
6351           }
6352         }
6353       }
6354     }
6355     if (ThreadLimitClause)
6356       CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6357     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6358       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6359           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6360         CS = Dir->getInnermostCapturedStmt();
6361         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6362             CGF.getContext(), CS->getCapturedStmt());
6363         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6364       }
6365       if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6366         CS = Dir->getInnermostCapturedStmt();
6367         getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6368       } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6369         return ReturnSequential();
6370     }
6371     return NT;
6372   }
6373   case OMPD_target_teams: {
6374     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6375       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6376       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6377       CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6378     }
6379     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6380     getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6381     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6382         CGF.getContext(), CS->getCapturedStmt());
6383     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6384       if (Dir->getDirectiveKind() == OMPD_distribute) {
6385         CS = Dir->getInnermostCapturedStmt();
6386         getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6387       }
6388     }
6389     return NT;
6390   }
6391   case OMPD_target_teams_distribute:
6392     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6393       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6394       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6395       CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6396     }
6397     getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
6398                   UpperBoundOnly, CondVal);
6399     return NT;
6400   case OMPD_target_teams_loop:
6401   case OMPD_target_parallel_loop:
6402   case OMPD_target_parallel:
6403   case OMPD_target_parallel_for:
6404   case OMPD_target_parallel_for_simd:
6405   case OMPD_target_teams_distribute_parallel_for:
6406   case OMPD_target_teams_distribute_parallel_for_simd: {
6407     if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6408       const OMPIfClause *IfClause = nullptr;
6409       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6410         if (C->getNameModifier() == OMPD_unknown ||
6411             C->getNameModifier() == OMPD_parallel) {
6412           IfClause = C;
6413           break;
6414         }
6415       }
6416       if (IfClause) {
6417         const Expr *Cond = IfClause->getCondition();
6418         bool Result;
6419         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6420           if (!Result)
6421             return ReturnSequential();
6422         } else {
6423           CodeGenFunction::RunCleanupsScope Scope(CGF);
6424           *CondVal = CGF.EvaluateExprAsBool(Cond);
6425         }
6426       }
6427     }
6428     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6429       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6430       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6431       CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6432     }
6433     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6434       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6435       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6436       CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6437       return NumThreadsClause->getNumThreads();
6438     }
6439     return NT;
6440   }
6441   case OMPD_target_teams_distribute_simd:
6442   case OMPD_target_simd:
6443     return ReturnSequential();
6444   default:
6445     break;
6446   }
6447   llvm_unreachable("Unsupported directive kind.");
6448 }
6449 
6450 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6451     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6452   llvm::Value *NumThreadsVal = nullptr;
6453   llvm::Value *CondVal = nullptr;
6454   llvm::Value *ThreadLimitVal = nullptr;
6455   const Expr *ThreadLimitExpr = nullptr;
6456   int32_t UpperBound = -1;
6457 
6458   const Expr *NT = getNumThreadsExprForTargetDirective(
6459       CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
6460       &ThreadLimitExpr);
6461 
6462   // Thread limit expressions are used below, emit them.
6463   if (ThreadLimitExpr) {
6464     ThreadLimitVal =
6465         CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6466     ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
6467                                                /*isSigned=*/false);
6468   }
6469 
6470   // Generate the num teams expression.
6471   if (UpperBound == 1) {
6472     NumThreadsVal = CGF.Builder.getInt32(UpperBound);
6473   } else if (NT) {
6474     NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
6475     NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
6476                                               /*isSigned=*/false);
6477   } else if (ThreadLimitVal) {
6478     // If we do not have a num threads value but a thread limit, replace the
6479     // former with the latter. We know handled the thread limit expression.
6480     NumThreadsVal = ThreadLimitVal;
6481     ThreadLimitVal = nullptr;
6482   } else {
6483     // Default to "0" which means runtime choice.
6484     assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6485     NumThreadsVal = CGF.Builder.getInt32(0);
6486   }
6487 
6488   // Handle if clause. If if clause present, the number of threads is
6489   // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6490   if (CondVal) {
6491     CodeGenFunction::RunCleanupsScope Scope(CGF);
6492     NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
6493                                              CGF.Builder.getInt32(1));
6494   }
6495 
6496   // If the thread limit and num teams expression were present, take the
6497   // minimum.
6498   if (ThreadLimitVal) {
6499     NumThreadsVal = CGF.Builder.CreateSelect(
6500         CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
6501         ThreadLimitVal, NumThreadsVal);
6502   }
6503 
6504   return NumThreadsVal;
6505 }
6506 
6507 namespace {
6508 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6509 
6510 // Utility to handle information from clauses associated with a given
6511 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6512 // It provides a convenient interface to obtain the information and generate
6513 // code for that information.
6514 class MappableExprsHandler {
6515 public:
6516   /// Get the offset of the OMP_MAP_MEMBER_OF field.
6517   static unsigned getFlagMemberOffset() {
6518     unsigned Offset = 0;
6519     for (uint64_t Remain =
6520              static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
6521                  OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
6522          !(Remain & 1); Remain = Remain >> 1)
6523       Offset++;
6524     return Offset;
6525   }
6526 
6527   /// Class that holds debugging information for a data mapping to be passed to
6528   /// the runtime library.
6529   class MappingExprInfo {
6530     /// The variable declaration used for the data mapping.
6531     const ValueDecl *MapDecl = nullptr;
6532     /// The original expression used in the map clause, or null if there is
6533     /// none.
6534     const Expr *MapExpr = nullptr;
6535 
6536   public:
6537     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
6538         : MapDecl(MapDecl), MapExpr(MapExpr) {}
6539 
6540     const ValueDecl *getMapDecl() const { return MapDecl; }
6541     const Expr *getMapExpr() const { return MapExpr; }
6542   };
6543 
6544   using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
6545   using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6546   using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6547   using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
6548   using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
6549   using MapNonContiguousArrayTy =
6550       llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
6551   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
6552   using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
6553 
6554   /// This structure contains combined information generated for mappable
6555   /// clauses, including base pointers, pointers, sizes, map types, user-defined
6556   /// mappers, and non-contiguous information.
6557   struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
6558     MapExprsArrayTy Exprs;
6559     MapValueDeclsArrayTy Mappers;
6560     MapValueDeclsArrayTy DevicePtrDecls;
6561 
6562     /// Append arrays in \a CurInfo.
6563     void append(MapCombinedInfoTy &CurInfo) {
6564       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
6565       DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
6566                             CurInfo.DevicePtrDecls.end());
6567       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
6568       llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
6569     }
6570   };
6571 
6572   /// Map between a struct and the its lowest & highest elements which have been
6573   /// mapped.
6574   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6575   ///                    HE(FieldIndex, Pointer)}
6576   struct StructRangeInfoTy {
6577     MapCombinedInfoTy PreliminaryMapData;
6578     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6579         0, Address::invalid()};
6580     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6581         0, Address::invalid()};
6582     Address Base = Address::invalid();
6583     Address LB = Address::invalid();
6584     bool IsArraySection = false;
6585     bool HasCompleteRecord = false;
6586   };
6587 
6588 private:
6589   /// Kind that defines how a device pointer has to be returned.
6590   struct MapInfo {
6591     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
6592     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
6593     ArrayRef<OpenMPMapModifierKind> MapModifiers;
6594     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
6595     bool ReturnDevicePointer = false;
6596     bool IsImplicit = false;
6597     const ValueDecl *Mapper = nullptr;
6598     const Expr *VarRef = nullptr;
6599     bool ForDeviceAddr = false;
6600 
6601     MapInfo() = default;
6602     MapInfo(
6603         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6604         OpenMPMapClauseKind MapType,
6605         ArrayRef<OpenMPMapModifierKind> MapModifiers,
6606         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6607         bool ReturnDevicePointer, bool IsImplicit,
6608         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
6609         bool ForDeviceAddr = false)
6610         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
6611           MotionModifiers(MotionModifiers),
6612           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
6613           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
6614   };
6615 
6616   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6617   /// member and there is no map information about it, then emission of that
6618   /// entry is deferred until the whole struct has been processed.
6619   struct DeferredDevicePtrEntryTy {
6620     const Expr *IE = nullptr;
6621     const ValueDecl *VD = nullptr;
6622     bool ForDeviceAddr = false;
6623 
6624     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
6625                              bool ForDeviceAddr)
6626         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
6627   };
6628 
6629   /// The target directive from where the mappable clauses were extracted. It
6630   /// is either a executable directive or a user-defined mapper directive.
6631   llvm::PointerUnion<const OMPExecutableDirective *,
6632                      const OMPDeclareMapperDecl *>
6633       CurDir;
6634 
6635   /// Function the directive is being generated for.
6636   CodeGenFunction &CGF;
6637 
6638   /// Set of all first private variables in the current directive.
6639   /// bool data is set to true if the variable is implicitly marked as
6640   /// firstprivate, false otherwise.
6641   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
6642 
6643   /// Map between device pointer declarations and their expression components.
6644   /// The key value for declarations in 'this' is null.
6645   llvm::DenseMap<
6646       const ValueDecl *,
6647       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6648       DevPointersMap;
6649 
6650   /// Map between device addr declarations and their expression components.
6651   /// The key value for declarations in 'this' is null.
6652   llvm::DenseMap<
6653       const ValueDecl *,
6654       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6655       HasDevAddrsMap;
6656 
6657   /// Map between lambda declarations and their map type.
6658   llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
6659 
6660   llvm::Value *getExprTypeSize(const Expr *E) const {
6661     QualType ExprTy = E->getType().getCanonicalType();
6662 
6663     // Calculate the size for array shaping expression.
6664     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
6665       llvm::Value *Size =
6666           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
6667       for (const Expr *SE : OAE->getDimensions()) {
6668         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
6669         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
6670                                       CGF.getContext().getSizeType(),
6671                                       SE->getExprLoc());
6672         Size = CGF.Builder.CreateNUWMul(Size, Sz);
6673       }
6674       return Size;
6675     }
6676 
6677     // Reference types are ignored for mapping purposes.
6678     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6679       ExprTy = RefTy->getPointeeType().getCanonicalType();
6680 
6681     // Given that an array section is considered a built-in type, we need to
6682     // do the calculation based on the length of the section instead of relying
6683     // on CGF.getTypeSize(E->getType()).
6684     if (const auto *OAE = dyn_cast<ArraySectionExpr>(E)) {
6685       QualType BaseTy = ArraySectionExpr::getBaseOriginalType(
6686                             OAE->getBase()->IgnoreParenImpCasts())
6687                             .getCanonicalType();
6688 
6689       // If there is no length associated with the expression and lower bound is
6690       // not specified too, that means we are using the whole length of the
6691       // base.
6692       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6693           !OAE->getLowerBound())
6694         return CGF.getTypeSize(BaseTy);
6695 
6696       llvm::Value *ElemSize;
6697       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6698         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
6699       } else {
6700         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
6701         assert(ATy && "Expecting array type if not a pointer type.");
6702         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
6703       }
6704 
6705       // If we don't have a length at this point, that is because we have an
6706       // array section with a single element.
6707       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
6708         return ElemSize;
6709 
6710       if (const Expr *LenExpr = OAE->getLength()) {
6711         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
6712         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
6713                                              CGF.getContext().getSizeType(),
6714                                              LenExpr->getExprLoc());
6715         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
6716       }
6717       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6718              OAE->getLowerBound() && "expected array_section[lb:].");
6719       // Size = sizetype - lb * elemtype;
6720       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
6721       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
6722       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
6723                                        CGF.getContext().getSizeType(),
6724                                        OAE->getLowerBound()->getExprLoc());
6725       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
6726       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
6727       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
6728       LengthVal = CGF.Builder.CreateSelect(
6729           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
6730       return LengthVal;
6731     }
6732     return CGF.getTypeSize(ExprTy);
6733   }
6734 
6735   /// Return the corresponding bits for a given map clause modifier. Add
6736   /// a flag marking the map as a pointer if requested. Add a flag marking the
6737   /// map as the first one of a series of maps that relate to the same map
6738   /// expression.
6739   OpenMPOffloadMappingFlags getMapTypeBits(
6740       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6741       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
6742       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
6743     OpenMPOffloadMappingFlags Bits =
6744         IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
6745                    : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
6746     switch (MapType) {
6747     case OMPC_MAP_alloc:
6748     case OMPC_MAP_release:
6749       // alloc and release is the default behavior in the runtime library,  i.e.
6750       // if we don't pass any bits alloc/release that is what the runtime is
6751       // going to do. Therefore, we don't need to signal anything for these two
6752       // type modifiers.
6753       break;
6754     case OMPC_MAP_to:
6755       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
6756       break;
6757     case OMPC_MAP_from:
6758       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6759       break;
6760     case OMPC_MAP_tofrom:
6761       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
6762               OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6763       break;
6764     case OMPC_MAP_delete:
6765       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
6766       break;
6767     case OMPC_MAP_unknown:
6768       llvm_unreachable("Unexpected map type!");
6769     }
6770     if (AddPtrFlag)
6771       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
6772     if (AddIsTargetParamFlag)
6773       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
6774     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
6775       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
6776     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
6777       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
6778     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
6779         llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
6780       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
6781     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
6782       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
6783     if (IsNonContiguous)
6784       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
6785     return Bits;
6786   }
6787 
6788   /// Return true if the provided expression is a final array section. A
6789   /// final array section, is one whose length can't be proved to be one.
6790   bool isFinalArraySectionExpression(const Expr *E) const {
6791     const auto *OASE = dyn_cast<ArraySectionExpr>(E);
6792 
6793     // It is not an array section and therefore not a unity-size one.
6794     if (!OASE)
6795       return false;
6796 
6797     // An array section with no colon always refer to a single element.
6798     if (OASE->getColonLocFirst().isInvalid())
6799       return false;
6800 
6801     const Expr *Length = OASE->getLength();
6802 
6803     // If we don't have a length we have to check if the array has size 1
6804     // for this dimension. Also, we should always expect a length if the
6805     // base type is pointer.
6806     if (!Length) {
6807       QualType BaseQTy = ArraySectionExpr::getBaseOriginalType(
6808                              OASE->getBase()->IgnoreParenImpCasts())
6809                              .getCanonicalType();
6810       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
6811         return ATy->getSExtSize() != 1;
6812       // If we don't have a constant dimension length, we have to consider
6813       // the current section as having any size, so it is not necessarily
6814       // unitary. If it happen to be unity size, that's user fault.
6815       return true;
6816     }
6817 
6818     // Check if the length evaluates to 1.
6819     Expr::EvalResult Result;
6820     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
6821       return true; // Can have more that size 1.
6822 
6823     llvm::APSInt ConstLength = Result.Val.getInt();
6824     return ConstLength.getSExtValue() != 1;
6825   }
6826 
6827   /// Generate the base pointers, section pointers, sizes, map type bits, and
6828   /// user-defined mappers (all included in \a CombinedInfo) for the provided
6829   /// map type, map or motion modifiers, and expression components.
6830   /// \a IsFirstComponent should be set to true if the provided set of
6831   /// components is the first associated with a capture.
6832   void generateInfoForComponentList(
6833       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6834       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6835       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6836       MapCombinedInfoTy &CombinedInfo,
6837       MapCombinedInfoTy &StructBaseCombinedInfo,
6838       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
6839       bool IsImplicit, bool GenerateAllInfoForClauses,
6840       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
6841       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
6842       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
6843           OverlappedElements = std::nullopt,
6844       bool AreBothBasePtrAndPteeMapped = false) const {
6845     // The following summarizes what has to be generated for each map and the
6846     // types below. The generated information is expressed in this order:
6847     // base pointer, section pointer, size, flags
6848     // (to add to the ones that come from the map type and modifier).
6849     //
6850     // double d;
6851     // int i[100];
6852     // float *p;
6853     // int **a = &i;
6854     //
6855     // struct S1 {
6856     //   int i;
6857     //   float f[50];
6858     // }
6859     // struct S2 {
6860     //   int i;
6861     //   float f[50];
6862     //   S1 s;
6863     //   double *p;
6864     //   struct S2 *ps;
6865     //   int &ref;
6866     // }
6867     // S2 s;
6868     // S2 *ps;
6869     //
6870     // map(d)
6871     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
6872     //
6873     // map(i)
6874     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
6875     //
6876     // map(i[1:23])
6877     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
6878     //
6879     // map(p)
6880     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
6881     //
6882     // map(p[1:24])
6883     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
6884     // in unified shared memory mode or for local pointers
6885     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
6886     //
6887     // map((*a)[0:3])
6888     // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6889     // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
6890     //
6891     // map(**a)
6892     // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6893     // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
6894     //
6895     // map(s)
6896     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
6897     //
6898     // map(s.i)
6899     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
6900     //
6901     // map(s.s.f)
6902     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6903     //
6904     // map(s.p)
6905     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
6906     //
6907     // map(to: s.p[:22])
6908     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
6909     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
6910     // &(s.p), &(s.p[0]), 22*sizeof(double),
6911     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6912     // (*) alloc space for struct members, only this is a target parameter
6913     // (**) map the pointer (nothing to be mapped in this example) (the compiler
6914     //      optimizes this entry out, same in the examples below)
6915     // (***) map the pointee (map: to)
6916     //
6917     // map(to: s.ref)
6918     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
6919     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6920     // (*) alloc space for struct members, only this is a target parameter
6921     // (**) map the pointer (nothing to be mapped in this example) (the compiler
6922     //      optimizes this entry out, same in the examples below)
6923     // (***) map the pointee (map: to)
6924     //
6925     // map(s.ps)
6926     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6927     //
6928     // map(from: s.ps->s.i)
6929     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6930     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6931     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
6932     //
6933     // map(to: s.ps->ps)
6934     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6935     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6936     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
6937     //
6938     // map(s.ps->ps->ps)
6939     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6940     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6941     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6942     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6943     //
6944     // map(to: s.ps->ps->s.f[:22])
6945     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6946     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6947     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6948     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6949     //
6950     // map(ps)
6951     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
6952     //
6953     // map(ps->i)
6954     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
6955     //
6956     // map(ps->s.f)
6957     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6958     //
6959     // map(from: ps->p)
6960     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
6961     //
6962     // map(to: ps->p[:22])
6963     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
6964     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
6965     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
6966     //
6967     // map(ps->ps)
6968     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6969     //
6970     // map(from: ps->ps->s.i)
6971     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6972     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6973     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6974     //
6975     // map(from: ps->ps->ps)
6976     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6977     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6978     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6979     //
6980     // map(ps->ps->ps->ps)
6981     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6982     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6983     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6984     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6985     //
6986     // map(to: ps->ps->ps->s.f[:22])
6987     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6988     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6989     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6990     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6991     //
6992     // map(to: s.f[:22]) map(from: s.p[:33])
6993     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
6994     //     sizeof(double*) (**), TARGET_PARAM
6995     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
6996     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
6997     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6998     // (*) allocate contiguous space needed to fit all mapped members even if
6999     //     we allocate space for members not mapped (in this example,
7000     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7001     //     them as well because they fall between &s.f[0] and &s.p)
7002     //
7003     // map(from: s.f[:22]) map(to: ps->p[:33])
7004     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7005     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7006     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7007     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7008     // (*) the struct this entry pertains to is the 2nd element in the list of
7009     //     arguments, hence MEMBER_OF(2)
7010     //
7011     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7012     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7013     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7014     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7015     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7016     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7017     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7018     // (*) the struct this entry pertains to is the 4th element in the list
7019     //     of arguments, hence MEMBER_OF(4)
7020     //
7021     // map(p, p[:100])
7022     // ===> map(p[:100])
7023     // &p, &p[0], 100*sizeof(float), TARGET_PARAM | PTR_AND_OBJ | TO | FROM
7024 
7025     // Track if the map information being generated is the first for a capture.
7026     bool IsCaptureFirstInfo = IsFirstComponentList;
7027     // When the variable is on a declare target link or in a to clause with
7028     // unified memory, a reference is needed to hold the host/device address
7029     // of the variable.
7030     bool RequiresReference = false;
7031 
7032     // Scan the components from the base to the complete expression.
7033     auto CI = Components.rbegin();
7034     auto CE = Components.rend();
7035     auto I = CI;
7036 
7037     // Track if the map information being generated is the first for a list of
7038     // components.
7039     bool IsExpressionFirstInfo = true;
7040     bool FirstPointerInComplexData = false;
7041     Address BP = Address::invalid();
7042     const Expr *AssocExpr = I->getAssociatedExpression();
7043     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7044     const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7045     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7046 
7047     if (AreBothBasePtrAndPteeMapped && std::next(I) == CE)
7048       return;
7049     if (isa<MemberExpr>(AssocExpr)) {
7050       // The base is the 'this' pointer. The content of the pointer is going
7051       // to be the base of the field being mapped.
7052       BP = CGF.LoadCXXThisAddress();
7053     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7054                (OASE &&
7055                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7056       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7057     } else if (OAShE &&
7058                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7059       BP = Address(
7060           CGF.EmitScalarExpr(OAShE->getBase()),
7061           CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7062           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7063     } else {
7064       // The base is the reference to the variable.
7065       // BP = &Var.
7066       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7067       if (const auto *VD =
7068               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7069         if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7070                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7071           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7072               ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7073                 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7074                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7075             RequiresReference = true;
7076             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7077           }
7078         }
7079       }
7080 
7081       // If the variable is a pointer and is being dereferenced (i.e. is not
7082       // the last component), the base has to be the pointer itself, not its
7083       // reference. References are ignored for mapping purposes.
7084       QualType Ty =
7085           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7086       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7087         // No need to generate individual map information for the pointer, it
7088         // can be associated with the combined storage if shared memory mode is
7089         // active or the base declaration is not global variable.
7090         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7091         if (!AreBothBasePtrAndPteeMapped &&
7092             (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7093              !VD || VD->hasLocalStorage()))
7094           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7095         else
7096           FirstPointerInComplexData = true;
7097         ++I;
7098       }
7099     }
7100 
7101     // Track whether a component of the list should be marked as MEMBER_OF some
7102     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7103     // in a component list should be marked as MEMBER_OF, all subsequent entries
7104     // do not belong to the base struct. E.g.
7105     // struct S2 s;
7106     // s.ps->ps->ps->f[:]
7107     //   (1) (2) (3) (4)
7108     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7109     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7110     // is the pointee of ps(2) which is not member of struct s, so it should not
7111     // be marked as such (it is still PTR_AND_OBJ).
7112     // The variable is initialized to false so that PTR_AND_OBJ entries which
7113     // are not struct members are not considered (e.g. array of pointers to
7114     // data).
7115     bool ShouldBeMemberOf = false;
7116 
7117     // Variable keeping track of whether or not we have encountered a component
7118     // in the component list which is a member expression. Useful when we have a
7119     // pointer or a final array section, in which case it is the previous
7120     // component in the list which tells us whether we have a member expression.
7121     // E.g. X.f[:]
7122     // While processing the final array section "[:]" it is "f" which tells us
7123     // whether we are dealing with a member of a declared struct.
7124     const MemberExpr *EncounteredME = nullptr;
7125 
7126     // Track for the total number of dimension. Start from one for the dummy
7127     // dimension.
7128     uint64_t DimSize = 1;
7129 
7130     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7131     bool IsPrevMemberReference = false;
7132 
7133     // We need to check if we will be encountering any MEs. If we do not
7134     // encounter any ME expression it means we will be mapping the whole struct.
7135     // In that case we need to skip adding an entry for the struct to the
7136     // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7137     // list only when generating all info for clauses.
7138     bool IsMappingWholeStruct = true;
7139     if (!GenerateAllInfoForClauses) {
7140       IsMappingWholeStruct = false;
7141     } else {
7142       for (auto TempI = I; TempI != CE; ++TempI) {
7143         const MemberExpr *PossibleME =
7144             dyn_cast<MemberExpr>(TempI->getAssociatedExpression());
7145         if (PossibleME) {
7146           IsMappingWholeStruct = false;
7147           break;
7148         }
7149       }
7150     }
7151 
7152     for (; I != CE; ++I) {
7153       // If the current component is member of a struct (parent struct) mark it.
7154       if (!EncounteredME) {
7155         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7156         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7157         // as MEMBER_OF the parent struct.
7158         if (EncounteredME) {
7159           ShouldBeMemberOf = true;
7160           // Do not emit as complex pointer if this is actually not array-like
7161           // expression.
7162           if (FirstPointerInComplexData) {
7163             QualType Ty = std::prev(I)
7164                               ->getAssociatedDeclaration()
7165                               ->getType()
7166                               .getNonReferenceType();
7167             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7168             FirstPointerInComplexData = false;
7169           }
7170         }
7171       }
7172 
7173       auto Next = std::next(I);
7174 
7175       // We need to generate the addresses and sizes if this is the last
7176       // component, if the component is a pointer or if it is an array section
7177       // whose length can't be proved to be one. If this is a pointer, it
7178       // becomes the base address for the following components.
7179 
7180       // A final array section, is one whose length can't be proved to be one.
7181       // If the map item is non-contiguous then we don't treat any array section
7182       // as final array section.
7183       bool IsFinalArraySection =
7184           !IsNonContiguous &&
7185           isFinalArraySectionExpression(I->getAssociatedExpression());
7186 
7187       // If we have a declaration for the mapping use that, otherwise use
7188       // the base declaration of the map clause.
7189       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7190                                      ? I->getAssociatedDeclaration()
7191                                      : BaseDecl;
7192       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7193                                                : MapExpr;
7194 
7195       // Get information on whether the element is a pointer. Have to do a
7196       // special treatment for array sections given that they are built-in
7197       // types.
7198       const auto *OASE =
7199           dyn_cast<ArraySectionExpr>(I->getAssociatedExpression());
7200       const auto *OAShE =
7201           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7202       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7203       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7204       bool IsPointer =
7205           OAShE ||
7206           (OASE && ArraySectionExpr::getBaseOriginalType(OASE)
7207                        .getCanonicalType()
7208                        ->isAnyPointerType()) ||
7209           I->getAssociatedExpression()->getType()->isAnyPointerType();
7210       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7211                                MapDecl &&
7212                                MapDecl->getType()->isLValueReferenceType();
7213       bool IsNonDerefPointer = IsPointer &&
7214                                !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7215                                !IsNonContiguous;
7216 
7217       if (OASE)
7218         ++DimSize;
7219 
7220       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7221           IsFinalArraySection) {
7222         // If this is not the last component, we expect the pointer to be
7223         // associated with an array expression or member expression.
7224         assert((Next == CE ||
7225                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7226                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7227                 isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
7228                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7229                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7230                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7231                "Unexpected expression");
7232 
7233         Address LB = Address::invalid();
7234         Address LowestElem = Address::invalid();
7235         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7236                                        const MemberExpr *E) {
7237           const Expr *BaseExpr = E->getBase();
7238           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
7239           // scalar.
7240           LValue BaseLV;
7241           if (E->isArrow()) {
7242             LValueBaseInfo BaseInfo;
7243             TBAAAccessInfo TBAAInfo;
7244             Address Addr =
7245                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7246             QualType PtrTy = BaseExpr->getType()->getPointeeType();
7247             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7248           } else {
7249             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7250           }
7251           return BaseLV;
7252         };
7253         if (OAShE) {
7254           LowestElem = LB =
7255               Address(CGF.EmitScalarExpr(OAShE->getBase()),
7256                       CGF.ConvertTypeForMem(
7257                           OAShE->getBase()->getType()->getPointeeType()),
7258                       CGF.getContext().getTypeAlignInChars(
7259                           OAShE->getBase()->getType()));
7260         } else if (IsMemberReference) {
7261           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7262           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7263           LowestElem = CGF.EmitLValueForFieldInitialization(
7264                               BaseLVal, cast<FieldDecl>(MapDecl))
7265                            .getAddress();
7266           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7267                    .getAddress();
7268         } else {
7269           LowestElem = LB =
7270               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7271                   .getAddress();
7272         }
7273 
7274         // If this component is a pointer inside the base struct then we don't
7275         // need to create any entry for it - it will be combined with the object
7276         // it is pointing to into a single PTR_AND_OBJ entry.
7277         bool IsMemberPointerOrAddr =
7278             EncounteredME &&
7279             (((IsPointer || ForDeviceAddr) &&
7280               I->getAssociatedExpression() == EncounteredME) ||
7281              (IsPrevMemberReference && !IsPointer) ||
7282              (IsMemberReference && Next != CE &&
7283               !Next->getAssociatedExpression()->getType()->isPointerType()));
7284         if (!OverlappedElements.empty() && Next == CE) {
7285           // Handle base element with the info for overlapped elements.
7286           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7287           assert(!IsPointer &&
7288                  "Unexpected base element with the pointer type.");
7289           // Mark the whole struct as the struct that requires allocation on the
7290           // device.
7291           PartialStruct.LowestElem = {0, LowestElem};
7292           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7293               I->getAssociatedExpression()->getType());
7294           Address HB = CGF.Builder.CreateConstGEP(
7295               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7296                   LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
7297               TypeSize.getQuantity() - 1);
7298           PartialStruct.HighestElem = {
7299               std::numeric_limits<decltype(
7300                   PartialStruct.HighestElem.first)>::max(),
7301               HB};
7302           PartialStruct.Base = BP;
7303           PartialStruct.LB = LB;
7304           assert(
7305               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7306               "Overlapped elements must be used only once for the variable.");
7307           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7308           // Emit data for non-overlapped data.
7309           OpenMPOffloadMappingFlags Flags =
7310               OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
7311               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7312                              /*AddPtrFlag=*/false,
7313                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7314           llvm::Value *Size = nullptr;
7315           // Do bitcopy of all non-overlapped structure elements.
7316           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7317                    Component : OverlappedElements) {
7318             Address ComponentLB = Address::invalid();
7319             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7320                  Component) {
7321               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7322                 const auto *FD = dyn_cast<FieldDecl>(VD);
7323                 if (FD && FD->getType()->isLValueReferenceType()) {
7324                   const auto *ME =
7325                       cast<MemberExpr>(MC.getAssociatedExpression());
7326                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7327                   ComponentLB =
7328                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7329                           .getAddress();
7330                 } else {
7331                   ComponentLB =
7332                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7333                           .getAddress();
7334                 }
7335                 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7336                 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7337                 Size = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr,
7338                                                  LBPtr);
7339                 break;
7340               }
7341             }
7342             assert(Size && "Failed to determine structure size");
7343             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7344             CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7345             CombinedInfo.DevicePtrDecls.push_back(nullptr);
7346             CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7347             CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7348             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7349                 Size, CGF.Int64Ty, /*isSigned=*/true));
7350             CombinedInfo.Types.push_back(Flags);
7351             CombinedInfo.Mappers.push_back(nullptr);
7352             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7353                                                                       : 1);
7354             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7355           }
7356           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7357           CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7358           CombinedInfo.DevicePtrDecls.push_back(nullptr);
7359           CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7360           CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7361           llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7362           Size = CGF.Builder.CreatePtrDiff(
7363               CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),
7364               LBPtr);
7365           CombinedInfo.Sizes.push_back(
7366               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7367           CombinedInfo.Types.push_back(Flags);
7368           CombinedInfo.Mappers.push_back(nullptr);
7369           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7370                                                                     : 1);
7371           break;
7372         }
7373         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7374         // Skip adding an entry in the CurInfo of this combined entry if the
7375         // whole struct is currently being mapped. The struct needs to be added
7376         // in the first position before any data internal to the struct is being
7377         // mapped.
7378         if (!IsMemberPointerOrAddr ||
7379             (Next == CE && MapType != OMPC_MAP_unknown)) {
7380           if (!IsMappingWholeStruct) {
7381             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7382             CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7383             CombinedInfo.DevicePtrDecls.push_back(nullptr);
7384             CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7385             CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7386             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7387                 Size, CGF.Int64Ty, /*isSigned=*/true));
7388             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7389                                                                       : 1);
7390           } else {
7391             StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7392             StructBaseCombinedInfo.BasePointers.push_back(
7393                 BP.emitRawPointer(CGF));
7394             StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr);
7395             StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7396             StructBaseCombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7397             StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7398                 Size, CGF.Int64Ty, /*isSigned=*/true));
7399             StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
7400                 IsNonContiguous ? DimSize : 1);
7401           }
7402 
7403           // If Mapper is valid, the last component inherits the mapper.
7404           bool HasMapper = Mapper && Next == CE;
7405           if (!IsMappingWholeStruct)
7406             CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7407           else
7408             StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper
7409                                                                : nullptr);
7410 
7411           // We need to add a pointer flag for each map that comes from the
7412           // same expression except for the first one. We also need to signal
7413           // this map is the first one that relates with the current capture
7414           // (there is a set of entries for each capture).
7415           OpenMPOffloadMappingFlags Flags =
7416               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7417                              !IsExpressionFirstInfo || RequiresReference ||
7418                                  FirstPointerInComplexData || IsMemberReference,
7419                              AreBothBasePtrAndPteeMapped ||
7420                                  (IsCaptureFirstInfo && !RequiresReference),
7421                              IsNonContiguous);
7422 
7423           if (!IsExpressionFirstInfo || IsMemberReference) {
7424             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7425             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7426             if (IsPointer || (IsMemberReference && Next != CE))
7427               Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
7428                          OpenMPOffloadMappingFlags::OMP_MAP_FROM |
7429                          OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
7430                          OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
7431                          OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
7432 
7433             if (ShouldBeMemberOf) {
7434               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7435               // should be later updated with the correct value of MEMBER_OF.
7436               Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7437               // From now on, all subsequent PTR_AND_OBJ entries should not be
7438               // marked as MEMBER_OF.
7439               ShouldBeMemberOf = false;
7440             }
7441           }
7442 
7443           if (!IsMappingWholeStruct)
7444             CombinedInfo.Types.push_back(Flags);
7445           else
7446             StructBaseCombinedInfo.Types.push_back(Flags);
7447         }
7448 
7449         // If we have encountered a member expression so far, keep track of the
7450         // mapped member. If the parent is "*this", then the value declaration
7451         // is nullptr.
7452         if (EncounteredME) {
7453           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7454           unsigned FieldIndex = FD->getFieldIndex();
7455 
7456           // Update info about the lowest and highest elements for this struct
7457           if (!PartialStruct.Base.isValid()) {
7458             PartialStruct.LowestElem = {FieldIndex, LowestElem};
7459             if (IsFinalArraySection) {
7460               Address HB =
7461                   CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
7462                       .getAddress();
7463               PartialStruct.HighestElem = {FieldIndex, HB};
7464             } else {
7465               PartialStruct.HighestElem = {FieldIndex, LowestElem};
7466             }
7467             PartialStruct.Base = BP;
7468             PartialStruct.LB = BP;
7469           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7470             PartialStruct.LowestElem = {FieldIndex, LowestElem};
7471           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7472             if (IsFinalArraySection) {
7473               Address HB =
7474                   CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
7475                       .getAddress();
7476               PartialStruct.HighestElem = {FieldIndex, HB};
7477             } else {
7478               PartialStruct.HighestElem = {FieldIndex, LowestElem};
7479             }
7480           }
7481         }
7482 
7483         // Need to emit combined struct for array sections.
7484         if (IsFinalArraySection || IsNonContiguous)
7485           PartialStruct.IsArraySection = true;
7486 
7487         // If we have a final array section, we are done with this expression.
7488         if (IsFinalArraySection)
7489           break;
7490 
7491         // The pointer becomes the base for the next element.
7492         if (Next != CE)
7493           BP = IsMemberReference ? LowestElem : LB;
7494 
7495         IsExpressionFirstInfo = false;
7496         IsCaptureFirstInfo = false;
7497         FirstPointerInComplexData = false;
7498         IsPrevMemberReference = IsMemberReference;
7499       } else if (FirstPointerInComplexData) {
7500         QualType Ty = Components.rbegin()
7501                           ->getAssociatedDeclaration()
7502                           ->getType()
7503                           .getNonReferenceType();
7504         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7505         FirstPointerInComplexData = false;
7506       }
7507     }
7508     // If ran into the whole component - allocate the space for the whole
7509     // record.
7510     if (!EncounteredME)
7511       PartialStruct.HasCompleteRecord = true;
7512 
7513     if (!IsNonContiguous)
7514       return;
7515 
7516     const ASTContext &Context = CGF.getContext();
7517 
7518     // For supporting stride in array section, we need to initialize the first
7519     // dimension size as 1, first offset as 0, and first count as 1
7520     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7521     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7522     MapValuesArrayTy CurStrides;
7523     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7524     uint64_t ElementTypeSize;
7525 
7526     // Collect Size information for each dimension and get the element size as
7527     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7528     // should be [10, 10] and the first stride is 4 btyes.
7529     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7530          Components) {
7531       const Expr *AssocExpr = Component.getAssociatedExpression();
7532       const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7533 
7534       if (!OASE)
7535         continue;
7536 
7537       QualType Ty = ArraySectionExpr::getBaseOriginalType(OASE->getBase());
7538       auto *CAT = Context.getAsConstantArrayType(Ty);
7539       auto *VAT = Context.getAsVariableArrayType(Ty);
7540 
7541       // We need all the dimension size except for the last dimension.
7542       assert((VAT || CAT || &Component == &*Components.begin()) &&
7543              "Should be either ConstantArray or VariableArray if not the "
7544              "first Component");
7545 
7546       // Get element size if CurStrides is empty.
7547       if (CurStrides.empty()) {
7548         const Type *ElementType = nullptr;
7549         if (CAT)
7550           ElementType = CAT->getElementType().getTypePtr();
7551         else if (VAT)
7552           ElementType = VAT->getElementType().getTypePtr();
7553         else
7554           assert(&Component == &*Components.begin() &&
7555                  "Only expect pointer (non CAT or VAT) when this is the "
7556                  "first Component");
7557         // If ElementType is null, then it means the base is a pointer
7558         // (neither CAT nor VAT) and we'll attempt to get ElementType again
7559         // for next iteration.
7560         if (ElementType) {
7561           // For the case that having pointer as base, we need to remove one
7562           // level of indirection.
7563           if (&Component != &*Components.begin())
7564             ElementType = ElementType->getPointeeOrArrayElementType();
7565           ElementTypeSize =
7566               Context.getTypeSizeInChars(ElementType).getQuantity();
7567           CurStrides.push_back(
7568               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7569         }
7570       }
7571       // Get dimension value except for the last dimension since we don't need
7572       // it.
7573       if (DimSizes.size() < Components.size() - 1) {
7574         if (CAT)
7575           DimSizes.push_back(
7576               llvm::ConstantInt::get(CGF.Int64Ty, CAT->getZExtSize()));
7577         else if (VAT)
7578           DimSizes.push_back(CGF.Builder.CreateIntCast(
7579               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
7580               /*IsSigned=*/false));
7581       }
7582     }
7583 
7584     // Skip the dummy dimension since we have already have its information.
7585     auto *DI = DimSizes.begin() + 1;
7586     // Product of dimension.
7587     llvm::Value *DimProd =
7588         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
7589 
7590     // Collect info for non-contiguous. Notice that offset, count, and stride
7591     // are only meaningful for array-section, so we insert a null for anything
7592     // other than array-section.
7593     // Also, the size of offset, count, and stride are not the same as
7594     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7595     // count, and stride are the same as the number of non-contiguous
7596     // declaration in target update to/from clause.
7597     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7598          Components) {
7599       const Expr *AssocExpr = Component.getAssociatedExpression();
7600 
7601       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
7602         llvm::Value *Offset = CGF.Builder.CreateIntCast(
7603             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
7604             /*isSigned=*/false);
7605         CurOffsets.push_back(Offset);
7606         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
7607         CurStrides.push_back(CurStrides.back());
7608         continue;
7609       }
7610 
7611       const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7612 
7613       if (!OASE)
7614         continue;
7615 
7616       // Offset
7617       const Expr *OffsetExpr = OASE->getLowerBound();
7618       llvm::Value *Offset = nullptr;
7619       if (!OffsetExpr) {
7620         // If offset is absent, then we just set it to zero.
7621         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
7622       } else {
7623         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
7624                                            CGF.Int64Ty,
7625                                            /*isSigned=*/false);
7626       }
7627       CurOffsets.push_back(Offset);
7628 
7629       // Count
7630       const Expr *CountExpr = OASE->getLength();
7631       llvm::Value *Count = nullptr;
7632       if (!CountExpr) {
7633         // In Clang, once a high dimension is an array section, we construct all
7634         // the lower dimension as array section, however, for case like
7635         // arr[0:2][2], Clang construct the inner dimension as an array section
7636         // but it actually is not in an array section form according to spec.
7637         if (!OASE->getColonLocFirst().isValid() &&
7638             !OASE->getColonLocSecond().isValid()) {
7639           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
7640         } else {
7641           // OpenMP 5.0, 2.1.5 Array Sections, Description.
7642           // When the length is absent it defaults to ⌈(size −
7643           // lower-bound)/stride⌉, where size is the size of the array
7644           // dimension.
7645           const Expr *StrideExpr = OASE->getStride();
7646           llvm::Value *Stride =
7647               StrideExpr
7648                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7649                                               CGF.Int64Ty, /*isSigned=*/false)
7650                   : nullptr;
7651           if (Stride)
7652             Count = CGF.Builder.CreateUDiv(
7653                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
7654           else
7655             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
7656         }
7657       } else {
7658         Count = CGF.EmitScalarExpr(CountExpr);
7659       }
7660       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
7661       CurCounts.push_back(Count);
7662 
7663       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7664       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7665       //              Offset      Count     Stride
7666       //    D0          0           1         4    (int)    <- dummy dimension
7667       //    D1          0           2         8    (2 * (1) * 4)
7668       //    D2          1           2         20   (1 * (1 * 5) * 4)
7669       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
7670       const Expr *StrideExpr = OASE->getStride();
7671       llvm::Value *Stride =
7672           StrideExpr
7673               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7674                                           CGF.Int64Ty, /*isSigned=*/false)
7675               : nullptr;
7676       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
7677       if (Stride)
7678         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
7679       else
7680         CurStrides.push_back(DimProd);
7681       if (DI != DimSizes.end())
7682         ++DI;
7683     }
7684 
7685     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
7686     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
7687     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
7688   }
7689 
7690   /// Return the adjusted map modifiers if the declaration a capture refers to
7691   /// appears in a first-private clause. This is expected to be used only with
7692   /// directives that start with 'target'.
7693   OpenMPOffloadMappingFlags
7694   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7695     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7696 
7697     // A first private variable captured by reference will use only the
7698     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7699     // declaration is known as first-private in this handler.
7700     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7701       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7702         return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7703                OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7704       return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
7705              OpenMPOffloadMappingFlags::OMP_MAP_TO;
7706     }
7707     auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
7708     if (I != LambdasMap.end())
7709       // for map(to: lambda): using user specified map type.
7710       return getMapTypeBits(
7711           I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
7712           /*MotionModifiers=*/std::nullopt, I->getSecond()->isImplicit(),
7713           /*AddPtrFlag=*/false,
7714           /*AddIsTargetParamFlag=*/false,
7715           /*isNonContiguous=*/false);
7716     return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7717            OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7718   }
7719 
7720   void getPlainLayout(const CXXRecordDecl *RD,
7721                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7722                       bool AsBase) const {
7723     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7724 
7725     llvm::StructType *St =
7726         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7727 
7728     unsigned NumElements = St->getNumElements();
7729     llvm::SmallVector<
7730         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7731         RecordLayout(NumElements);
7732 
7733     // Fill bases.
7734     for (const auto &I : RD->bases()) {
7735       if (I.isVirtual())
7736         continue;
7737 
7738       QualType BaseTy = I.getType();
7739       const auto *Base = BaseTy->getAsCXXRecordDecl();
7740       // Ignore empty bases.
7741       if (isEmptyRecordForLayout(CGF.getContext(), BaseTy) ||
7742           CGF.getContext()
7743               .getASTRecordLayout(Base)
7744               .getNonVirtualSize()
7745               .isZero())
7746         continue;
7747 
7748       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7749       RecordLayout[FieldIndex] = Base;
7750     }
7751     // Fill in virtual bases.
7752     for (const auto &I : RD->vbases()) {
7753       QualType BaseTy = I.getType();
7754       // Ignore empty bases.
7755       if (isEmptyRecordForLayout(CGF.getContext(), BaseTy))
7756         continue;
7757 
7758       const auto *Base = BaseTy->getAsCXXRecordDecl();
7759       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7760       if (RecordLayout[FieldIndex])
7761         continue;
7762       RecordLayout[FieldIndex] = Base;
7763     }
7764     // Fill in all the fields.
7765     assert(!RD->isUnion() && "Unexpected union.");
7766     for (const auto *Field : RD->fields()) {
7767       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7768       // will fill in later.)
7769       if (!Field->isBitField() &&
7770           !isEmptyFieldForLayout(CGF.getContext(), Field)) {
7771         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7772         RecordLayout[FieldIndex] = Field;
7773       }
7774     }
7775     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7776              &Data : RecordLayout) {
7777       if (Data.isNull())
7778         continue;
7779       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7780         getPlainLayout(Base, Layout, /*AsBase=*/true);
7781       else
7782         Layout.push_back(Data.get<const FieldDecl *>());
7783     }
7784   }
7785 
7786   /// Generate all the base pointers, section pointers, sizes, map types, and
7787   /// mappers for the extracted mappable expressions (all included in \a
7788   /// CombinedInfo). Also, for each item that relates with a device pointer, a
7789   /// pair of the relevant declaration and index where it occurs is appended to
7790   /// the device pointers info array.
7791   void generateAllInfoForClauses(
7792       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
7793       llvm::OpenMPIRBuilder &OMPBuilder,
7794       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
7795           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
7796     // We have to process the component lists that relate with the same
7797     // declaration in a single chunk so that we can generate the map flags
7798     // correctly. Therefore, we organize all lists in a map.
7799     enum MapKind { Present, Allocs, Other, Total };
7800     llvm::MapVector<CanonicalDeclPtr<const Decl>,
7801                     SmallVector<SmallVector<MapInfo, 8>, 4>>
7802         Info;
7803 
7804     // Helper function to fill the information map for the different supported
7805     // clauses.
7806     auto &&InfoGen =
7807         [&Info, &SkipVarSet](
7808             const ValueDecl *D, MapKind Kind,
7809             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7810             OpenMPMapClauseKind MapType,
7811             ArrayRef<OpenMPMapModifierKind> MapModifiers,
7812             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7813             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
7814             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
7815           if (SkipVarSet.contains(D))
7816             return;
7817           auto It = Info.find(D);
7818           if (It == Info.end())
7819             It = Info
7820                      .insert(std::make_pair(
7821                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
7822                      .first;
7823           It->second[Kind].emplace_back(
7824               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
7825               IsImplicit, Mapper, VarRef, ForDeviceAddr);
7826         };
7827 
7828     for (const auto *Cl : Clauses) {
7829       const auto *C = dyn_cast<OMPMapClause>(Cl);
7830       if (!C)
7831         continue;
7832       MapKind Kind = Other;
7833       if (llvm::is_contained(C->getMapTypeModifiers(),
7834                              OMPC_MAP_MODIFIER_present))
7835         Kind = Present;
7836       else if (C->getMapType() == OMPC_MAP_alloc)
7837         Kind = Allocs;
7838       const auto *EI = C->getVarRefs().begin();
7839       for (const auto L : C->component_lists()) {
7840         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
7841         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
7842                 C->getMapTypeModifiers(), std::nullopt,
7843                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7844                 E);
7845         ++EI;
7846       }
7847     }
7848     for (const auto *Cl : Clauses) {
7849       const auto *C = dyn_cast<OMPToClause>(Cl);
7850       if (!C)
7851         continue;
7852       MapKind Kind = Other;
7853       if (llvm::is_contained(C->getMotionModifiers(),
7854                              OMPC_MOTION_MODIFIER_present))
7855         Kind = Present;
7856       const auto *EI = C->getVarRefs().begin();
7857       for (const auto L : C->component_lists()) {
7858         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, std::nullopt,
7859                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
7860                 C->isImplicit(), std::get<2>(L), *EI);
7861         ++EI;
7862       }
7863     }
7864     for (const auto *Cl : Clauses) {
7865       const auto *C = dyn_cast<OMPFromClause>(Cl);
7866       if (!C)
7867         continue;
7868       MapKind Kind = Other;
7869       if (llvm::is_contained(C->getMotionModifiers(),
7870                              OMPC_MOTION_MODIFIER_present))
7871         Kind = Present;
7872       const auto *EI = C->getVarRefs().begin();
7873       for (const auto L : C->component_lists()) {
7874         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from,
7875                 std::nullopt, C->getMotionModifiers(),
7876                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7877                 *EI);
7878         ++EI;
7879       }
7880     }
7881 
7882     // Look at the use_device_ptr and use_device_addr clauses information and
7883     // mark the existing map entries as such. If there is no map information for
7884     // an entry in the use_device_ptr and use_device_addr list, we create one
7885     // with map type 'alloc' and zero size section. It is the user fault if that
7886     // was not mapped before. If there is no map information and the pointer is
7887     // a struct member, then we defer the emission of that entry until the whole
7888     // struct has been processed.
7889     llvm::MapVector<CanonicalDeclPtr<const Decl>,
7890                     SmallVector<DeferredDevicePtrEntryTy, 4>>
7891         DeferredInfo;
7892     MapCombinedInfoTy UseDeviceDataCombinedInfo;
7893 
7894     auto &&UseDeviceDataCombinedInfoGen =
7895         [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
7896                                      CodeGenFunction &CGF, bool IsDevAddr) {
7897           UseDeviceDataCombinedInfo.Exprs.push_back(VD);
7898           UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
7899           UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
7900           UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
7901               IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
7902           UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
7903           UseDeviceDataCombinedInfo.Sizes.push_back(
7904               llvm::Constant::getNullValue(CGF.Int64Ty));
7905           UseDeviceDataCombinedInfo.Types.push_back(
7906               OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
7907           UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
7908         };
7909 
7910     auto &&MapInfoGen =
7911         [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
7912          &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
7913                    OMPClauseMappableExprCommon::MappableExprComponentListRef
7914                        Components,
7915                    bool IsImplicit, bool IsDevAddr) {
7916           // We didn't find any match in our map information - generate a zero
7917           // size array section - if the pointer is a struct member we defer
7918           // this action until the whole struct has been processed.
7919           if (isa<MemberExpr>(IE)) {
7920             // Insert the pointer into Info to be processed by
7921             // generateInfoForComponentList. Because it is a member pointer
7922             // without a pointee, no entry will be generated for it, therefore
7923             // we need to generate one after the whole struct has been
7924             // processed. Nonetheless, generateInfoForComponentList must be
7925             // called to take the pointer into account for the calculation of
7926             // the range of the partial struct.
7927             InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt,
7928                     std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit,
7929                     nullptr, nullptr, IsDevAddr);
7930             DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
7931           } else {
7932             llvm::Value *Ptr;
7933             if (IsDevAddr) {
7934               if (IE->isGLValue())
7935                 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
7936               else
7937                 Ptr = CGF.EmitScalarExpr(IE);
7938             } else {
7939               Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
7940             }
7941             UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);
7942           }
7943         };
7944 
7945     auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
7946                                     const Expr *IE, bool IsDevAddr) -> bool {
7947       // We potentially have map information for this declaration already.
7948       // Look for the first set of components that refer to it. If found,
7949       // return true.
7950       // If the first component is a member expression, we have to look into
7951       // 'this', which maps to null in the map of map information. Otherwise
7952       // look directly for the information.
7953       auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7954       if (It != Info.end()) {
7955         bool Found = false;
7956         for (auto &Data : It->second) {
7957           auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
7958             return MI.Components.back().getAssociatedDeclaration() == VD;
7959           });
7960           // If we found a map entry, signal that the pointer has to be
7961           // returned and move on to the next declaration. Exclude cases where
7962           // the base pointer is mapped as array subscript, array section or
7963           // array shaping. The base address is passed as a pointer to base in
7964           // this case and cannot be used as a base for use_device_ptr list
7965           // item.
7966           if (CI != Data.end()) {
7967             if (IsDevAddr) {
7968               CI->ForDeviceAddr = IsDevAddr;
7969               CI->ReturnDevicePointer = true;
7970               Found = true;
7971               break;
7972             } else {
7973               auto PrevCI = std::next(CI->Components.rbegin());
7974               const auto *VarD = dyn_cast<VarDecl>(VD);
7975               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7976                   isa<MemberExpr>(IE) ||
7977                   !VD->getType().getNonReferenceType()->isPointerType() ||
7978                   PrevCI == CI->Components.rend() ||
7979                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
7980                   VarD->hasLocalStorage()) {
7981                 CI->ForDeviceAddr = IsDevAddr;
7982                 CI->ReturnDevicePointer = true;
7983                 Found = true;
7984                 break;
7985               }
7986             }
7987           }
7988         }
7989         return Found;
7990       }
7991       return false;
7992     };
7993 
7994     // Look at the use_device_ptr clause information and mark the existing map
7995     // entries as such. If there is no map information for an entry in the
7996     // use_device_ptr list, we create one with map type 'alloc' and zero size
7997     // section. It is the user fault if that was not mapped before. If there is
7998     // no map information and the pointer is a struct member, then we defer the
7999     // emission of that entry until the whole struct has been processed.
8000     for (const auto *Cl : Clauses) {
8001       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8002       if (!C)
8003         continue;
8004       for (const auto L : C->component_lists()) {
8005         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8006             std::get<1>(L);
8007         assert(!Components.empty() &&
8008                "Not expecting empty list of components!");
8009         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8010         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8011         const Expr *IE = Components.back().getAssociatedExpression();
8012         if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
8013           continue;
8014         MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8015                    /*IsDevAddr=*/false);
8016       }
8017     }
8018 
8019     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8020     for (const auto *Cl : Clauses) {
8021       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8022       if (!C)
8023         continue;
8024       for (const auto L : C->component_lists()) {
8025         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8026             std::get<1>(L);
8027         assert(!std::get<1>(L).empty() &&
8028                "Not expecting empty list of components!");
8029         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8030         if (!Processed.insert(VD).second)
8031           continue;
8032         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8033         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8034         if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8035           continue;
8036         MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8037                    /*IsDevAddr=*/true);
8038       }
8039     }
8040 
8041     for (const auto &Data : Info) {
8042       StructRangeInfoTy PartialStruct;
8043       // Current struct information:
8044       MapCombinedInfoTy CurInfo;
8045       // Current struct base information:
8046       MapCombinedInfoTy StructBaseCurInfo;
8047       const Decl *D = Data.first;
8048       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8049       bool HasMapBasePtr = false;
8050       bool HasMapArraySec = false;
8051       if (VD && VD->getType()->isAnyPointerType()) {
8052         for (const auto &M : Data.second) {
8053           HasMapBasePtr = any_of(M, [](const MapInfo &L) {
8054             return isa_and_present<DeclRefExpr>(L.VarRef);
8055           });
8056           HasMapArraySec = any_of(M, [](const MapInfo &L) {
8057             return isa_and_present<ArraySectionExpr, ArraySubscriptExpr>(
8058                 L.VarRef);
8059           });
8060           if (HasMapBasePtr && HasMapArraySec)
8061             break;
8062         }
8063       }
8064       for (const auto &M : Data.second) {
8065         for (const MapInfo &L : M) {
8066           assert(!L.Components.empty() &&
8067                  "Not expecting declaration with no component lists.");
8068 
8069           // Remember the current base pointer index.
8070           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8071           unsigned StructBasePointersIdx =
8072               StructBaseCurInfo.BasePointers.size();
8073           CurInfo.NonContigInfo.IsNonContiguous =
8074               L.Components.back().isNonContiguous();
8075           generateInfoForComponentList(
8076               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8077               CurInfo, StructBaseCurInfo, PartialStruct,
8078               /*IsFirstComponentList=*/false, L.IsImplicit,
8079               /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD,
8080               L.VarRef, /*OverlappedElements*/ std::nullopt,
8081               HasMapBasePtr && HasMapArraySec);
8082 
8083           // If this entry relates to a device pointer, set the relevant
8084           // declaration and add the 'return pointer' flag.
8085           if (L.ReturnDevicePointer) {
8086             // Check whether a value was added to either CurInfo or
8087             // StructBaseCurInfo and error if no value was added to either of
8088             // them:
8089             assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() ||
8090                     StructBasePointersIdx <
8091                         StructBaseCurInfo.BasePointers.size()) &&
8092                    "Unexpected number of mapped base pointers.");
8093 
8094             // Choose a base pointer index which is always valid:
8095             const ValueDecl *RelevantVD =
8096                 L.Components.back().getAssociatedDeclaration();
8097             assert(RelevantVD &&
8098                    "No relevant declaration related with device pointer??");
8099 
8100             // If StructBaseCurInfo has been updated this iteration then work on
8101             // the first new entry added to it i.e. make sure that when multiple
8102             // values are added to any of the lists, the first value added is
8103             // being modified by the assignments below (not the last value
8104             // added).
8105             if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) {
8106               StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
8107                   RelevantVD;
8108               StructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
8109                   L.ForDeviceAddr ? DeviceInfoTy::Address
8110                                   : DeviceInfoTy::Pointer;
8111               StructBaseCurInfo.Types[StructBasePointersIdx] |=
8112                   OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8113             } else {
8114               CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8115               CurInfo.DevicePointers[CurrentBasePointersIdx] =
8116                   L.ForDeviceAddr ? DeviceInfoTy::Address
8117                                   : DeviceInfoTy::Pointer;
8118               CurInfo.Types[CurrentBasePointersIdx] |=
8119                   OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8120             }
8121           }
8122         }
8123       }
8124 
8125       // Append any pending zero-length pointers which are struct members and
8126       // used with use_device_ptr or use_device_addr.
8127       auto CI = DeferredInfo.find(Data.first);
8128       if (CI != DeferredInfo.end()) {
8129         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8130           llvm::Value *BasePtr;
8131           llvm::Value *Ptr;
8132           if (L.ForDeviceAddr) {
8133             if (L.IE->isGLValue())
8134               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8135             else
8136               Ptr = this->CGF.EmitScalarExpr(L.IE);
8137             BasePtr = Ptr;
8138             // Entry is RETURN_PARAM. Also, set the placeholder value
8139             // MEMBER_OF=FFFF so that the entry is later updated with the
8140             // correct value of MEMBER_OF.
8141             CurInfo.Types.push_back(
8142                 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8143                 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8144           } else {
8145             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8146             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8147                                              L.IE->getExprLoc());
8148             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8149             // placeholder value MEMBER_OF=FFFF so that the entry is later
8150             // updated with the correct value of MEMBER_OF.
8151             CurInfo.Types.push_back(
8152                 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8153                 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8154                 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8155           }
8156           CurInfo.Exprs.push_back(L.VD);
8157           CurInfo.BasePointers.emplace_back(BasePtr);
8158           CurInfo.DevicePtrDecls.emplace_back(L.VD);
8159           CurInfo.DevicePointers.emplace_back(
8160               L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8161           CurInfo.Pointers.push_back(Ptr);
8162           CurInfo.Sizes.push_back(
8163               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8164           CurInfo.Mappers.push_back(nullptr);
8165         }
8166       }
8167 
8168       // Unify entries in one list making sure the struct mapping precedes the
8169       // individual fields:
8170       MapCombinedInfoTy UnionCurInfo;
8171       UnionCurInfo.append(StructBaseCurInfo);
8172       UnionCurInfo.append(CurInfo);
8173 
8174       // If there is an entry in PartialStruct it means we have a struct with
8175       // individual members mapped. Emit an extra combined entry.
8176       if (PartialStruct.Base.isValid()) {
8177         UnionCurInfo.NonContigInfo.Dims.push_back(0);
8178         // Emit a combined entry:
8179         emitCombinedEntry(CombinedInfo, UnionCurInfo.Types, PartialStruct,
8180                           /*IsMapThis*/ !VD, OMPBuilder, VD);
8181       }
8182 
8183       // We need to append the results of this capture to what we already have.
8184       CombinedInfo.append(UnionCurInfo);
8185     }
8186     // Append data for use_device_ptr clauses.
8187     CombinedInfo.append(UseDeviceDataCombinedInfo);
8188   }
8189 
8190 public:
8191   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8192       : CurDir(&Dir), CGF(CGF) {
8193     // Extract firstprivate clause information.
8194     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8195       for (const auto *D : C->varlists())
8196         FirstPrivateDecls.try_emplace(
8197             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8198     // Extract implicit firstprivates from uses_allocators clauses.
8199     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8200       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8201         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8202         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8203           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8204                                         /*Implicit=*/true);
8205         else if (const auto *VD = dyn_cast<VarDecl>(
8206                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8207                          ->getDecl()))
8208           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8209       }
8210     }
8211     // Extract device pointer clause information.
8212     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8213       for (auto L : C->component_lists())
8214         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8215     // Extract device addr clause information.
8216     for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8217       for (auto L : C->component_lists())
8218         HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8219     // Extract map information.
8220     for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8221       if (C->getMapType() != OMPC_MAP_to)
8222         continue;
8223       for (auto L : C->component_lists()) {
8224         const ValueDecl *VD = std::get<0>(L);
8225         const auto *RD = VD ? VD->getType()
8226                                   .getCanonicalType()
8227                                   .getNonReferenceType()
8228                                   ->getAsCXXRecordDecl()
8229                             : nullptr;
8230         if (RD && RD->isLambda())
8231           LambdasMap.try_emplace(std::get<0>(L), C);
8232       }
8233     }
8234   }
8235 
8236   /// Constructor for the declare mapper directive.
8237   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8238       : CurDir(&Dir), CGF(CGF) {}
8239 
8240   /// Generate code for the combined entry if we have a partially mapped struct
8241   /// and take care of the mapping flags of the arguments corresponding to
8242   /// individual struct members.
8243   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8244                          MapFlagsArrayTy &CurTypes,
8245                          const StructRangeInfoTy &PartialStruct, bool IsMapThis,
8246                          llvm::OpenMPIRBuilder &OMPBuilder,
8247                          const ValueDecl *VD = nullptr,
8248                          bool NotTargetParams = true) const {
8249     if (CurTypes.size() == 1 &&
8250         ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
8251          OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
8252         !PartialStruct.IsArraySection)
8253       return;
8254     Address LBAddr = PartialStruct.LowestElem.second;
8255     Address HBAddr = PartialStruct.HighestElem.second;
8256     if (PartialStruct.HasCompleteRecord) {
8257       LBAddr = PartialStruct.LB;
8258       HBAddr = PartialStruct.LB;
8259     }
8260     CombinedInfo.Exprs.push_back(VD);
8261     // Base is the base of the struct
8262     CombinedInfo.BasePointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
8263     CombinedInfo.DevicePtrDecls.push_back(nullptr);
8264     CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8265     // Pointer is the address of the lowest element
8266     llvm::Value *LB = LBAddr.emitRawPointer(CGF);
8267     const CXXMethodDecl *MD =
8268         CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
8269     const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
8270     bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
8271     // There should not be a mapper for a combined entry.
8272     if (HasBaseClass) {
8273       // OpenMP 5.2 148:21:
8274       // If the target construct is within a class non-static member function,
8275       // and a variable is an accessible data member of the object for which the
8276       // non-static data member function is invoked, the variable is treated as
8277       // if the this[:1] expression had appeared in a map clause with a map-type
8278       // of tofrom.
8279       // Emit this[:1]
8280       CombinedInfo.Pointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
8281       QualType Ty = MD->getFunctionObjectParameterType();
8282       llvm::Value *Size =
8283           CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
8284                                     /*isSigned=*/true);
8285       CombinedInfo.Sizes.push_back(Size);
8286     } else {
8287       CombinedInfo.Pointers.push_back(LB);
8288       // Size is (addr of {highest+1} element) - (addr of lowest element)
8289       llvm::Value *HB = HBAddr.emitRawPointer(CGF);
8290       llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
8291           HBAddr.getElementType(), HB, /*Idx0=*/1);
8292       llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8293       llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8294       llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8295       llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8296                                                     /*isSigned=*/false);
8297       CombinedInfo.Sizes.push_back(Size);
8298     }
8299     CombinedInfo.Mappers.push_back(nullptr);
8300     // Map type is always TARGET_PARAM, if generate info for captures.
8301     CombinedInfo.Types.push_back(
8302         NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8303                         : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8304     // If any element has the present modifier, then make sure the runtime
8305     // doesn't attempt to allocate the struct.
8306     if (CurTypes.end() !=
8307         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8308           return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8309               Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8310         }))
8311       CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
8312     // Remove TARGET_PARAM flag from the first element
8313     (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8314     // If any element has the ompx_hold modifier, then make sure the runtime
8315     // uses the hold reference count for the struct as a whole so that it won't
8316     // be unmapped by an extra dynamic reference count decrement.  Add it to all
8317     // elements as well so the runtime knows which reference count to check
8318     // when determining whether it's time for device-to-host transfers of
8319     // individual elements.
8320     if (CurTypes.end() !=
8321         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8322           return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8323               Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
8324         })) {
8325       CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8326       for (auto &M : CurTypes)
8327         M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8328     }
8329 
8330     // All other current entries will be MEMBER_OF the combined entry
8331     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8332     // 0xFFFF in the MEMBER_OF field).
8333     OpenMPOffloadMappingFlags MemberOfFlag =
8334         OMPBuilder.getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8335     for (auto &M : CurTypes)
8336       OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);
8337   }
8338 
8339   /// Generate all the base pointers, section pointers, sizes, map types, and
8340   /// mappers for the extracted mappable expressions (all included in \a
8341   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8342   /// pair of the relevant declaration and index where it occurs is appended to
8343   /// the device pointers info array.
8344   void generateAllInfo(
8345       MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
8346       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8347           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8348     assert(CurDir.is<const OMPExecutableDirective *>() &&
8349            "Expect a executable directive");
8350     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8351     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,
8352                               SkipVarSet);
8353   }
8354 
8355   /// Generate all the base pointers, section pointers, sizes, map types, and
8356   /// mappers for the extracted map clauses of user-defined mapper (all included
8357   /// in \a CombinedInfo).
8358   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
8359                                 llvm::OpenMPIRBuilder &OMPBuilder) const {
8360     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8361            "Expect a declare mapper directive");
8362     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8363     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,
8364                               OMPBuilder);
8365   }
8366 
8367   /// Emit capture info for lambdas for variables captured by reference.
8368   void generateInfoForLambdaCaptures(
8369       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8370       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8371     QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
8372     const auto *RD = VDType->getAsCXXRecordDecl();
8373     if (!RD || !RD->isLambda())
8374       return;
8375     Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
8376                    CGF.getContext().getDeclAlign(VD));
8377     LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
8378     llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8379     FieldDecl *ThisCapture = nullptr;
8380     RD->getCaptureFields(Captures, ThisCapture);
8381     if (ThisCapture) {
8382       LValue ThisLVal =
8383           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8384       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8385       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8386                                  VDLVal.getPointer(CGF));
8387       CombinedInfo.Exprs.push_back(VD);
8388       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8389       CombinedInfo.DevicePtrDecls.push_back(nullptr);
8390       CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8391       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8392       CombinedInfo.Sizes.push_back(
8393           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8394                                     CGF.Int64Ty, /*isSigned=*/true));
8395       CombinedInfo.Types.push_back(
8396           OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8397           OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8398           OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8399           OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8400       CombinedInfo.Mappers.push_back(nullptr);
8401     }
8402     for (const LambdaCapture &LC : RD->captures()) {
8403       if (!LC.capturesVariable())
8404         continue;
8405       const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
8406       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8407         continue;
8408       auto It = Captures.find(VD);
8409       assert(It != Captures.end() && "Found lambda capture without field.");
8410       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8411       if (LC.getCaptureKind() == LCK_ByRef) {
8412         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8413         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8414                                    VDLVal.getPointer(CGF));
8415         CombinedInfo.Exprs.push_back(VD);
8416         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8417         CombinedInfo.DevicePtrDecls.push_back(nullptr);
8418         CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8419         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8420         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8421             CGF.getTypeSize(
8422                 VD->getType().getCanonicalType().getNonReferenceType()),
8423             CGF.Int64Ty, /*isSigned=*/true));
8424       } else {
8425         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8426         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8427                                    VDLVal.getPointer(CGF));
8428         CombinedInfo.Exprs.push_back(VD);
8429         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8430         CombinedInfo.DevicePtrDecls.push_back(nullptr);
8431         CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8432         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8433         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8434       }
8435       CombinedInfo.Types.push_back(
8436           OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8437           OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8438           OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8439           OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8440       CombinedInfo.Mappers.push_back(nullptr);
8441     }
8442   }
8443 
8444   /// Set correct indices for lambdas captures.
8445   void adjustMemberOfForLambdaCaptures(
8446       llvm::OpenMPIRBuilder &OMPBuilder,
8447       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8448       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8449       MapFlagsArrayTy &Types) const {
8450     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8451       // Set correct member_of idx for all implicit lambda captures.
8452       if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8453                        OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8454                        OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8455                        OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
8456         continue;
8457       llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
8458       assert(BasePtr && "Unable to find base lambda address.");
8459       int TgtIdx = -1;
8460       for (unsigned J = I; J > 0; --J) {
8461         unsigned Idx = J - 1;
8462         if (Pointers[Idx] != BasePtr)
8463           continue;
8464         TgtIdx = Idx;
8465         break;
8466       }
8467       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8468       // All other current entries will be MEMBER_OF the combined entry
8469       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8470       // 0xFFFF in the MEMBER_OF field).
8471       OpenMPOffloadMappingFlags MemberOfFlag =
8472           OMPBuilder.getMemberOfFlag(TgtIdx);
8473       OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8474     }
8475   }
8476 
8477   /// Generate the base pointers, section pointers, sizes, map types, and
8478   /// mappers associated to a given capture (all included in \a CombinedInfo).
8479   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8480                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8481                               StructRangeInfoTy &PartialStruct) const {
8482     assert(!Cap->capturesVariableArrayType() &&
8483            "Not expecting to generate map info for a variable array type!");
8484 
8485     // We need to know when we generating information for the first component
8486     const ValueDecl *VD = Cap->capturesThis()
8487                               ? nullptr
8488                               : Cap->getCapturedVar()->getCanonicalDecl();
8489 
8490     // for map(to: lambda): skip here, processing it in
8491     // generateDefaultMapInfo
8492     if (LambdasMap.count(VD))
8493       return;
8494 
8495     // If this declaration appears in a is_device_ptr clause we just have to
8496     // pass the pointer by value. If it is a reference to a declaration, we just
8497     // pass its value.
8498     if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
8499       CombinedInfo.Exprs.push_back(VD);
8500       CombinedInfo.BasePointers.emplace_back(Arg);
8501       CombinedInfo.DevicePtrDecls.emplace_back(VD);
8502       CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
8503       CombinedInfo.Pointers.push_back(Arg);
8504       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8505           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8506           /*isSigned=*/true));
8507       CombinedInfo.Types.push_back(
8508           OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8509           OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8510       CombinedInfo.Mappers.push_back(nullptr);
8511       return;
8512     }
8513 
8514     using MapData =
8515         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8516                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8517                    const ValueDecl *, const Expr *>;
8518     SmallVector<MapData, 4> DeclComponentLists;
8519     // For member fields list in is_device_ptr, store it in
8520     // DeclComponentLists for generating components info.
8521     static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
8522     auto It = DevPointersMap.find(VD);
8523     if (It != DevPointersMap.end())
8524       for (const auto &MCL : It->second)
8525         DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
8526                                         /*IsImpicit = */ true, nullptr,
8527                                         nullptr);
8528     auto I = HasDevAddrsMap.find(VD);
8529     if (I != HasDevAddrsMap.end())
8530       for (const auto &MCL : I->second)
8531         DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
8532                                         /*IsImpicit = */ true, nullptr,
8533                                         nullptr);
8534     assert(CurDir.is<const OMPExecutableDirective *>() &&
8535            "Expect a executable directive");
8536     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8537     bool HasMapBasePtr = false;
8538     bool HasMapArraySec = false;
8539     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8540       const auto *EI = C->getVarRefs().begin();
8541       for (const auto L : C->decl_component_lists(VD)) {
8542         const ValueDecl *VDecl, *Mapper;
8543         // The Expression is not correct if the mapping is implicit
8544         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8545         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8546         std::tie(VDecl, Components, Mapper) = L;
8547         assert(VDecl == VD && "We got information for the wrong declaration??");
8548         assert(!Components.empty() &&
8549                "Not expecting declaration with no component lists.");
8550         if (VD && E && VD->getType()->isAnyPointerType() && isa<DeclRefExpr>(E))
8551           HasMapBasePtr = true;
8552         if (VD && E && VD->getType()->isAnyPointerType() &&
8553             (isa<ArraySectionExpr>(E) || isa<ArraySubscriptExpr>(E)))
8554           HasMapArraySec = true;
8555         DeclComponentLists.emplace_back(Components, C->getMapType(),
8556                                         C->getMapTypeModifiers(),
8557                                         C->isImplicit(), Mapper, E);
8558         ++EI;
8559       }
8560     }
8561     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8562                                              const MapData &RHS) {
8563       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8564       OpenMPMapClauseKind MapType = std::get<1>(RHS);
8565       bool HasPresent =
8566           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8567       bool HasAllocs = MapType == OMPC_MAP_alloc;
8568       MapModifiers = std::get<2>(RHS);
8569       MapType = std::get<1>(LHS);
8570       bool HasPresentR =
8571           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8572       bool HasAllocsR = MapType == OMPC_MAP_alloc;
8573       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8574     });
8575 
8576     // Find overlapping elements (including the offset from the base element).
8577     llvm::SmallDenseMap<
8578         const MapData *,
8579         llvm::SmallVector<
8580             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8581         4>
8582         OverlappedData;
8583     size_t Count = 0;
8584     for (const MapData &L : DeclComponentLists) {
8585       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8586       OpenMPMapClauseKind MapType;
8587       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8588       bool IsImplicit;
8589       const ValueDecl *Mapper;
8590       const Expr *VarRef;
8591       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8592           L;
8593       ++Count;
8594       for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
8595         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8596         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8597                  VarRef) = L1;
8598         auto CI = Components.rbegin();
8599         auto CE = Components.rend();
8600         auto SI = Components1.rbegin();
8601         auto SE = Components1.rend();
8602         for (; CI != CE && SI != SE; ++CI, ++SI) {
8603           if (CI->getAssociatedExpression()->getStmtClass() !=
8604               SI->getAssociatedExpression()->getStmtClass())
8605             break;
8606           // Are we dealing with different variables/fields?
8607           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8608             break;
8609         }
8610         // Found overlapping if, at least for one component, reached the head
8611         // of the components list.
8612         if (CI == CE || SI == SE) {
8613           // Ignore it if it is the same component.
8614           if (CI == CE && SI == SE)
8615             continue;
8616           const auto It = (SI == SE) ? CI : SI;
8617           // If one component is a pointer and another one is a kind of
8618           // dereference of this pointer (array subscript, section, dereference,
8619           // etc.), it is not an overlapping.
8620           // Same, if one component is a base and another component is a
8621           // dereferenced pointer memberexpr with the same base.
8622           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
8623               (std::prev(It)->getAssociatedDeclaration() &&
8624                std::prev(It)
8625                    ->getAssociatedDeclaration()
8626                    ->getType()
8627                    ->isPointerType()) ||
8628               (It->getAssociatedDeclaration() &&
8629                It->getAssociatedDeclaration()->getType()->isPointerType() &&
8630                std::next(It) != CE && std::next(It) != SE))
8631             continue;
8632           const MapData &BaseData = CI == CE ? L : L1;
8633           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8634               SI == SE ? Components : Components1;
8635           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8636           OverlappedElements.getSecond().push_back(SubData);
8637         }
8638       }
8639     }
8640     // Sort the overlapped elements for each item.
8641     llvm::SmallVector<const FieldDecl *, 4> Layout;
8642     if (!OverlappedData.empty()) {
8643       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8644       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8645       while (BaseType != OrigType) {
8646         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8647         OrigType = BaseType->getPointeeOrArrayElementType();
8648       }
8649 
8650       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8651         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8652       else {
8653         const auto *RD = BaseType->getAsRecordDecl();
8654         Layout.append(RD->field_begin(), RD->field_end());
8655       }
8656     }
8657     for (auto &Pair : OverlappedData) {
8658       llvm::stable_sort(
8659           Pair.getSecond(),
8660           [&Layout](
8661               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8662               OMPClauseMappableExprCommon::MappableExprComponentListRef
8663                   Second) {
8664             auto CI = First.rbegin();
8665             auto CE = First.rend();
8666             auto SI = Second.rbegin();
8667             auto SE = Second.rend();
8668             for (; CI != CE && SI != SE; ++CI, ++SI) {
8669               if (CI->getAssociatedExpression()->getStmtClass() !=
8670                   SI->getAssociatedExpression()->getStmtClass())
8671                 break;
8672               // Are we dealing with different variables/fields?
8673               if (CI->getAssociatedDeclaration() !=
8674                   SI->getAssociatedDeclaration())
8675                 break;
8676             }
8677 
8678             // Lists contain the same elements.
8679             if (CI == CE && SI == SE)
8680               return false;
8681 
8682             // List with less elements is less than list with more elements.
8683             if (CI == CE || SI == SE)
8684               return CI == CE;
8685 
8686             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8687             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8688             if (FD1->getParent() == FD2->getParent())
8689               return FD1->getFieldIndex() < FD2->getFieldIndex();
8690             const auto *It =
8691                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8692                   return FD == FD1 || FD == FD2;
8693                 });
8694             return *It == FD1;
8695           });
8696     }
8697 
8698     // Associated with a capture, because the mapping flags depend on it.
8699     // Go through all of the elements with the overlapped elements.
8700     bool IsFirstComponentList = true;
8701     MapCombinedInfoTy StructBaseCombinedInfo;
8702     for (const auto &Pair : OverlappedData) {
8703       const MapData &L = *Pair.getFirst();
8704       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8705       OpenMPMapClauseKind MapType;
8706       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8707       bool IsImplicit;
8708       const ValueDecl *Mapper;
8709       const Expr *VarRef;
8710       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8711           L;
8712       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8713           OverlappedComponents = Pair.getSecond();
8714       generateInfoForComponentList(
8715           MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
8716           StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8717           IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8718           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
8719       IsFirstComponentList = false;
8720     }
8721     // Go through other elements without overlapped elements.
8722     for (const MapData &L : DeclComponentLists) {
8723       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8724       OpenMPMapClauseKind MapType;
8725       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8726       bool IsImplicit;
8727       const ValueDecl *Mapper;
8728       const Expr *VarRef;
8729       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8730           L;
8731       auto It = OverlappedData.find(&L);
8732       if (It == OverlappedData.end())
8733         generateInfoForComponentList(
8734             MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
8735             StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8736             IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8737             /*ForDeviceAddr=*/false, VD, VarRef,
8738             /*OverlappedElements*/ std::nullopt,
8739             HasMapBasePtr && HasMapArraySec);
8740       IsFirstComponentList = false;
8741     }
8742   }
8743 
8744   /// Generate the default map information for a given capture \a CI,
8745   /// record field declaration \a RI and captured value \a CV.
8746   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8747                               const FieldDecl &RI, llvm::Value *CV,
8748                               MapCombinedInfoTy &CombinedInfo) const {
8749     bool IsImplicit = true;
8750     // Do the default mapping.
8751     if (CI.capturesThis()) {
8752       CombinedInfo.Exprs.push_back(nullptr);
8753       CombinedInfo.BasePointers.push_back(CV);
8754       CombinedInfo.DevicePtrDecls.push_back(nullptr);
8755       CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8756       CombinedInfo.Pointers.push_back(CV);
8757       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8758       CombinedInfo.Sizes.push_back(
8759           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8760                                     CGF.Int64Ty, /*isSigned=*/true));
8761       // Default map type.
8762       CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8763                                    OpenMPOffloadMappingFlags::OMP_MAP_FROM);
8764     } else if (CI.capturesVariableByCopy()) {
8765       const VarDecl *VD = CI.getCapturedVar();
8766       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8767       CombinedInfo.BasePointers.push_back(CV);
8768       CombinedInfo.DevicePtrDecls.push_back(nullptr);
8769       CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8770       CombinedInfo.Pointers.push_back(CV);
8771       if (!RI.getType()->isAnyPointerType()) {
8772         // We have to signal to the runtime captures passed by value that are
8773         // not pointers.
8774         CombinedInfo.Types.push_back(
8775             OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
8776         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8777             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8778       } else {
8779         // Pointers are implicitly mapped with a zero size and no flags
8780         // (other than first map that is added for all implicit maps).
8781         CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
8782         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8783       }
8784       auto I = FirstPrivateDecls.find(VD);
8785       if (I != FirstPrivateDecls.end())
8786         IsImplicit = I->getSecond();
8787     } else {
8788       assert(CI.capturesVariable() && "Expected captured reference.");
8789       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8790       QualType ElementType = PtrTy->getPointeeType();
8791       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8792           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8793       // The default map type for a scalar/complex type is 'to' because by
8794       // default the value doesn't have to be retrieved. For an aggregate
8795       // type, the default is 'tofrom'.
8796       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
8797       const VarDecl *VD = CI.getCapturedVar();
8798       auto I = FirstPrivateDecls.find(VD);
8799       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8800       CombinedInfo.BasePointers.push_back(CV);
8801       CombinedInfo.DevicePtrDecls.push_back(nullptr);
8802       CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8803       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8804         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8805             CV, ElementType, CGF.getContext().getDeclAlign(VD),
8806             AlignmentSource::Decl));
8807         CombinedInfo.Pointers.push_back(PtrAddr.emitRawPointer(CGF));
8808       } else {
8809         CombinedInfo.Pointers.push_back(CV);
8810       }
8811       if (I != FirstPrivateDecls.end())
8812         IsImplicit = I->getSecond();
8813     }
8814     // Every default map produces a single argument which is a target parameter.
8815     CombinedInfo.Types.back() |=
8816         OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8817 
8818     // Add flag stating this is an implicit map.
8819     if (IsImplicit)
8820       CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
8821 
8822     // No user-defined mapper for default mapping.
8823     CombinedInfo.Mappers.push_back(nullptr);
8824   }
8825 };
8826 } // anonymous namespace
8827 
8828 // Try to extract the base declaration from a `this->x` expression if possible.
8829 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
8830   if (!E)
8831     return nullptr;
8832 
8833   if (const auto *OASE = dyn_cast<ArraySectionExpr>(E->IgnoreParenCasts()))
8834     if (const MemberExpr *ME =
8835             dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
8836       return ME->getMemberDecl();
8837   return nullptr;
8838 }
8839 
8840 /// Emit a string constant containing the names of the values mapped to the
8841 /// offloading runtime library.
8842 llvm::Constant *
8843 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
8844                        MappableExprsHandler::MappingExprInfo &MapExprs) {
8845 
8846   uint32_t SrcLocStrSize;
8847   if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
8848     return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
8849 
8850   SourceLocation Loc;
8851   if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
8852     if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
8853       Loc = VD->getLocation();
8854     else
8855       Loc = MapExprs.getMapExpr()->getExprLoc();
8856   } else {
8857     Loc = MapExprs.getMapDecl()->getLocation();
8858   }
8859 
8860   std::string ExprName;
8861   if (MapExprs.getMapExpr()) {
8862     PrintingPolicy P(CGF.getContext().getLangOpts());
8863     llvm::raw_string_ostream OS(ExprName);
8864     MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
8865     OS.flush();
8866   } else {
8867     ExprName = MapExprs.getMapDecl()->getNameAsString();
8868   }
8869 
8870   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
8871   return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
8872                                          PLoc.getLine(), PLoc.getColumn(),
8873                                          SrcLocStrSize);
8874 }
8875 
8876 /// Emit the arrays used to pass the captures and map information to the
8877 /// offloading runtime library. If there is no map or capture information,
8878 /// return nullptr by reference.
8879 static void emitOffloadingArrays(
8880     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
8881     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
8882     bool IsNonContiguous = false) {
8883   CodeGenModule &CGM = CGF.CGM;
8884 
8885   // Reset the array information.
8886   Info.clearArrayInfo();
8887   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
8888 
8889   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
8890   InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
8891                          CGF.AllocaInsertPt->getIterator());
8892   InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
8893                           CGF.Builder.GetInsertPoint());
8894 
8895   auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
8896     return emitMappingInformation(CGF, OMPBuilder, MapExpr);
8897   };
8898   if (CGM.getCodeGenOpts().getDebugInfo() !=
8899       llvm::codegenoptions::NoDebugInfo) {
8900     CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
8901     llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
8902                     FillInfoMap);
8903   }
8904 
8905   auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
8906     if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
8907       Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
8908     }
8909   };
8910 
8911   auto CustomMapperCB = [&](unsigned int I) {
8912     llvm::Value *MFunc = nullptr;
8913     if (CombinedInfo.Mappers[I]) {
8914       Info.HasMapper = true;
8915       MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
8916           cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
8917     }
8918     return MFunc;
8919   };
8920   OMPBuilder.emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info,
8921                                   /*IsNonContiguous=*/true, DeviceAddrCB,
8922                                   CustomMapperCB);
8923 }
8924 
8925 /// Check for inner distribute directive.
8926 static const OMPExecutableDirective *
8927 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8928   const auto *CS = D.getInnermostCapturedStmt();
8929   const auto *Body =
8930       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8931   const Stmt *ChildStmt =
8932       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8933 
8934   if (const auto *NestedDir =
8935           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8936     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8937     switch (D.getDirectiveKind()) {
8938     case OMPD_target:
8939       // For now, treat 'target' with nested 'teams loop' as if it's
8940       // distributed (target teams distribute).
8941       if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
8942         return NestedDir;
8943       if (DKind == OMPD_teams) {
8944         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8945             /*IgnoreCaptured=*/true);
8946         if (!Body)
8947           return nullptr;
8948         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8949         if (const auto *NND =
8950                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8951           DKind = NND->getDirectiveKind();
8952           if (isOpenMPDistributeDirective(DKind))
8953             return NND;
8954         }
8955       }
8956       return nullptr;
8957     case OMPD_target_teams:
8958       if (isOpenMPDistributeDirective(DKind))
8959         return NestedDir;
8960       return nullptr;
8961     case OMPD_target_parallel:
8962     case OMPD_target_simd:
8963     case OMPD_target_parallel_for:
8964     case OMPD_target_parallel_for_simd:
8965       return nullptr;
8966     case OMPD_target_teams_distribute:
8967     case OMPD_target_teams_distribute_simd:
8968     case OMPD_target_teams_distribute_parallel_for:
8969     case OMPD_target_teams_distribute_parallel_for_simd:
8970     case OMPD_parallel:
8971     case OMPD_for:
8972     case OMPD_parallel_for:
8973     case OMPD_parallel_master:
8974     case OMPD_parallel_sections:
8975     case OMPD_for_simd:
8976     case OMPD_parallel_for_simd:
8977     case OMPD_cancel:
8978     case OMPD_cancellation_point:
8979     case OMPD_ordered:
8980     case OMPD_threadprivate:
8981     case OMPD_allocate:
8982     case OMPD_task:
8983     case OMPD_simd:
8984     case OMPD_tile:
8985     case OMPD_unroll:
8986     case OMPD_sections:
8987     case OMPD_section:
8988     case OMPD_single:
8989     case OMPD_master:
8990     case OMPD_critical:
8991     case OMPD_taskyield:
8992     case OMPD_barrier:
8993     case OMPD_taskwait:
8994     case OMPD_taskgroup:
8995     case OMPD_atomic:
8996     case OMPD_flush:
8997     case OMPD_depobj:
8998     case OMPD_scan:
8999     case OMPD_teams:
9000     case OMPD_target_data:
9001     case OMPD_target_exit_data:
9002     case OMPD_target_enter_data:
9003     case OMPD_distribute:
9004     case OMPD_distribute_simd:
9005     case OMPD_distribute_parallel_for:
9006     case OMPD_distribute_parallel_for_simd:
9007     case OMPD_teams_distribute:
9008     case OMPD_teams_distribute_simd:
9009     case OMPD_teams_distribute_parallel_for:
9010     case OMPD_teams_distribute_parallel_for_simd:
9011     case OMPD_target_update:
9012     case OMPD_declare_simd:
9013     case OMPD_declare_variant:
9014     case OMPD_begin_declare_variant:
9015     case OMPD_end_declare_variant:
9016     case OMPD_declare_target:
9017     case OMPD_end_declare_target:
9018     case OMPD_declare_reduction:
9019     case OMPD_declare_mapper:
9020     case OMPD_taskloop:
9021     case OMPD_taskloop_simd:
9022     case OMPD_master_taskloop:
9023     case OMPD_master_taskloop_simd:
9024     case OMPD_parallel_master_taskloop:
9025     case OMPD_parallel_master_taskloop_simd:
9026     case OMPD_requires:
9027     case OMPD_metadirective:
9028     case OMPD_unknown:
9029     default:
9030       llvm_unreachable("Unexpected directive.");
9031     }
9032   }
9033 
9034   return nullptr;
9035 }
9036 
9037 /// Emit the user-defined mapper function. The code generation follows the
9038 /// pattern in the example below.
9039 /// \code
9040 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9041 ///                                           void *base, void *begin,
9042 ///                                           int64_t size, int64_t type,
9043 ///                                           void *name = nullptr) {
9044 ///   // Allocate space for an array section first or add a base/begin for
9045 ///   // pointer dereference.
9046 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9047 ///       !maptype.IsDelete)
9048 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9049 ///                                 size*sizeof(Ty), clearToFromMember(type));
9050 ///   // Map members.
9051 ///   for (unsigned i = 0; i < size; i++) {
9052 ///     // For each component specified by this mapper:
9053 ///     for (auto c : begin[i]->all_components) {
9054 ///       if (c.hasMapper())
9055 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9056 ///                       c.arg_type, c.arg_name);
9057 ///       else
9058 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9059 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9060 ///                                     c.arg_name);
9061 ///     }
9062 ///   }
9063 ///   // Delete the array section.
9064 ///   if (size > 1 && maptype.IsDelete)
9065 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9066 ///                                 size*sizeof(Ty), clearToFromMember(type));
9067 /// }
9068 /// \endcode
9069 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9070                                             CodeGenFunction *CGF) {
9071   if (UDMMap.count(D) > 0)
9072     return;
9073   ASTContext &C = CGM.getContext();
9074   QualType Ty = D->getType();
9075   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9076   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9077   auto *MapperVarDecl =
9078       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9079   SourceLocation Loc = D->getLocation();
9080   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9081   llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9082 
9083   // Prepare mapper function arguments and attributes.
9084   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9085                               C.VoidPtrTy, ImplicitParamKind::Other);
9086   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9087                             ImplicitParamKind::Other);
9088   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9089                              C.VoidPtrTy, ImplicitParamKind::Other);
9090   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9091                             ImplicitParamKind::Other);
9092   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9093                             ImplicitParamKind::Other);
9094   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9095                             ImplicitParamKind::Other);
9096   FunctionArgList Args;
9097   Args.push_back(&HandleArg);
9098   Args.push_back(&BaseArg);
9099   Args.push_back(&BeginArg);
9100   Args.push_back(&SizeArg);
9101   Args.push_back(&TypeArg);
9102   Args.push_back(&NameArg);
9103   const CGFunctionInfo &FnInfo =
9104       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9105   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9106   SmallString<64> TyStr;
9107   llvm::raw_svector_ostream Out(TyStr);
9108   CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out);
9109   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9110   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9111                                     Name, &CGM.getModule());
9112   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9113   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9114   // Start the mapper function code generation.
9115   CodeGenFunction MapperCGF(CGM);
9116   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9117   // Compute the starting and end addresses of array elements.
9118   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9119       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9120       C.getPointerType(Int64Ty), Loc);
9121   // Prepare common arguments for array initiation and deletion.
9122   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9123       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9124       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9125   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9126       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9127       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9128   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9129       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9130       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9131   // Convert the size in bytes into the number of array elements.
9132   Size = MapperCGF.Builder.CreateExactUDiv(
9133       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9134   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9135       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9136   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size);
9137   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9138       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9139       C.getPointerType(Int64Ty), Loc);
9140   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9141       MapperCGF.GetAddrOfLocalVar(&NameArg),
9142       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9143 
9144   // Emit array initiation if this is an array section and \p MapType indicates
9145   // that memory allocation is required.
9146   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9147   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9148                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
9149 
9150   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9151 
9152   // Emit the loop header block.
9153   MapperCGF.EmitBlock(HeadBB);
9154   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9155   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9156   // Evaluate whether the initial condition is satisfied.
9157   llvm::Value *IsEmpty =
9158       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9159   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9160   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9161 
9162   // Emit the loop body block.
9163   MapperCGF.EmitBlock(BodyBB);
9164   llvm::BasicBlock *LastBB = BodyBB;
9165   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9166       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9167   PtrPHI->addIncoming(PtrBegin, EntryBB);
9168   Address PtrCurrent(PtrPHI, ElemTy,
9169                      MapperCGF.GetAddrOfLocalVar(&BeginArg)
9170                          .getAlignment()
9171                          .alignmentOfArrayElement(ElementSize));
9172   // Privatize the declared variable of mapper to be the current array element.
9173   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9174   Scope.addPrivate(MapperVarDecl, PtrCurrent);
9175   (void)Scope.Privatize();
9176 
9177   // Get map clause information. Fill up the arrays with all mapped variables.
9178   MappableExprsHandler::MapCombinedInfoTy Info;
9179   MappableExprsHandler MEHandler(*D, MapperCGF);
9180   MEHandler.generateAllInfoForMapper(Info, OMPBuilder);
9181 
9182   // Call the runtime API __tgt_mapper_num_components to get the number of
9183   // pre-existing components.
9184   llvm::Value *OffloadingArgs[] = {Handle};
9185   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9186       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9187                                             OMPRTL___tgt_mapper_num_components),
9188       OffloadingArgs);
9189   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9190       PreviousSize,
9191       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9192 
9193   // Fill up the runtime mapper handle for all components.
9194   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9195     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9196         Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9197     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9198         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9199     llvm::Value *CurSizeArg = Info.Sizes[I];
9200     llvm::Value *CurNameArg =
9201         (CGM.getCodeGenOpts().getDebugInfo() ==
9202          llvm::codegenoptions::NoDebugInfo)
9203             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9204             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9205 
9206     // Extract the MEMBER_OF field from the map type.
9207     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(
9208         static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9209             Info.Types[I]));
9210     llvm::Value *MemberMapType =
9211         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9212 
9213     // Combine the map type inherited from user-defined mapper with that
9214     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9215     // bits of the \a MapType, which is the input argument of the mapper
9216     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9217     // bits of MemberMapType.
9218     // [OpenMP 5.0], 1.2.6. map-type decay.
9219     //        | alloc |  to   | from  | tofrom | release | delete
9220     // ----------------------------------------------------------
9221     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9222     // to     | alloc |  to   | alloc |   to   | release | delete
9223     // from   | alloc | alloc | from  |  from  | release | delete
9224     // tofrom | alloc |  to   | from  | tofrom | release | delete
9225     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9226         MapType,
9227         MapperCGF.Builder.getInt64(
9228             static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9229                 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9230                 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9231     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9232     llvm::BasicBlock *AllocElseBB =
9233         MapperCGF.createBasicBlock("omp.type.alloc.else");
9234     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9235     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9236     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9237     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9238     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9239     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9240     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9241     MapperCGF.EmitBlock(AllocBB);
9242     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9243         MemberMapType,
9244         MapperCGF.Builder.getInt64(
9245             ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9246                 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9247                 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9248     MapperCGF.Builder.CreateBr(EndBB);
9249     MapperCGF.EmitBlock(AllocElseBB);
9250     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9251         LeftToFrom,
9252         MapperCGF.Builder.getInt64(
9253             static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9254                 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9255     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9256     // In case of to, clear OMP_MAP_FROM.
9257     MapperCGF.EmitBlock(ToBB);
9258     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9259         MemberMapType,
9260         MapperCGF.Builder.getInt64(
9261             ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9262                 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9263     MapperCGF.Builder.CreateBr(EndBB);
9264     MapperCGF.EmitBlock(ToElseBB);
9265     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9266         LeftToFrom,
9267         MapperCGF.Builder.getInt64(
9268             static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9269                 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9270     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9271     // In case of from, clear OMP_MAP_TO.
9272     MapperCGF.EmitBlock(FromBB);
9273     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9274         MemberMapType,
9275         MapperCGF.Builder.getInt64(
9276             ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9277                 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9278     // In case of tofrom, do nothing.
9279     MapperCGF.EmitBlock(EndBB);
9280     LastBB = EndBB;
9281     llvm::PHINode *CurMapType =
9282         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9283     CurMapType->addIncoming(AllocMapType, AllocBB);
9284     CurMapType->addIncoming(ToMapType, ToBB);
9285     CurMapType->addIncoming(FromMapType, FromBB);
9286     CurMapType->addIncoming(MemberMapType, ToElseBB);
9287 
9288     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
9289                                      CurSizeArg, CurMapType, CurNameArg};
9290     if (Info.Mappers[I]) {
9291       // Call the corresponding mapper function.
9292       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9293           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9294       assert(MapperFunc && "Expect a valid mapper function is available.");
9295       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9296     } else {
9297       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9298       // data structure.
9299       MapperCGF.EmitRuntimeCall(
9300           OMPBuilder.getOrCreateRuntimeFunction(
9301               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9302           OffloadingArgs);
9303     }
9304   }
9305 
9306   // Update the pointer to point to the next element that needs to be mapped,
9307   // and check whether we have mapped all elements.
9308   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9309       ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9310   PtrPHI->addIncoming(PtrNext, LastBB);
9311   llvm::Value *IsDone =
9312       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9313   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9314   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9315 
9316   MapperCGF.EmitBlock(ExitBB);
9317   // Emit array deletion if this is an array section and \p MapType indicates
9318   // that deletion is required.
9319   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9320                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
9321 
9322   // Emit the function exit block.
9323   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9324   MapperCGF.FinishFunction();
9325   UDMMap.try_emplace(D, Fn);
9326   if (CGF) {
9327     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9328     Decls.second.push_back(D);
9329   }
9330 }
9331 
9332 /// Emit the array initialization or deletion portion for user-defined mapper
9333 /// code generation. First, it evaluates whether an array section is mapped and
9334 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9335 /// true, and \a MapType indicates to not delete this array, array
9336 /// initialization code is generated. If \a IsInit is false, and \a MapType
9337 /// indicates to not this array, array deletion code is generated.
9338 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9339     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9340     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9341     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
9342     bool IsInit) {
9343   StringRef Prefix = IsInit ? ".init" : ".del";
9344 
9345   // Evaluate if this is an array section.
9346   llvm::BasicBlock *BodyBB =
9347       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9348   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9349       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9350   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9351       MapType,
9352       MapperCGF.Builder.getInt64(
9353           static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9354               OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
9355   llvm::Value *DeleteCond;
9356   llvm::Value *Cond;
9357   if (IsInit) {
9358     // base != begin?
9359     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
9360     // IsPtrAndObj?
9361     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
9362         MapType,
9363         MapperCGF.Builder.getInt64(
9364             static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9365                 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
9366     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
9367     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
9368     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
9369     DeleteCond = MapperCGF.Builder.CreateIsNull(
9370         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9371   } else {
9372     Cond = IsArray;
9373     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9374         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9375   }
9376   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
9377   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
9378 
9379   MapperCGF.EmitBlock(BodyBB);
9380   // Get the array size by multiplying element size and element number (i.e., \p
9381   // Size).
9382   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9383       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9384   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9385   // memory allocation/deletion purpose only.
9386   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9387       MapType,
9388       MapperCGF.Builder.getInt64(
9389           ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9390               OpenMPOffloadMappingFlags::OMP_MAP_TO |
9391               OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9392   MapTypeArg = MapperCGF.Builder.CreateOr(
9393       MapTypeArg,
9394       MapperCGF.Builder.getInt64(
9395           static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9396               OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
9397 
9398   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9399   // data structure.
9400   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
9401                                    ArraySize, MapTypeArg, MapName};
9402   MapperCGF.EmitRuntimeCall(
9403       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9404                                             OMPRTL___tgt_push_mapper_component),
9405       OffloadingArgs);
9406 }
9407 
9408 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9409     const OMPDeclareMapperDecl *D) {
9410   auto I = UDMMap.find(D);
9411   if (I != UDMMap.end())
9412     return I->second;
9413   emitUserDefinedMapper(D);
9414   return UDMMap.lookup(D);
9415 }
9416 
9417 llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
9418     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9419     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9420                                      const OMPLoopDirective &D)>
9421         SizeEmitter) {
9422   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9423   const OMPExecutableDirective *TD = &D;
9424   // Get nested teams distribute kind directive, if any. For now, treat
9425   // 'target_teams_loop' as if it's really a target_teams_distribute.
9426   if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
9427       Kind != OMPD_target_teams_loop)
9428     TD = getNestedDistributeDirective(CGM.getContext(), D);
9429   if (!TD)
9430     return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9431 
9432   const auto *LD = cast<OMPLoopDirective>(TD);
9433   if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9434     return NumIterations;
9435   return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9436 }
9437 
9438 static void
9439 emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9440                        const OMPExecutableDirective &D,
9441                        llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9442                        bool RequiresOuterTask, const CapturedStmt &CS,
9443                        bool OffloadingMandatory, CodeGenFunction &CGF) {
9444   if (OffloadingMandatory) {
9445     CGF.Builder.CreateUnreachable();
9446   } else {
9447     if (RequiresOuterTask) {
9448       CapturedVars.clear();
9449       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9450     }
9451     OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
9452                                          CapturedVars);
9453   }
9454 }
9455 
9456 static llvm::Value *emitDeviceID(
9457     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9458     CodeGenFunction &CGF) {
9459   // Emit device ID if any.
9460   llvm::Value *DeviceID;
9461   if (Device.getPointer()) {
9462     assert((Device.getInt() == OMPC_DEVICE_unknown ||
9463             Device.getInt() == OMPC_DEVICE_device_num) &&
9464            "Expected device_num modifier.");
9465     llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9466     DeviceID =
9467         CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9468   } else {
9469     DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9470   }
9471   return DeviceID;
9472 }
9473 
9474 llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
9475                                CodeGenFunction &CGF) {
9476   llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0);
9477 
9478   if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
9479     CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
9480     llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
9481         DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
9482     DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty,
9483                                              /*isSigned=*/false);
9484   }
9485   return DynCGroupMem;
9486 }
9487 
9488 static void emitTargetCallKernelLaunch(
9489     CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9490     const OMPExecutableDirective &D,
9491     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
9492     const CapturedStmt &CS, bool OffloadingMandatory,
9493     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9494     llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
9495     llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
9496     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9497                                      const OMPLoopDirective &D)>
9498         SizeEmitter,
9499     CodeGenFunction &CGF, CodeGenModule &CGM) {
9500   llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
9501 
9502   // Fill up the arrays with all the captured variables.
9503   MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9504 
9505   // Get mappable expression information.
9506   MappableExprsHandler MEHandler(D, CGF);
9507   llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9508   llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
9509 
9510   auto RI = CS.getCapturedRecordDecl()->field_begin();
9511   auto *CV = CapturedVars.begin();
9512   for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9513                                             CE = CS.capture_end();
9514        CI != CE; ++CI, ++RI, ++CV) {
9515     MappableExprsHandler::MapCombinedInfoTy CurInfo;
9516     MappableExprsHandler::StructRangeInfoTy PartialStruct;
9517 
9518     // VLA sizes are passed to the outlined region by copy and do not have map
9519     // information associated.
9520     if (CI->capturesVariableArrayType()) {
9521       CurInfo.Exprs.push_back(nullptr);
9522       CurInfo.BasePointers.push_back(*CV);
9523       CurInfo.DevicePtrDecls.push_back(nullptr);
9524       CurInfo.DevicePointers.push_back(
9525           MappableExprsHandler::DeviceInfoTy::None);
9526       CurInfo.Pointers.push_back(*CV);
9527       CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9528           CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9529       // Copy to the device as an argument. No need to retrieve it.
9530       CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9531                               OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
9532                               OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9533       CurInfo.Mappers.push_back(nullptr);
9534     } else {
9535       // If we have any information in the map clause, we use it, otherwise we
9536       // just do a default mapping.
9537       MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
9538       if (!CI->capturesThis())
9539         MappedVarSet.insert(CI->getCapturedVar());
9540       else
9541         MappedVarSet.insert(nullptr);
9542       if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
9543         MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
9544       // Generate correct mapping for variables captured by reference in
9545       // lambdas.
9546       if (CI->capturesVariable())
9547         MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
9548                                                 CurInfo, LambdaPointers);
9549     }
9550     // We expect to have at least an element of information for this capture.
9551     assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
9552            "Non-existing map pointer for capture!");
9553     assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9554            CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9555            CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9556            CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9557            "Inconsistent map information sizes!");
9558 
9559     // If there is an entry in PartialStruct it means we have a struct with
9560     // individual members mapped. Emit an extra combined entry.
9561     if (PartialStruct.Base.isValid()) {
9562       CombinedInfo.append(PartialStruct.PreliminaryMapData);
9563       MEHandler.emitCombinedEntry(
9564           CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(),
9565           OMPBuilder, nullptr,
9566           !PartialStruct.PreliminaryMapData.BasePointers.empty());
9567     }
9568 
9569     // We need to append the results of this capture to what we already have.
9570     CombinedInfo.append(CurInfo);
9571   }
9572   // Adjust MEMBER_OF flags for the lambdas captures.
9573   MEHandler.adjustMemberOfForLambdaCaptures(
9574       OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
9575       CombinedInfo.Pointers, CombinedInfo.Types);
9576   // Map any list items in a map clause that were not captures because they
9577   // weren't referenced within the construct.
9578   MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet);
9579 
9580   CGOpenMPRuntime::TargetDataInfo Info;
9581   // Fill up the arrays and create the arguments.
9582   emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
9583   bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
9584                    llvm::codegenoptions::NoDebugInfo;
9585   OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
9586                                           EmitDebug,
9587                                           /*ForEndCall=*/false);
9588 
9589   InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9590   InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
9591                                         CGF.VoidPtrTy, CGM.getPointerAlign());
9592   InputInfo.PointersArray =
9593       Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9594   InputInfo.SizesArray =
9595       Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
9596   InputInfo.MappersArray =
9597       Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9598   MapTypesArray = Info.RTArgs.MapTypesArray;
9599   MapNamesArray = Info.RTArgs.MapNamesArray;
9600 
9601   auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
9602                     RequiresOuterTask, &CS, OffloadingMandatory, Device,
9603                     OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
9604                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9605     bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
9606 
9607     if (IsReverseOffloading) {
9608       // Reverse offloading is not supported, so just execute on the host.
9609       // FIXME: This fallback solution is incorrect since it ignores the
9610       // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
9611       // assert here and ensure SEMA emits an error.
9612       emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9613                              RequiresOuterTask, CS, OffloadingMandatory, CGF);
9614       return;
9615     }
9616 
9617     bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
9618     unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
9619 
9620     llvm::Value *BasePointersArray =
9621         InputInfo.BasePointersArray.emitRawPointer(CGF);
9622     llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
9623     llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
9624     llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
9625 
9626     auto &&EmitTargetCallFallbackCB =
9627         [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9628          OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
9629         -> llvm::OpenMPIRBuilder::InsertPointTy {
9630       CGF.Builder.restoreIP(IP);
9631       emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9632                              RequiresOuterTask, CS, OffloadingMandatory, CGF);
9633       return CGF.Builder.saveIP();
9634     };
9635 
9636     llvm::Value *DeviceID = emitDeviceID(Device, CGF);
9637     llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D);
9638     llvm::Value *NumThreads =
9639         OMPRuntime->emitNumThreadsForTargetDirective(CGF, D);
9640     llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
9641     llvm::Value *NumIterations =
9642         OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
9643     llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
9644     llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
9645         CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
9646 
9647     llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
9648         BasePointersArray, PointersArray, SizesArray, MapTypesArray,
9649         nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
9650 
9651     llvm::OpenMPIRBuilder::TargetKernelArgs Args(
9652         NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
9653         DynCGGroupMem, HasNoWait);
9654 
9655     CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch(
9656         CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args,
9657         DeviceID, RTLoc, AllocaIP));
9658   };
9659 
9660   if (RequiresOuterTask)
9661     CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9662   else
9663     OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9664 }
9665 
9666 static void
9667 emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9668                    const OMPExecutableDirective &D,
9669                    llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9670                    bool RequiresOuterTask, const CapturedStmt &CS,
9671                    bool OffloadingMandatory, CodeGenFunction &CGF) {
9672 
9673   // Notify that the host version must be executed.
9674   auto &&ElseGen =
9675       [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9676        OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9677         emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9678                                RequiresOuterTask, CS, OffloadingMandatory, CGF);
9679       };
9680 
9681   if (RequiresOuterTask) {
9682     CodeGenFunction::OMPTargetDataInfo InputInfo;
9683     CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9684   } else {
9685     OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9686   }
9687 }
9688 
9689 void CGOpenMPRuntime::emitTargetCall(
9690     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9691     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9692     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9693     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9694                                      const OMPLoopDirective &D)>
9695         SizeEmitter) {
9696   if (!CGF.HaveInsertPoint())
9697     return;
9698 
9699   const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
9700                                    CGM.getLangOpts().OpenMPOffloadMandatory;
9701 
9702   assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
9703 
9704   const bool RequiresOuterTask =
9705       D.hasClausesOfKind<OMPDependClause>() ||
9706       D.hasClausesOfKind<OMPNowaitClause>() ||
9707       D.hasClausesOfKind<OMPInReductionClause>() ||
9708       (CGM.getLangOpts().OpenMP >= 51 &&
9709        needsTaskBasedThreadLimit(D.getDirectiveKind()) &&
9710        D.hasClausesOfKind<OMPThreadLimitClause>());
9711   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9712   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9713   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9714                                             PrePostActionTy &) {
9715     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9716   };
9717   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9718 
9719   CodeGenFunction::OMPTargetDataInfo InputInfo;
9720   llvm::Value *MapTypesArray = nullptr;
9721   llvm::Value *MapNamesArray = nullptr;
9722 
9723   auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
9724                           RequiresOuterTask, &CS, OffloadingMandatory, Device,
9725                           OutlinedFnID, &InputInfo, &MapTypesArray,
9726                           &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
9727                                                        PrePostActionTy &) {
9728     emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
9729                                RequiresOuterTask, CS, OffloadingMandatory,
9730                                Device, OutlinedFnID, InputInfo, MapTypesArray,
9731                                MapNamesArray, SizeEmitter, CGF, CGM);
9732   };
9733 
9734   auto &&TargetElseGen =
9735       [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9736        OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9737         emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
9738                            CS, OffloadingMandatory, CGF);
9739       };
9740 
9741   // If we have a target function ID it means that we need to support
9742   // offloading, otherwise, just execute on the host. We need to execute on host
9743   // regardless of the conditional in the if clause if, e.g., the user do not
9744   // specify target triples.
9745   if (OutlinedFnID) {
9746     if (IfCond) {
9747       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9748     } else {
9749       RegionCodeGenTy ThenRCG(TargetThenGen);
9750       ThenRCG(CGF);
9751     }
9752   } else {
9753     RegionCodeGenTy ElseRCG(TargetElseGen);
9754     ElseRCG(CGF);
9755   }
9756 }
9757 
9758 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9759                                                     StringRef ParentName) {
9760   if (!S)
9761     return;
9762 
9763   // Codegen OMP target directives that offload compute to the device.
9764   bool RequiresDeviceCodegen =
9765       isa<OMPExecutableDirective>(S) &&
9766       isOpenMPTargetExecutionDirective(
9767           cast<OMPExecutableDirective>(S)->getDirectiveKind());
9768 
9769   if (RequiresDeviceCodegen) {
9770     const auto &E = *cast<OMPExecutableDirective>(S);
9771 
9772     llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
9773         CGM, OMPBuilder, E.getBeginLoc(), ParentName);
9774 
9775     // Is this a target region that should not be emitted as an entry point? If
9776     // so just signal we are done with this target region.
9777     if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
9778       return;
9779 
9780     switch (E.getDirectiveKind()) {
9781     case OMPD_target:
9782       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9783                                                    cast<OMPTargetDirective>(E));
9784       break;
9785     case OMPD_target_parallel:
9786       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9787           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9788       break;
9789     case OMPD_target_teams:
9790       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9791           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9792       break;
9793     case OMPD_target_teams_distribute:
9794       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9795           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9796       break;
9797     case OMPD_target_teams_distribute_simd:
9798       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9799           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9800       break;
9801     case OMPD_target_parallel_for:
9802       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9803           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9804       break;
9805     case OMPD_target_parallel_for_simd:
9806       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9807           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9808       break;
9809     case OMPD_target_simd:
9810       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9811           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9812       break;
9813     case OMPD_target_teams_distribute_parallel_for:
9814       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9815           CGM, ParentName,
9816           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9817       break;
9818     case OMPD_target_teams_distribute_parallel_for_simd:
9819       CodeGenFunction::
9820           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9821               CGM, ParentName,
9822               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9823       break;
9824     case OMPD_target_teams_loop:
9825       CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
9826           CGM, ParentName, cast<OMPTargetTeamsGenericLoopDirective>(E));
9827       break;
9828     case OMPD_target_parallel_loop:
9829       CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
9830           CGM, ParentName, cast<OMPTargetParallelGenericLoopDirective>(E));
9831       break;
9832     case OMPD_parallel:
9833     case OMPD_for:
9834     case OMPD_parallel_for:
9835     case OMPD_parallel_master:
9836     case OMPD_parallel_sections:
9837     case OMPD_for_simd:
9838     case OMPD_parallel_for_simd:
9839     case OMPD_cancel:
9840     case OMPD_cancellation_point:
9841     case OMPD_ordered:
9842     case OMPD_threadprivate:
9843     case OMPD_allocate:
9844     case OMPD_task:
9845     case OMPD_simd:
9846     case OMPD_tile:
9847     case OMPD_unroll:
9848     case OMPD_sections:
9849     case OMPD_section:
9850     case OMPD_single:
9851     case OMPD_master:
9852     case OMPD_critical:
9853     case OMPD_taskyield:
9854     case OMPD_barrier:
9855     case OMPD_taskwait:
9856     case OMPD_taskgroup:
9857     case OMPD_atomic:
9858     case OMPD_flush:
9859     case OMPD_depobj:
9860     case OMPD_scan:
9861     case OMPD_teams:
9862     case OMPD_target_data:
9863     case OMPD_target_exit_data:
9864     case OMPD_target_enter_data:
9865     case OMPD_distribute:
9866     case OMPD_distribute_simd:
9867     case OMPD_distribute_parallel_for:
9868     case OMPD_distribute_parallel_for_simd:
9869     case OMPD_teams_distribute:
9870     case OMPD_teams_distribute_simd:
9871     case OMPD_teams_distribute_parallel_for:
9872     case OMPD_teams_distribute_parallel_for_simd:
9873     case OMPD_target_update:
9874     case OMPD_declare_simd:
9875     case OMPD_declare_variant:
9876     case OMPD_begin_declare_variant:
9877     case OMPD_end_declare_variant:
9878     case OMPD_declare_target:
9879     case OMPD_end_declare_target:
9880     case OMPD_declare_reduction:
9881     case OMPD_declare_mapper:
9882     case OMPD_taskloop:
9883     case OMPD_taskloop_simd:
9884     case OMPD_master_taskloop:
9885     case OMPD_master_taskloop_simd:
9886     case OMPD_parallel_master_taskloop:
9887     case OMPD_parallel_master_taskloop_simd:
9888     case OMPD_requires:
9889     case OMPD_metadirective:
9890     case OMPD_unknown:
9891     default:
9892       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9893     }
9894     return;
9895   }
9896 
9897   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9898     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9899       return;
9900 
9901     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
9902     return;
9903   }
9904 
9905   // If this is a lambda function, look into its body.
9906   if (const auto *L = dyn_cast<LambdaExpr>(S))
9907     S = L->getBody();
9908 
9909   // Keep looking for target regions recursively.
9910   for (const Stmt *II : S->children())
9911     scanForTargetRegionsFunctions(II, ParentName);
9912 }
9913 
9914 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
9915   std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9916       OMPDeclareTargetDeclAttr::getDeviceType(VD);
9917   if (!DevTy)
9918     return false;
9919   // Do not emit device_type(nohost) functions for the host.
9920   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9921     return true;
9922   // Do not emit device_type(host) functions for the device.
9923   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9924     return true;
9925   return false;
9926 }
9927 
9928 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9929   // If emitting code for the host, we do not process FD here. Instead we do
9930   // the normal code generation.
9931   if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
9932     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
9933       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9934                                   CGM.getLangOpts().OpenMPIsTargetDevice))
9935         return true;
9936     return false;
9937   }
9938 
9939   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9940   // Try to detect target regions in the function.
9941   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9942     StringRef Name = CGM.getMangledName(GD);
9943     scanForTargetRegionsFunctions(FD->getBody(), Name);
9944     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9945                                 CGM.getLangOpts().OpenMPIsTargetDevice))
9946       return true;
9947   }
9948 
9949   // Do not to emit function if it is not marked as declare target.
9950   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9951          AlreadyEmittedTargetDecls.count(VD) == 0;
9952 }
9953 
9954 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9955   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
9956                               CGM.getLangOpts().OpenMPIsTargetDevice))
9957     return true;
9958 
9959   if (!CGM.getLangOpts().OpenMPIsTargetDevice)
9960     return false;
9961 
9962   // Check if there are Ctors/Dtors in this declaration and look for target
9963   // regions in it. We use the complete variant to produce the kernel name
9964   // mangling.
9965   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9966   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9967     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9968       StringRef ParentName =
9969           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9970       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9971     }
9972     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9973       StringRef ParentName =
9974           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9975       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9976     }
9977   }
9978 
9979   // Do not to emit variable if it is not marked as declare target.
9980   std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9981       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9982           cast<VarDecl>(GD.getDecl()));
9983   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9984       ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9985         *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9986        HasRequiresUnifiedSharedMemory)) {
9987     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9988     return true;
9989   }
9990   return false;
9991 }
9992 
9993 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9994                                                    llvm::Constant *Addr) {
9995   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9996       !CGM.getLangOpts().OpenMPIsTargetDevice)
9997     return;
9998 
9999   std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10000       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10001 
10002   // If this is an 'extern' declaration we defer to the canonical definition and
10003   // do not emit an offloading entry.
10004   if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
10005       VD->hasExternalStorage())
10006     return;
10007 
10008   if (!Res) {
10009     if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10010       // Register non-target variables being emitted in device code (debug info
10011       // may cause this).
10012       StringRef VarName = CGM.getMangledName(VD);
10013       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10014     }
10015     return;
10016   }
10017 
10018   auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
10019   auto LinkageForVariable = [&VD, this]() {
10020     return CGM.getLLVMLinkageVarDefinition(VD);
10021   };
10022 
10023   std::vector<llvm::GlobalVariable *> GeneratedRefs;
10024   OMPBuilder.registerTargetGlobalVariable(
10025       convertCaptureClause(VD), convertDeviceClause(VD),
10026       VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
10027       VD->isExternallyVisible(),
10028       getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
10029                                   VD->getCanonicalDecl()->getBeginLoc()),
10030       CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
10031       CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
10032       CGM.getTypes().ConvertTypeForMem(
10033           CGM.getContext().getPointerType(VD->getType())),
10034       Addr);
10035 
10036   for (auto *ref : GeneratedRefs)
10037     CGM.addCompilerUsedGlobal(ref);
10038 }
10039 
10040 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10041   if (isa<FunctionDecl>(GD.getDecl()) ||
10042       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10043     return emitTargetFunctions(GD);
10044 
10045   return emitTargetGlobalVariable(GD);
10046 }
10047 
10048 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10049   for (const VarDecl *VD : DeferredGlobalVariables) {
10050     std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10051         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10052     if (!Res)
10053       continue;
10054     if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10055          *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10056         !HasRequiresUnifiedSharedMemory) {
10057       CGM.EmitGlobal(VD);
10058     } else {
10059       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10060               ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10061                 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10062                HasRequiresUnifiedSharedMemory)) &&
10063              "Expected link clause or to clause with unified memory.");
10064       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10065     }
10066   }
10067 }
10068 
10069 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10070     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10071   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10072          " Expected target-based directive.");
10073 }
10074 
10075 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10076   for (const OMPClause *Clause : D->clauselists()) {
10077     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10078       HasRequiresUnifiedSharedMemory = true;
10079       OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
10080     } else if (const auto *AC =
10081                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10082       switch (AC->getAtomicDefaultMemOrderKind()) {
10083       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10084         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10085         break;
10086       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10087         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10088         break;
10089       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10090         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10091         break;
10092       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10093         break;
10094       }
10095     }
10096   }
10097 }
10098 
10099 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10100   return RequiresAtomicOrdering;
10101 }
10102 
10103 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10104                                                        LangAS &AS) {
10105   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10106     return false;
10107   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10108   switch(A->getAllocatorType()) {
10109   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10110   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10111   // Not supported, fallback to the default mem space.
10112   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10113   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10114   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10115   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10116   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10117   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10118   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10119     AS = LangAS::Default;
10120     return true;
10121   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10122     llvm_unreachable("Expected predefined allocator for the variables with the "
10123                      "static storage.");
10124   }
10125   return false;
10126 }
10127 
10128 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10129   return HasRequiresUnifiedSharedMemory;
10130 }
10131 
10132 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10133     CodeGenModule &CGM)
10134     : CGM(CGM) {
10135   if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10136     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10137     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10138   }
10139 }
10140 
10141 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10142   if (CGM.getLangOpts().OpenMPIsTargetDevice)
10143     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10144 }
10145 
10146 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10147   if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
10148     return true;
10149 
10150   const auto *D = cast<FunctionDecl>(GD.getDecl());
10151   // Do not to emit function if it is marked as declare target as it was already
10152   // emitted.
10153   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10154     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10155       if (auto *F = dyn_cast_or_null<llvm::Function>(
10156               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10157         return !F->isDeclaration();
10158       return false;
10159     }
10160     return true;
10161   }
10162 
10163   return !AlreadyEmittedTargetDecls.insert(D).second;
10164 }
10165 
10166 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10167                                     const OMPExecutableDirective &D,
10168                                     SourceLocation Loc,
10169                                     llvm::Function *OutlinedFn,
10170                                     ArrayRef<llvm::Value *> CapturedVars) {
10171   if (!CGF.HaveInsertPoint())
10172     return;
10173 
10174   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10175   CodeGenFunction::RunCleanupsScope Scope(CGF);
10176 
10177   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10178   llvm::Value *Args[] = {
10179       RTLoc,
10180       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10181       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10182   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10183   RealArgs.append(std::begin(Args), std::end(Args));
10184   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10185 
10186   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10187       CGM.getModule(), OMPRTL___kmpc_fork_teams);
10188   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10189 }
10190 
10191 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10192                                          const Expr *NumTeams,
10193                                          const Expr *ThreadLimit,
10194                                          SourceLocation Loc) {
10195   if (!CGF.HaveInsertPoint())
10196     return;
10197 
10198   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10199 
10200   llvm::Value *NumTeamsVal =
10201       NumTeams
10202           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10203                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10204           : CGF.Builder.getInt32(0);
10205 
10206   llvm::Value *ThreadLimitVal =
10207       ThreadLimit
10208           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10209                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10210           : CGF.Builder.getInt32(0);
10211 
10212   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10213   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10214                                      ThreadLimitVal};
10215   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10216                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10217                       PushNumTeamsArgs);
10218 }
10219 
10220 void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction &CGF,
10221                                             const Expr *ThreadLimit,
10222                                             SourceLocation Loc) {
10223   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10224   llvm::Value *ThreadLimitVal =
10225       ThreadLimit
10226           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10227                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10228           : CGF.Builder.getInt32(0);
10229 
10230   // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
10231   llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
10232                                     ThreadLimitVal};
10233   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10234                           CGM.getModule(), OMPRTL___kmpc_set_thread_limit),
10235                       ThreadLimitArgs);
10236 }
10237 
10238 void CGOpenMPRuntime::emitTargetDataCalls(
10239     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10240     const Expr *Device, const RegionCodeGenTy &CodeGen,
10241     CGOpenMPRuntime::TargetDataInfo &Info) {
10242   if (!CGF.HaveInsertPoint())
10243     return;
10244 
10245   // Action used to replace the default codegen action and turn privatization
10246   // off.
10247   PrePostActionTy NoPrivAction;
10248 
10249   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10250 
10251   llvm::Value *IfCondVal = nullptr;
10252   if (IfCond)
10253     IfCondVal = CGF.EvaluateExprAsBool(IfCond);
10254 
10255   // Emit device ID if any.
10256   llvm::Value *DeviceID = nullptr;
10257   if (Device) {
10258     DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10259                                          CGF.Int64Ty, /*isSigned=*/true);
10260   } else {
10261     DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10262   }
10263 
10264   // Fill up the arrays with all the mapped variables.
10265   MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10266   auto GenMapInfoCB =
10267       [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10268     CGF.Builder.restoreIP(CodeGenIP);
10269     // Get map clause information.
10270     MappableExprsHandler MEHandler(D, CGF);
10271     MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10272 
10273     auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10274       return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10275     };
10276     if (CGM.getCodeGenOpts().getDebugInfo() !=
10277         llvm::codegenoptions::NoDebugInfo) {
10278       CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10279       llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10280                       FillInfoMap);
10281     }
10282 
10283     return CombinedInfo;
10284   };
10285   using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
10286   auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
10287     CGF.Builder.restoreIP(CodeGenIP);
10288     switch (BodyGenType) {
10289     case BodyGenTy::Priv:
10290       if (!Info.CaptureDeviceAddrMap.empty())
10291         CodeGen(CGF);
10292       break;
10293     case BodyGenTy::DupNoPriv:
10294       if (!Info.CaptureDeviceAddrMap.empty()) {
10295         CodeGen.setAction(NoPrivAction);
10296         CodeGen(CGF);
10297       }
10298       break;
10299     case BodyGenTy::NoPriv:
10300       if (Info.CaptureDeviceAddrMap.empty()) {
10301         CodeGen.setAction(NoPrivAction);
10302         CodeGen(CGF);
10303       }
10304       break;
10305     }
10306     return InsertPointTy(CGF.Builder.GetInsertBlock(),
10307                          CGF.Builder.GetInsertPoint());
10308   };
10309 
10310   auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10311     if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10312       Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
10313     }
10314   };
10315 
10316   auto CustomMapperCB = [&](unsigned int I) {
10317     llvm::Value *MFunc = nullptr;
10318     if (CombinedInfo.Mappers[I]) {
10319       Info.HasMapper = true;
10320       MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
10321           cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10322     }
10323     return MFunc;
10324   };
10325 
10326   // Source location for the ident struct
10327   llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10328 
10329   InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10330                          CGF.AllocaInsertPt->getIterator());
10331   InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10332                           CGF.Builder.GetInsertPoint());
10333   llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
10334   CGF.Builder.restoreIP(OMPBuilder.createTargetData(
10335       OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
10336       /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc));
10337 }
10338 
10339 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10340     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10341     const Expr *Device) {
10342   if (!CGF.HaveInsertPoint())
10343     return;
10344 
10345   assert((isa<OMPTargetEnterDataDirective>(D) ||
10346           isa<OMPTargetExitDataDirective>(D) ||
10347           isa<OMPTargetUpdateDirective>(D)) &&
10348          "Expecting either target enter, exit data, or update directives.");
10349 
10350   CodeGenFunction::OMPTargetDataInfo InputInfo;
10351   llvm::Value *MapTypesArray = nullptr;
10352   llvm::Value *MapNamesArray = nullptr;
10353   // Generate the code for the opening of the data environment.
10354   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10355                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10356     // Emit device ID if any.
10357     llvm::Value *DeviceID = nullptr;
10358     if (Device) {
10359       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10360                                            CGF.Int64Ty, /*isSigned=*/true);
10361     } else {
10362       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10363     }
10364 
10365     // Emit the number of elements in the offloading arrays.
10366     llvm::Constant *PointerNum =
10367         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10368 
10369     // Source location for the ident struct
10370     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10371 
10372     SmallVector<llvm::Value *, 13> OffloadingArgs(
10373         {RTLoc, DeviceID, PointerNum,
10374          InputInfo.BasePointersArray.emitRawPointer(CGF),
10375          InputInfo.PointersArray.emitRawPointer(CGF),
10376          InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray,
10377          InputInfo.MappersArray.emitRawPointer(CGF)});
10378 
10379     // Select the right runtime function call for each standalone
10380     // directive.
10381     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10382     RuntimeFunction RTLFn;
10383     switch (D.getDirectiveKind()) {
10384     case OMPD_target_enter_data:
10385       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10386                         : OMPRTL___tgt_target_data_begin_mapper;
10387       break;
10388     case OMPD_target_exit_data:
10389       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10390                         : OMPRTL___tgt_target_data_end_mapper;
10391       break;
10392     case OMPD_target_update:
10393       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10394                         : OMPRTL___tgt_target_data_update_mapper;
10395       break;
10396     case OMPD_parallel:
10397     case OMPD_for:
10398     case OMPD_parallel_for:
10399     case OMPD_parallel_master:
10400     case OMPD_parallel_sections:
10401     case OMPD_for_simd:
10402     case OMPD_parallel_for_simd:
10403     case OMPD_cancel:
10404     case OMPD_cancellation_point:
10405     case OMPD_ordered:
10406     case OMPD_threadprivate:
10407     case OMPD_allocate:
10408     case OMPD_task:
10409     case OMPD_simd:
10410     case OMPD_tile:
10411     case OMPD_unroll:
10412     case OMPD_sections:
10413     case OMPD_section:
10414     case OMPD_single:
10415     case OMPD_master:
10416     case OMPD_critical:
10417     case OMPD_taskyield:
10418     case OMPD_barrier:
10419     case OMPD_taskwait:
10420     case OMPD_taskgroup:
10421     case OMPD_atomic:
10422     case OMPD_flush:
10423     case OMPD_depobj:
10424     case OMPD_scan:
10425     case OMPD_teams:
10426     case OMPD_target_data:
10427     case OMPD_distribute:
10428     case OMPD_distribute_simd:
10429     case OMPD_distribute_parallel_for:
10430     case OMPD_distribute_parallel_for_simd:
10431     case OMPD_teams_distribute:
10432     case OMPD_teams_distribute_simd:
10433     case OMPD_teams_distribute_parallel_for:
10434     case OMPD_teams_distribute_parallel_for_simd:
10435     case OMPD_declare_simd:
10436     case OMPD_declare_variant:
10437     case OMPD_begin_declare_variant:
10438     case OMPD_end_declare_variant:
10439     case OMPD_declare_target:
10440     case OMPD_end_declare_target:
10441     case OMPD_declare_reduction:
10442     case OMPD_declare_mapper:
10443     case OMPD_taskloop:
10444     case OMPD_taskloop_simd:
10445     case OMPD_master_taskloop:
10446     case OMPD_master_taskloop_simd:
10447     case OMPD_parallel_master_taskloop:
10448     case OMPD_parallel_master_taskloop_simd:
10449     case OMPD_target:
10450     case OMPD_target_simd:
10451     case OMPD_target_teams_distribute:
10452     case OMPD_target_teams_distribute_simd:
10453     case OMPD_target_teams_distribute_parallel_for:
10454     case OMPD_target_teams_distribute_parallel_for_simd:
10455     case OMPD_target_teams:
10456     case OMPD_target_parallel:
10457     case OMPD_target_parallel_for:
10458     case OMPD_target_parallel_for_simd:
10459     case OMPD_requires:
10460     case OMPD_metadirective:
10461     case OMPD_unknown:
10462     default:
10463       llvm_unreachable("Unexpected standalone target data directive.");
10464       break;
10465     }
10466     if (HasNowait) {
10467       OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
10468       OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
10469       OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
10470       OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
10471     }
10472     CGF.EmitRuntimeCall(
10473         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10474         OffloadingArgs);
10475   };
10476 
10477   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10478                           &MapNamesArray](CodeGenFunction &CGF,
10479                                           PrePostActionTy &) {
10480     // Fill up the arrays with all the mapped variables.
10481     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10482 
10483     // Get map clause information.
10484     MappableExprsHandler MEHandler(D, CGF);
10485     MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10486 
10487     CGOpenMPRuntime::TargetDataInfo Info;
10488     // Fill up the arrays and create the arguments.
10489     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10490                          /*IsNonContiguous=*/true);
10491     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10492                              D.hasClausesOfKind<OMPNowaitClause>();
10493     bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
10494                      llvm::codegenoptions::NoDebugInfo;
10495     OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
10496                                             EmitDebug,
10497                                             /*ForEndCall=*/false);
10498     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10499     InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10500                                           CGF.VoidPtrTy, CGM.getPointerAlign());
10501     InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10502                                       CGM.getPointerAlign());
10503     InputInfo.SizesArray =
10504         Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10505     InputInfo.MappersArray =
10506         Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10507     MapTypesArray = Info.RTArgs.MapTypesArray;
10508     MapNamesArray = Info.RTArgs.MapNamesArray;
10509     if (RequiresOuterTask)
10510       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10511     else
10512       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10513   };
10514 
10515   if (IfCond) {
10516     emitIfClause(CGF, IfCond, TargetThenGen,
10517                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
10518   } else {
10519     RegionCodeGenTy ThenRCG(TargetThenGen);
10520     ThenRCG(CGF);
10521   }
10522 }
10523 
10524 namespace {
10525   /// Kind of parameter in a function with 'declare simd' directive.
10526 enum ParamKindTy {
10527   Linear,
10528   LinearRef,
10529   LinearUVal,
10530   LinearVal,
10531   Uniform,
10532   Vector,
10533 };
10534 /// Attribute set of the parameter.
10535 struct ParamAttrTy {
10536   ParamKindTy Kind = Vector;
10537   llvm::APSInt StrideOrArg;
10538   llvm::APSInt Alignment;
10539   bool HasVarStride = false;
10540 };
10541 } // namespace
10542 
10543 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10544                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10545   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10546   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10547   // of that clause. The VLEN value must be power of 2.
10548   // In other case the notion of the function`s "characteristic data type" (CDT)
10549   // is used to compute the vector length.
10550   // CDT is defined in the following order:
10551   //   a) For non-void function, the CDT is the return type.
10552   //   b) If the function has any non-uniform, non-linear parameters, then the
10553   //   CDT is the type of the first such parameter.
10554   //   c) If the CDT determined by a) or b) above is struct, union, or class
10555   //   type which is pass-by-value (except for the type that maps to the
10556   //   built-in complex data type), the characteristic data type is int.
10557   //   d) If none of the above three cases is applicable, the CDT is int.
10558   // The VLEN is then determined based on the CDT and the size of vector
10559   // register of that ISA for which current vector version is generated. The
10560   // VLEN is computed using the formula below:
10561   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10562   // where vector register size specified in section 3.2.1 Registers and the
10563   // Stack Frame of original AMD64 ABI document.
10564   QualType RetType = FD->getReturnType();
10565   if (RetType.isNull())
10566     return 0;
10567   ASTContext &C = FD->getASTContext();
10568   QualType CDT;
10569   if (!RetType.isNull() && !RetType->isVoidType()) {
10570     CDT = RetType;
10571   } else {
10572     unsigned Offset = 0;
10573     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10574       if (ParamAttrs[Offset].Kind == Vector)
10575         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10576       ++Offset;
10577     }
10578     if (CDT.isNull()) {
10579       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10580         if (ParamAttrs[I + Offset].Kind == Vector) {
10581           CDT = FD->getParamDecl(I)->getType();
10582           break;
10583         }
10584       }
10585     }
10586   }
10587   if (CDT.isNull())
10588     CDT = C.IntTy;
10589   CDT = CDT->getCanonicalTypeUnqualified();
10590   if (CDT->isRecordType() || CDT->isUnionType())
10591     CDT = C.IntTy;
10592   return C.getTypeSize(CDT);
10593 }
10594 
10595 /// Mangle the parameter part of the vector function name according to
10596 /// their OpenMP classification. The mangling function is defined in
10597 /// section 4.5 of the AAVFABI(2021Q1).
10598 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10599   SmallString<256> Buffer;
10600   llvm::raw_svector_ostream Out(Buffer);
10601   for (const auto &ParamAttr : ParamAttrs) {
10602     switch (ParamAttr.Kind) {
10603     case Linear:
10604       Out << 'l';
10605       break;
10606     case LinearRef:
10607       Out << 'R';
10608       break;
10609     case LinearUVal:
10610       Out << 'U';
10611       break;
10612     case LinearVal:
10613       Out << 'L';
10614       break;
10615     case Uniform:
10616       Out << 'u';
10617       break;
10618     case Vector:
10619       Out << 'v';
10620       break;
10621     }
10622     if (ParamAttr.HasVarStride)
10623       Out << "s" << ParamAttr.StrideOrArg;
10624     else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
10625              ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
10626       // Don't print the step value if it is not present or if it is
10627       // equal to 1.
10628       if (ParamAttr.StrideOrArg < 0)
10629         Out << 'n' << -ParamAttr.StrideOrArg;
10630       else if (ParamAttr.StrideOrArg != 1)
10631         Out << ParamAttr.StrideOrArg;
10632     }
10633 
10634     if (!!ParamAttr.Alignment)
10635       Out << 'a' << ParamAttr.Alignment;
10636   }
10637 
10638   return std::string(Out.str());
10639 }
10640 
10641 static void
10642 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10643                            const llvm::APSInt &VLENVal,
10644                            ArrayRef<ParamAttrTy> ParamAttrs,
10645                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
10646   struct ISADataTy {
10647     char ISA;
10648     unsigned VecRegSize;
10649   };
10650   ISADataTy ISAData[] = {
10651       {
10652           'b', 128
10653       }, // SSE
10654       {
10655           'c', 256
10656       }, // AVX
10657       {
10658           'd', 256
10659       }, // AVX2
10660       {
10661           'e', 512
10662       }, // AVX512
10663   };
10664   llvm::SmallVector<char, 2> Masked;
10665   switch (State) {
10666   case OMPDeclareSimdDeclAttr::BS_Undefined:
10667     Masked.push_back('N');
10668     Masked.push_back('M');
10669     break;
10670   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10671     Masked.push_back('N');
10672     break;
10673   case OMPDeclareSimdDeclAttr::BS_Inbranch:
10674     Masked.push_back('M');
10675     break;
10676   }
10677   for (char Mask : Masked) {
10678     for (const ISADataTy &Data : ISAData) {
10679       SmallString<256> Buffer;
10680       llvm::raw_svector_ostream Out(Buffer);
10681       Out << "_ZGV" << Data.ISA << Mask;
10682       if (!VLENVal) {
10683         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10684         assert(NumElts && "Non-zero simdlen/cdtsize expected");
10685         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10686       } else {
10687         Out << VLENVal;
10688       }
10689       Out << mangleVectorParameters(ParamAttrs);
10690       Out << '_' << Fn->getName();
10691       Fn->addFnAttr(Out.str());
10692     }
10693   }
10694 }
10695 
10696 // This are the Functions that are needed to mangle the name of the
10697 // vector functions generated by the compiler, according to the rules
10698 // defined in the "Vector Function ABI specifications for AArch64",
10699 // available at
10700 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10701 
10702 /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
10703 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10704   QT = QT.getCanonicalType();
10705 
10706   if (QT->isVoidType())
10707     return false;
10708 
10709   if (Kind == ParamKindTy::Uniform)
10710     return false;
10711 
10712   if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
10713     return false;
10714 
10715   if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
10716       !QT->isReferenceType())
10717     return false;
10718 
10719   return true;
10720 }
10721 
10722 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10723 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10724   QT = QT.getCanonicalType();
10725   unsigned Size = C.getTypeSize(QT);
10726 
10727   // Only scalars and complex within 16 bytes wide set PVB to true.
10728   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10729     return false;
10730 
10731   if (QT->isFloatingType())
10732     return true;
10733 
10734   if (QT->isIntegerType())
10735     return true;
10736 
10737   if (QT->isPointerType())
10738     return true;
10739 
10740   // TODO: Add support for complex types (section 3.1.2, item 2).
10741 
10742   return false;
10743 }
10744 
10745 /// Computes the lane size (LS) of a return type or of an input parameter,
10746 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10747 /// TODO: Add support for references, section 3.2.1, item 1.
10748 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10749   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10750     QualType PTy = QT.getCanonicalType()->getPointeeType();
10751     if (getAArch64PBV(PTy, C))
10752       return C.getTypeSize(PTy);
10753   }
10754   if (getAArch64PBV(QT, C))
10755     return C.getTypeSize(QT);
10756 
10757   return C.getTypeSize(C.getUIntPtrType());
10758 }
10759 
10760 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10761 // signature of the scalar function, as defined in 3.2.2 of the
10762 // AAVFABI.
10763 static std::tuple<unsigned, unsigned, bool>
10764 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10765   QualType RetType = FD->getReturnType().getCanonicalType();
10766 
10767   ASTContext &C = FD->getASTContext();
10768 
10769   bool OutputBecomesInput = false;
10770 
10771   llvm::SmallVector<unsigned, 8> Sizes;
10772   if (!RetType->isVoidType()) {
10773     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10774     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10775       OutputBecomesInput = true;
10776   }
10777   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10778     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10779     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10780   }
10781 
10782   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10783   // The LS of a function parameter / return value can only be a power
10784   // of 2, starting from 8 bits, up to 128.
10785   assert(llvm::all_of(Sizes,
10786                       [](unsigned Size) {
10787                         return Size == 8 || Size == 16 || Size == 32 ||
10788                                Size == 64 || Size == 128;
10789                       }) &&
10790          "Invalid size");
10791 
10792   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10793                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
10794                          OutputBecomesInput);
10795 }
10796 
10797 // Function used to add the attribute. The parameter `VLEN` is
10798 // templated to allow the use of "x" when targeting scalable functions
10799 // for SVE.
10800 template <typename T>
10801 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10802                                  char ISA, StringRef ParSeq,
10803                                  StringRef MangledName, bool OutputBecomesInput,
10804                                  llvm::Function *Fn) {
10805   SmallString<256> Buffer;
10806   llvm::raw_svector_ostream Out(Buffer);
10807   Out << Prefix << ISA << LMask << VLEN;
10808   if (OutputBecomesInput)
10809     Out << "v";
10810   Out << ParSeq << "_" << MangledName;
10811   Fn->addFnAttr(Out.str());
10812 }
10813 
10814 // Helper function to generate the Advanced SIMD names depending on
10815 // the value of the NDS when simdlen is not present.
10816 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10817                                       StringRef Prefix, char ISA,
10818                                       StringRef ParSeq, StringRef MangledName,
10819                                       bool OutputBecomesInput,
10820                                       llvm::Function *Fn) {
10821   switch (NDS) {
10822   case 8:
10823     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10824                          OutputBecomesInput, Fn);
10825     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10826                          OutputBecomesInput, Fn);
10827     break;
10828   case 16:
10829     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10830                          OutputBecomesInput, Fn);
10831     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10832                          OutputBecomesInput, Fn);
10833     break;
10834   case 32:
10835     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10836                          OutputBecomesInput, Fn);
10837     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10838                          OutputBecomesInput, Fn);
10839     break;
10840   case 64:
10841   case 128:
10842     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10843                          OutputBecomesInput, Fn);
10844     break;
10845   default:
10846     llvm_unreachable("Scalar type is too wide.");
10847   }
10848 }
10849 
10850 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10851 static void emitAArch64DeclareSimdFunction(
10852     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10853     ArrayRef<ParamAttrTy> ParamAttrs,
10854     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10855     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10856 
10857   // Get basic data for building the vector signature.
10858   const auto Data = getNDSWDS(FD, ParamAttrs);
10859   const unsigned NDS = std::get<0>(Data);
10860   const unsigned WDS = std::get<1>(Data);
10861   const bool OutputBecomesInput = std::get<2>(Data);
10862 
10863   // Check the values provided via `simdlen` by the user.
10864   // 1. A `simdlen(1)` doesn't produce vector signatures,
10865   if (UserVLEN == 1) {
10866     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10867         DiagnosticsEngine::Warning,
10868         "The clause simdlen(1) has no effect when targeting aarch64.");
10869     CGM.getDiags().Report(SLoc, DiagID);
10870     return;
10871   }
10872 
10873   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10874   // Advanced SIMD output.
10875   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10876     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10877         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10878                                     "power of 2 when targeting Advanced SIMD.");
10879     CGM.getDiags().Report(SLoc, DiagID);
10880     return;
10881   }
10882 
10883   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10884   // limits.
10885   if (ISA == 's' && UserVLEN != 0) {
10886     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10887       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10888           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10889                                       "lanes in the architectural constraints "
10890                                       "for SVE (min is 128-bit, max is "
10891                                       "2048-bit, by steps of 128-bit)");
10892       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10893       return;
10894     }
10895   }
10896 
10897   // Sort out parameter sequence.
10898   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10899   StringRef Prefix = "_ZGV";
10900   // Generate simdlen from user input (if any).
10901   if (UserVLEN) {
10902     if (ISA == 's') {
10903       // SVE generates only a masked function.
10904       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10905                            OutputBecomesInput, Fn);
10906     } else {
10907       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10908       // Advanced SIMD generates one or two functions, depending on
10909       // the `[not]inbranch` clause.
10910       switch (State) {
10911       case OMPDeclareSimdDeclAttr::BS_Undefined:
10912         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10913                              OutputBecomesInput, Fn);
10914         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10915                              OutputBecomesInput, Fn);
10916         break;
10917       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10918         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10919                              OutputBecomesInput, Fn);
10920         break;
10921       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10922         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10923                              OutputBecomesInput, Fn);
10924         break;
10925       }
10926     }
10927   } else {
10928     // If no user simdlen is provided, follow the AAVFABI rules for
10929     // generating the vector length.
10930     if (ISA == 's') {
10931       // SVE, section 3.4.1, item 1.
10932       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10933                            OutputBecomesInput, Fn);
10934     } else {
10935       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10936       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10937       // two vector names depending on the use of the clause
10938       // `[not]inbranch`.
10939       switch (State) {
10940       case OMPDeclareSimdDeclAttr::BS_Undefined:
10941         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10942                                   OutputBecomesInput, Fn);
10943         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10944                                   OutputBecomesInput, Fn);
10945         break;
10946       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10947         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10948                                   OutputBecomesInput, Fn);
10949         break;
10950       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10951         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10952                                   OutputBecomesInput, Fn);
10953         break;
10954       }
10955     }
10956   }
10957 }
10958 
10959 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10960                                               llvm::Function *Fn) {
10961   ASTContext &C = CGM.getContext();
10962   FD = FD->getMostRecentDecl();
10963   while (FD) {
10964     // Map params to their positions in function decl.
10965     llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10966     if (isa<CXXMethodDecl>(FD))
10967       ParamPositions.try_emplace(FD, 0);
10968     unsigned ParamPos = ParamPositions.size();
10969     for (const ParmVarDecl *P : FD->parameters()) {
10970       ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10971       ++ParamPos;
10972     }
10973     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10974       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10975       // Mark uniform parameters.
10976       for (const Expr *E : Attr->uniforms()) {
10977         E = E->IgnoreParenImpCasts();
10978         unsigned Pos;
10979         if (isa<CXXThisExpr>(E)) {
10980           Pos = ParamPositions[FD];
10981         } else {
10982           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10983                                 ->getCanonicalDecl();
10984           auto It = ParamPositions.find(PVD);
10985           assert(It != ParamPositions.end() && "Function parameter not found");
10986           Pos = It->second;
10987         }
10988         ParamAttrs[Pos].Kind = Uniform;
10989       }
10990       // Get alignment info.
10991       auto *NI = Attr->alignments_begin();
10992       for (const Expr *E : Attr->aligneds()) {
10993         E = E->IgnoreParenImpCasts();
10994         unsigned Pos;
10995         QualType ParmTy;
10996         if (isa<CXXThisExpr>(E)) {
10997           Pos = ParamPositions[FD];
10998           ParmTy = E->getType();
10999         } else {
11000           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11001                                 ->getCanonicalDecl();
11002           auto It = ParamPositions.find(PVD);
11003           assert(It != ParamPositions.end() && "Function parameter not found");
11004           Pos = It->second;
11005           ParmTy = PVD->getType();
11006         }
11007         ParamAttrs[Pos].Alignment =
11008             (*NI)
11009                 ? (*NI)->EvaluateKnownConstInt(C)
11010                 : llvm::APSInt::getUnsigned(
11011                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11012                           .getQuantity());
11013         ++NI;
11014       }
11015       // Mark linear parameters.
11016       auto *SI = Attr->steps_begin();
11017       auto *MI = Attr->modifiers_begin();
11018       for (const Expr *E : Attr->linears()) {
11019         E = E->IgnoreParenImpCasts();
11020         unsigned Pos;
11021         bool IsReferenceType = false;
11022         // Rescaling factor needed to compute the linear parameter
11023         // value in the mangled name.
11024         unsigned PtrRescalingFactor = 1;
11025         if (isa<CXXThisExpr>(E)) {
11026           Pos = ParamPositions[FD];
11027           auto *P = cast<PointerType>(E->getType());
11028           PtrRescalingFactor = CGM.getContext()
11029                                    .getTypeSizeInChars(P->getPointeeType())
11030                                    .getQuantity();
11031         } else {
11032           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11033                                 ->getCanonicalDecl();
11034           auto It = ParamPositions.find(PVD);
11035           assert(It != ParamPositions.end() && "Function parameter not found");
11036           Pos = It->second;
11037           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11038             PtrRescalingFactor = CGM.getContext()
11039                                      .getTypeSizeInChars(P->getPointeeType())
11040                                      .getQuantity();
11041           else if (PVD->getType()->isReferenceType()) {
11042             IsReferenceType = true;
11043             PtrRescalingFactor =
11044                 CGM.getContext()
11045                     .getTypeSizeInChars(PVD->getType().getNonReferenceType())
11046                     .getQuantity();
11047           }
11048         }
11049         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11050         if (*MI == OMPC_LINEAR_ref)
11051           ParamAttr.Kind = LinearRef;
11052         else if (*MI == OMPC_LINEAR_uval)
11053           ParamAttr.Kind = LinearUVal;
11054         else if (IsReferenceType)
11055           ParamAttr.Kind = LinearVal;
11056         else
11057           ParamAttr.Kind = Linear;
11058         // Assuming a stride of 1, for `linear` without modifiers.
11059         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11060         if (*SI) {
11061           Expr::EvalResult Result;
11062           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11063             if (const auto *DRE =
11064                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11065               if (const auto *StridePVD =
11066                       dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11067                 ParamAttr.HasVarStride = true;
11068                 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11069                 assert(It != ParamPositions.end() &&
11070                        "Function parameter not found");
11071                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11072               }
11073             }
11074           } else {
11075             ParamAttr.StrideOrArg = Result.Val.getInt();
11076           }
11077         }
11078         // If we are using a linear clause on a pointer, we need to
11079         // rescale the value of linear_step with the byte size of the
11080         // pointee type.
11081         if (!ParamAttr.HasVarStride &&
11082             (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11083           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11084         ++SI;
11085         ++MI;
11086       }
11087       llvm::APSInt VLENVal;
11088       SourceLocation ExprLoc;
11089       const Expr *VLENExpr = Attr->getSimdlen();
11090       if (VLENExpr) {
11091         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11092         ExprLoc = VLENExpr->getExprLoc();
11093       }
11094       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11095       if (CGM.getTriple().isX86()) {
11096         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11097       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11098         unsigned VLEN = VLENVal.getExtValue();
11099         StringRef MangledName = Fn->getName();
11100         if (CGM.getTarget().hasFeature("sve"))
11101           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11102                                          MangledName, 's', 128, Fn, ExprLoc);
11103         else if (CGM.getTarget().hasFeature("neon"))
11104           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11105                                          MangledName, 'n', 128, Fn, ExprLoc);
11106       }
11107     }
11108     FD = FD->getPreviousDecl();
11109   }
11110 }
11111 
11112 namespace {
11113 /// Cleanup action for doacross support.
11114 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11115 public:
11116   static const int DoacrossFinArgs = 2;
11117 
11118 private:
11119   llvm::FunctionCallee RTLFn;
11120   llvm::Value *Args[DoacrossFinArgs];
11121 
11122 public:
11123   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11124                     ArrayRef<llvm::Value *> CallArgs)
11125       : RTLFn(RTLFn) {
11126     assert(CallArgs.size() == DoacrossFinArgs);
11127     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11128   }
11129   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11130     if (!CGF.HaveInsertPoint())
11131       return;
11132     CGF.EmitRuntimeCall(RTLFn, Args);
11133   }
11134 };
11135 } // namespace
11136 
11137 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11138                                        const OMPLoopDirective &D,
11139                                        ArrayRef<Expr *> NumIterations) {
11140   if (!CGF.HaveInsertPoint())
11141     return;
11142 
11143   ASTContext &C = CGM.getContext();
11144   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11145   RecordDecl *RD;
11146   if (KmpDimTy.isNull()) {
11147     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11148     //  kmp_int64 lo; // lower
11149     //  kmp_int64 up; // upper
11150     //  kmp_int64 st; // stride
11151     // };
11152     RD = C.buildImplicitRecord("kmp_dim");
11153     RD->startDefinition();
11154     addFieldToRecordDecl(C, RD, Int64Ty);
11155     addFieldToRecordDecl(C, RD, Int64Ty);
11156     addFieldToRecordDecl(C, RD, Int64Ty);
11157     RD->completeDefinition();
11158     KmpDimTy = C.getRecordType(RD);
11159   } else {
11160     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11161   }
11162   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11163   QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
11164                                             ArraySizeModifier::Normal, 0);
11165 
11166   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11167   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11168   enum { LowerFD = 0, UpperFD, StrideFD };
11169   // Fill dims with data.
11170   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11171     LValue DimsLVal = CGF.MakeAddrLValue(
11172         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11173     // dims.upper = num_iterations;
11174     LValue UpperLVal = CGF.EmitLValueForField(
11175         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11176     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11177         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11178         Int64Ty, NumIterations[I]->getExprLoc());
11179     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11180     // dims.stride = 1;
11181     LValue StrideLVal = CGF.EmitLValueForField(
11182         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11183     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11184                           StrideLVal);
11185   }
11186 
11187   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11188   // kmp_int32 num_dims, struct kmp_dim * dims);
11189   llvm::Value *Args[] = {
11190       emitUpdateLocation(CGF, D.getBeginLoc()),
11191       getThreadID(CGF, D.getBeginLoc()),
11192       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11193       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11194           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).emitRawPointer(CGF),
11195           CGM.VoidPtrTy)};
11196 
11197   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11198       CGM.getModule(), OMPRTL___kmpc_doacross_init);
11199   CGF.EmitRuntimeCall(RTLFn, Args);
11200   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11201       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11202   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11203       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11204   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11205                                              llvm::ArrayRef(FiniArgs));
11206 }
11207 
11208 template <typename T>
11209 static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM,
11210                                 const T *C, llvm::Value *ULoc,
11211                                 llvm::Value *ThreadID) {
11212   QualType Int64Ty =
11213       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11214   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11215   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11216       Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);
11217   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11218   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11219     const Expr *CounterVal = C->getLoopData(I);
11220     assert(CounterVal);
11221     llvm::Value *CntVal = CGF.EmitScalarConversion(
11222         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11223         CounterVal->getExprLoc());
11224     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11225                           /*Volatile=*/false, Int64Ty);
11226   }
11227   llvm::Value *Args[] = {
11228       ULoc, ThreadID,
11229       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).emitRawPointer(CGF)};
11230   llvm::FunctionCallee RTLFn;
11231   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
11232   OMPDoacrossKind<T> ODK;
11233   if (ODK.isSource(C)) {
11234     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11235                                                   OMPRTL___kmpc_doacross_post);
11236   } else {
11237     assert(ODK.isSink(C) && "Expect sink modifier.");
11238     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11239                                                   OMPRTL___kmpc_doacross_wait);
11240   }
11241   CGF.EmitRuntimeCall(RTLFn, Args);
11242 }
11243 
11244 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11245                                           const OMPDependClause *C) {
11246   return EmitDoacrossOrdered<OMPDependClause>(
11247       CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11248       getThreadID(CGF, C->getBeginLoc()));
11249 }
11250 
11251 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11252                                           const OMPDoacrossClause *C) {
11253   return EmitDoacrossOrdered<OMPDoacrossClause>(
11254       CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11255       getThreadID(CGF, C->getBeginLoc()));
11256 }
11257 
11258 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11259                                llvm::FunctionCallee Callee,
11260                                ArrayRef<llvm::Value *> Args) const {
11261   assert(Loc.isValid() && "Outlined function call location must be valid.");
11262   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11263 
11264   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11265     if (Fn->doesNotThrow()) {
11266       CGF.EmitNounwindRuntimeCall(Fn, Args);
11267       return;
11268     }
11269   }
11270   CGF.EmitRuntimeCall(Callee, Args);
11271 }
11272 
11273 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11274     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11275     ArrayRef<llvm::Value *> Args) const {
11276   emitCall(CGF, Loc, OutlinedFn, Args);
11277 }
11278 
11279 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11280   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11281     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11282       HasEmittedDeclareTargetRegion = true;
11283 }
11284 
11285 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11286                                              const VarDecl *NativeParam,
11287                                              const VarDecl *TargetParam) const {
11288   return CGF.GetAddrOfLocalVar(NativeParam);
11289 }
11290 
11291 /// Return allocator value from expression, or return a null allocator (default
11292 /// when no allocator specified).
11293 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11294                                     const Expr *Allocator) {
11295   llvm::Value *AllocVal;
11296   if (Allocator) {
11297     AllocVal = CGF.EmitScalarExpr(Allocator);
11298     // According to the standard, the original allocator type is a enum
11299     // (integer). Convert to pointer type, if required.
11300     AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11301                                         CGF.getContext().VoidPtrTy,
11302                                         Allocator->getExprLoc());
11303   } else {
11304     // If no allocator specified, it defaults to the null allocator.
11305     AllocVal = llvm::Constant::getNullValue(
11306         CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
11307   }
11308   return AllocVal;
11309 }
11310 
11311 /// Return the alignment from an allocate directive if present.
11312 static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11313   std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11314 
11315   if (!AllocateAlignment)
11316     return nullptr;
11317 
11318   return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
11319 }
11320 
11321 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11322                                                    const VarDecl *VD) {
11323   if (!VD)
11324     return Address::invalid();
11325   Address UntiedAddr = Address::invalid();
11326   Address UntiedRealAddr = Address::invalid();
11327   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11328   if (It != FunctionToUntiedTaskStackMap.end()) {
11329     const UntiedLocalVarsAddressesMap &UntiedData =
11330         UntiedLocalVarsStack[It->second];
11331     auto I = UntiedData.find(VD);
11332     if (I != UntiedData.end()) {
11333       UntiedAddr = I->second.first;
11334       UntiedRealAddr = I->second.second;
11335     }
11336   }
11337   const VarDecl *CVD = VD->getCanonicalDecl();
11338   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11339     // Use the default allocation.
11340     if (!isAllocatableDecl(VD))
11341       return UntiedAddr;
11342     llvm::Value *Size;
11343     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11344     if (CVD->getType()->isVariablyModifiedType()) {
11345       Size = CGF.getTypeSize(CVD->getType());
11346       // Align the size: ((size + align - 1) / align) * align
11347       Size = CGF.Builder.CreateNUWAdd(
11348           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11349       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11350       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11351     } else {
11352       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11353       Size = CGM.getSize(Sz.alignTo(Align));
11354     }
11355     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11356     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11357     const Expr *Allocator = AA->getAllocator();
11358     llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11359     llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
11360     SmallVector<llvm::Value *, 4> Args;
11361     Args.push_back(ThreadID);
11362     if (Alignment)
11363       Args.push_back(Alignment);
11364     Args.push_back(Size);
11365     Args.push_back(AllocVal);
11366     llvm::omp::RuntimeFunction FnID =
11367         Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11368     llvm::Value *Addr = CGF.EmitRuntimeCall(
11369         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
11370         getName({CVD->getName(), ".void.addr"}));
11371     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11372         CGM.getModule(), OMPRTL___kmpc_free);
11373     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11374     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11375         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11376     if (UntiedAddr.isValid())
11377       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11378 
11379     // Cleanup action for allocate support.
11380     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11381       llvm::FunctionCallee RTLFn;
11382       SourceLocation::UIntTy LocEncoding;
11383       Address Addr;
11384       const Expr *AllocExpr;
11385 
11386     public:
11387       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11388                            SourceLocation::UIntTy LocEncoding, Address Addr,
11389                            const Expr *AllocExpr)
11390           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11391             AllocExpr(AllocExpr) {}
11392       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11393         if (!CGF.HaveInsertPoint())
11394           return;
11395         llvm::Value *Args[3];
11396         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11397             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11398         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11399             Addr.emitRawPointer(CGF), CGF.VoidPtrTy);
11400         llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
11401         Args[2] = AllocVal;
11402         CGF.EmitRuntimeCall(RTLFn, Args);
11403       }
11404     };
11405     Address VDAddr =
11406         UntiedRealAddr.isValid()
11407             ? UntiedRealAddr
11408             : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
11409     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11410         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11411         VDAddr, Allocator);
11412     if (UntiedRealAddr.isValid())
11413       if (auto *Region =
11414               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11415         Region->emitUntiedSwitch(CGF);
11416     return VDAddr;
11417   }
11418   return UntiedAddr;
11419 }
11420 
11421 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11422                                              const VarDecl *VD) const {
11423   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11424   if (It == FunctionToUntiedTaskStackMap.end())
11425     return false;
11426   return UntiedLocalVarsStack[It->second].count(VD) > 0;
11427 }
11428 
11429 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11430     CodeGenModule &CGM, const OMPLoopDirective &S)
11431     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11432   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11433   if (!NeedToPush)
11434     return;
11435   NontemporalDeclsSet &DS =
11436       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11437   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11438     for (const Stmt *Ref : C->private_refs()) {
11439       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11440       const ValueDecl *VD;
11441       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11442         VD = DRE->getDecl();
11443       } else {
11444         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11445         assert((ME->isImplicitCXXThis() ||
11446                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11447                "Expected member of current class.");
11448         VD = ME->getMemberDecl();
11449       }
11450       DS.insert(VD);
11451     }
11452   }
11453 }
11454 
11455 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11456   if (!NeedToPush)
11457     return;
11458   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11459 }
11460 
11461 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11462     CodeGenFunction &CGF,
11463     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
11464                           std::pair<Address, Address>> &LocalVars)
11465     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11466   if (!NeedToPush)
11467     return;
11468   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
11469       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11470   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11471 }
11472 
11473 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11474   if (!NeedToPush)
11475     return;
11476   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
11477 }
11478 
11479 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11480   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11481 
11482   return llvm::any_of(
11483       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11484       [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
11485 }
11486 
11487 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11488     const OMPExecutableDirective &S,
11489     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11490     const {
11491   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11492   // Vars in target/task regions must be excluded completely.
11493   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11494       isOpenMPTaskingDirective(S.getDirectiveKind())) {
11495     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11496     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11497     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11498     for (const CapturedStmt::Capture &Cap : CS->captures()) {
11499       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11500         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11501     }
11502   }
11503   // Exclude vars in private clauses.
11504   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11505     for (const Expr *Ref : C->varlists()) {
11506       if (!Ref->getType()->isScalarType())
11507         continue;
11508       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11509       if (!DRE)
11510         continue;
11511       NeedToCheckForLPCs.insert(DRE->getDecl());
11512     }
11513   }
11514   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11515     for (const Expr *Ref : C->varlists()) {
11516       if (!Ref->getType()->isScalarType())
11517         continue;
11518       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11519       if (!DRE)
11520         continue;
11521       NeedToCheckForLPCs.insert(DRE->getDecl());
11522     }
11523   }
11524   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11525     for (const Expr *Ref : C->varlists()) {
11526       if (!Ref->getType()->isScalarType())
11527         continue;
11528       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11529       if (!DRE)
11530         continue;
11531       NeedToCheckForLPCs.insert(DRE->getDecl());
11532     }
11533   }
11534   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11535     for (const Expr *Ref : C->varlists()) {
11536       if (!Ref->getType()->isScalarType())
11537         continue;
11538       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11539       if (!DRE)
11540         continue;
11541       NeedToCheckForLPCs.insert(DRE->getDecl());
11542     }
11543   }
11544   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11545     for (const Expr *Ref : C->varlists()) {
11546       if (!Ref->getType()->isScalarType())
11547         continue;
11548       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11549       if (!DRE)
11550         continue;
11551       NeedToCheckForLPCs.insert(DRE->getDecl());
11552     }
11553   }
11554   for (const Decl *VD : NeedToCheckForLPCs) {
11555     for (const LastprivateConditionalData &Data :
11556          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11557       if (Data.DeclToUniqueName.count(VD) > 0) {
11558         if (!Data.Disabled)
11559           NeedToAddForLPCsAsDisabled.insert(VD);
11560         break;
11561       }
11562     }
11563   }
11564 }
11565 
11566 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11567     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11568     : CGM(CGF.CGM),
11569       Action((CGM.getLangOpts().OpenMP >= 50 &&
11570               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11571                            [](const OMPLastprivateClause *C) {
11572                              return C->getKind() ==
11573                                     OMPC_LASTPRIVATE_conditional;
11574                            }))
11575                  ? ActionToDo::PushAsLastprivateConditional
11576                  : ActionToDo::DoNotPush) {
11577   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11578   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11579     return;
11580   assert(Action == ActionToDo::PushAsLastprivateConditional &&
11581          "Expected a push action.");
11582   LastprivateConditionalData &Data =
11583       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11584   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11585     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11586       continue;
11587 
11588     for (const Expr *Ref : C->varlists()) {
11589       Data.DeclToUniqueName.insert(std::make_pair(
11590           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11591           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11592     }
11593   }
11594   Data.IVLVal = IVLVal;
11595   Data.Fn = CGF.CurFn;
11596 }
11597 
11598 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11599     CodeGenFunction &CGF, const OMPExecutableDirective &S)
11600     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11601   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11602   if (CGM.getLangOpts().OpenMP < 50)
11603     return;
11604   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11605   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11606   if (!NeedToAddForLPCsAsDisabled.empty()) {
11607     Action = ActionToDo::DisableLastprivateConditional;
11608     LastprivateConditionalData &Data =
11609         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11610     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11611       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11612     Data.Fn = CGF.CurFn;
11613     Data.Disabled = true;
11614   }
11615 }
11616 
11617 CGOpenMPRuntime::LastprivateConditionalRAII
11618 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11619     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11620   return LastprivateConditionalRAII(CGF, S);
11621 }
11622 
11623 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11624   if (CGM.getLangOpts().OpenMP < 50)
11625     return;
11626   if (Action == ActionToDo::DisableLastprivateConditional) {
11627     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11628            "Expected list of disabled private vars.");
11629     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11630   }
11631   if (Action == ActionToDo::PushAsLastprivateConditional) {
11632     assert(
11633         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11634         "Expected list of lastprivate conditional vars.");
11635     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11636   }
11637 }
11638 
11639 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11640                                                         const VarDecl *VD) {
11641   ASTContext &C = CGM.getContext();
11642   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11643   if (I == LastprivateConditionalToTypes.end())
11644     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11645   QualType NewType;
11646   const FieldDecl *VDField;
11647   const FieldDecl *FiredField;
11648   LValue BaseLVal;
11649   auto VI = I->getSecond().find(VD);
11650   if (VI == I->getSecond().end()) {
11651     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11652     RD->startDefinition();
11653     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11654     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11655     RD->completeDefinition();
11656     NewType = C.getRecordType(RD);
11657     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11658     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11659     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11660   } else {
11661     NewType = std::get<0>(VI->getSecond());
11662     VDField = std::get<1>(VI->getSecond());
11663     FiredField = std::get<2>(VI->getSecond());
11664     BaseLVal = std::get<3>(VI->getSecond());
11665   }
11666   LValue FiredLVal =
11667       CGF.EmitLValueForField(BaseLVal, FiredField);
11668   CGF.EmitStoreOfScalar(
11669       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11670       FiredLVal);
11671   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress();
11672 }
11673 
11674 namespace {
11675 /// Checks if the lastprivate conditional variable is referenced in LHS.
11676 class LastprivateConditionalRefChecker final
11677     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11678   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11679   const Expr *FoundE = nullptr;
11680   const Decl *FoundD = nullptr;
11681   StringRef UniqueDeclName;
11682   LValue IVLVal;
11683   llvm::Function *FoundFn = nullptr;
11684   SourceLocation Loc;
11685 
11686 public:
11687   bool VisitDeclRefExpr(const DeclRefExpr *E) {
11688     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11689          llvm::reverse(LPM)) {
11690       auto It = D.DeclToUniqueName.find(E->getDecl());
11691       if (It == D.DeclToUniqueName.end())
11692         continue;
11693       if (D.Disabled)
11694         return false;
11695       FoundE = E;
11696       FoundD = E->getDecl()->getCanonicalDecl();
11697       UniqueDeclName = It->second;
11698       IVLVal = D.IVLVal;
11699       FoundFn = D.Fn;
11700       break;
11701     }
11702     return FoundE == E;
11703   }
11704   bool VisitMemberExpr(const MemberExpr *E) {
11705     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11706       return false;
11707     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11708          llvm::reverse(LPM)) {
11709       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11710       if (It == D.DeclToUniqueName.end())
11711         continue;
11712       if (D.Disabled)
11713         return false;
11714       FoundE = E;
11715       FoundD = E->getMemberDecl()->getCanonicalDecl();
11716       UniqueDeclName = It->second;
11717       IVLVal = D.IVLVal;
11718       FoundFn = D.Fn;
11719       break;
11720     }
11721     return FoundE == E;
11722   }
11723   bool VisitStmt(const Stmt *S) {
11724     for (const Stmt *Child : S->children()) {
11725       if (!Child)
11726         continue;
11727       if (const auto *E = dyn_cast<Expr>(Child))
11728         if (!E->isGLValue())
11729           continue;
11730       if (Visit(Child))
11731         return true;
11732     }
11733     return false;
11734   }
11735   explicit LastprivateConditionalRefChecker(
11736       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11737       : LPM(LPM) {}
11738   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11739   getFoundData() const {
11740     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11741   }
11742 };
11743 } // namespace
11744 
11745 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11746                                                        LValue IVLVal,
11747                                                        StringRef UniqueDeclName,
11748                                                        LValue LVal,
11749                                                        SourceLocation Loc) {
11750   // Last updated loop counter for the lastprivate conditional var.
11751   // int<xx> last_iv = 0;
11752   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11753   llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
11754       LLIVTy, getName({UniqueDeclName, "iv"}));
11755   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11756       IVLVal.getAlignment().getAsAlign());
11757   LValue LastIVLVal =
11758       CGF.MakeNaturalAlignRawAddrLValue(LastIV, IVLVal.getType());
11759 
11760   // Last value of the lastprivate conditional.
11761   // decltype(priv_a) last_a;
11762   llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
11763       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11764   cast<llvm::GlobalVariable>(Last)->setAlignment(
11765       LVal.getAlignment().getAsAlign());
11766   LValue LastLVal =
11767       CGF.MakeRawAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11768 
11769   // Global loop counter. Required to handle inner parallel-for regions.
11770   // iv
11771   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11772 
11773   // #pragma omp critical(a)
11774   // if (last_iv <= iv) {
11775   //   last_iv = iv;
11776   //   last_a = priv_a;
11777   // }
11778   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11779                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11780     Action.Enter(CGF);
11781     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11782     // (last_iv <= iv) ? Check if the variable is updated and store new
11783     // value in global var.
11784     llvm::Value *CmpRes;
11785     if (IVLVal.getType()->isSignedIntegerType()) {
11786       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11787     } else {
11788       assert(IVLVal.getType()->isUnsignedIntegerType() &&
11789              "Loop iteration variable must be integer.");
11790       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11791     }
11792     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11793     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11794     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11795     // {
11796     CGF.EmitBlock(ThenBB);
11797 
11798     //   last_iv = iv;
11799     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11800 
11801     //   last_a = priv_a;
11802     switch (CGF.getEvaluationKind(LVal.getType())) {
11803     case TEK_Scalar: {
11804       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11805       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11806       break;
11807     }
11808     case TEK_Complex: {
11809       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11810       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11811       break;
11812     }
11813     case TEK_Aggregate:
11814       llvm_unreachable(
11815           "Aggregates are not supported in lastprivate conditional.");
11816     }
11817     // }
11818     CGF.EmitBranch(ExitBB);
11819     // There is no need to emit line number for unconditional branch.
11820     (void)ApplyDebugLocation::CreateEmpty(CGF);
11821     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11822   };
11823 
11824   if (CGM.getLangOpts().OpenMPSimd) {
11825     // Do not emit as a critical region as no parallel region could be emitted.
11826     RegionCodeGenTy ThenRCG(CodeGen);
11827     ThenRCG(CGF);
11828   } else {
11829     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11830   }
11831 }
11832 
11833 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11834                                                          const Expr *LHS) {
11835   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11836     return;
11837   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11838   if (!Checker.Visit(LHS))
11839     return;
11840   const Expr *FoundE;
11841   const Decl *FoundD;
11842   StringRef UniqueDeclName;
11843   LValue IVLVal;
11844   llvm::Function *FoundFn;
11845   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11846       Checker.getFoundData();
11847   if (FoundFn != CGF.CurFn) {
11848     // Special codegen for inner parallel regions.
11849     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11850     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11851     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11852            "Lastprivate conditional is not found in outer region.");
11853     QualType StructTy = std::get<0>(It->getSecond());
11854     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11855     LValue PrivLVal = CGF.EmitLValue(FoundE);
11856     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11857         PrivLVal.getAddress(),
11858         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
11859         CGF.ConvertTypeForMem(StructTy));
11860     LValue BaseLVal =
11861         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11862     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11863     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11864                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11865                         FiredLVal, llvm::AtomicOrdering::Unordered,
11866                         /*IsVolatile=*/true, /*isInit=*/false);
11867     return;
11868   }
11869 
11870   // Private address of the lastprivate conditional in the current context.
11871   // priv_a
11872   LValue LVal = CGF.EmitLValue(FoundE);
11873   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11874                                    FoundE->getExprLoc());
11875 }
11876 
11877 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11878     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11879     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11880   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11881     return;
11882   auto Range = llvm::reverse(LastprivateConditionalStack);
11883   auto It = llvm::find_if(
11884       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11885   if (It == Range.end() || It->Fn != CGF.CurFn)
11886     return;
11887   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11888   assert(LPCI != LastprivateConditionalToTypes.end() &&
11889          "Lastprivates must be registered already.");
11890   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11891   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11892   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11893   for (const auto &Pair : It->DeclToUniqueName) {
11894     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11895     if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
11896       continue;
11897     auto I = LPCI->getSecond().find(Pair.first);
11898     assert(I != LPCI->getSecond().end() &&
11899            "Lastprivate must be rehistered already.");
11900     // bool Cmp = priv_a.Fired != 0;
11901     LValue BaseLVal = std::get<3>(I->getSecond());
11902     LValue FiredLVal =
11903         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11904     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11905     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11906     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11907     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11908     // if (Cmp) {
11909     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11910     CGF.EmitBlock(ThenBB);
11911     Address Addr = CGF.GetAddrOfLocalVar(VD);
11912     LValue LVal;
11913     if (VD->getType()->isReferenceType())
11914       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11915                                            AlignmentSource::Decl);
11916     else
11917       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11918                                 AlignmentSource::Decl);
11919     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11920                                      D.getBeginLoc());
11921     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11922     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11923     // }
11924   }
11925 }
11926 
11927 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11928     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11929     SourceLocation Loc) {
11930   if (CGF.getLangOpts().OpenMP < 50)
11931     return;
11932   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11933   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11934          "Unknown lastprivate conditional variable.");
11935   StringRef UniqueName = It->second;
11936   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11937   // The variable was not updated in the region - exit.
11938   if (!GV)
11939     return;
11940   LValue LPLVal = CGF.MakeRawAddrLValue(
11941       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11942   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11943   CGF.EmitStoreOfScalar(Res, PrivLVal);
11944 }
11945 
11946 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11947     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11948     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11949     const RegionCodeGenTy &CodeGen) {
11950   llvm_unreachable("Not supported in SIMD-only mode");
11951 }
11952 
11953 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11954     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11955     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11956     const RegionCodeGenTy &CodeGen) {
11957   llvm_unreachable("Not supported in SIMD-only mode");
11958 }
11959 
11960 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11961     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11962     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11963     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11964     bool Tied, unsigned &NumberOfParts) {
11965   llvm_unreachable("Not supported in SIMD-only mode");
11966 }
11967 
11968 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11969                                            SourceLocation Loc,
11970                                            llvm::Function *OutlinedFn,
11971                                            ArrayRef<llvm::Value *> CapturedVars,
11972                                            const Expr *IfCond,
11973                                            llvm::Value *NumThreads) {
11974   llvm_unreachable("Not supported in SIMD-only mode");
11975 }
11976 
11977 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11978     CodeGenFunction &CGF, StringRef CriticalName,
11979     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11980     const Expr *Hint) {
11981   llvm_unreachable("Not supported in SIMD-only mode");
11982 }
11983 
11984 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11985                                            const RegionCodeGenTy &MasterOpGen,
11986                                            SourceLocation Loc) {
11987   llvm_unreachable("Not supported in SIMD-only mode");
11988 }
11989 
11990 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
11991                                            const RegionCodeGenTy &MasterOpGen,
11992                                            SourceLocation Loc,
11993                                            const Expr *Filter) {
11994   llvm_unreachable("Not supported in SIMD-only mode");
11995 }
11996 
11997 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11998                                             SourceLocation Loc) {
11999   llvm_unreachable("Not supported in SIMD-only mode");
12000 }
12001 
12002 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12003     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12004     SourceLocation Loc) {
12005   llvm_unreachable("Not supported in SIMD-only mode");
12006 }
12007 
12008 void CGOpenMPSIMDRuntime::emitSingleRegion(
12009     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12010     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12011     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12012     ArrayRef<const Expr *> AssignmentOps) {
12013   llvm_unreachable("Not supported in SIMD-only mode");
12014 }
12015 
12016 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12017                                             const RegionCodeGenTy &OrderedOpGen,
12018                                             SourceLocation Loc,
12019                                             bool IsThreads) {
12020   llvm_unreachable("Not supported in SIMD-only mode");
12021 }
12022 
12023 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12024                                           SourceLocation Loc,
12025                                           OpenMPDirectiveKind Kind,
12026                                           bool EmitChecks,
12027                                           bool ForceSimpleCall) {
12028   llvm_unreachable("Not supported in SIMD-only mode");
12029 }
12030 
12031 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12032     CodeGenFunction &CGF, SourceLocation Loc,
12033     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12034     bool Ordered, const DispatchRTInput &DispatchValues) {
12035   llvm_unreachable("Not supported in SIMD-only mode");
12036 }
12037 
12038 void CGOpenMPSIMDRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
12039                                                 SourceLocation Loc) {
12040   llvm_unreachable("Not supported in SIMD-only mode");
12041 }
12042 
12043 void CGOpenMPSIMDRuntime::emitForStaticInit(
12044     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12045     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12046   llvm_unreachable("Not supported in SIMD-only mode");
12047 }
12048 
12049 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12050     CodeGenFunction &CGF, SourceLocation Loc,
12051     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12052   llvm_unreachable("Not supported in SIMD-only mode");
12053 }
12054 
12055 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12056                                                      SourceLocation Loc,
12057                                                      unsigned IVSize,
12058                                                      bool IVSigned) {
12059   llvm_unreachable("Not supported in SIMD-only mode");
12060 }
12061 
12062 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12063                                               SourceLocation Loc,
12064                                               OpenMPDirectiveKind DKind) {
12065   llvm_unreachable("Not supported in SIMD-only mode");
12066 }
12067 
12068 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12069                                               SourceLocation Loc,
12070                                               unsigned IVSize, bool IVSigned,
12071                                               Address IL, Address LB,
12072                                               Address UB, Address ST) {
12073   llvm_unreachable("Not supported in SIMD-only mode");
12074 }
12075 
12076 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12077                                                llvm::Value *NumThreads,
12078                                                SourceLocation Loc) {
12079   llvm_unreachable("Not supported in SIMD-only mode");
12080 }
12081 
12082 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12083                                              ProcBindKind ProcBind,
12084                                              SourceLocation Loc) {
12085   llvm_unreachable("Not supported in SIMD-only mode");
12086 }
12087 
12088 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12089                                                     const VarDecl *VD,
12090                                                     Address VDAddr,
12091                                                     SourceLocation Loc) {
12092   llvm_unreachable("Not supported in SIMD-only mode");
12093 }
12094 
12095 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12096     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12097     CodeGenFunction *CGF) {
12098   llvm_unreachable("Not supported in SIMD-only mode");
12099 }
12100 
12101 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12102     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12103   llvm_unreachable("Not supported in SIMD-only mode");
12104 }
12105 
12106 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12107                                     ArrayRef<const Expr *> Vars,
12108                                     SourceLocation Loc,
12109                                     llvm::AtomicOrdering AO) {
12110   llvm_unreachable("Not supported in SIMD-only mode");
12111 }
12112 
12113 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12114                                        const OMPExecutableDirective &D,
12115                                        llvm::Function *TaskFunction,
12116                                        QualType SharedsTy, Address Shareds,
12117                                        const Expr *IfCond,
12118                                        const OMPTaskDataTy &Data) {
12119   llvm_unreachable("Not supported in SIMD-only mode");
12120 }
12121 
12122 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12123     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12124     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12125     const Expr *IfCond, const OMPTaskDataTy &Data) {
12126   llvm_unreachable("Not supported in SIMD-only mode");
12127 }
12128 
12129 void CGOpenMPSIMDRuntime::emitReduction(
12130     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12131     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12132     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12133   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12134   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12135                                  ReductionOps, Options);
12136 }
12137 
12138 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12139     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12140     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12141   llvm_unreachable("Not supported in SIMD-only mode");
12142 }
12143 
12144 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12145                                                 SourceLocation Loc,
12146                                                 bool IsWorksharingReduction) {
12147   llvm_unreachable("Not supported in SIMD-only mode");
12148 }
12149 
12150 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12151                                                   SourceLocation Loc,
12152                                                   ReductionCodeGen &RCG,
12153                                                   unsigned N) {
12154   llvm_unreachable("Not supported in SIMD-only mode");
12155 }
12156 
12157 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12158                                                   SourceLocation Loc,
12159                                                   llvm::Value *ReductionsPtr,
12160                                                   LValue SharedLVal) {
12161   llvm_unreachable("Not supported in SIMD-only mode");
12162 }
12163 
12164 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12165                                            SourceLocation Loc,
12166                                            const OMPTaskDataTy &Data) {
12167   llvm_unreachable("Not supported in SIMD-only mode");
12168 }
12169 
12170 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12171     CodeGenFunction &CGF, SourceLocation Loc,
12172     OpenMPDirectiveKind CancelRegion) {
12173   llvm_unreachable("Not supported in SIMD-only mode");
12174 }
12175 
12176 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12177                                          SourceLocation Loc, const Expr *IfCond,
12178                                          OpenMPDirectiveKind CancelRegion) {
12179   llvm_unreachable("Not supported in SIMD-only mode");
12180 }
12181 
12182 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12183     const OMPExecutableDirective &D, StringRef ParentName,
12184     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12185     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12186   llvm_unreachable("Not supported in SIMD-only mode");
12187 }
12188 
12189 void CGOpenMPSIMDRuntime::emitTargetCall(
12190     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12191     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12192     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12193     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12194                                      const OMPLoopDirective &D)>
12195         SizeEmitter) {
12196   llvm_unreachable("Not supported in SIMD-only mode");
12197 }
12198 
12199 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12200   llvm_unreachable("Not supported in SIMD-only mode");
12201 }
12202 
12203 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12204   llvm_unreachable("Not supported in SIMD-only mode");
12205 }
12206 
12207 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12208   return false;
12209 }
12210 
12211 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12212                                         const OMPExecutableDirective &D,
12213                                         SourceLocation Loc,
12214                                         llvm::Function *OutlinedFn,
12215                                         ArrayRef<llvm::Value *> CapturedVars) {
12216   llvm_unreachable("Not supported in SIMD-only mode");
12217 }
12218 
12219 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12220                                              const Expr *NumTeams,
12221                                              const Expr *ThreadLimit,
12222                                              SourceLocation Loc) {
12223   llvm_unreachable("Not supported in SIMD-only mode");
12224 }
12225 
12226 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12227     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12228     const Expr *Device, const RegionCodeGenTy &CodeGen,
12229     CGOpenMPRuntime::TargetDataInfo &Info) {
12230   llvm_unreachable("Not supported in SIMD-only mode");
12231 }
12232 
12233 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12234     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12235     const Expr *Device) {
12236   llvm_unreachable("Not supported in SIMD-only mode");
12237 }
12238 
12239 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12240                                            const OMPLoopDirective &D,
12241                                            ArrayRef<Expr *> NumIterations) {
12242   llvm_unreachable("Not supported in SIMD-only mode");
12243 }
12244 
12245 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12246                                               const OMPDependClause *C) {
12247   llvm_unreachable("Not supported in SIMD-only mode");
12248 }
12249 
12250 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12251                                               const OMPDoacrossClause *C) {
12252   llvm_unreachable("Not supported in SIMD-only mode");
12253 }
12254 
12255 const VarDecl *
12256 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12257                                         const VarDecl *NativeParam) const {
12258   llvm_unreachable("Not supported in SIMD-only mode");
12259 }
12260 
12261 Address
12262 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12263                                          const VarDecl *NativeParam,
12264                                          const VarDecl *TargetParam) const {
12265   llvm_unreachable("Not supported in SIMD-only mode");
12266 }
12267