xref: /freebsd/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp (revision 6132212808e8dccedc9e5d85fea4390c2f38059a)
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/Basic/FileManager.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/CodeGen/ConstantInitBuilder.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/SetOperations.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Bitcode/BitcodeReader.h"
32 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/Support/AtomicOrdering.h"
38 #include "llvm/Support/Format.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <cassert>
41 #include <numeric>
42 
43 using namespace clang;
44 using namespace CodeGen;
45 using namespace llvm::omp;
46 
47 namespace {
48 /// Base class for handling code generation inside OpenMP regions.
49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
50 public:
51   /// Kinds of OpenMP regions used in codegen.
52   enum CGOpenMPRegionKind {
53     /// Region with outlined function for standalone 'parallel'
54     /// directive.
55     ParallelOutlinedRegion,
56     /// Region with outlined function for standalone 'task' directive.
57     TaskOutlinedRegion,
58     /// Region for constructs that do not require function outlining,
59     /// like 'for', 'sections', 'atomic' etc. directives.
60     InlinedRegion,
61     /// Region with outlined function for standalone 'target' directive.
62     TargetRegion,
63   };
64 
65   CGOpenMPRegionInfo(const CapturedStmt &CS,
66                      const CGOpenMPRegionKind RegionKind,
67                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
68                      bool HasCancel)
69       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
70         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
71 
72   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
73                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
74                      bool HasCancel)
75       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
76         Kind(Kind), HasCancel(HasCancel) {}
77 
78   /// Get a variable or parameter for storing global thread id
79   /// inside OpenMP construct.
80   virtual const VarDecl *getThreadIDVariable() const = 0;
81 
82   /// Emit the captured statement body.
83   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
84 
85   /// Get an LValue for the current ThreadID variable.
86   /// \return LValue for thread id variable. This LValue always has type int32*.
87   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
88 
89   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
90 
91   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
92 
93   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
94 
95   bool hasCancel() const { return HasCancel; }
96 
97   static bool classof(const CGCapturedStmtInfo *Info) {
98     return Info->getKind() == CR_OpenMP;
99   }
100 
101   ~CGOpenMPRegionInfo() override = default;
102 
103 protected:
104   CGOpenMPRegionKind RegionKind;
105   RegionCodeGenTy CodeGen;
106   OpenMPDirectiveKind Kind;
107   bool HasCancel;
108 };
109 
110 /// API for captured statement code generation in OpenMP constructs.
111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
112 public:
113   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
114                              const RegionCodeGenTy &CodeGen,
115                              OpenMPDirectiveKind Kind, bool HasCancel,
116                              StringRef HelperName)
117       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
118                            HasCancel),
119         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
120     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
121   }
122 
123   /// Get a variable or parameter for storing global thread id
124   /// inside OpenMP construct.
125   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
126 
127   /// Get the name of the capture helper.
128   StringRef getHelperName() const override { return HelperName; }
129 
130   static bool classof(const CGCapturedStmtInfo *Info) {
131     return CGOpenMPRegionInfo::classof(Info) &&
132            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
133                ParallelOutlinedRegion;
134   }
135 
136 private:
137   /// A variable or parameter storing global thread id for OpenMP
138   /// constructs.
139   const VarDecl *ThreadIDVar;
140   StringRef HelperName;
141 };
142 
143 /// API for captured statement code generation in OpenMP constructs.
144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
145 public:
146   class UntiedTaskActionTy final : public PrePostActionTy {
147     bool Untied;
148     const VarDecl *PartIDVar;
149     const RegionCodeGenTy UntiedCodeGen;
150     llvm::SwitchInst *UntiedSwitch = nullptr;
151 
152   public:
153     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
154                        const RegionCodeGenTy &UntiedCodeGen)
155         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
156     void Enter(CodeGenFunction &CGF) override {
157       if (Untied) {
158         // Emit task switching point.
159         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
160             CGF.GetAddrOfLocalVar(PartIDVar),
161             PartIDVar->getType()->castAs<PointerType>());
162         llvm::Value *Res =
163             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
164         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
165         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
166         CGF.EmitBlock(DoneBB);
167         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
168         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
169         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
170                               CGF.Builder.GetInsertBlock());
171         emitUntiedSwitch(CGF);
172       }
173     }
174     void emitUntiedSwitch(CodeGenFunction &CGF) const {
175       if (Untied) {
176         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
177             CGF.GetAddrOfLocalVar(PartIDVar),
178             PartIDVar->getType()->castAs<PointerType>());
179         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
180                               PartIdLVal);
181         UntiedCodeGen(CGF);
182         CodeGenFunction::JumpDest CurPoint =
183             CGF.getJumpDestInCurrentScope(".untied.next.");
184         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
185         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
186         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
187                               CGF.Builder.GetInsertBlock());
188         CGF.EmitBranchThroughCleanup(CurPoint);
189         CGF.EmitBlock(CurPoint.getBlock());
190       }
191     }
192     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
193   };
194   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
195                                  const VarDecl *ThreadIDVar,
196                                  const RegionCodeGenTy &CodeGen,
197                                  OpenMPDirectiveKind Kind, bool HasCancel,
198                                  const UntiedTaskActionTy &Action)
199       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
200         ThreadIDVar(ThreadIDVar), Action(Action) {
201     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
202   }
203 
204   /// Get a variable or parameter for storing global thread id
205   /// inside OpenMP construct.
206   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
207 
208   /// Get an LValue for the current ThreadID variable.
209   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
210 
211   /// Get the name of the capture helper.
212   StringRef getHelperName() const override { return ".omp_outlined."; }
213 
214   void emitUntiedSwitch(CodeGenFunction &CGF) override {
215     Action.emitUntiedSwitch(CGF);
216   }
217 
218   static bool classof(const CGCapturedStmtInfo *Info) {
219     return CGOpenMPRegionInfo::classof(Info) &&
220            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
221                TaskOutlinedRegion;
222   }
223 
224 private:
225   /// A variable or parameter storing global thread id for OpenMP
226   /// constructs.
227   const VarDecl *ThreadIDVar;
228   /// Action for emitting code for untied tasks.
229   const UntiedTaskActionTy &Action;
230 };
231 
232 /// API for inlined captured statement code generation in OpenMP
233 /// constructs.
234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
235 public:
236   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
237                             const RegionCodeGenTy &CodeGen,
238                             OpenMPDirectiveKind Kind, bool HasCancel)
239       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
240         OldCSI(OldCSI),
241         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
242 
243   // Retrieve the value of the context parameter.
244   llvm::Value *getContextValue() const override {
245     if (OuterRegionInfo)
246       return OuterRegionInfo->getContextValue();
247     llvm_unreachable("No context value for inlined OpenMP region");
248   }
249 
250   void setContextValue(llvm::Value *V) override {
251     if (OuterRegionInfo) {
252       OuterRegionInfo->setContextValue(V);
253       return;
254     }
255     llvm_unreachable("No context value for inlined OpenMP region");
256   }
257 
258   /// Lookup the captured field decl for a variable.
259   const FieldDecl *lookup(const VarDecl *VD) const override {
260     if (OuterRegionInfo)
261       return OuterRegionInfo->lookup(VD);
262     // If there is no outer outlined region,no need to lookup in a list of
263     // captured variables, we can use the original one.
264     return nullptr;
265   }
266 
267   FieldDecl *getThisFieldDecl() const override {
268     if (OuterRegionInfo)
269       return OuterRegionInfo->getThisFieldDecl();
270     return nullptr;
271   }
272 
273   /// Get a variable or parameter for storing global thread id
274   /// inside OpenMP construct.
275   const VarDecl *getThreadIDVariable() const override {
276     if (OuterRegionInfo)
277       return OuterRegionInfo->getThreadIDVariable();
278     return nullptr;
279   }
280 
281   /// Get an LValue for the current ThreadID variable.
282   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
285     llvm_unreachable("No LValue for inlined OpenMP construct");
286   }
287 
288   /// Get the name of the capture helper.
289   StringRef getHelperName() const override {
290     if (auto *OuterRegionInfo = getOldCSI())
291       return OuterRegionInfo->getHelperName();
292     llvm_unreachable("No helper name for inlined OpenMP construct");
293   }
294 
295   void emitUntiedSwitch(CodeGenFunction &CGF) override {
296     if (OuterRegionInfo)
297       OuterRegionInfo->emitUntiedSwitch(CGF);
298   }
299 
300   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
301 
302   static bool classof(const CGCapturedStmtInfo *Info) {
303     return CGOpenMPRegionInfo::classof(Info) &&
304            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
305   }
306 
307   ~CGOpenMPInlinedRegionInfo() override = default;
308 
309 private:
310   /// CodeGen info about outer OpenMP region.
311   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
312   CGOpenMPRegionInfo *OuterRegionInfo;
313 };
314 
315 /// API for captured statement code generation in OpenMP target
316 /// constructs. For this captures, implicit parameters are used instead of the
317 /// captured fields. The name of the target region has to be unique in a given
318 /// application so it is provided by the client, because only the client has
319 /// the information to generate that.
320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
321 public:
322   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
323                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
324       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
325                            /*HasCancel=*/false),
326         HelperName(HelperName) {}
327 
328   /// This is unused for target regions because each starts executing
329   /// with a single thread.
330   const VarDecl *getThreadIDVariable() const override { return nullptr; }
331 
332   /// Get the name of the capture helper.
333   StringRef getHelperName() const override { return HelperName; }
334 
335   static bool classof(const CGCapturedStmtInfo *Info) {
336     return CGOpenMPRegionInfo::classof(Info) &&
337            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
338   }
339 
340 private:
341   StringRef HelperName;
342 };
343 
344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
345   llvm_unreachable("No codegen for expressions");
346 }
347 /// API for generation of expressions captured in a innermost OpenMP
348 /// region.
349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
350 public:
351   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
352       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
353                                   OMPD_unknown,
354                                   /*HasCancel=*/false),
355         PrivScope(CGF) {
356     // Make sure the globals captured in the provided statement are local by
357     // using the privatization logic. We assume the same variable is not
358     // captured more than once.
359     for (const auto &C : CS.captures()) {
360       if (!C.capturesVariable() && !C.capturesVariableByCopy())
361         continue;
362 
363       const VarDecl *VD = C.getCapturedVar();
364       if (VD->isLocalVarDeclOrParm())
365         continue;
366 
367       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
368                       /*RefersToEnclosingVariableOrCapture=*/false,
369                       VD->getType().getNonReferenceType(), VK_LValue,
370                       C.getLocation());
371       PrivScope.addPrivate(
372           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
373     }
374     (void)PrivScope.Privatize();
375   }
376 
377   /// Lookup the captured field decl for a variable.
378   const FieldDecl *lookup(const VarDecl *VD) const override {
379     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
380       return FD;
381     return nullptr;
382   }
383 
384   /// Emit the captured statement body.
385   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
386     llvm_unreachable("No body for expressions");
387   }
388 
389   /// Get a variable or parameter for storing global thread id
390   /// inside OpenMP construct.
391   const VarDecl *getThreadIDVariable() const override {
392     llvm_unreachable("No thread id for expressions");
393   }
394 
395   /// Get the name of the capture helper.
396   StringRef getHelperName() const override {
397     llvm_unreachable("No helper name for expressions");
398   }
399 
400   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
401 
402 private:
403   /// Private scope to capture global variables.
404   CodeGenFunction::OMPPrivateScope PrivScope;
405 };
406 
407 /// RAII for emitting code of OpenMP constructs.
408 class InlinedOpenMPRegionRAII {
409   CodeGenFunction &CGF;
410   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
411   FieldDecl *LambdaThisCaptureField = nullptr;
412   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
413 
414 public:
415   /// Constructs region for combined constructs.
416   /// \param CodeGen Code generation sequence for combined directives. Includes
417   /// a list of functions used for code generation of implicitly inlined
418   /// regions.
419   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
420                           OpenMPDirectiveKind Kind, bool HasCancel)
421       : CGF(CGF) {
422     // Start emission for the construct.
423     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
424         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
425     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
426     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
427     CGF.LambdaThisCaptureField = nullptr;
428     BlockInfo = CGF.BlockInfo;
429     CGF.BlockInfo = nullptr;
430   }
431 
432   ~InlinedOpenMPRegionRAII() {
433     // Restore original CapturedStmtInfo only if we're done with code emission.
434     auto *OldCSI =
435         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
436     delete CGF.CapturedStmtInfo;
437     CGF.CapturedStmtInfo = OldCSI;
438     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
439     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
440     CGF.BlockInfo = BlockInfo;
441   }
442 };
443 
444 /// Values for bit flags used in the ident_t to describe the fields.
445 /// All enumeric elements are named and described in accordance with the code
446 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
447 enum OpenMPLocationFlags : unsigned {
448   /// Use trampoline for internal microtask.
449   OMP_IDENT_IMD = 0x01,
450   /// Use c-style ident structure.
451   OMP_IDENT_KMPC = 0x02,
452   /// Atomic reduction option for kmpc_reduce.
453   OMP_ATOMIC_REDUCE = 0x10,
454   /// Explicit 'barrier' directive.
455   OMP_IDENT_BARRIER_EXPL = 0x20,
456   /// Implicit barrier in code.
457   OMP_IDENT_BARRIER_IMPL = 0x40,
458   /// Implicit barrier in 'for' directive.
459   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
460   /// Implicit barrier in 'sections' directive.
461   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
462   /// Implicit barrier in 'single' directive.
463   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
464   /// Call of __kmp_for_static_init for static loop.
465   OMP_IDENT_WORK_LOOP = 0x200,
466   /// Call of __kmp_for_static_init for sections.
467   OMP_IDENT_WORK_SECTIONS = 0x400,
468   /// Call of __kmp_for_static_init for distribute.
469   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
470   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
471 };
472 
473 namespace {
474 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
475 /// Values for bit flags for marking which requires clauses have been used.
476 enum OpenMPOffloadingRequiresDirFlags : int64_t {
477   /// flag undefined.
478   OMP_REQ_UNDEFINED               = 0x000,
479   /// no requires clause present.
480   OMP_REQ_NONE                    = 0x001,
481   /// reverse_offload clause.
482   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
483   /// unified_address clause.
484   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
485   /// unified_shared_memory clause.
486   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
487   /// dynamic_allocators clause.
488   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
489   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
490 };
491 
492 enum OpenMPOffloadingReservedDeviceIDs {
493   /// Device ID if the device was not defined, runtime should get it
494   /// from environment variables in the spec.
495   OMP_DEVICEID_UNDEF = -1,
496 };
497 } // anonymous namespace
498 
499 /// Describes ident structure that describes a source location.
500 /// All descriptions are taken from
501 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
502 /// Original structure:
503 /// typedef struct ident {
504 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
505 ///                                  see above  */
506 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
507 ///                                  KMP_IDENT_KMPC identifies this union
508 ///                                  member  */
509 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
510 ///                                  see above */
511 ///#if USE_ITT_BUILD
512 ///                            /*  but currently used for storing
513 ///                                region-specific ITT */
514 ///                            /*  contextual information. */
515 ///#endif /* USE_ITT_BUILD */
516 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
517 ///                                 C++  */
518 ///    char const *psource;    /**< String describing the source location.
519 ///                            The string is composed of semi-colon separated
520 //                             fields which describe the source file,
521 ///                            the function and a pair of line numbers that
522 ///                            delimit the construct.
523 ///                             */
524 /// } ident_t;
525 enum IdentFieldIndex {
526   /// might be used in Fortran
527   IdentField_Reserved_1,
528   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
529   IdentField_Flags,
530   /// Not really used in Fortran any more
531   IdentField_Reserved_2,
532   /// Source[4] in Fortran, do not use for C++
533   IdentField_Reserved_3,
534   /// String describing the source location. The string is composed of
535   /// semi-colon separated fields which describe the source file, the function
536   /// and a pair of line numbers that delimit the construct.
537   IdentField_PSource
538 };
539 
540 /// Schedule types for 'omp for' loops (these enumerators are taken from
541 /// the enum sched_type in kmp.h).
542 enum OpenMPSchedType {
543   /// Lower bound for default (unordered) versions.
544   OMP_sch_lower = 32,
545   OMP_sch_static_chunked = 33,
546   OMP_sch_static = 34,
547   OMP_sch_dynamic_chunked = 35,
548   OMP_sch_guided_chunked = 36,
549   OMP_sch_runtime = 37,
550   OMP_sch_auto = 38,
551   /// static with chunk adjustment (e.g., simd)
552   OMP_sch_static_balanced_chunked = 45,
553   /// Lower bound for 'ordered' versions.
554   OMP_ord_lower = 64,
555   OMP_ord_static_chunked = 65,
556   OMP_ord_static = 66,
557   OMP_ord_dynamic_chunked = 67,
558   OMP_ord_guided_chunked = 68,
559   OMP_ord_runtime = 69,
560   OMP_ord_auto = 70,
561   OMP_sch_default = OMP_sch_static,
562   /// dist_schedule types
563   OMP_dist_sch_static_chunked = 91,
564   OMP_dist_sch_static = 92,
565   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
566   /// Set if the monotonic schedule modifier was present.
567   OMP_sch_modifier_monotonic = (1 << 29),
568   /// Set if the nonmonotonic schedule modifier was present.
569   OMP_sch_modifier_nonmonotonic = (1 << 30),
570 };
571 
572 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
573 /// region.
574 class CleanupTy final : public EHScopeStack::Cleanup {
575   PrePostActionTy *Action;
576 
577 public:
578   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
579   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
580     if (!CGF.HaveInsertPoint())
581       return;
582     Action->Exit(CGF);
583   }
584 };
585 
586 } // anonymous namespace
587 
588 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
589   CodeGenFunction::RunCleanupsScope Scope(CGF);
590   if (PrePostAction) {
591     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
592     Callback(CodeGen, CGF, *PrePostAction);
593   } else {
594     PrePostActionTy Action;
595     Callback(CodeGen, CGF, Action);
596   }
597 }
598 
599 /// Check if the combiner is a call to UDR combiner and if it is so return the
600 /// UDR decl used for reduction.
601 static const OMPDeclareReductionDecl *
602 getReductionInit(const Expr *ReductionOp) {
603   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
604     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
605       if (const auto *DRE =
606               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
607         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
608           return DRD;
609   return nullptr;
610 }
611 
612 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
613                                              const OMPDeclareReductionDecl *DRD,
614                                              const Expr *InitOp,
615                                              Address Private, Address Original,
616                                              QualType Ty) {
617   if (DRD->getInitializer()) {
618     std::pair<llvm::Function *, llvm::Function *> Reduction =
619         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
620     const auto *CE = cast<CallExpr>(InitOp);
621     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
622     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
623     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
624     const auto *LHSDRE =
625         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
626     const auto *RHSDRE =
627         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
628     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
629     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
630                             [=]() { return Private; });
631     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
632                             [=]() { return Original; });
633     (void)PrivateScope.Privatize();
634     RValue Func = RValue::get(Reduction.second);
635     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
636     CGF.EmitIgnoredExpr(InitOp);
637   } else {
638     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
639     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
640     auto *GV = new llvm::GlobalVariable(
641         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
642         llvm::GlobalValue::PrivateLinkage, Init, Name);
643     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
644     RValue InitRVal;
645     switch (CGF.getEvaluationKind(Ty)) {
646     case TEK_Scalar:
647       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
648       break;
649     case TEK_Complex:
650       InitRVal =
651           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
652       break;
653     case TEK_Aggregate:
654       InitRVal = RValue::getAggregate(LV.getAddress(CGF));
655       break;
656     }
657     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
658     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
659     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
660                          /*IsInitializer=*/false);
661   }
662 }
663 
664 /// Emit initialization of arrays of complex types.
665 /// \param DestAddr Address of the array.
666 /// \param Type Type of array.
667 /// \param Init Initial expression of array.
668 /// \param SrcAddr Address of the original array.
669 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
670                                  QualType Type, bool EmitDeclareReductionInit,
671                                  const Expr *Init,
672                                  const OMPDeclareReductionDecl *DRD,
673                                  Address SrcAddr = Address::invalid()) {
674   // Perform element-by-element initialization.
675   QualType ElementTy;
676 
677   // Drill down to the base element type on both arrays.
678   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
679   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
680   DestAddr =
681       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
682   if (DRD)
683     SrcAddr =
684         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
685 
686   llvm::Value *SrcBegin = nullptr;
687   if (DRD)
688     SrcBegin = SrcAddr.getPointer();
689   llvm::Value *DestBegin = DestAddr.getPointer();
690   // Cast from pointer to array type to pointer to single element.
691   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
692   // The basic structure here is a while-do loop.
693   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
694   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
695   llvm::Value *IsEmpty =
696       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
697   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
698 
699   // Enter the loop body, making that address the current address.
700   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
701   CGF.EmitBlock(BodyBB);
702 
703   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
704 
705   llvm::PHINode *SrcElementPHI = nullptr;
706   Address SrcElementCurrent = Address::invalid();
707   if (DRD) {
708     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
709                                           "omp.arraycpy.srcElementPast");
710     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
711     SrcElementCurrent =
712         Address(SrcElementPHI,
713                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
714   }
715   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
716       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
717   DestElementPHI->addIncoming(DestBegin, EntryBB);
718   Address DestElementCurrent =
719       Address(DestElementPHI,
720               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
721 
722   // Emit copy.
723   {
724     CodeGenFunction::RunCleanupsScope InitScope(CGF);
725     if (EmitDeclareReductionInit) {
726       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
727                                        SrcElementCurrent, ElementTy);
728     } else
729       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
730                            /*IsInitializer=*/false);
731   }
732 
733   if (DRD) {
734     // Shift the address forward by one element.
735     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
736         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
737     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
738   }
739 
740   // Shift the address forward by one element.
741   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
742       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
743   // Check whether we've reached the end.
744   llvm::Value *Done =
745       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
746   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
747   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
748 
749   // Done.
750   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
751 }
752 
753 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
754   return CGF.EmitOMPSharedLValue(E);
755 }
756 
757 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
758                                             const Expr *E) {
759   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
760     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
761   return LValue();
762 }
763 
764 void ReductionCodeGen::emitAggregateInitialization(
765     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
766     const OMPDeclareReductionDecl *DRD) {
767   // Emit VarDecl with copy init for arrays.
768   // Get the address of the original variable captured in current
769   // captured region.
770   const auto *PrivateVD =
771       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
772   bool EmitDeclareReductionInit =
773       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
774   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
775                        EmitDeclareReductionInit,
776                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
777                                                 : PrivateVD->getInit(),
778                        DRD, SharedLVal.getAddress(CGF));
779 }
780 
781 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
782                                    ArrayRef<const Expr *> Origs,
783                                    ArrayRef<const Expr *> Privates,
784                                    ArrayRef<const Expr *> ReductionOps) {
785   ClausesData.reserve(Shareds.size());
786   SharedAddresses.reserve(Shareds.size());
787   Sizes.reserve(Shareds.size());
788   BaseDecls.reserve(Shareds.size());
789   const auto *IOrig = Origs.begin();
790   const auto *IPriv = Privates.begin();
791   const auto *IRed = ReductionOps.begin();
792   for (const Expr *Ref : Shareds) {
793     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
794     std::advance(IOrig, 1);
795     std::advance(IPriv, 1);
796     std::advance(IRed, 1);
797   }
798 }
799 
800 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
801   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
802          "Number of generated lvalues must be exactly N.");
803   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
804   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
805   SharedAddresses.emplace_back(First, Second);
806   if (ClausesData[N].Shared == ClausesData[N].Ref) {
807     OrigAddresses.emplace_back(First, Second);
808   } else {
809     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
810     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
811     OrigAddresses.emplace_back(First, Second);
812   }
813 }
814 
815 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
816   const auto *PrivateVD =
817       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
818   QualType PrivateType = PrivateVD->getType();
819   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
820   if (!PrivateType->isVariablyModifiedType()) {
821     Sizes.emplace_back(
822         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
823         nullptr);
824     return;
825   }
826   llvm::Value *Size;
827   llvm::Value *SizeInChars;
828   auto *ElemType =
829       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
830           ->getElementType();
831   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
832   if (AsArraySection) {
833     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
834                                      OrigAddresses[N].first.getPointer(CGF));
835     Size = CGF.Builder.CreateNUWAdd(
836         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
837     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
838   } else {
839     SizeInChars =
840         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
841     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
842   }
843   Sizes.emplace_back(SizeInChars, Size);
844   CodeGenFunction::OpaqueValueMapping OpaqueMap(
845       CGF,
846       cast<OpaqueValueExpr>(
847           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
848       RValue::get(Size));
849   CGF.EmitVariablyModifiedType(PrivateType);
850 }
851 
852 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
853                                          llvm::Value *Size) {
854   const auto *PrivateVD =
855       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
856   QualType PrivateType = PrivateVD->getType();
857   if (!PrivateType->isVariablyModifiedType()) {
858     assert(!Size && !Sizes[N].second &&
859            "Size should be nullptr for non-variably modified reduction "
860            "items.");
861     return;
862   }
863   CodeGenFunction::OpaqueValueMapping OpaqueMap(
864       CGF,
865       cast<OpaqueValueExpr>(
866           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
867       RValue::get(Size));
868   CGF.EmitVariablyModifiedType(PrivateType);
869 }
870 
871 void ReductionCodeGen::emitInitialization(
872     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
873     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
874   assert(SharedAddresses.size() > N && "No variable was generated");
875   const auto *PrivateVD =
876       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
877   const OMPDeclareReductionDecl *DRD =
878       getReductionInit(ClausesData[N].ReductionOp);
879   QualType PrivateType = PrivateVD->getType();
880   PrivateAddr = CGF.Builder.CreateElementBitCast(
881       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
882   QualType SharedType = SharedAddresses[N].first.getType();
883   SharedLVal = CGF.MakeAddrLValue(
884       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
885                                        CGF.ConvertTypeForMem(SharedType)),
886       SharedType, SharedAddresses[N].first.getBaseInfo(),
887       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
888   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
889     if (DRD && DRD->getInitializer())
890       (void)DefaultInit(CGF);
891     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
892   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
893     (void)DefaultInit(CGF);
894     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
895                                      PrivateAddr, SharedLVal.getAddress(CGF),
896                                      SharedLVal.getType());
897   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
898              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
899     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
900                          PrivateVD->getType().getQualifiers(),
901                          /*IsInitializer=*/false);
902   }
903 }
904 
905 bool ReductionCodeGen::needCleanups(unsigned N) {
906   const auto *PrivateVD =
907       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
908   QualType PrivateType = PrivateVD->getType();
909   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
910   return DTorKind != QualType::DK_none;
911 }
912 
913 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
914                                     Address PrivateAddr) {
915   const auto *PrivateVD =
916       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
917   QualType PrivateType = PrivateVD->getType();
918   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
919   if (needCleanups(N)) {
920     PrivateAddr = CGF.Builder.CreateElementBitCast(
921         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
922     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
923   }
924 }
925 
926 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
927                           LValue BaseLV) {
928   BaseTy = BaseTy.getNonReferenceType();
929   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
930          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
931     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
932       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
933     } else {
934       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
935       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
936     }
937     BaseTy = BaseTy->getPointeeType();
938   }
939   return CGF.MakeAddrLValue(
940       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
941                                        CGF.ConvertTypeForMem(ElTy)),
942       BaseLV.getType(), BaseLV.getBaseInfo(),
943       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
944 }
945 
946 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
947                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
948                           llvm::Value *Addr) {
949   Address Tmp = Address::invalid();
950   Address TopTmp = Address::invalid();
951   Address MostTopTmp = Address::invalid();
952   BaseTy = BaseTy.getNonReferenceType();
953   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
954          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
955     Tmp = CGF.CreateMemTemp(BaseTy);
956     if (TopTmp.isValid())
957       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
958     else
959       MostTopTmp = Tmp;
960     TopTmp = Tmp;
961     BaseTy = BaseTy->getPointeeType();
962   }
963   llvm::Type *Ty = BaseLVType;
964   if (Tmp.isValid())
965     Ty = Tmp.getElementType();
966   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
967   if (Tmp.isValid()) {
968     CGF.Builder.CreateStore(Addr, Tmp);
969     return MostTopTmp;
970   }
971   return Address(Addr, BaseLVAlignment);
972 }
973 
974 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
975   const VarDecl *OrigVD = nullptr;
976   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
977     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
978     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
979       Base = TempOASE->getBase()->IgnoreParenImpCasts();
980     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
981       Base = TempASE->getBase()->IgnoreParenImpCasts();
982     DE = cast<DeclRefExpr>(Base);
983     OrigVD = cast<VarDecl>(DE->getDecl());
984   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
985     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
986     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
987       Base = TempASE->getBase()->IgnoreParenImpCasts();
988     DE = cast<DeclRefExpr>(Base);
989     OrigVD = cast<VarDecl>(DE->getDecl());
990   }
991   return OrigVD;
992 }
993 
994 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
995                                                Address PrivateAddr) {
996   const DeclRefExpr *DE;
997   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
998     BaseDecls.emplace_back(OrigVD);
999     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1000     LValue BaseLValue =
1001         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1002                     OriginalBaseLValue);
1003     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1004         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1005     llvm::Value *PrivatePointer =
1006         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1007             PrivateAddr.getPointer(),
1008             SharedAddresses[N].first.getAddress(CGF).getType());
1009     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1010     return castToBase(CGF, OrigVD->getType(),
1011                       SharedAddresses[N].first.getType(),
1012                       OriginalBaseLValue.getAddress(CGF).getType(),
1013                       OriginalBaseLValue.getAlignment(), Ptr);
1014   }
1015   BaseDecls.emplace_back(
1016       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1017   return PrivateAddr;
1018 }
1019 
1020 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1021   const OMPDeclareReductionDecl *DRD =
1022       getReductionInit(ClausesData[N].ReductionOp);
1023   return DRD && DRD->getInitializer();
1024 }
1025 
1026 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1027   return CGF.EmitLoadOfPointerLValue(
1028       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1029       getThreadIDVariable()->getType()->castAs<PointerType>());
1030 }
1031 
1032 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1033   if (!CGF.HaveInsertPoint())
1034     return;
1035   // 1.2.2 OpenMP Language Terminology
1036   // Structured block - An executable statement with a single entry at the
1037   // top and a single exit at the bottom.
1038   // The point of exit cannot be a branch out of the structured block.
1039   // longjmp() and throw() must not violate the entry/exit criteria.
1040   CGF.EHStack.pushTerminate();
1041   CodeGen(CGF);
1042   CGF.EHStack.popTerminate();
1043 }
1044 
1045 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1046     CodeGenFunction &CGF) {
1047   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1048                             getThreadIDVariable()->getType(),
1049                             AlignmentSource::Decl);
1050 }
1051 
1052 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1053                                        QualType FieldTy) {
1054   auto *Field = FieldDecl::Create(
1055       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1056       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1057       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1058   Field->setAccess(AS_public);
1059   DC->addDecl(Field);
1060   return Field;
1061 }
1062 
1063 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1064                                  StringRef Separator)
1065     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1066       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1067   ASTContext &C = CGM.getContext();
1068   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1069   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1070   RD->startDefinition();
1071   // reserved_1
1072   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1073   // flags
1074   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1075   // reserved_2
1076   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1077   // reserved_3
1078   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1079   // psource
1080   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1081   RD->completeDefinition();
1082   IdentQTy = C.getRecordType(RD);
1083   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1084   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1085 
1086   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1087   OMPBuilder.initialize();
1088   loadOffloadInfoMetadata();
1089 }
1090 
1091 void CGOpenMPRuntime::clear() {
1092   InternalVars.clear();
1093   // Clean non-target variable declarations possibly used only in debug info.
1094   for (const auto &Data : EmittedNonTargetVariables) {
1095     if (!Data.getValue().pointsToAliveValue())
1096       continue;
1097     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1098     if (!GV)
1099       continue;
1100     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1101       continue;
1102     GV->eraseFromParent();
1103   }
1104 }
1105 
1106 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1107   SmallString<128> Buffer;
1108   llvm::raw_svector_ostream OS(Buffer);
1109   StringRef Sep = FirstSeparator;
1110   for (StringRef Part : Parts) {
1111     OS << Sep << Part;
1112     Sep = Separator;
1113   }
1114   return std::string(OS.str());
1115 }
1116 
1117 static llvm::Function *
1118 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1119                           const Expr *CombinerInitializer, const VarDecl *In,
1120                           const VarDecl *Out, bool IsCombiner) {
1121   // void .omp_combiner.(Ty *in, Ty *out);
1122   ASTContext &C = CGM.getContext();
1123   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1124   FunctionArgList Args;
1125   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1126                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1127   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1128                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1129   Args.push_back(&OmpOutParm);
1130   Args.push_back(&OmpInParm);
1131   const CGFunctionInfo &FnInfo =
1132       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1133   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1134   std::string Name = CGM.getOpenMPRuntime().getName(
1135       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1136   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1137                                     Name, &CGM.getModule());
1138   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1139   if (CGM.getLangOpts().Optimize) {
1140     Fn->removeFnAttr(llvm::Attribute::NoInline);
1141     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1142     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1143   }
1144   CodeGenFunction CGF(CGM);
1145   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1146   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1147   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1148                     Out->getLocation());
1149   CodeGenFunction::OMPPrivateScope Scope(CGF);
1150   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1151   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1152     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1153         .getAddress(CGF);
1154   });
1155   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1156   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1157     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1158         .getAddress(CGF);
1159   });
1160   (void)Scope.Privatize();
1161   if (!IsCombiner && Out->hasInit() &&
1162       !CGF.isTrivialInitializer(Out->getInit())) {
1163     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1164                          Out->getType().getQualifiers(),
1165                          /*IsInitializer=*/true);
1166   }
1167   if (CombinerInitializer)
1168     CGF.EmitIgnoredExpr(CombinerInitializer);
1169   Scope.ForceCleanup();
1170   CGF.FinishFunction();
1171   return Fn;
1172 }
1173 
1174 void CGOpenMPRuntime::emitUserDefinedReduction(
1175     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1176   if (UDRMap.count(D) > 0)
1177     return;
1178   llvm::Function *Combiner = emitCombinerOrInitializer(
1179       CGM, D->getType(), D->getCombiner(),
1180       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1181       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1182       /*IsCombiner=*/true);
1183   llvm::Function *Initializer = nullptr;
1184   if (const Expr *Init = D->getInitializer()) {
1185     Initializer = emitCombinerOrInitializer(
1186         CGM, D->getType(),
1187         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1188                                                                      : nullptr,
1189         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1190         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1191         /*IsCombiner=*/false);
1192   }
1193   UDRMap.try_emplace(D, Combiner, Initializer);
1194   if (CGF) {
1195     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1196     Decls.second.push_back(D);
1197   }
1198 }
1199 
1200 std::pair<llvm::Function *, llvm::Function *>
1201 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1202   auto I = UDRMap.find(D);
1203   if (I != UDRMap.end())
1204     return I->second;
1205   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1206   return UDRMap.lookup(D);
1207 }
1208 
1209 namespace {
1210 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1211 // Builder if one is present.
1212 struct PushAndPopStackRAII {
1213   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1214                       bool HasCancel)
1215       : OMPBuilder(OMPBuilder) {
1216     if (!OMPBuilder)
1217       return;
1218 
1219     // The following callback is the crucial part of clangs cleanup process.
1220     //
1221     // NOTE:
1222     // Once the OpenMPIRBuilder is used to create parallel regions (and
1223     // similar), the cancellation destination (Dest below) is determined via
1224     // IP. That means if we have variables to finalize we split the block at IP,
1225     // use the new block (=BB) as destination to build a JumpDest (via
1226     // getJumpDestInCurrentScope(BB)) which then is fed to
1227     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1228     // to push & pop an FinalizationInfo object.
1229     // The FiniCB will still be needed but at the point where the
1230     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1231     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1232       assert(IP.getBlock()->end() == IP.getPoint() &&
1233              "Clang CG should cause non-terminated block!");
1234       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1235       CGF.Builder.restoreIP(IP);
1236       CodeGenFunction::JumpDest Dest =
1237           CGF.getOMPCancelDestination(OMPD_parallel);
1238       CGF.EmitBranchThroughCleanup(Dest);
1239     };
1240 
1241     // TODO: Remove this once we emit parallel regions through the
1242     //       OpenMPIRBuilder as it can do this setup internally.
1243     llvm::OpenMPIRBuilder::FinalizationInfo FI(
1244         {FiniCB, OMPD_parallel, HasCancel});
1245     OMPBuilder->pushFinalizationCB(std::move(FI));
1246   }
1247   ~PushAndPopStackRAII() {
1248     if (OMPBuilder)
1249       OMPBuilder->popFinalizationCB();
1250   }
1251   llvm::OpenMPIRBuilder *OMPBuilder;
1252 };
1253 } // namespace
1254 
1255 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1256     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1257     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1258     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1259   assert(ThreadIDVar->getType()->isPointerType() &&
1260          "thread id variable must be of type kmp_int32 *");
1261   CodeGenFunction CGF(CGM, true);
1262   bool HasCancel = false;
1263   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1264     HasCancel = OPD->hasCancel();
1265   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1266     HasCancel = OPD->hasCancel();
1267   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1268     HasCancel = OPSD->hasCancel();
1269   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1270     HasCancel = OPFD->hasCancel();
1271   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1272     HasCancel = OPFD->hasCancel();
1273   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1274     HasCancel = OPFD->hasCancel();
1275   else if (const auto *OPFD =
1276                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1277     HasCancel = OPFD->hasCancel();
1278   else if (const auto *OPFD =
1279                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1280     HasCancel = OPFD->hasCancel();
1281 
1282   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1283   //       parallel region to make cancellation barriers work properly.
1284   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1285   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel);
1286   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1287                                     HasCancel, OutlinedHelperName);
1288   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1289   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1290 }
1291 
1292 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1293     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1294     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1295   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1296   return emitParallelOrTeamsOutlinedFunction(
1297       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1298 }
1299 
1300 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1301     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1302     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1303   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1304   return emitParallelOrTeamsOutlinedFunction(
1305       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1306 }
1307 
1308 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1309     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1310     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1311     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1312     bool Tied, unsigned &NumberOfParts) {
1313   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1314                                               PrePostActionTy &) {
1315     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1316     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1317     llvm::Value *TaskArgs[] = {
1318         UpLoc, ThreadID,
1319         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1320                                     TaskTVar->getType()->castAs<PointerType>())
1321             .getPointer(CGF)};
1322     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1323                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1324                         TaskArgs);
1325   };
1326   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1327                                                             UntiedCodeGen);
1328   CodeGen.setAction(Action);
1329   assert(!ThreadIDVar->getType()->isPointerType() &&
1330          "thread id variable must be of type kmp_int32 for tasks");
1331   const OpenMPDirectiveKind Region =
1332       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1333                                                       : OMPD_task;
1334   const CapturedStmt *CS = D.getCapturedStmt(Region);
1335   bool HasCancel = false;
1336   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1337     HasCancel = TD->hasCancel();
1338   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1339     HasCancel = TD->hasCancel();
1340   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1341     HasCancel = TD->hasCancel();
1342   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1343     HasCancel = TD->hasCancel();
1344 
1345   CodeGenFunction CGF(CGM, true);
1346   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1347                                         InnermostKind, HasCancel, Action);
1348   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1349   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1350   if (!Tied)
1351     NumberOfParts = Action.getNumberOfParts();
1352   return Res;
1353 }
1354 
1355 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1356                              const RecordDecl *RD, const CGRecordLayout &RL,
1357                              ArrayRef<llvm::Constant *> Data) {
1358   llvm::StructType *StructTy = RL.getLLVMType();
1359   unsigned PrevIdx = 0;
1360   ConstantInitBuilder CIBuilder(CGM);
1361   auto DI = Data.begin();
1362   for (const FieldDecl *FD : RD->fields()) {
1363     unsigned Idx = RL.getLLVMFieldNo(FD);
1364     // Fill the alignment.
1365     for (unsigned I = PrevIdx; I < Idx; ++I)
1366       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1367     PrevIdx = Idx + 1;
1368     Fields.add(*DI);
1369     ++DI;
1370   }
1371 }
1372 
1373 template <class... As>
1374 static llvm::GlobalVariable *
1375 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1376                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1377                    As &&... Args) {
1378   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1379   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1380   ConstantInitBuilder CIBuilder(CGM);
1381   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1382   buildStructValue(Fields, CGM, RD, RL, Data);
1383   return Fields.finishAndCreateGlobal(
1384       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1385       std::forward<As>(Args)...);
1386 }
1387 
1388 template <typename T>
1389 static void
1390 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1391                                          ArrayRef<llvm::Constant *> Data,
1392                                          T &Parent) {
1393   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1394   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1395   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1396   buildStructValue(Fields, CGM, RD, RL, Data);
1397   Fields.finishAndAddTo(Parent);
1398 }
1399 
1400 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1401   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1402   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1403   FlagsTy FlagsKey(Flags, Reserved2Flags);
1404   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1405   if (!Entry) {
1406     if (!DefaultOpenMPPSource) {
1407       // Initialize default location for psource field of ident_t structure of
1408       // all ident_t objects. Format is ";file;function;line;column;;".
1409       // Taken from
1410       // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1411       DefaultOpenMPPSource =
1412           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1413       DefaultOpenMPPSource =
1414           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1415     }
1416 
1417     llvm::Constant *Data[] = {
1418         llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1419         llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1420         llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1421         llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1422     llvm::GlobalValue *DefaultOpenMPLocation =
1423         createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1424                            llvm::GlobalValue::PrivateLinkage);
1425     DefaultOpenMPLocation->setUnnamedAddr(
1426         llvm::GlobalValue::UnnamedAddr::Global);
1427 
1428     OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1429   }
1430   return Address(Entry, Align);
1431 }
1432 
1433 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1434                                              bool AtCurrentPoint) {
1435   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1436   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1437 
1438   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1439   if (AtCurrentPoint) {
1440     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1441         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1442   } else {
1443     Elem.second.ServiceInsertPt =
1444         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1445     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1446   }
1447 }
1448 
1449 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1450   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1451   if (Elem.second.ServiceInsertPt) {
1452     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1453     Elem.second.ServiceInsertPt = nullptr;
1454     Ptr->eraseFromParent();
1455   }
1456 }
1457 
1458 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1459                                                  SourceLocation Loc,
1460                                                  unsigned Flags) {
1461   Flags |= OMP_IDENT_KMPC;
1462   // If no debug info is generated - return global default location.
1463   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1464       Loc.isInvalid())
1465     return getOrCreateDefaultLocation(Flags).getPointer();
1466 
1467   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1468 
1469   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1470   Address LocValue = Address::invalid();
1471   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1472   if (I != OpenMPLocThreadIDMap.end())
1473     LocValue = Address(I->second.DebugLoc, Align);
1474 
1475   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1476   // GetOpenMPThreadID was called before this routine.
1477   if (!LocValue.isValid()) {
1478     // Generate "ident_t .kmpc_loc.addr;"
1479     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1480     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1481     Elem.second.DebugLoc = AI.getPointer();
1482     LocValue = AI;
1483 
1484     if (!Elem.second.ServiceInsertPt)
1485       setLocThreadIdInsertPt(CGF);
1486     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1487     CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1488     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1489                              CGF.getTypeSize(IdentQTy));
1490   }
1491 
1492   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1493   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1494   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1495   LValue PSource =
1496       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1497 
1498   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1499   if (OMPDebugLoc == nullptr) {
1500     SmallString<128> Buffer2;
1501     llvm::raw_svector_ostream OS2(Buffer2);
1502     // Build debug location
1503     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1504     OS2 << ";" << PLoc.getFilename() << ";";
1505     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1506       OS2 << FD->getQualifiedNameAsString();
1507     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1508     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1509     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1510   }
1511   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1512   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1513 
1514   // Our callers always pass this to a runtime function, so for
1515   // convenience, go ahead and return a naked pointer.
1516   return LocValue.getPointer();
1517 }
1518 
1519 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1520                                           SourceLocation Loc) {
1521   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1522 
1523   llvm::Value *ThreadID = nullptr;
1524   // Check whether we've already cached a load of the thread id in this
1525   // function.
1526   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1527   if (I != OpenMPLocThreadIDMap.end()) {
1528     ThreadID = I->second.ThreadID;
1529     if (ThreadID != nullptr)
1530       return ThreadID;
1531   }
1532   // If exceptions are enabled, do not use parameter to avoid possible crash.
1533   if (auto *OMPRegionInfo =
1534           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1535     if (OMPRegionInfo->getThreadIDVariable()) {
1536       // Check if this an outlined function with thread id passed as argument.
1537       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1538       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1539       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1540           !CGF.getLangOpts().CXXExceptions ||
1541           CGF.Builder.GetInsertBlock() == TopBlock ||
1542           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1543           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1544               TopBlock ||
1545           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1546               CGF.Builder.GetInsertBlock()) {
1547         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1548         // If value loaded in entry block, cache it and use it everywhere in
1549         // function.
1550         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1551           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1552           Elem.second.ThreadID = ThreadID;
1553         }
1554         return ThreadID;
1555       }
1556     }
1557   }
1558 
1559   // This is not an outlined function region - need to call __kmpc_int32
1560   // kmpc_global_thread_num(ident_t *loc).
1561   // Generate thread id value and cache this value for use across the
1562   // function.
1563   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1564   if (!Elem.second.ServiceInsertPt)
1565     setLocThreadIdInsertPt(CGF);
1566   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1567   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1568   llvm::CallInst *Call = CGF.Builder.CreateCall(
1569       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1570                                             OMPRTL___kmpc_global_thread_num),
1571       emitUpdateLocation(CGF, Loc));
1572   Call->setCallingConv(CGF.getRuntimeCC());
1573   Elem.second.ThreadID = Call;
1574   return Call;
1575 }
1576 
1577 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1578   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1579   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1580     clearLocThreadIdInsertPt(CGF);
1581     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1582   }
1583   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1584     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1585       UDRMap.erase(D);
1586     FunctionUDRMap.erase(CGF.CurFn);
1587   }
1588   auto I = FunctionUDMMap.find(CGF.CurFn);
1589   if (I != FunctionUDMMap.end()) {
1590     for(const auto *D : I->second)
1591       UDMMap.erase(D);
1592     FunctionUDMMap.erase(I);
1593   }
1594   LastprivateConditionalToTypes.erase(CGF.CurFn);
1595 }
1596 
1597 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1598   return IdentTy->getPointerTo();
1599 }
1600 
1601 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1602   if (!Kmpc_MicroTy) {
1603     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1604     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1605                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1606     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1607   }
1608   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1609 }
1610 
1611 llvm::FunctionCallee
1612 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1613   assert((IVSize == 32 || IVSize == 64) &&
1614          "IV size is not compatible with the omp runtime");
1615   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1616                                             : "__kmpc_for_static_init_4u")
1617                                 : (IVSigned ? "__kmpc_for_static_init_8"
1618                                             : "__kmpc_for_static_init_8u");
1619   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1620   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1621   llvm::Type *TypeParams[] = {
1622     getIdentTyPointerTy(),                     // loc
1623     CGM.Int32Ty,                               // tid
1624     CGM.Int32Ty,                               // schedtype
1625     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1626     PtrTy,                                     // p_lower
1627     PtrTy,                                     // p_upper
1628     PtrTy,                                     // p_stride
1629     ITy,                                       // incr
1630     ITy                                        // chunk
1631   };
1632   auto *FnTy =
1633       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1634   return CGM.CreateRuntimeFunction(FnTy, Name);
1635 }
1636 
1637 llvm::FunctionCallee
1638 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1639   assert((IVSize == 32 || IVSize == 64) &&
1640          "IV size is not compatible with the omp runtime");
1641   StringRef Name =
1642       IVSize == 32
1643           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1644           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1645   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1646   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1647                                CGM.Int32Ty,           // tid
1648                                CGM.Int32Ty,           // schedtype
1649                                ITy,                   // lower
1650                                ITy,                   // upper
1651                                ITy,                   // stride
1652                                ITy                    // chunk
1653   };
1654   auto *FnTy =
1655       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1656   return CGM.CreateRuntimeFunction(FnTy, Name);
1657 }
1658 
1659 llvm::FunctionCallee
1660 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1661   assert((IVSize == 32 || IVSize == 64) &&
1662          "IV size is not compatible with the omp runtime");
1663   StringRef Name =
1664       IVSize == 32
1665           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1666           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1667   llvm::Type *TypeParams[] = {
1668       getIdentTyPointerTy(), // loc
1669       CGM.Int32Ty,           // tid
1670   };
1671   auto *FnTy =
1672       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1673   return CGM.CreateRuntimeFunction(FnTy, Name);
1674 }
1675 
1676 llvm::FunctionCallee
1677 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1678   assert((IVSize == 32 || IVSize == 64) &&
1679          "IV size is not compatible with the omp runtime");
1680   StringRef Name =
1681       IVSize == 32
1682           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1683           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1684   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1685   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1686   llvm::Type *TypeParams[] = {
1687     getIdentTyPointerTy(),                     // loc
1688     CGM.Int32Ty,                               // tid
1689     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1690     PtrTy,                                     // p_lower
1691     PtrTy,                                     // p_upper
1692     PtrTy                                      // p_stride
1693   };
1694   auto *FnTy =
1695       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1696   return CGM.CreateRuntimeFunction(FnTy, Name);
1697 }
1698 
1699 /// Obtain information that uniquely identifies a target entry. This
1700 /// consists of the file and device IDs as well as line number associated with
1701 /// the relevant entry source location.
1702 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1703                                      unsigned &DeviceID, unsigned &FileID,
1704                                      unsigned &LineNum) {
1705   SourceManager &SM = C.getSourceManager();
1706 
1707   // The loc should be always valid and have a file ID (the user cannot use
1708   // #pragma directives in macros)
1709 
1710   assert(Loc.isValid() && "Source location is expected to be always valid.");
1711 
1712   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1713   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1714 
1715   llvm::sys::fs::UniqueID ID;
1716   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1717     SM.getDiagnostics().Report(diag::err_cannot_open_file)
1718         << PLoc.getFilename() << EC.message();
1719 
1720   DeviceID = ID.getDevice();
1721   FileID = ID.getFile();
1722   LineNum = PLoc.getLine();
1723 }
1724 
1725 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1726   if (CGM.getLangOpts().OpenMPSimd)
1727     return Address::invalid();
1728   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1729       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1730   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1731               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1732                HasRequiresUnifiedSharedMemory))) {
1733     SmallString<64> PtrName;
1734     {
1735       llvm::raw_svector_ostream OS(PtrName);
1736       OS << CGM.getMangledName(GlobalDecl(VD));
1737       if (!VD->isExternallyVisible()) {
1738         unsigned DeviceID, FileID, Line;
1739         getTargetEntryUniqueInfo(CGM.getContext(),
1740                                  VD->getCanonicalDecl()->getBeginLoc(),
1741                                  DeviceID, FileID, Line);
1742         OS << llvm::format("_%x", FileID);
1743       }
1744       OS << "_decl_tgt_ref_ptr";
1745     }
1746     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1747     if (!Ptr) {
1748       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1749       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1750                                         PtrName);
1751 
1752       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1753       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1754 
1755       if (!CGM.getLangOpts().OpenMPIsDevice)
1756         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1757       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1758     }
1759     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1760   }
1761   return Address::invalid();
1762 }
1763 
1764 llvm::Constant *
1765 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1766   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1767          !CGM.getContext().getTargetInfo().isTLSSupported());
1768   // Lookup the entry, lazily creating it if necessary.
1769   std::string Suffix = getName({"cache", ""});
1770   return getOrCreateInternalVariable(
1771       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1772 }
1773 
1774 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1775                                                 const VarDecl *VD,
1776                                                 Address VDAddr,
1777                                                 SourceLocation Loc) {
1778   if (CGM.getLangOpts().OpenMPUseTLS &&
1779       CGM.getContext().getTargetInfo().isTLSSupported())
1780     return VDAddr;
1781 
1782   llvm::Type *VarTy = VDAddr.getElementType();
1783   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1784                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1785                                                        CGM.Int8PtrTy),
1786                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1787                          getOrCreateThreadPrivateCache(VD)};
1788   return Address(CGF.EmitRuntimeCall(
1789                      OMPBuilder.getOrCreateRuntimeFunction(
1790                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1791                      Args),
1792                  VDAddr.getAlignment());
1793 }
1794 
1795 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1796     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1797     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1798   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1799   // library.
1800   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1801   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1802                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1803                       OMPLoc);
1804   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1805   // to register constructor/destructor for variable.
1806   llvm::Value *Args[] = {
1807       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1808       Ctor, CopyCtor, Dtor};
1809   CGF.EmitRuntimeCall(
1810       OMPBuilder.getOrCreateRuntimeFunction(
1811           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1812       Args);
1813 }
1814 
1815 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1816     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1817     bool PerformInit, CodeGenFunction *CGF) {
1818   if (CGM.getLangOpts().OpenMPUseTLS &&
1819       CGM.getContext().getTargetInfo().isTLSSupported())
1820     return nullptr;
1821 
1822   VD = VD->getDefinition(CGM.getContext());
1823   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1824     QualType ASTTy = VD->getType();
1825 
1826     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1827     const Expr *Init = VD->getAnyInitializer();
1828     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1829       // Generate function that re-emits the declaration's initializer into the
1830       // threadprivate copy of the variable VD
1831       CodeGenFunction CtorCGF(CGM);
1832       FunctionArgList Args;
1833       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1834                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1835                             ImplicitParamDecl::Other);
1836       Args.push_back(&Dst);
1837 
1838       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1839           CGM.getContext().VoidPtrTy, Args);
1840       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1841       std::string Name = getName({"__kmpc_global_ctor_", ""});
1842       llvm::Function *Fn =
1843           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1844       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1845                             Args, Loc, Loc);
1846       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1847           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1848           CGM.getContext().VoidPtrTy, Dst.getLocation());
1849       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1850       Arg = CtorCGF.Builder.CreateElementBitCast(
1851           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1852       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1853                                /*IsInitializer=*/true);
1854       ArgVal = CtorCGF.EmitLoadOfScalar(
1855           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1856           CGM.getContext().VoidPtrTy, Dst.getLocation());
1857       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1858       CtorCGF.FinishFunction();
1859       Ctor = Fn;
1860     }
1861     if (VD->getType().isDestructedType() != QualType::DK_none) {
1862       // Generate function that emits destructor call for the threadprivate copy
1863       // of the variable VD
1864       CodeGenFunction DtorCGF(CGM);
1865       FunctionArgList Args;
1866       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1867                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1868                             ImplicitParamDecl::Other);
1869       Args.push_back(&Dst);
1870 
1871       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1872           CGM.getContext().VoidTy, Args);
1873       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1874       std::string Name = getName({"__kmpc_global_dtor_", ""});
1875       llvm::Function *Fn =
1876           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1877       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1878       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1879                             Loc, Loc);
1880       // Create a scope with an artificial location for the body of this function.
1881       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1882       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1883           DtorCGF.GetAddrOfLocalVar(&Dst),
1884           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1885       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1886                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1887                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1888       DtorCGF.FinishFunction();
1889       Dtor = Fn;
1890     }
1891     // Do not emit init function if it is not required.
1892     if (!Ctor && !Dtor)
1893       return nullptr;
1894 
1895     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1896     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1897                                                /*isVarArg=*/false)
1898                            ->getPointerTo();
1899     // Copying constructor for the threadprivate variable.
1900     // Must be NULL - reserved by runtime, but currently it requires that this
1901     // parameter is always NULL. Otherwise it fires assertion.
1902     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1903     if (Ctor == nullptr) {
1904       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1905                                              /*isVarArg=*/false)
1906                          ->getPointerTo();
1907       Ctor = llvm::Constant::getNullValue(CtorTy);
1908     }
1909     if (Dtor == nullptr) {
1910       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1911                                              /*isVarArg=*/false)
1912                          ->getPointerTo();
1913       Dtor = llvm::Constant::getNullValue(DtorTy);
1914     }
1915     if (!CGF) {
1916       auto *InitFunctionTy =
1917           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1918       std::string Name = getName({"__omp_threadprivate_init_", ""});
1919       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1920           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1921       CodeGenFunction InitCGF(CGM);
1922       FunctionArgList ArgList;
1923       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1924                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1925                             Loc, Loc);
1926       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1927       InitCGF.FinishFunction();
1928       return InitFunction;
1929     }
1930     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1931   }
1932   return nullptr;
1933 }
1934 
1935 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1936                                                      llvm::GlobalVariable *Addr,
1937                                                      bool PerformInit) {
1938   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1939       !CGM.getLangOpts().OpenMPIsDevice)
1940     return false;
1941   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1942       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1943   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1944       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1945        HasRequiresUnifiedSharedMemory))
1946     return CGM.getLangOpts().OpenMPIsDevice;
1947   VD = VD->getDefinition(CGM.getContext());
1948   assert(VD && "Unknown VarDecl");
1949 
1950   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1951     return CGM.getLangOpts().OpenMPIsDevice;
1952 
1953   QualType ASTTy = VD->getType();
1954   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1955 
1956   // Produce the unique prefix to identify the new target regions. We use
1957   // the source location of the variable declaration which we know to not
1958   // conflict with any target region.
1959   unsigned DeviceID;
1960   unsigned FileID;
1961   unsigned Line;
1962   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1963   SmallString<128> Buffer, Out;
1964   {
1965     llvm::raw_svector_ostream OS(Buffer);
1966     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1967        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1968   }
1969 
1970   const Expr *Init = VD->getAnyInitializer();
1971   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1972     llvm::Constant *Ctor;
1973     llvm::Constant *ID;
1974     if (CGM.getLangOpts().OpenMPIsDevice) {
1975       // Generate function that re-emits the declaration's initializer into
1976       // the threadprivate copy of the variable VD
1977       CodeGenFunction CtorCGF(CGM);
1978 
1979       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1980       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1981       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1982           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1983       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1984       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1985                             FunctionArgList(), Loc, Loc);
1986       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1987       CtorCGF.EmitAnyExprToMem(Init,
1988                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1989                                Init->getType().getQualifiers(),
1990                                /*IsInitializer=*/true);
1991       CtorCGF.FinishFunction();
1992       Ctor = Fn;
1993       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1994       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1995     } else {
1996       Ctor = new llvm::GlobalVariable(
1997           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1998           llvm::GlobalValue::PrivateLinkage,
1999           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2000       ID = Ctor;
2001     }
2002 
2003     // Register the information for the entry associated with the constructor.
2004     Out.clear();
2005     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2006         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2007         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2008   }
2009   if (VD->getType().isDestructedType() != QualType::DK_none) {
2010     llvm::Constant *Dtor;
2011     llvm::Constant *ID;
2012     if (CGM.getLangOpts().OpenMPIsDevice) {
2013       // Generate function that emits destructor call for the threadprivate
2014       // copy of the variable VD
2015       CodeGenFunction DtorCGF(CGM);
2016 
2017       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2018       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2019       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
2020           FTy, Twine(Buffer, "_dtor"), FI, Loc);
2021       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2022       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2023                             FunctionArgList(), Loc, Loc);
2024       // Create a scope with an artificial location for the body of this
2025       // function.
2026       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2027       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2028                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2029                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2030       DtorCGF.FinishFunction();
2031       Dtor = Fn;
2032       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2033       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2034     } else {
2035       Dtor = new llvm::GlobalVariable(
2036           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2037           llvm::GlobalValue::PrivateLinkage,
2038           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2039       ID = Dtor;
2040     }
2041     // Register the information for the entry associated with the destructor.
2042     Out.clear();
2043     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2044         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2045         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2046   }
2047   return CGM.getLangOpts().OpenMPIsDevice;
2048 }
2049 
2050 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2051                                                           QualType VarType,
2052                                                           StringRef Name) {
2053   std::string Suffix = getName({"artificial", ""});
2054   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2055   llvm::Value *GAddr =
2056       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2057   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2058       CGM.getTarget().isTLSSupported()) {
2059     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
2060     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
2061   }
2062   std::string CacheSuffix = getName({"cache", ""});
2063   llvm::Value *Args[] = {
2064       emitUpdateLocation(CGF, SourceLocation()),
2065       getThreadID(CGF, SourceLocation()),
2066       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2067       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2068                                 /*isSigned=*/false),
2069       getOrCreateInternalVariable(
2070           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2071   return Address(
2072       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2073           CGF.EmitRuntimeCall(
2074               OMPBuilder.getOrCreateRuntimeFunction(
2075                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2076               Args),
2077           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2078       CGM.getContext().getTypeAlignInChars(VarType));
2079 }
2080 
2081 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2082                                    const RegionCodeGenTy &ThenGen,
2083                                    const RegionCodeGenTy &ElseGen) {
2084   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2085 
2086   // If the condition constant folds and can be elided, try to avoid emitting
2087   // the condition and the dead arm of the if/else.
2088   bool CondConstant;
2089   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2090     if (CondConstant)
2091       ThenGen(CGF);
2092     else
2093       ElseGen(CGF);
2094     return;
2095   }
2096 
2097   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2098   // emit the conditional branch.
2099   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2100   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2101   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2102   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2103 
2104   // Emit the 'then' code.
2105   CGF.EmitBlock(ThenBlock);
2106   ThenGen(CGF);
2107   CGF.EmitBranch(ContBlock);
2108   // Emit the 'else' code if present.
2109   // There is no need to emit line number for unconditional branch.
2110   (void)ApplyDebugLocation::CreateEmpty(CGF);
2111   CGF.EmitBlock(ElseBlock);
2112   ElseGen(CGF);
2113   // There is no need to emit line number for unconditional branch.
2114   (void)ApplyDebugLocation::CreateEmpty(CGF);
2115   CGF.EmitBranch(ContBlock);
2116   // Emit the continuation block for code after the if.
2117   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2118 }
2119 
2120 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2121                                        llvm::Function *OutlinedFn,
2122                                        ArrayRef<llvm::Value *> CapturedVars,
2123                                        const Expr *IfCond) {
2124   if (!CGF.HaveInsertPoint())
2125     return;
2126   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2127   auto &M = CGM.getModule();
2128   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2129                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2130     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2131     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2132     llvm::Value *Args[] = {
2133         RTLoc,
2134         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2135         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2136     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2137     RealArgs.append(std::begin(Args), std::end(Args));
2138     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2139 
2140     llvm::FunctionCallee RTLFn =
2141         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2142     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2143   };
2144   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2145                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2146     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2147     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2148     // Build calls:
2149     // __kmpc_serialized_parallel(&Loc, GTid);
2150     llvm::Value *Args[] = {RTLoc, ThreadID};
2151     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2152                             M, OMPRTL___kmpc_serialized_parallel),
2153                         Args);
2154 
2155     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2156     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2157     Address ZeroAddrBound =
2158         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2159                                          /*Name=*/".bound.zero.addr");
2160     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2161     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2162     // ThreadId for serialized parallels is 0.
2163     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2164     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2165     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2166     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2167 
2168     // __kmpc_end_serialized_parallel(&Loc, GTid);
2169     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2170     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2171                             M, OMPRTL___kmpc_end_serialized_parallel),
2172                         EndArgs);
2173   };
2174   if (IfCond) {
2175     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2176   } else {
2177     RegionCodeGenTy ThenRCG(ThenGen);
2178     ThenRCG(CGF);
2179   }
2180 }
2181 
2182 // If we're inside an (outlined) parallel region, use the region info's
2183 // thread-ID variable (it is passed in a first argument of the outlined function
2184 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2185 // regular serial code region, get thread ID by calling kmp_int32
2186 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2187 // return the address of that temp.
2188 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2189                                              SourceLocation Loc) {
2190   if (auto *OMPRegionInfo =
2191           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2192     if (OMPRegionInfo->getThreadIDVariable())
2193       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2194 
2195   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2196   QualType Int32Ty =
2197       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2198   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2199   CGF.EmitStoreOfScalar(ThreadID,
2200                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2201 
2202   return ThreadIDTemp;
2203 }
2204 
2205 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2206     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2207   SmallString<256> Buffer;
2208   llvm::raw_svector_ostream Out(Buffer);
2209   Out << Name;
2210   StringRef RuntimeName = Out.str();
2211   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2212   if (Elem.second) {
2213     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2214            "OMP internal variable has different type than requested");
2215     return &*Elem.second;
2216   }
2217 
2218   return Elem.second = new llvm::GlobalVariable(
2219              CGM.getModule(), Ty, /*IsConstant*/ false,
2220              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2221              Elem.first(), /*InsertBefore=*/nullptr,
2222              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2223 }
2224 
2225 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2226   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2227   std::string Name = getName({Prefix, "var"});
2228   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2229 }
2230 
2231 namespace {
2232 /// Common pre(post)-action for different OpenMP constructs.
2233 class CommonActionTy final : public PrePostActionTy {
2234   llvm::FunctionCallee EnterCallee;
2235   ArrayRef<llvm::Value *> EnterArgs;
2236   llvm::FunctionCallee ExitCallee;
2237   ArrayRef<llvm::Value *> ExitArgs;
2238   bool Conditional;
2239   llvm::BasicBlock *ContBlock = nullptr;
2240 
2241 public:
2242   CommonActionTy(llvm::FunctionCallee EnterCallee,
2243                  ArrayRef<llvm::Value *> EnterArgs,
2244                  llvm::FunctionCallee ExitCallee,
2245                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2246       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2247         ExitArgs(ExitArgs), Conditional(Conditional) {}
2248   void Enter(CodeGenFunction &CGF) override {
2249     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2250     if (Conditional) {
2251       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2252       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2253       ContBlock = CGF.createBasicBlock("omp_if.end");
2254       // Generate the branch (If-stmt)
2255       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2256       CGF.EmitBlock(ThenBlock);
2257     }
2258   }
2259   void Done(CodeGenFunction &CGF) {
2260     // Emit the rest of blocks/branches
2261     CGF.EmitBranch(ContBlock);
2262     CGF.EmitBlock(ContBlock, true);
2263   }
2264   void Exit(CodeGenFunction &CGF) override {
2265     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2266   }
2267 };
2268 } // anonymous namespace
2269 
2270 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2271                                          StringRef CriticalName,
2272                                          const RegionCodeGenTy &CriticalOpGen,
2273                                          SourceLocation Loc, const Expr *Hint) {
2274   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2275   // CriticalOpGen();
2276   // __kmpc_end_critical(ident_t *, gtid, Lock);
2277   // Prepare arguments and build a call to __kmpc_critical
2278   if (!CGF.HaveInsertPoint())
2279     return;
2280   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2281                          getCriticalRegionLock(CriticalName)};
2282   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2283                                                 std::end(Args));
2284   if (Hint) {
2285     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2286         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2287   }
2288   CommonActionTy Action(
2289       OMPBuilder.getOrCreateRuntimeFunction(
2290           CGM.getModule(),
2291           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2292       EnterArgs,
2293       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2294                                             OMPRTL___kmpc_end_critical),
2295       Args);
2296   CriticalOpGen.setAction(Action);
2297   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2298 }
2299 
2300 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2301                                        const RegionCodeGenTy &MasterOpGen,
2302                                        SourceLocation Loc) {
2303   if (!CGF.HaveInsertPoint())
2304     return;
2305   // if(__kmpc_master(ident_t *, gtid)) {
2306   //   MasterOpGen();
2307   //   __kmpc_end_master(ident_t *, gtid);
2308   // }
2309   // Prepare arguments and build a call to __kmpc_master
2310   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2311   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2312                             CGM.getModule(), OMPRTL___kmpc_master),
2313                         Args,
2314                         OMPBuilder.getOrCreateRuntimeFunction(
2315                             CGM.getModule(), OMPRTL___kmpc_end_master),
2316                         Args,
2317                         /*Conditional=*/true);
2318   MasterOpGen.setAction(Action);
2319   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2320   Action.Done(CGF);
2321 }
2322 
2323 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2324                                         SourceLocation Loc) {
2325   if (!CGF.HaveInsertPoint())
2326     return;
2327   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2328     OMPBuilder.CreateTaskyield(CGF.Builder);
2329   } else {
2330     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2331     llvm::Value *Args[] = {
2332         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2333         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2334     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2335                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2336                         Args);
2337   }
2338 
2339   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2340     Region->emitUntiedSwitch(CGF);
2341 }
2342 
2343 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2344                                           const RegionCodeGenTy &TaskgroupOpGen,
2345                                           SourceLocation Loc) {
2346   if (!CGF.HaveInsertPoint())
2347     return;
2348   // __kmpc_taskgroup(ident_t *, gtid);
2349   // TaskgroupOpGen();
2350   // __kmpc_end_taskgroup(ident_t *, gtid);
2351   // Prepare arguments and build a call to __kmpc_taskgroup
2352   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2353   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2354                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2355                         Args,
2356                         OMPBuilder.getOrCreateRuntimeFunction(
2357                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2358                         Args);
2359   TaskgroupOpGen.setAction(Action);
2360   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2361 }
2362 
2363 /// Given an array of pointers to variables, project the address of a
2364 /// given variable.
2365 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2366                                       unsigned Index, const VarDecl *Var) {
2367   // Pull out the pointer to the variable.
2368   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2369   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2370 
2371   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2372   Addr = CGF.Builder.CreateElementBitCast(
2373       Addr, CGF.ConvertTypeForMem(Var->getType()));
2374   return Addr;
2375 }
2376 
2377 static llvm::Value *emitCopyprivateCopyFunction(
2378     CodeGenModule &CGM, llvm::Type *ArgsType,
2379     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2380     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2381     SourceLocation Loc) {
2382   ASTContext &C = CGM.getContext();
2383   // void copy_func(void *LHSArg, void *RHSArg);
2384   FunctionArgList Args;
2385   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2386                            ImplicitParamDecl::Other);
2387   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2388                            ImplicitParamDecl::Other);
2389   Args.push_back(&LHSArg);
2390   Args.push_back(&RHSArg);
2391   const auto &CGFI =
2392       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2393   std::string Name =
2394       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2395   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2396                                     llvm::GlobalValue::InternalLinkage, Name,
2397                                     &CGM.getModule());
2398   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2399   Fn->setDoesNotRecurse();
2400   CodeGenFunction CGF(CGM);
2401   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2402   // Dest = (void*[n])(LHSArg);
2403   // Src = (void*[n])(RHSArg);
2404   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2405       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2406       ArgsType), CGF.getPointerAlign());
2407   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2408       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2409       ArgsType), CGF.getPointerAlign());
2410   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2411   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2412   // ...
2413   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2414   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2415     const auto *DestVar =
2416         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2417     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2418 
2419     const auto *SrcVar =
2420         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2421     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2422 
2423     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2424     QualType Type = VD->getType();
2425     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2426   }
2427   CGF.FinishFunction();
2428   return Fn;
2429 }
2430 
2431 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2432                                        const RegionCodeGenTy &SingleOpGen,
2433                                        SourceLocation Loc,
2434                                        ArrayRef<const Expr *> CopyprivateVars,
2435                                        ArrayRef<const Expr *> SrcExprs,
2436                                        ArrayRef<const Expr *> DstExprs,
2437                                        ArrayRef<const Expr *> AssignmentOps) {
2438   if (!CGF.HaveInsertPoint())
2439     return;
2440   assert(CopyprivateVars.size() == SrcExprs.size() &&
2441          CopyprivateVars.size() == DstExprs.size() &&
2442          CopyprivateVars.size() == AssignmentOps.size());
2443   ASTContext &C = CGM.getContext();
2444   // int32 did_it = 0;
2445   // if(__kmpc_single(ident_t *, gtid)) {
2446   //   SingleOpGen();
2447   //   __kmpc_end_single(ident_t *, gtid);
2448   //   did_it = 1;
2449   // }
2450   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2451   // <copy_func>, did_it);
2452 
2453   Address DidIt = Address::invalid();
2454   if (!CopyprivateVars.empty()) {
2455     // int32 did_it = 0;
2456     QualType KmpInt32Ty =
2457         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2458     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2459     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2460   }
2461   // Prepare arguments and build a call to __kmpc_single
2462   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2463   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2464                             CGM.getModule(), OMPRTL___kmpc_single),
2465                         Args,
2466                         OMPBuilder.getOrCreateRuntimeFunction(
2467                             CGM.getModule(), OMPRTL___kmpc_end_single),
2468                         Args,
2469                         /*Conditional=*/true);
2470   SingleOpGen.setAction(Action);
2471   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2472   if (DidIt.isValid()) {
2473     // did_it = 1;
2474     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2475   }
2476   Action.Done(CGF);
2477   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2478   // <copy_func>, did_it);
2479   if (DidIt.isValid()) {
2480     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2481     QualType CopyprivateArrayTy = C.getConstantArrayType(
2482         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2483         /*IndexTypeQuals=*/0);
2484     // Create a list of all private variables for copyprivate.
2485     Address CopyprivateList =
2486         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2487     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2488       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2489       CGF.Builder.CreateStore(
2490           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2491               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2492               CGF.VoidPtrTy),
2493           Elem);
2494     }
2495     // Build function that copies private values from single region to all other
2496     // threads in the corresponding parallel region.
2497     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2498         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2499         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2500     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2501     Address CL =
2502       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2503                                                       CGF.VoidPtrTy);
2504     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2505     llvm::Value *Args[] = {
2506         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2507         getThreadID(CGF, Loc),        // i32 <gtid>
2508         BufSize,                      // size_t <buf_size>
2509         CL.getPointer(),              // void *<copyprivate list>
2510         CpyFn,                        // void (*) (void *, void *) <copy_func>
2511         DidItVal                      // i32 did_it
2512     };
2513     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2514                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2515                         Args);
2516   }
2517 }
2518 
2519 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2520                                         const RegionCodeGenTy &OrderedOpGen,
2521                                         SourceLocation Loc, bool IsThreads) {
2522   if (!CGF.HaveInsertPoint())
2523     return;
2524   // __kmpc_ordered(ident_t *, gtid);
2525   // OrderedOpGen();
2526   // __kmpc_end_ordered(ident_t *, gtid);
2527   // Prepare arguments and build a call to __kmpc_ordered
2528   if (IsThreads) {
2529     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2530     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2531                               CGM.getModule(), OMPRTL___kmpc_ordered),
2532                           Args,
2533                           OMPBuilder.getOrCreateRuntimeFunction(
2534                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2535                           Args);
2536     OrderedOpGen.setAction(Action);
2537     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2538     return;
2539   }
2540   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2541 }
2542 
2543 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2544   unsigned Flags;
2545   if (Kind == OMPD_for)
2546     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2547   else if (Kind == OMPD_sections)
2548     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2549   else if (Kind == OMPD_single)
2550     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2551   else if (Kind == OMPD_barrier)
2552     Flags = OMP_IDENT_BARRIER_EXPL;
2553   else
2554     Flags = OMP_IDENT_BARRIER_IMPL;
2555   return Flags;
2556 }
2557 
2558 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2559     CodeGenFunction &CGF, const OMPLoopDirective &S,
2560     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2561   // Check if the loop directive is actually a doacross loop directive. In this
2562   // case choose static, 1 schedule.
2563   if (llvm::any_of(
2564           S.getClausesOfKind<OMPOrderedClause>(),
2565           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2566     ScheduleKind = OMPC_SCHEDULE_static;
2567     // Chunk size is 1 in this case.
2568     llvm::APInt ChunkSize(32, 1);
2569     ChunkExpr = IntegerLiteral::Create(
2570         CGF.getContext(), ChunkSize,
2571         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2572         SourceLocation());
2573   }
2574 }
2575 
2576 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2577                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2578                                       bool ForceSimpleCall) {
2579   // Check if we should use the OMPBuilder
2580   auto *OMPRegionInfo =
2581       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2582   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2583     CGF.Builder.restoreIP(OMPBuilder.CreateBarrier(
2584         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2585     return;
2586   }
2587 
2588   if (!CGF.HaveInsertPoint())
2589     return;
2590   // Build call __kmpc_cancel_barrier(loc, thread_id);
2591   // Build call __kmpc_barrier(loc, thread_id);
2592   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2593   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2594   // thread_id);
2595   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2596                          getThreadID(CGF, Loc)};
2597   if (OMPRegionInfo) {
2598     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2599       llvm::Value *Result = CGF.EmitRuntimeCall(
2600           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2601                                                 OMPRTL___kmpc_cancel_barrier),
2602           Args);
2603       if (EmitChecks) {
2604         // if (__kmpc_cancel_barrier()) {
2605         //   exit from construct;
2606         // }
2607         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2608         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2609         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2610         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2611         CGF.EmitBlock(ExitBB);
2612         //   exit from construct;
2613         CodeGenFunction::JumpDest CancelDestination =
2614             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2615         CGF.EmitBranchThroughCleanup(CancelDestination);
2616         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2617       }
2618       return;
2619     }
2620   }
2621   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2622                           CGM.getModule(), OMPRTL___kmpc_barrier),
2623                       Args);
2624 }
2625 
2626 /// Map the OpenMP loop schedule to the runtime enumeration.
2627 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2628                                           bool Chunked, bool Ordered) {
2629   switch (ScheduleKind) {
2630   case OMPC_SCHEDULE_static:
2631     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2632                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2633   case OMPC_SCHEDULE_dynamic:
2634     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2635   case OMPC_SCHEDULE_guided:
2636     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2637   case OMPC_SCHEDULE_runtime:
2638     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2639   case OMPC_SCHEDULE_auto:
2640     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2641   case OMPC_SCHEDULE_unknown:
2642     assert(!Chunked && "chunk was specified but schedule kind not known");
2643     return Ordered ? OMP_ord_static : OMP_sch_static;
2644   }
2645   llvm_unreachable("Unexpected runtime schedule");
2646 }
2647 
2648 /// Map the OpenMP distribute schedule to the runtime enumeration.
2649 static OpenMPSchedType
2650 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2651   // only static is allowed for dist_schedule
2652   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2653 }
2654 
2655 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2656                                          bool Chunked) const {
2657   OpenMPSchedType Schedule =
2658       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2659   return Schedule == OMP_sch_static;
2660 }
2661 
2662 bool CGOpenMPRuntime::isStaticNonchunked(
2663     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2664   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2665   return Schedule == OMP_dist_sch_static;
2666 }
2667 
2668 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2669                                       bool Chunked) const {
2670   OpenMPSchedType Schedule =
2671       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2672   return Schedule == OMP_sch_static_chunked;
2673 }
2674 
2675 bool CGOpenMPRuntime::isStaticChunked(
2676     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2677   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2678   return Schedule == OMP_dist_sch_static_chunked;
2679 }
2680 
2681 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2682   OpenMPSchedType Schedule =
2683       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2684   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2685   return Schedule != OMP_sch_static;
2686 }
2687 
2688 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2689                                   OpenMPScheduleClauseModifier M1,
2690                                   OpenMPScheduleClauseModifier M2) {
2691   int Modifier = 0;
2692   switch (M1) {
2693   case OMPC_SCHEDULE_MODIFIER_monotonic:
2694     Modifier = OMP_sch_modifier_monotonic;
2695     break;
2696   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2697     Modifier = OMP_sch_modifier_nonmonotonic;
2698     break;
2699   case OMPC_SCHEDULE_MODIFIER_simd:
2700     if (Schedule == OMP_sch_static_chunked)
2701       Schedule = OMP_sch_static_balanced_chunked;
2702     break;
2703   case OMPC_SCHEDULE_MODIFIER_last:
2704   case OMPC_SCHEDULE_MODIFIER_unknown:
2705     break;
2706   }
2707   switch (M2) {
2708   case OMPC_SCHEDULE_MODIFIER_monotonic:
2709     Modifier = OMP_sch_modifier_monotonic;
2710     break;
2711   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2712     Modifier = OMP_sch_modifier_nonmonotonic;
2713     break;
2714   case OMPC_SCHEDULE_MODIFIER_simd:
2715     if (Schedule == OMP_sch_static_chunked)
2716       Schedule = OMP_sch_static_balanced_chunked;
2717     break;
2718   case OMPC_SCHEDULE_MODIFIER_last:
2719   case OMPC_SCHEDULE_MODIFIER_unknown:
2720     break;
2721   }
2722   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2723   // If the static schedule kind is specified or if the ordered clause is
2724   // specified, and if the nonmonotonic modifier is not specified, the effect is
2725   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2726   // modifier is specified, the effect is as if the nonmonotonic modifier is
2727   // specified.
2728   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2729     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2730           Schedule == OMP_sch_static_balanced_chunked ||
2731           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2732           Schedule == OMP_dist_sch_static_chunked ||
2733           Schedule == OMP_dist_sch_static))
2734       Modifier = OMP_sch_modifier_nonmonotonic;
2735   }
2736   return Schedule | Modifier;
2737 }
2738 
2739 void CGOpenMPRuntime::emitForDispatchInit(
2740     CodeGenFunction &CGF, SourceLocation Loc,
2741     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2742     bool Ordered, const DispatchRTInput &DispatchValues) {
2743   if (!CGF.HaveInsertPoint())
2744     return;
2745   OpenMPSchedType Schedule = getRuntimeSchedule(
2746       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2747   assert(Ordered ||
2748          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2749           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2750           Schedule != OMP_sch_static_balanced_chunked));
2751   // Call __kmpc_dispatch_init(
2752   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2753   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2754   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2755 
2756   // If the Chunk was not specified in the clause - use default value 1.
2757   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2758                                             : CGF.Builder.getIntN(IVSize, 1);
2759   llvm::Value *Args[] = {
2760       emitUpdateLocation(CGF, Loc),
2761       getThreadID(CGF, Loc),
2762       CGF.Builder.getInt32(addMonoNonMonoModifier(
2763           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2764       DispatchValues.LB,                                     // Lower
2765       DispatchValues.UB,                                     // Upper
2766       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2767       Chunk                                                  // Chunk
2768   };
2769   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2770 }
2771 
2772 static void emitForStaticInitCall(
2773     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2774     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2775     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2776     const CGOpenMPRuntime::StaticRTInput &Values) {
2777   if (!CGF.HaveInsertPoint())
2778     return;
2779 
2780   assert(!Values.Ordered);
2781   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2782          Schedule == OMP_sch_static_balanced_chunked ||
2783          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2784          Schedule == OMP_dist_sch_static ||
2785          Schedule == OMP_dist_sch_static_chunked);
2786 
2787   // Call __kmpc_for_static_init(
2788   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2789   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2790   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2791   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2792   llvm::Value *Chunk = Values.Chunk;
2793   if (Chunk == nullptr) {
2794     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2795             Schedule == OMP_dist_sch_static) &&
2796            "expected static non-chunked schedule");
2797     // If the Chunk was not specified in the clause - use default value 1.
2798     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2799   } else {
2800     assert((Schedule == OMP_sch_static_chunked ||
2801             Schedule == OMP_sch_static_balanced_chunked ||
2802             Schedule == OMP_ord_static_chunked ||
2803             Schedule == OMP_dist_sch_static_chunked) &&
2804            "expected static chunked schedule");
2805   }
2806   llvm::Value *Args[] = {
2807       UpdateLocation,
2808       ThreadId,
2809       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2810                                                   M2)), // Schedule type
2811       Values.IL.getPointer(),                           // &isLastIter
2812       Values.LB.getPointer(),                           // &LB
2813       Values.UB.getPointer(),                           // &UB
2814       Values.ST.getPointer(),                           // &Stride
2815       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2816       Chunk                                             // Chunk
2817   };
2818   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2819 }
2820 
2821 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2822                                         SourceLocation Loc,
2823                                         OpenMPDirectiveKind DKind,
2824                                         const OpenMPScheduleTy &ScheduleKind,
2825                                         const StaticRTInput &Values) {
2826   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2827       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2828   assert(isOpenMPWorksharingDirective(DKind) &&
2829          "Expected loop-based or sections-based directive.");
2830   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2831                                              isOpenMPLoopDirective(DKind)
2832                                                  ? OMP_IDENT_WORK_LOOP
2833                                                  : OMP_IDENT_WORK_SECTIONS);
2834   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2835   llvm::FunctionCallee StaticInitFunction =
2836       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2837   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2838   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2839                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2840 }
2841 
2842 void CGOpenMPRuntime::emitDistributeStaticInit(
2843     CodeGenFunction &CGF, SourceLocation Loc,
2844     OpenMPDistScheduleClauseKind SchedKind,
2845     const CGOpenMPRuntime::StaticRTInput &Values) {
2846   OpenMPSchedType ScheduleNum =
2847       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2848   llvm::Value *UpdatedLocation =
2849       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2850   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2851   llvm::FunctionCallee StaticInitFunction =
2852       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2853   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2854                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2855                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2856 }
2857 
2858 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2859                                           SourceLocation Loc,
2860                                           OpenMPDirectiveKind DKind) {
2861   if (!CGF.HaveInsertPoint())
2862     return;
2863   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2864   llvm::Value *Args[] = {
2865       emitUpdateLocation(CGF, Loc,
2866                          isOpenMPDistributeDirective(DKind)
2867                              ? OMP_IDENT_WORK_DISTRIBUTE
2868                              : isOpenMPLoopDirective(DKind)
2869                                    ? OMP_IDENT_WORK_LOOP
2870                                    : OMP_IDENT_WORK_SECTIONS),
2871       getThreadID(CGF, Loc)};
2872   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2873   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2874                           CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2875                       Args);
2876 }
2877 
2878 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2879                                                  SourceLocation Loc,
2880                                                  unsigned IVSize,
2881                                                  bool IVSigned) {
2882   if (!CGF.HaveInsertPoint())
2883     return;
2884   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2885   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2886   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2887 }
2888 
2889 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2890                                           SourceLocation Loc, unsigned IVSize,
2891                                           bool IVSigned, Address IL,
2892                                           Address LB, Address UB,
2893                                           Address ST) {
2894   // Call __kmpc_dispatch_next(
2895   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2896   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2897   //          kmp_int[32|64] *p_stride);
2898   llvm::Value *Args[] = {
2899       emitUpdateLocation(CGF, Loc),
2900       getThreadID(CGF, Loc),
2901       IL.getPointer(), // &isLastIter
2902       LB.getPointer(), // &Lower
2903       UB.getPointer(), // &Upper
2904       ST.getPointer()  // &Stride
2905   };
2906   llvm::Value *Call =
2907       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2908   return CGF.EmitScalarConversion(
2909       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2910       CGF.getContext().BoolTy, Loc);
2911 }
2912 
2913 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2914                                            llvm::Value *NumThreads,
2915                                            SourceLocation Loc) {
2916   if (!CGF.HaveInsertPoint())
2917     return;
2918   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2919   llvm::Value *Args[] = {
2920       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2921       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2922   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2923                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2924                       Args);
2925 }
2926 
2927 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2928                                          ProcBindKind ProcBind,
2929                                          SourceLocation Loc) {
2930   if (!CGF.HaveInsertPoint())
2931     return;
2932   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2933   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2934   llvm::Value *Args[] = {
2935       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2936       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2937   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2938                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2939                       Args);
2940 }
2941 
2942 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2943                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2944   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2945     OMPBuilder.CreateFlush(CGF.Builder);
2946   } else {
2947     if (!CGF.HaveInsertPoint())
2948       return;
2949     // Build call void __kmpc_flush(ident_t *loc)
2950     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2951                             CGM.getModule(), OMPRTL___kmpc_flush),
2952                         emitUpdateLocation(CGF, Loc));
2953   }
2954 }
2955 
2956 namespace {
2957 /// Indexes of fields for type kmp_task_t.
2958 enum KmpTaskTFields {
2959   /// List of shared variables.
2960   KmpTaskTShareds,
2961   /// Task routine.
2962   KmpTaskTRoutine,
2963   /// Partition id for the untied tasks.
2964   KmpTaskTPartId,
2965   /// Function with call of destructors for private variables.
2966   Data1,
2967   /// Task priority.
2968   Data2,
2969   /// (Taskloops only) Lower bound.
2970   KmpTaskTLowerBound,
2971   /// (Taskloops only) Upper bound.
2972   KmpTaskTUpperBound,
2973   /// (Taskloops only) Stride.
2974   KmpTaskTStride,
2975   /// (Taskloops only) Is last iteration flag.
2976   KmpTaskTLastIter,
2977   /// (Taskloops only) Reduction data.
2978   KmpTaskTReductions,
2979 };
2980 } // anonymous namespace
2981 
2982 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2983   return OffloadEntriesTargetRegion.empty() &&
2984          OffloadEntriesDeviceGlobalVar.empty();
2985 }
2986 
2987 /// Initialize target region entry.
2988 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2989     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2990                                     StringRef ParentName, unsigned LineNum,
2991                                     unsigned Order) {
2992   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2993                                              "only required for the device "
2994                                              "code generation.");
2995   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2996       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2997                                    OMPTargetRegionEntryTargetRegion);
2998   ++OffloadingEntriesNum;
2999 }
3000 
3001 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3002     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3003                                   StringRef ParentName, unsigned LineNum,
3004                                   llvm::Constant *Addr, llvm::Constant *ID,
3005                                   OMPTargetRegionEntryKind Flags) {
3006   // If we are emitting code for a target, the entry is already initialized,
3007   // only has to be registered.
3008   if (CGM.getLangOpts().OpenMPIsDevice) {
3009     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3010       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3011           DiagnosticsEngine::Error,
3012           "Unable to find target region on line '%0' in the device code.");
3013       CGM.getDiags().Report(DiagID) << LineNum;
3014       return;
3015     }
3016     auto &Entry =
3017         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3018     assert(Entry.isValid() && "Entry not initialized!");
3019     Entry.setAddress(Addr);
3020     Entry.setID(ID);
3021     Entry.setFlags(Flags);
3022   } else {
3023     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3024     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3025     ++OffloadingEntriesNum;
3026   }
3027 }
3028 
3029 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3030     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3031     unsigned LineNum) const {
3032   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3033   if (PerDevice == OffloadEntriesTargetRegion.end())
3034     return false;
3035   auto PerFile = PerDevice->second.find(FileID);
3036   if (PerFile == PerDevice->second.end())
3037     return false;
3038   auto PerParentName = PerFile->second.find(ParentName);
3039   if (PerParentName == PerFile->second.end())
3040     return false;
3041   auto PerLine = PerParentName->second.find(LineNum);
3042   if (PerLine == PerParentName->second.end())
3043     return false;
3044   // Fail if this entry is already registered.
3045   if (PerLine->second.getAddress() || PerLine->second.getID())
3046     return false;
3047   return true;
3048 }
3049 
3050 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3051     const OffloadTargetRegionEntryInfoActTy &Action) {
3052   // Scan all target region entries and perform the provided action.
3053   for (const auto &D : OffloadEntriesTargetRegion)
3054     for (const auto &F : D.second)
3055       for (const auto &P : F.second)
3056         for (const auto &L : P.second)
3057           Action(D.first, F.first, P.first(), L.first, L.second);
3058 }
3059 
3060 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3061     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3062                                        OMPTargetGlobalVarEntryKind Flags,
3063                                        unsigned Order) {
3064   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3065                                              "only required for the device "
3066                                              "code generation.");
3067   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3068   ++OffloadingEntriesNum;
3069 }
3070 
3071 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3072     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3073                                      CharUnits VarSize,
3074                                      OMPTargetGlobalVarEntryKind Flags,
3075                                      llvm::GlobalValue::LinkageTypes Linkage) {
3076   if (CGM.getLangOpts().OpenMPIsDevice) {
3077     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3078     assert(Entry.isValid() && Entry.getFlags() == Flags &&
3079            "Entry not initialized!");
3080     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3081            "Resetting with the new address.");
3082     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3083       if (Entry.getVarSize().isZero()) {
3084         Entry.setVarSize(VarSize);
3085         Entry.setLinkage(Linkage);
3086       }
3087       return;
3088     }
3089     Entry.setVarSize(VarSize);
3090     Entry.setLinkage(Linkage);
3091     Entry.setAddress(Addr);
3092   } else {
3093     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3094       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3095       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3096              "Entry not initialized!");
3097       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3098              "Resetting with the new address.");
3099       if (Entry.getVarSize().isZero()) {
3100         Entry.setVarSize(VarSize);
3101         Entry.setLinkage(Linkage);
3102       }
3103       return;
3104     }
3105     OffloadEntriesDeviceGlobalVar.try_emplace(
3106         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3107     ++OffloadingEntriesNum;
3108   }
3109 }
3110 
3111 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3112     actOnDeviceGlobalVarEntriesInfo(
3113         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3114   // Scan all target region entries and perform the provided action.
3115   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3116     Action(E.getKey(), E.getValue());
3117 }
3118 
3119 void CGOpenMPRuntime::createOffloadEntry(
3120     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3121     llvm::GlobalValue::LinkageTypes Linkage) {
3122   StringRef Name = Addr->getName();
3123   llvm::Module &M = CGM.getModule();
3124   llvm::LLVMContext &C = M.getContext();
3125 
3126   // Create constant string with the name.
3127   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3128 
3129   std::string StringName = getName({"omp_offloading", "entry_name"});
3130   auto *Str = new llvm::GlobalVariable(
3131       M, StrPtrInit->getType(), /*isConstant=*/true,
3132       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3133   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3134 
3135   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
3136                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
3137                             llvm::ConstantInt::get(CGM.SizeTy, Size),
3138                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3139                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3140   std::string EntryName = getName({"omp_offloading", "entry", ""});
3141   llvm::GlobalVariable *Entry = createGlobalStruct(
3142       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3143       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3144 
3145   // The entry has to be created in the section the linker expects it to be.
3146   Entry->setSection("omp_offloading_entries");
3147 }
3148 
3149 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3150   // Emit the offloading entries and metadata so that the device codegen side
3151   // can easily figure out what to emit. The produced metadata looks like
3152   // this:
3153   //
3154   // !omp_offload.info = !{!1, ...}
3155   //
3156   // Right now we only generate metadata for function that contain target
3157   // regions.
3158 
3159   // If we are in simd mode or there are no entries, we don't need to do
3160   // anything.
3161   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3162     return;
3163 
3164   llvm::Module &M = CGM.getModule();
3165   llvm::LLVMContext &C = M.getContext();
3166   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3167                          SourceLocation, StringRef>,
3168               16>
3169       OrderedEntries(OffloadEntriesInfoManager.size());
3170   llvm::SmallVector<StringRef, 16> ParentFunctions(
3171       OffloadEntriesInfoManager.size());
3172 
3173   // Auxiliary methods to create metadata values and strings.
3174   auto &&GetMDInt = [this](unsigned V) {
3175     return llvm::ConstantAsMetadata::get(
3176         llvm::ConstantInt::get(CGM.Int32Ty, V));
3177   };
3178 
3179   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3180 
3181   // Create the offloading info metadata node.
3182   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3183 
3184   // Create function that emits metadata for each target region entry;
3185   auto &&TargetRegionMetadataEmitter =
3186       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3187        &GetMDString](
3188           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3189           unsigned Line,
3190           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3191         // Generate metadata for target regions. Each entry of this metadata
3192         // contains:
3193         // - Entry 0 -> Kind of this type of metadata (0).
3194         // - Entry 1 -> Device ID of the file where the entry was identified.
3195         // - Entry 2 -> File ID of the file where the entry was identified.
3196         // - Entry 3 -> Mangled name of the function where the entry was
3197         // identified.
3198         // - Entry 4 -> Line in the file where the entry was identified.
3199         // - Entry 5 -> Order the entry was created.
3200         // The first element of the metadata node is the kind.
3201         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3202                                  GetMDInt(FileID),      GetMDString(ParentName),
3203                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3204 
3205         SourceLocation Loc;
3206         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3207                   E = CGM.getContext().getSourceManager().fileinfo_end();
3208              I != E; ++I) {
3209           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3210               I->getFirst()->getUniqueID().getFile() == FileID) {
3211             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3212                 I->getFirst(), Line, 1);
3213             break;
3214           }
3215         }
3216         // Save this entry in the right position of the ordered entries array.
3217         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3218         ParentFunctions[E.getOrder()] = ParentName;
3219 
3220         // Add metadata to the named metadata node.
3221         MD->addOperand(llvm::MDNode::get(C, Ops));
3222       };
3223 
3224   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3225       TargetRegionMetadataEmitter);
3226 
3227   // Create function that emits metadata for each device global variable entry;
3228   auto &&DeviceGlobalVarMetadataEmitter =
3229       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3230        MD](StringRef MangledName,
3231            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3232                &E) {
3233         // Generate metadata for global variables. Each entry of this metadata
3234         // contains:
3235         // - Entry 0 -> Kind of this type of metadata (1).
3236         // - Entry 1 -> Mangled name of the variable.
3237         // - Entry 2 -> Declare target kind.
3238         // - Entry 3 -> Order the entry was created.
3239         // The first element of the metadata node is the kind.
3240         llvm::Metadata *Ops[] = {
3241             GetMDInt(E.getKind()), GetMDString(MangledName),
3242             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3243 
3244         // Save this entry in the right position of the ordered entries array.
3245         OrderedEntries[E.getOrder()] =
3246             std::make_tuple(&E, SourceLocation(), MangledName);
3247 
3248         // Add metadata to the named metadata node.
3249         MD->addOperand(llvm::MDNode::get(C, Ops));
3250       };
3251 
3252   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3253       DeviceGlobalVarMetadataEmitter);
3254 
3255   for (const auto &E : OrderedEntries) {
3256     assert(std::get<0>(E) && "All ordered entries must exist!");
3257     if (const auto *CE =
3258             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3259                 std::get<0>(E))) {
3260       if (!CE->getID() || !CE->getAddress()) {
3261         // Do not blame the entry if the parent funtion is not emitted.
3262         StringRef FnName = ParentFunctions[CE->getOrder()];
3263         if (!CGM.GetGlobalValue(FnName))
3264           continue;
3265         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3266             DiagnosticsEngine::Error,
3267             "Offloading entry for target region in %0 is incorrect: either the "
3268             "address or the ID is invalid.");
3269         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3270         continue;
3271       }
3272       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3273                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3274     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3275                                              OffloadEntryInfoDeviceGlobalVar>(
3276                    std::get<0>(E))) {
3277       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3278           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3279               CE->getFlags());
3280       switch (Flags) {
3281       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3282         if (CGM.getLangOpts().OpenMPIsDevice &&
3283             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3284           continue;
3285         if (!CE->getAddress()) {
3286           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3287               DiagnosticsEngine::Error, "Offloading entry for declare target "
3288                                         "variable %0 is incorrect: the "
3289                                         "address is invalid.");
3290           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3291           continue;
3292         }
3293         // The vaiable has no definition - no need to add the entry.
3294         if (CE->getVarSize().isZero())
3295           continue;
3296         break;
3297       }
3298       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3299         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3300                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3301                "Declaret target link address is set.");
3302         if (CGM.getLangOpts().OpenMPIsDevice)
3303           continue;
3304         if (!CE->getAddress()) {
3305           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3306               DiagnosticsEngine::Error,
3307               "Offloading entry for declare target variable is incorrect: the "
3308               "address is invalid.");
3309           CGM.getDiags().Report(DiagID);
3310           continue;
3311         }
3312         break;
3313       }
3314       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3315                          CE->getVarSize().getQuantity(), Flags,
3316                          CE->getLinkage());
3317     } else {
3318       llvm_unreachable("Unsupported entry kind.");
3319     }
3320   }
3321 }
3322 
3323 /// Loads all the offload entries information from the host IR
3324 /// metadata.
3325 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3326   // If we are in target mode, load the metadata from the host IR. This code has
3327   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3328 
3329   if (!CGM.getLangOpts().OpenMPIsDevice)
3330     return;
3331 
3332   if (CGM.getLangOpts().OMPHostIRFile.empty())
3333     return;
3334 
3335   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3336   if (auto EC = Buf.getError()) {
3337     CGM.getDiags().Report(diag::err_cannot_open_file)
3338         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3339     return;
3340   }
3341 
3342   llvm::LLVMContext C;
3343   auto ME = expectedToErrorOrAndEmitErrors(
3344       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3345 
3346   if (auto EC = ME.getError()) {
3347     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3348         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3349     CGM.getDiags().Report(DiagID)
3350         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3351     return;
3352   }
3353 
3354   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3355   if (!MD)
3356     return;
3357 
3358   for (llvm::MDNode *MN : MD->operands()) {
3359     auto &&GetMDInt = [MN](unsigned Idx) {
3360       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3361       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3362     };
3363 
3364     auto &&GetMDString = [MN](unsigned Idx) {
3365       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3366       return V->getString();
3367     };
3368 
3369     switch (GetMDInt(0)) {
3370     default:
3371       llvm_unreachable("Unexpected metadata!");
3372       break;
3373     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3374         OffloadingEntryInfoTargetRegion:
3375       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3376           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3377           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3378           /*Order=*/GetMDInt(5));
3379       break;
3380     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3381         OffloadingEntryInfoDeviceGlobalVar:
3382       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3383           /*MangledName=*/GetMDString(1),
3384           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3385               /*Flags=*/GetMDInt(2)),
3386           /*Order=*/GetMDInt(3));
3387       break;
3388     }
3389   }
3390 }
3391 
3392 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3393   if (!KmpRoutineEntryPtrTy) {
3394     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3395     ASTContext &C = CGM.getContext();
3396     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3397     FunctionProtoType::ExtProtoInfo EPI;
3398     KmpRoutineEntryPtrQTy = C.getPointerType(
3399         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3400     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3401   }
3402 }
3403 
3404 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3405   // Make sure the type of the entry is already created. This is the type we
3406   // have to create:
3407   // struct __tgt_offload_entry{
3408   //   void      *addr;       // Pointer to the offload entry info.
3409   //                          // (function or global)
3410   //   char      *name;       // Name of the function or global.
3411   //   size_t     size;       // Size of the entry info (0 if it a function).
3412   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3413   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3414   // };
3415   if (TgtOffloadEntryQTy.isNull()) {
3416     ASTContext &C = CGM.getContext();
3417     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3418     RD->startDefinition();
3419     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3420     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3421     addFieldToRecordDecl(C, RD, C.getSizeType());
3422     addFieldToRecordDecl(
3423         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3424     addFieldToRecordDecl(
3425         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3426     RD->completeDefinition();
3427     RD->addAttr(PackedAttr::CreateImplicit(C));
3428     TgtOffloadEntryQTy = C.getRecordType(RD);
3429   }
3430   return TgtOffloadEntryQTy;
3431 }
3432 
3433 namespace {
3434 struct PrivateHelpersTy {
3435   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3436                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3437       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3438         PrivateElemInit(PrivateElemInit) {}
3439   const Expr *OriginalRef = nullptr;
3440   const VarDecl *Original = nullptr;
3441   const VarDecl *PrivateCopy = nullptr;
3442   const VarDecl *PrivateElemInit = nullptr;
3443 };
3444 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3445 } // anonymous namespace
3446 
3447 static RecordDecl *
3448 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3449   if (!Privates.empty()) {
3450     ASTContext &C = CGM.getContext();
3451     // Build struct .kmp_privates_t. {
3452     //         /*  private vars  */
3453     //       };
3454     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3455     RD->startDefinition();
3456     for (const auto &Pair : Privates) {
3457       const VarDecl *VD = Pair.second.Original;
3458       QualType Type = VD->getType().getNonReferenceType();
3459       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3460       if (VD->hasAttrs()) {
3461         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3462              E(VD->getAttrs().end());
3463              I != E; ++I)
3464           FD->addAttr(*I);
3465       }
3466     }
3467     RD->completeDefinition();
3468     return RD;
3469   }
3470   return nullptr;
3471 }
3472 
3473 static RecordDecl *
3474 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3475                          QualType KmpInt32Ty,
3476                          QualType KmpRoutineEntryPointerQTy) {
3477   ASTContext &C = CGM.getContext();
3478   // Build struct kmp_task_t {
3479   //         void *              shareds;
3480   //         kmp_routine_entry_t routine;
3481   //         kmp_int32           part_id;
3482   //         kmp_cmplrdata_t data1;
3483   //         kmp_cmplrdata_t data2;
3484   // For taskloops additional fields:
3485   //         kmp_uint64          lb;
3486   //         kmp_uint64          ub;
3487   //         kmp_int64           st;
3488   //         kmp_int32           liter;
3489   //         void *              reductions;
3490   //       };
3491   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3492   UD->startDefinition();
3493   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3494   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3495   UD->completeDefinition();
3496   QualType KmpCmplrdataTy = C.getRecordType(UD);
3497   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3498   RD->startDefinition();
3499   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3500   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3501   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3502   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3503   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3504   if (isOpenMPTaskLoopDirective(Kind)) {
3505     QualType KmpUInt64Ty =
3506         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3507     QualType KmpInt64Ty =
3508         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3509     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3510     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3511     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3512     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3513     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3514   }
3515   RD->completeDefinition();
3516   return RD;
3517 }
3518 
3519 static RecordDecl *
3520 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3521                                      ArrayRef<PrivateDataTy> Privates) {
3522   ASTContext &C = CGM.getContext();
3523   // Build struct kmp_task_t_with_privates {
3524   //         kmp_task_t task_data;
3525   //         .kmp_privates_t. privates;
3526   //       };
3527   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3528   RD->startDefinition();
3529   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3530   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3531     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3532   RD->completeDefinition();
3533   return RD;
3534 }
3535 
3536 /// Emit a proxy function which accepts kmp_task_t as the second
3537 /// argument.
3538 /// \code
3539 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3540 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3541 ///   For taskloops:
3542 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3543 ///   tt->reductions, tt->shareds);
3544 ///   return 0;
3545 /// }
3546 /// \endcode
3547 static llvm::Function *
3548 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3549                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3550                       QualType KmpTaskTWithPrivatesPtrQTy,
3551                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3552                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3553                       llvm::Value *TaskPrivatesMap) {
3554   ASTContext &C = CGM.getContext();
3555   FunctionArgList Args;
3556   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3557                             ImplicitParamDecl::Other);
3558   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3559                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3560                                 ImplicitParamDecl::Other);
3561   Args.push_back(&GtidArg);
3562   Args.push_back(&TaskTypeArg);
3563   const auto &TaskEntryFnInfo =
3564       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3565   llvm::FunctionType *TaskEntryTy =
3566       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3567   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3568   auto *TaskEntry = llvm::Function::Create(
3569       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3570   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3571   TaskEntry->setDoesNotRecurse();
3572   CodeGenFunction CGF(CGM);
3573   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3574                     Loc, Loc);
3575 
3576   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3577   // tt,
3578   // For taskloops:
3579   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3580   // tt->task_data.shareds);
3581   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3582       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3583   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3584       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3585       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3586   const auto *KmpTaskTWithPrivatesQTyRD =
3587       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3588   LValue Base =
3589       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3590   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3591   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3592   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3593   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3594 
3595   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3596   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3597   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3598       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3599       CGF.ConvertTypeForMem(SharedsPtrTy));
3600 
3601   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3602   llvm::Value *PrivatesParam;
3603   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3604     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3605     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3606         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3607   } else {
3608     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3609   }
3610 
3611   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3612                                TaskPrivatesMap,
3613                                CGF.Builder
3614                                    .CreatePointerBitCastOrAddrSpaceCast(
3615                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3616                                    .getPointer()};
3617   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3618                                           std::end(CommonArgs));
3619   if (isOpenMPTaskLoopDirective(Kind)) {
3620     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3621     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3622     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3623     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3624     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3625     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3626     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3627     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3628     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3629     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3630     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3631     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3632     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3633     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3634     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3635     CallArgs.push_back(LBParam);
3636     CallArgs.push_back(UBParam);
3637     CallArgs.push_back(StParam);
3638     CallArgs.push_back(LIParam);
3639     CallArgs.push_back(RParam);
3640   }
3641   CallArgs.push_back(SharedsParam);
3642 
3643   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3644                                                   CallArgs);
3645   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3646                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3647   CGF.FinishFunction();
3648   return TaskEntry;
3649 }
3650 
3651 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3652                                             SourceLocation Loc,
3653                                             QualType KmpInt32Ty,
3654                                             QualType KmpTaskTWithPrivatesPtrQTy,
3655                                             QualType KmpTaskTWithPrivatesQTy) {
3656   ASTContext &C = CGM.getContext();
3657   FunctionArgList Args;
3658   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3659                             ImplicitParamDecl::Other);
3660   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3661                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3662                                 ImplicitParamDecl::Other);
3663   Args.push_back(&GtidArg);
3664   Args.push_back(&TaskTypeArg);
3665   const auto &DestructorFnInfo =
3666       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3667   llvm::FunctionType *DestructorFnTy =
3668       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3669   std::string Name =
3670       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3671   auto *DestructorFn =
3672       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3673                              Name, &CGM.getModule());
3674   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3675                                     DestructorFnInfo);
3676   DestructorFn->setDoesNotRecurse();
3677   CodeGenFunction CGF(CGM);
3678   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3679                     Args, Loc, Loc);
3680 
3681   LValue Base = CGF.EmitLoadOfPointerLValue(
3682       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3683       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3684   const auto *KmpTaskTWithPrivatesQTyRD =
3685       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3686   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3687   Base = CGF.EmitLValueForField(Base, *FI);
3688   for (const auto *Field :
3689        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3690     if (QualType::DestructionKind DtorKind =
3691             Field->getType().isDestructedType()) {
3692       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3693       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3694     }
3695   }
3696   CGF.FinishFunction();
3697   return DestructorFn;
3698 }
3699 
3700 /// Emit a privates mapping function for correct handling of private and
3701 /// firstprivate variables.
3702 /// \code
3703 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3704 /// **noalias priv1,...,  <tyn> **noalias privn) {
3705 ///   *priv1 = &.privates.priv1;
3706 ///   ...;
3707 ///   *privn = &.privates.privn;
3708 /// }
3709 /// \endcode
3710 static llvm::Value *
3711 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3712                                ArrayRef<const Expr *> PrivateVars,
3713                                ArrayRef<const Expr *> FirstprivateVars,
3714                                ArrayRef<const Expr *> LastprivateVars,
3715                                QualType PrivatesQTy,
3716                                ArrayRef<PrivateDataTy> Privates) {
3717   ASTContext &C = CGM.getContext();
3718   FunctionArgList Args;
3719   ImplicitParamDecl TaskPrivatesArg(
3720       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3721       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3722       ImplicitParamDecl::Other);
3723   Args.push_back(&TaskPrivatesArg);
3724   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
3725   unsigned Counter = 1;
3726   for (const Expr *E : PrivateVars) {
3727     Args.push_back(ImplicitParamDecl::Create(
3728         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3729         C.getPointerType(C.getPointerType(E->getType()))
3730             .withConst()
3731             .withRestrict(),
3732         ImplicitParamDecl::Other));
3733     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3734     PrivateVarsPos[VD] = Counter;
3735     ++Counter;
3736   }
3737   for (const Expr *E : FirstprivateVars) {
3738     Args.push_back(ImplicitParamDecl::Create(
3739         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3740         C.getPointerType(C.getPointerType(E->getType()))
3741             .withConst()
3742             .withRestrict(),
3743         ImplicitParamDecl::Other));
3744     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3745     PrivateVarsPos[VD] = Counter;
3746     ++Counter;
3747   }
3748   for (const Expr *E : LastprivateVars) {
3749     Args.push_back(ImplicitParamDecl::Create(
3750         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3751         C.getPointerType(C.getPointerType(E->getType()))
3752             .withConst()
3753             .withRestrict(),
3754         ImplicitParamDecl::Other));
3755     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3756     PrivateVarsPos[VD] = Counter;
3757     ++Counter;
3758   }
3759   const auto &TaskPrivatesMapFnInfo =
3760       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3761   llvm::FunctionType *TaskPrivatesMapTy =
3762       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3763   std::string Name =
3764       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3765   auto *TaskPrivatesMap = llvm::Function::Create(
3766       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3767       &CGM.getModule());
3768   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3769                                     TaskPrivatesMapFnInfo);
3770   if (CGM.getLangOpts().Optimize) {
3771     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3772     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3773     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3774   }
3775   CodeGenFunction CGF(CGM);
3776   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3777                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3778 
3779   // *privi = &.privates.privi;
3780   LValue Base = CGF.EmitLoadOfPointerLValue(
3781       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3782       TaskPrivatesArg.getType()->castAs<PointerType>());
3783   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3784   Counter = 0;
3785   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3786     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3787     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3788     LValue RefLVal =
3789         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3790     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3791         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3792     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3793     ++Counter;
3794   }
3795   CGF.FinishFunction();
3796   return TaskPrivatesMap;
3797 }
3798 
3799 /// Emit initialization for private variables in task-based directives.
3800 static void emitPrivatesInit(CodeGenFunction &CGF,
3801                              const OMPExecutableDirective &D,
3802                              Address KmpTaskSharedsPtr, LValue TDBase,
3803                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3804                              QualType SharedsTy, QualType SharedsPtrTy,
3805                              const OMPTaskDataTy &Data,
3806                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3807   ASTContext &C = CGF.getContext();
3808   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3809   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3810   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3811                                  ? OMPD_taskloop
3812                                  : OMPD_task;
3813   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3814   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3815   LValue SrcBase;
3816   bool IsTargetTask =
3817       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3818       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3819   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
3820   // PointersArray and SizesArray. The original variables for these arrays are
3821   // not captured and we get their addresses explicitly.
3822   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3823       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3824     SrcBase = CGF.MakeAddrLValue(
3825         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3826             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3827         SharedsTy);
3828   }
3829   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3830   for (const PrivateDataTy &Pair : Privates) {
3831     const VarDecl *VD = Pair.second.PrivateCopy;
3832     const Expr *Init = VD->getAnyInitializer();
3833     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3834                              !CGF.isTrivialInitializer(Init)))) {
3835       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3836       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3837         const VarDecl *OriginalVD = Pair.second.Original;
3838         // Check if the variable is the target-based BasePointersArray,
3839         // PointersArray or SizesArray.
3840         LValue SharedRefLValue;
3841         QualType Type = PrivateLValue.getType();
3842         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3843         if (IsTargetTask && !SharedField) {
3844           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3845                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3846                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3847                          ->getNumParams() == 0 &&
3848                  isa<TranslationUnitDecl>(
3849                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3850                          ->getDeclContext()) &&
3851                  "Expected artificial target data variable.");
3852           SharedRefLValue =
3853               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3854         } else if (ForDup) {
3855           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3856           SharedRefLValue = CGF.MakeAddrLValue(
3857               Address(SharedRefLValue.getPointer(CGF),
3858                       C.getDeclAlign(OriginalVD)),
3859               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3860               SharedRefLValue.getTBAAInfo());
3861         } else if (CGF.LambdaCaptureFields.count(
3862                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3863                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3864           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3865         } else {
3866           // Processing for implicitly captured variables.
3867           InlinedOpenMPRegionRAII Region(
3868               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3869               /*HasCancel=*/false);
3870           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3871         }
3872         if (Type->isArrayType()) {
3873           // Initialize firstprivate array.
3874           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3875             // Perform simple memcpy.
3876             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3877           } else {
3878             // Initialize firstprivate array using element-by-element
3879             // initialization.
3880             CGF.EmitOMPAggregateAssign(
3881                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3882                 Type,
3883                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3884                                                   Address SrcElement) {
3885                   // Clean up any temporaries needed by the initialization.
3886                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3887                   InitScope.addPrivate(
3888                       Elem, [SrcElement]() -> Address { return SrcElement; });
3889                   (void)InitScope.Privatize();
3890                   // Emit initialization for single element.
3891                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3892                       CGF, &CapturesInfo);
3893                   CGF.EmitAnyExprToMem(Init, DestElement,
3894                                        Init->getType().getQualifiers(),
3895                                        /*IsInitializer=*/false);
3896                 });
3897           }
3898         } else {
3899           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3900           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3901             return SharedRefLValue.getAddress(CGF);
3902           });
3903           (void)InitScope.Privatize();
3904           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3905           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3906                              /*capturedByInit=*/false);
3907         }
3908       } else {
3909         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3910       }
3911     }
3912     ++FI;
3913   }
3914 }
3915 
3916 /// Check if duplication function is required for taskloops.
3917 static bool checkInitIsRequired(CodeGenFunction &CGF,
3918                                 ArrayRef<PrivateDataTy> Privates) {
3919   bool InitRequired = false;
3920   for (const PrivateDataTy &Pair : Privates) {
3921     const VarDecl *VD = Pair.second.PrivateCopy;
3922     const Expr *Init = VD->getAnyInitializer();
3923     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3924                                     !CGF.isTrivialInitializer(Init));
3925     if (InitRequired)
3926       break;
3927   }
3928   return InitRequired;
3929 }
3930 
3931 
3932 /// Emit task_dup function (for initialization of
3933 /// private/firstprivate/lastprivate vars and last_iter flag)
3934 /// \code
3935 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3936 /// lastpriv) {
3937 /// // setup lastprivate flag
3938 ///    task_dst->last = lastpriv;
3939 /// // could be constructor calls here...
3940 /// }
3941 /// \endcode
3942 static llvm::Value *
3943 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3944                     const OMPExecutableDirective &D,
3945                     QualType KmpTaskTWithPrivatesPtrQTy,
3946                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3947                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3948                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3949                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3950   ASTContext &C = CGM.getContext();
3951   FunctionArgList Args;
3952   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3953                            KmpTaskTWithPrivatesPtrQTy,
3954                            ImplicitParamDecl::Other);
3955   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3956                            KmpTaskTWithPrivatesPtrQTy,
3957                            ImplicitParamDecl::Other);
3958   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3959                                 ImplicitParamDecl::Other);
3960   Args.push_back(&DstArg);
3961   Args.push_back(&SrcArg);
3962   Args.push_back(&LastprivArg);
3963   const auto &TaskDupFnInfo =
3964       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3965   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3966   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3967   auto *TaskDup = llvm::Function::Create(
3968       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3969   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3970   TaskDup->setDoesNotRecurse();
3971   CodeGenFunction CGF(CGM);
3972   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3973                     Loc);
3974 
3975   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3976       CGF.GetAddrOfLocalVar(&DstArg),
3977       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3978   // task_dst->liter = lastpriv;
3979   if (WithLastIter) {
3980     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3981     LValue Base = CGF.EmitLValueForField(
3982         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3983     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3984     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3985         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3986     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3987   }
3988 
3989   // Emit initial values for private copies (if any).
3990   assert(!Privates.empty());
3991   Address KmpTaskSharedsPtr = Address::invalid();
3992   if (!Data.FirstprivateVars.empty()) {
3993     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3994         CGF.GetAddrOfLocalVar(&SrcArg),
3995         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3996     LValue Base = CGF.EmitLValueForField(
3997         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3998     KmpTaskSharedsPtr = Address(
3999         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4000                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4001                                                   KmpTaskTShareds)),
4002                              Loc),
4003         CGM.getNaturalTypeAlignment(SharedsTy));
4004   }
4005   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4006                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4007   CGF.FinishFunction();
4008   return TaskDup;
4009 }
4010 
4011 /// Checks if destructor function is required to be generated.
4012 /// \return true if cleanups are required, false otherwise.
4013 static bool
4014 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4015   bool NeedsCleanup = false;
4016   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4017   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4018   for (const FieldDecl *FD : PrivateRD->fields()) {
4019     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4020     if (NeedsCleanup)
4021       break;
4022   }
4023   return NeedsCleanup;
4024 }
4025 
4026 namespace {
4027 /// Loop generator for OpenMP iterator expression.
4028 class OMPIteratorGeneratorScope final
4029     : public CodeGenFunction::OMPPrivateScope {
4030   CodeGenFunction &CGF;
4031   const OMPIteratorExpr *E = nullptr;
4032   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4033   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4034   OMPIteratorGeneratorScope() = delete;
4035   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4036 
4037 public:
4038   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4039       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4040     if (!E)
4041       return;
4042     SmallVector<llvm::Value *, 4> Uppers;
4043     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4044       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4045       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4046       addPrivate(VD, [&CGF, VD]() {
4047         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4048       });
4049       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4050       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4051         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4052                                  "counter.addr");
4053       });
4054     }
4055     Privatize();
4056 
4057     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4058       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4059       LValue CLVal =
4060           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4061                              HelperData.CounterVD->getType());
4062       // Counter = 0;
4063       CGF.EmitStoreOfScalar(
4064           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4065           CLVal);
4066       CodeGenFunction::JumpDest &ContDest =
4067           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4068       CodeGenFunction::JumpDest &ExitDest =
4069           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4070       // N = <number-of_iterations>;
4071       llvm::Value *N = Uppers[I];
4072       // cont:
4073       // if (Counter < N) goto body; else goto exit;
4074       CGF.EmitBlock(ContDest.getBlock());
4075       auto *CVal =
4076           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4077       llvm::Value *Cmp =
4078           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4079               ? CGF.Builder.CreateICmpSLT(CVal, N)
4080               : CGF.Builder.CreateICmpULT(CVal, N);
4081       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4082       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4083       // body:
4084       CGF.EmitBlock(BodyBB);
4085       // Iteri = Begini + Counter * Stepi;
4086       CGF.EmitIgnoredExpr(HelperData.Update);
4087     }
4088   }
4089   ~OMPIteratorGeneratorScope() {
4090     if (!E)
4091       return;
4092     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4093       // Counter = Counter + 1;
4094       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4095       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4096       // goto cont;
4097       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4098       // exit:
4099       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4100     }
4101   }
4102 };
4103 } // namespace
4104 
4105 static std::pair<llvm::Value *, llvm::Value *>
4106 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4107   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4108   llvm::Value *Addr;
4109   if (OASE) {
4110     const Expr *Base = OASE->getBase();
4111     Addr = CGF.EmitScalarExpr(Base);
4112   } else {
4113     Addr = CGF.EmitLValue(E).getPointer(CGF);
4114   }
4115   llvm::Value *SizeVal;
4116   QualType Ty = E->getType();
4117   if (OASE) {
4118     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4119     for (const Expr *SE : OASE->getDimensions()) {
4120       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4121       Sz = CGF.EmitScalarConversion(
4122           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4123       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4124     }
4125   } else if (const auto *ASE =
4126                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4127     LValue UpAddrLVal =
4128         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4129     llvm::Value *UpAddr =
4130         CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
4131     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4132     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4133     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4134   } else {
4135     SizeVal = CGF.getTypeSize(Ty);
4136   }
4137   return std::make_pair(Addr, SizeVal);
4138 }
4139 
4140 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4141 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4142   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4143   if (KmpTaskAffinityInfoTy.isNull()) {
4144     RecordDecl *KmpAffinityInfoRD =
4145         C.buildImplicitRecord("kmp_task_affinity_info_t");
4146     KmpAffinityInfoRD->startDefinition();
4147     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4148     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4149     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4150     KmpAffinityInfoRD->completeDefinition();
4151     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4152   }
4153 }
4154 
4155 CGOpenMPRuntime::TaskResultTy
4156 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4157                               const OMPExecutableDirective &D,
4158                               llvm::Function *TaskFunction, QualType SharedsTy,
4159                               Address Shareds, const OMPTaskDataTy &Data) {
4160   ASTContext &C = CGM.getContext();
4161   llvm::SmallVector<PrivateDataTy, 4> Privates;
4162   // Aggregate privates and sort them by the alignment.
4163   const auto *I = Data.PrivateCopies.begin();
4164   for (const Expr *E : Data.PrivateVars) {
4165     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4166     Privates.emplace_back(
4167         C.getDeclAlign(VD),
4168         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4169                          /*PrivateElemInit=*/nullptr));
4170     ++I;
4171   }
4172   I = Data.FirstprivateCopies.begin();
4173   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4174   for (const Expr *E : Data.FirstprivateVars) {
4175     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4176     Privates.emplace_back(
4177         C.getDeclAlign(VD),
4178         PrivateHelpersTy(
4179             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4180             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4181     ++I;
4182     ++IElemInitRef;
4183   }
4184   I = Data.LastprivateCopies.begin();
4185   for (const Expr *E : Data.LastprivateVars) {
4186     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4187     Privates.emplace_back(
4188         C.getDeclAlign(VD),
4189         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4190                          /*PrivateElemInit=*/nullptr));
4191     ++I;
4192   }
4193   llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
4194     return L.first > R.first;
4195   });
4196   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4197   // Build type kmp_routine_entry_t (if not built yet).
4198   emitKmpRoutineEntryT(KmpInt32Ty);
4199   // Build type kmp_task_t (if not built yet).
4200   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4201     if (SavedKmpTaskloopTQTy.isNull()) {
4202       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4203           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4204     }
4205     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4206   } else {
4207     assert((D.getDirectiveKind() == OMPD_task ||
4208             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4209             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4210            "Expected taskloop, task or target directive");
4211     if (SavedKmpTaskTQTy.isNull()) {
4212       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4213           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4214     }
4215     KmpTaskTQTy = SavedKmpTaskTQTy;
4216   }
4217   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4218   // Build particular struct kmp_task_t for the given task.
4219   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4220       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4221   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4222   QualType KmpTaskTWithPrivatesPtrQTy =
4223       C.getPointerType(KmpTaskTWithPrivatesQTy);
4224   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4225   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4226       KmpTaskTWithPrivatesTy->getPointerTo();
4227   llvm::Value *KmpTaskTWithPrivatesTySize =
4228       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4229   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4230 
4231   // Emit initial values for private copies (if any).
4232   llvm::Value *TaskPrivatesMap = nullptr;
4233   llvm::Type *TaskPrivatesMapTy =
4234       std::next(TaskFunction->arg_begin(), 3)->getType();
4235   if (!Privates.empty()) {
4236     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4237     TaskPrivatesMap = emitTaskPrivateMappingFunction(
4238         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
4239         FI->getType(), Privates);
4240     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4241         TaskPrivatesMap, TaskPrivatesMapTy);
4242   } else {
4243     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4244         cast<llvm::PointerType>(TaskPrivatesMapTy));
4245   }
4246   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4247   // kmp_task_t *tt);
4248   llvm::Function *TaskEntry = emitProxyTaskFunction(
4249       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4250       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4251       TaskPrivatesMap);
4252 
4253   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4254   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4255   // kmp_routine_entry_t *task_entry);
4256   // Task flags. Format is taken from
4257   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
4258   // description of kmp_tasking_flags struct.
4259   enum {
4260     TiedFlag = 0x1,
4261     FinalFlag = 0x2,
4262     DestructorsFlag = 0x8,
4263     PriorityFlag = 0x20,
4264     DetachableFlag = 0x40,
4265   };
4266   unsigned Flags = Data.Tied ? TiedFlag : 0;
4267   bool NeedsCleanup = false;
4268   if (!Privates.empty()) {
4269     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
4270     if (NeedsCleanup)
4271       Flags = Flags | DestructorsFlag;
4272   }
4273   if (Data.Priority.getInt())
4274     Flags = Flags | PriorityFlag;
4275   if (D.hasClausesOfKind<OMPDetachClause>())
4276     Flags = Flags | DetachableFlag;
4277   llvm::Value *TaskFlags =
4278       Data.Final.getPointer()
4279           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4280                                      CGF.Builder.getInt32(FinalFlag),
4281                                      CGF.Builder.getInt32(/*C=*/0))
4282           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4283   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4284   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4285   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4286       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4287       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4288           TaskEntry, KmpRoutineEntryPtrTy)};
4289   llvm::Value *NewTask;
4290   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4291     // Check if we have any device clause associated with the directive.
4292     const Expr *Device = nullptr;
4293     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4294       Device = C->getDevice();
4295     // Emit device ID if any otherwise use default value.
4296     llvm::Value *DeviceID;
4297     if (Device)
4298       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4299                                            CGF.Int64Ty, /*isSigned=*/true);
4300     else
4301       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4302     AllocArgs.push_back(DeviceID);
4303     NewTask = CGF.EmitRuntimeCall(
4304         OMPBuilder.getOrCreateRuntimeFunction(
4305             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4306         AllocArgs);
4307   } else {
4308     NewTask =
4309         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4310                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4311                             AllocArgs);
4312   }
4313   // Emit detach clause initialization.
4314   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4315   // task_descriptor);
4316   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4317     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4318     LValue EvtLVal = CGF.EmitLValue(Evt);
4319 
4320     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4321     // int gtid, kmp_task_t *task);
4322     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4323     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4324     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4325     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4326         OMPBuilder.getOrCreateRuntimeFunction(
4327             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4328         {Loc, Tid, NewTask});
4329     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4330                                       Evt->getExprLoc());
4331     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4332   }
4333   // Process affinity clauses.
4334   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4335     // Process list of affinity data.
4336     ASTContext &C = CGM.getContext();
4337     Address AffinitiesArray = Address::invalid();
4338     // Calculate number of elements to form the array of affinity data.
4339     llvm::Value *NumOfElements = nullptr;
4340     unsigned NumAffinities = 0;
4341     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4342       if (const Expr *Modifier = C->getModifier()) {
4343         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4344         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4345           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4346           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4347           NumOfElements =
4348               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4349         }
4350       } else {
4351         NumAffinities += C->varlist_size();
4352       }
4353     }
4354     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4355     // Fields ids in kmp_task_affinity_info record.
4356     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4357 
4358     QualType KmpTaskAffinityInfoArrayTy;
4359     if (NumOfElements) {
4360       NumOfElements = CGF.Builder.CreateNUWAdd(
4361           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4362       OpaqueValueExpr OVE(
4363           Loc,
4364           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4365           VK_RValue);
4366       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4367                                                     RValue::get(NumOfElements));
4368       KmpTaskAffinityInfoArrayTy =
4369           C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4370                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4371       // Properly emit variable-sized array.
4372       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4373                                            ImplicitParamDecl::Other);
4374       CGF.EmitVarDecl(*PD);
4375       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4376       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4377                                                 /*isSigned=*/false);
4378     } else {
4379       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4380           KmpTaskAffinityInfoTy,
4381           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4382           ArrayType::Normal, /*IndexTypeQuals=*/0);
4383       AffinitiesArray =
4384           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4385       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4386       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4387                                              /*isSigned=*/false);
4388     }
4389 
4390     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4391     // Fill array by elements without iterators.
4392     unsigned Pos = 0;
4393     bool HasIterator = false;
4394     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4395       if (C->getModifier()) {
4396         HasIterator = true;
4397         continue;
4398       }
4399       for (const Expr *E : C->varlists()) {
4400         llvm::Value *Addr;
4401         llvm::Value *Size;
4402         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4403         LValue Base =
4404             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4405                                KmpTaskAffinityInfoTy);
4406         // affs[i].base_addr = &<Affinities[i].second>;
4407         LValue BaseAddrLVal = CGF.EmitLValueForField(
4408             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4409         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4410                               BaseAddrLVal);
4411         // affs[i].len = sizeof(<Affinities[i].second>);
4412         LValue LenLVal = CGF.EmitLValueForField(
4413             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4414         CGF.EmitStoreOfScalar(Size, LenLVal);
4415         ++Pos;
4416       }
4417     }
4418     LValue PosLVal;
4419     if (HasIterator) {
4420       PosLVal = CGF.MakeAddrLValue(
4421           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4422           C.getSizeType());
4423       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4424     }
4425     // Process elements with iterators.
4426     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4427       const Expr *Modifier = C->getModifier();
4428       if (!Modifier)
4429         continue;
4430       OMPIteratorGeneratorScope IteratorScope(
4431           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4432       for (const Expr *E : C->varlists()) {
4433         llvm::Value *Addr;
4434         llvm::Value *Size;
4435         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4436         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4437         LValue Base = CGF.MakeAddrLValue(
4438             Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx),
4439                     AffinitiesArray.getAlignment()),
4440             KmpTaskAffinityInfoTy);
4441         // affs[i].base_addr = &<Affinities[i].second>;
4442         LValue BaseAddrLVal = CGF.EmitLValueForField(
4443             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4444         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4445                               BaseAddrLVal);
4446         // affs[i].len = sizeof(<Affinities[i].second>);
4447         LValue LenLVal = CGF.EmitLValueForField(
4448             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4449         CGF.EmitStoreOfScalar(Size, LenLVal);
4450         Idx = CGF.Builder.CreateNUWAdd(
4451             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4452         CGF.EmitStoreOfScalar(Idx, PosLVal);
4453       }
4454     }
4455     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4456     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4457     // naffins, kmp_task_affinity_info_t *affin_list);
4458     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4459     llvm::Value *GTid = getThreadID(CGF, Loc);
4460     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4461         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4462     // FIXME: Emit the function and ignore its result for now unless the
4463     // runtime function is properly implemented.
4464     (void)CGF.EmitRuntimeCall(
4465         OMPBuilder.getOrCreateRuntimeFunction(
4466             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4467         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4468   }
4469   llvm::Value *NewTaskNewTaskTTy =
4470       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4471           NewTask, KmpTaskTWithPrivatesPtrTy);
4472   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4473                                                KmpTaskTWithPrivatesQTy);
4474   LValue TDBase =
4475       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4476   // Fill the data in the resulting kmp_task_t record.
4477   // Copy shareds if there are any.
4478   Address KmpTaskSharedsPtr = Address::invalid();
4479   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4480     KmpTaskSharedsPtr =
4481         Address(CGF.EmitLoadOfScalar(
4482                     CGF.EmitLValueForField(
4483                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4484                                            KmpTaskTShareds)),
4485                     Loc),
4486                 CGM.getNaturalTypeAlignment(SharedsTy));
4487     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4488     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4489     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4490   }
4491   // Emit initial values for private copies (if any).
4492   TaskResultTy Result;
4493   if (!Privates.empty()) {
4494     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4495                      SharedsTy, SharedsPtrTy, Data, Privates,
4496                      /*ForDup=*/false);
4497     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4498         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4499       Result.TaskDupFn = emitTaskDupFunction(
4500           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4501           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4502           /*WithLastIter=*/!Data.LastprivateVars.empty());
4503     }
4504   }
4505   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4506   enum { Priority = 0, Destructors = 1 };
4507   // Provide pointer to function with destructors for privates.
4508   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4509   const RecordDecl *KmpCmplrdataUD =
4510       (*FI)->getType()->getAsUnionType()->getDecl();
4511   if (NeedsCleanup) {
4512     llvm::Value *DestructorFn = emitDestructorsFunction(
4513         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4514         KmpTaskTWithPrivatesQTy);
4515     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4516     LValue DestructorsLV = CGF.EmitLValueForField(
4517         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4518     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4519                               DestructorFn, KmpRoutineEntryPtrTy),
4520                           DestructorsLV);
4521   }
4522   // Set priority.
4523   if (Data.Priority.getInt()) {
4524     LValue Data2LV = CGF.EmitLValueForField(
4525         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4526     LValue PriorityLV = CGF.EmitLValueForField(
4527         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4528     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4529   }
4530   Result.NewTask = NewTask;
4531   Result.TaskEntry = TaskEntry;
4532   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4533   Result.TDBase = TDBase;
4534   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4535   return Result;
4536 }
4537 
4538 namespace {
4539 /// Dependence kind for RTL.
4540 enum RTLDependenceKindTy {
4541   DepIn = 0x01,
4542   DepInOut = 0x3,
4543   DepMutexInOutSet = 0x4
4544 };
4545 /// Fields ids in kmp_depend_info record.
4546 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4547 } // namespace
4548 
4549 /// Translates internal dependency kind into the runtime kind.
4550 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4551   RTLDependenceKindTy DepKind;
4552   switch (K) {
4553   case OMPC_DEPEND_in:
4554     DepKind = DepIn;
4555     break;
4556   // Out and InOut dependencies must use the same code.
4557   case OMPC_DEPEND_out:
4558   case OMPC_DEPEND_inout:
4559     DepKind = DepInOut;
4560     break;
4561   case OMPC_DEPEND_mutexinoutset:
4562     DepKind = DepMutexInOutSet;
4563     break;
4564   case OMPC_DEPEND_source:
4565   case OMPC_DEPEND_sink:
4566   case OMPC_DEPEND_depobj:
4567   case OMPC_DEPEND_unknown:
4568     llvm_unreachable("Unknown task dependence type");
4569   }
4570   return DepKind;
4571 }
4572 
4573 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4574 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4575                            QualType &FlagsTy) {
4576   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4577   if (KmpDependInfoTy.isNull()) {
4578     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4579     KmpDependInfoRD->startDefinition();
4580     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4581     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4582     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4583     KmpDependInfoRD->completeDefinition();
4584     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4585   }
4586 }
4587 
4588 std::pair<llvm::Value *, LValue>
4589 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4590                                    SourceLocation Loc) {
4591   ASTContext &C = CGM.getContext();
4592   QualType FlagsTy;
4593   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4594   RecordDecl *KmpDependInfoRD =
4595       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4596   LValue Base = CGF.EmitLoadOfPointerLValue(
4597       DepobjLVal.getAddress(CGF),
4598       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4599   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4600   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4601           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4602   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4603                             Base.getTBAAInfo());
4604   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4605       Addr.getPointer(),
4606       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4607   LValue NumDepsBase = CGF.MakeAddrLValue(
4608       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4609       Base.getBaseInfo(), Base.getTBAAInfo());
4610   // NumDeps = deps[i].base_addr;
4611   LValue BaseAddrLVal = CGF.EmitLValueForField(
4612       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4613   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4614   return std::make_pair(NumDeps, Base);
4615 }
4616 
4617 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4618                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4619                            const OMPTaskDataTy::DependData &Data,
4620                            Address DependenciesArray) {
4621   CodeGenModule &CGM = CGF.CGM;
4622   ASTContext &C = CGM.getContext();
4623   QualType FlagsTy;
4624   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4625   RecordDecl *KmpDependInfoRD =
4626       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4627   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4628 
4629   OMPIteratorGeneratorScope IteratorScope(
4630       CGF, cast_or_null<OMPIteratorExpr>(
4631                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4632                                  : nullptr));
4633   for (const Expr *E : Data.DepExprs) {
4634     llvm::Value *Addr;
4635     llvm::Value *Size;
4636     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4637     LValue Base;
4638     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4639       Base = CGF.MakeAddrLValue(
4640           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4641     } else {
4642       LValue &PosLVal = *Pos.get<LValue *>();
4643       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4644       Base = CGF.MakeAddrLValue(
4645           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
4646                   DependenciesArray.getAlignment()),
4647           KmpDependInfoTy);
4648     }
4649     // deps[i].base_addr = &<Dependencies[i].second>;
4650     LValue BaseAddrLVal = CGF.EmitLValueForField(
4651         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4652     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4653                           BaseAddrLVal);
4654     // deps[i].len = sizeof(<Dependencies[i].second>);
4655     LValue LenLVal = CGF.EmitLValueForField(
4656         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4657     CGF.EmitStoreOfScalar(Size, LenLVal);
4658     // deps[i].flags = <Dependencies[i].first>;
4659     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4660     LValue FlagsLVal = CGF.EmitLValueForField(
4661         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4662     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4663                           FlagsLVal);
4664     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4665       ++(*P);
4666     } else {
4667       LValue &PosLVal = *Pos.get<LValue *>();
4668       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4669       Idx = CGF.Builder.CreateNUWAdd(Idx,
4670                                      llvm::ConstantInt::get(Idx->getType(), 1));
4671       CGF.EmitStoreOfScalar(Idx, PosLVal);
4672     }
4673   }
4674 }
4675 
4676 static SmallVector<llvm::Value *, 4>
4677 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4678                         const OMPTaskDataTy::DependData &Data) {
4679   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4680          "Expected depobj dependecy kind.");
4681   SmallVector<llvm::Value *, 4> Sizes;
4682   SmallVector<LValue, 4> SizeLVals;
4683   ASTContext &C = CGF.getContext();
4684   QualType FlagsTy;
4685   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4686   RecordDecl *KmpDependInfoRD =
4687       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4688   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4689   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4690   {
4691     OMPIteratorGeneratorScope IteratorScope(
4692         CGF, cast_or_null<OMPIteratorExpr>(
4693                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4694                                    : nullptr));
4695     for (const Expr *E : Data.DepExprs) {
4696       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4697       LValue Base = CGF.EmitLoadOfPointerLValue(
4698           DepobjLVal.getAddress(CGF),
4699           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4700       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4701           Base.getAddress(CGF), KmpDependInfoPtrT);
4702       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4703                                 Base.getTBAAInfo());
4704       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4705           Addr.getPointer(),
4706           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4707       LValue NumDepsBase = CGF.MakeAddrLValue(
4708           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4709           Base.getBaseInfo(), Base.getTBAAInfo());
4710       // NumDeps = deps[i].base_addr;
4711       LValue BaseAddrLVal = CGF.EmitLValueForField(
4712           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4713       llvm::Value *NumDeps =
4714           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4715       LValue NumLVal = CGF.MakeAddrLValue(
4716           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4717           C.getUIntPtrType());
4718       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4719                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4720       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4721       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4722       CGF.EmitStoreOfScalar(Add, NumLVal);
4723       SizeLVals.push_back(NumLVal);
4724     }
4725   }
4726   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4727     llvm::Value *Size =
4728         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4729     Sizes.push_back(Size);
4730   }
4731   return Sizes;
4732 }
4733 
4734 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4735                                LValue PosLVal,
4736                                const OMPTaskDataTy::DependData &Data,
4737                                Address DependenciesArray) {
4738   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4739          "Expected depobj dependecy kind.");
4740   ASTContext &C = CGF.getContext();
4741   QualType FlagsTy;
4742   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4743   RecordDecl *KmpDependInfoRD =
4744       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4745   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4746   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4747   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4748   {
4749     OMPIteratorGeneratorScope IteratorScope(
4750         CGF, cast_or_null<OMPIteratorExpr>(
4751                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4752                                    : nullptr));
4753     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4754       const Expr *E = Data.DepExprs[I];
4755       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4756       LValue Base = CGF.EmitLoadOfPointerLValue(
4757           DepobjLVal.getAddress(CGF),
4758           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4759       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4760           Base.getAddress(CGF), KmpDependInfoPtrT);
4761       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4762                                 Base.getTBAAInfo());
4763 
4764       // Get number of elements in a single depobj.
4765       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4766           Addr.getPointer(),
4767           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4768       LValue NumDepsBase = CGF.MakeAddrLValue(
4769           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4770           Base.getBaseInfo(), Base.getTBAAInfo());
4771       // NumDeps = deps[i].base_addr;
4772       LValue BaseAddrLVal = CGF.EmitLValueForField(
4773           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4774       llvm::Value *NumDeps =
4775           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4776 
4777       // memcopy dependency data.
4778       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4779           ElSize,
4780           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4781       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4782       Address DepAddr =
4783           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
4784                   DependenciesArray.getAlignment());
4785       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4786 
4787       // Increase pos.
4788       // pos += size;
4789       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4790       CGF.EmitStoreOfScalar(Add, PosLVal);
4791     }
4792   }
4793 }
4794 
4795 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4796     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4797     SourceLocation Loc) {
4798   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4799         return D.DepExprs.empty();
4800       }))
4801     return std::make_pair(nullptr, Address::invalid());
4802   // Process list of dependencies.
4803   ASTContext &C = CGM.getContext();
4804   Address DependenciesArray = Address::invalid();
4805   llvm::Value *NumOfElements = nullptr;
4806   unsigned NumDependencies = std::accumulate(
4807       Dependencies.begin(), Dependencies.end(), 0,
4808       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4809         return D.DepKind == OMPC_DEPEND_depobj
4810                    ? V
4811                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4812       });
4813   QualType FlagsTy;
4814   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4815   bool HasDepobjDeps = false;
4816   bool HasRegularWithIterators = false;
4817   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4818   llvm::Value *NumOfRegularWithIterators =
4819       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4820   // Calculate number of depobj dependecies and regular deps with the iterators.
4821   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4822     if (D.DepKind == OMPC_DEPEND_depobj) {
4823       SmallVector<llvm::Value *, 4> Sizes =
4824           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4825       for (llvm::Value *Size : Sizes) {
4826         NumOfDepobjElements =
4827             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4828       }
4829       HasDepobjDeps = true;
4830       continue;
4831     }
4832     // Include number of iterations, if any.
4833     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4834       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4835         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4836         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4837         NumOfRegularWithIterators =
4838             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4839       }
4840       HasRegularWithIterators = true;
4841       continue;
4842     }
4843   }
4844 
4845   QualType KmpDependInfoArrayTy;
4846   if (HasDepobjDeps || HasRegularWithIterators) {
4847     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4848                                            /*isSigned=*/false);
4849     if (HasDepobjDeps) {
4850       NumOfElements =
4851           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4852     }
4853     if (HasRegularWithIterators) {
4854       NumOfElements =
4855           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4856     }
4857     OpaqueValueExpr OVE(Loc,
4858                         C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4859                         VK_RValue);
4860     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4861                                                   RValue::get(NumOfElements));
4862     KmpDependInfoArrayTy =
4863         C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4864                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4865     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4866     // Properly emit variable-sized array.
4867     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4868                                          ImplicitParamDecl::Other);
4869     CGF.EmitVarDecl(*PD);
4870     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4871     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4872                                               /*isSigned=*/false);
4873   } else {
4874     KmpDependInfoArrayTy = C.getConstantArrayType(
4875         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4876         ArrayType::Normal, /*IndexTypeQuals=*/0);
4877     DependenciesArray =
4878         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4879     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4880     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4881                                            /*isSigned=*/false);
4882   }
4883   unsigned Pos = 0;
4884   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4885     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4886         Dependencies[I].IteratorExpr)
4887       continue;
4888     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4889                    DependenciesArray);
4890   }
4891   // Copy regular dependecies with iterators.
4892   LValue PosLVal = CGF.MakeAddrLValue(
4893       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4894   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4895   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4896     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4897         !Dependencies[I].IteratorExpr)
4898       continue;
4899     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4900                    DependenciesArray);
4901   }
4902   // Copy final depobj arrays without iterators.
4903   if (HasDepobjDeps) {
4904     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4905       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4906         continue;
4907       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4908                          DependenciesArray);
4909     }
4910   }
4911   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4912       DependenciesArray, CGF.VoidPtrTy);
4913   return std::make_pair(NumOfElements, DependenciesArray);
4914 }
4915 
4916 Address CGOpenMPRuntime::emitDepobjDependClause(
4917     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4918     SourceLocation Loc) {
4919   if (Dependencies.DepExprs.empty())
4920     return Address::invalid();
4921   // Process list of dependencies.
4922   ASTContext &C = CGM.getContext();
4923   Address DependenciesArray = Address::invalid();
4924   unsigned NumDependencies = Dependencies.DepExprs.size();
4925   QualType FlagsTy;
4926   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4927   RecordDecl *KmpDependInfoRD =
4928       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4929 
4930   llvm::Value *Size;
4931   // Define type kmp_depend_info[<Dependencies.size()>];
4932   // For depobj reserve one extra element to store the number of elements.
4933   // It is required to handle depobj(x) update(in) construct.
4934   // kmp_depend_info[<Dependencies.size()>] deps;
4935   llvm::Value *NumDepsVal;
4936   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4937   if (const auto *IE =
4938           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4939     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4940     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4941       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4942       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4943       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4944     }
4945     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4946                                     NumDepsVal);
4947     CharUnits SizeInBytes =
4948         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4949     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4950     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4951     NumDepsVal =
4952         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4953   } else {
4954     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4955         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4956         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4957     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4958     Size = CGM.getSize(Sz.alignTo(Align));
4959     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4960   }
4961   // Need to allocate on the dynamic memory.
4962   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4963   // Use default allocator.
4964   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4965   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4966 
4967   llvm::Value *Addr =
4968       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4969                               CGM.getModule(), OMPRTL___kmpc_alloc),
4970                           Args, ".dep.arr.addr");
4971   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4972       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
4973   DependenciesArray = Address(Addr, Align);
4974   // Write number of elements in the first element of array for depobj.
4975   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4976   // deps[i].base_addr = NumDependencies;
4977   LValue BaseAddrLVal = CGF.EmitLValueForField(
4978       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4979   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4980   llvm::PointerUnion<unsigned *, LValue *> Pos;
4981   unsigned Idx = 1;
4982   LValue PosLVal;
4983   if (Dependencies.IteratorExpr) {
4984     PosLVal = CGF.MakeAddrLValue(
4985         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4986         C.getSizeType());
4987     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4988                           /*IsInit=*/true);
4989     Pos = &PosLVal;
4990   } else {
4991     Pos = &Idx;
4992   }
4993   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4994   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4995       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
4996   return DependenciesArray;
4997 }
4998 
4999 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5000                                         SourceLocation Loc) {
5001   ASTContext &C = CGM.getContext();
5002   QualType FlagsTy;
5003   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5004   LValue Base = CGF.EmitLoadOfPointerLValue(
5005       DepobjLVal.getAddress(CGF),
5006       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5007   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5008   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5009       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5010   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5011       Addr.getPointer(),
5012       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5013   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5014                                                                CGF.VoidPtrTy);
5015   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5016   // Use default allocator.
5017   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5018   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5019 
5020   // _kmpc_free(gtid, addr, nullptr);
5021   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5022                                 CGM.getModule(), OMPRTL___kmpc_free),
5023                             Args);
5024 }
5025 
5026 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5027                                        OpenMPDependClauseKind NewDepKind,
5028                                        SourceLocation Loc) {
5029   ASTContext &C = CGM.getContext();
5030   QualType FlagsTy;
5031   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5032   RecordDecl *KmpDependInfoRD =
5033       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5034   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5035   llvm::Value *NumDeps;
5036   LValue Base;
5037   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5038 
5039   Address Begin = Base.getAddress(CGF);
5040   // Cast from pointer to array type to pointer to single element.
5041   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5042   // The basic structure here is a while-do loop.
5043   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5044   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5045   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5046   CGF.EmitBlock(BodyBB);
5047   llvm::PHINode *ElementPHI =
5048       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5049   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5050   Begin = Address(ElementPHI, Begin.getAlignment());
5051   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5052                             Base.getTBAAInfo());
5053   // deps[i].flags = NewDepKind;
5054   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5055   LValue FlagsLVal = CGF.EmitLValueForField(
5056       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5057   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5058                         FlagsLVal);
5059 
5060   // Shift the address forward by one element.
5061   Address ElementNext =
5062       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5063   ElementPHI->addIncoming(ElementNext.getPointer(),
5064                           CGF.Builder.GetInsertBlock());
5065   llvm::Value *IsEmpty =
5066       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5067   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5068   // Done.
5069   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5070 }
5071 
5072 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5073                                    const OMPExecutableDirective &D,
5074                                    llvm::Function *TaskFunction,
5075                                    QualType SharedsTy, Address Shareds,
5076                                    const Expr *IfCond,
5077                                    const OMPTaskDataTy &Data) {
5078   if (!CGF.HaveInsertPoint())
5079     return;
5080 
5081   TaskResultTy Result =
5082       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5083   llvm::Value *NewTask = Result.NewTask;
5084   llvm::Function *TaskEntry = Result.TaskEntry;
5085   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5086   LValue TDBase = Result.TDBase;
5087   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5088   // Process list of dependences.
5089   Address DependenciesArray = Address::invalid();
5090   llvm::Value *NumOfElements;
5091   std::tie(NumOfElements, DependenciesArray) =
5092       emitDependClause(CGF, Data.Dependences, Loc);
5093 
5094   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5095   // libcall.
5096   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5097   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5098   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5099   // list is not empty
5100   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5101   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5102   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5103   llvm::Value *DepTaskArgs[7];
5104   if (!Data.Dependences.empty()) {
5105     DepTaskArgs[0] = UpLoc;
5106     DepTaskArgs[1] = ThreadID;
5107     DepTaskArgs[2] = NewTask;
5108     DepTaskArgs[3] = NumOfElements;
5109     DepTaskArgs[4] = DependenciesArray.getPointer();
5110     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5111     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5112   }
5113   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5114                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5115     if (!Data.Tied) {
5116       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5117       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5118       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5119     }
5120     if (!Data.Dependences.empty()) {
5121       CGF.EmitRuntimeCall(
5122           OMPBuilder.getOrCreateRuntimeFunction(
5123               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5124           DepTaskArgs);
5125     } else {
5126       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5127                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5128                           TaskArgs);
5129     }
5130     // Check if parent region is untied and build return for untied task;
5131     if (auto *Region =
5132             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5133       Region->emitUntiedSwitch(CGF);
5134   };
5135 
5136   llvm::Value *DepWaitTaskArgs[6];
5137   if (!Data.Dependences.empty()) {
5138     DepWaitTaskArgs[0] = UpLoc;
5139     DepWaitTaskArgs[1] = ThreadID;
5140     DepWaitTaskArgs[2] = NumOfElements;
5141     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5142     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5143     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5144   }
5145   auto &M = CGM.getModule();
5146   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5147                         TaskEntry, &Data, &DepWaitTaskArgs,
5148                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5149     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5150     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5151     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5152     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5153     // is specified.
5154     if (!Data.Dependences.empty())
5155       CGF.EmitRuntimeCall(
5156           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5157           DepWaitTaskArgs);
5158     // Call proxy_task_entry(gtid, new_task);
5159     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5160                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5161       Action.Enter(CGF);
5162       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5163       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5164                                                           OutlinedFnArgs);
5165     };
5166 
5167     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5168     // kmp_task_t *new_task);
5169     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5170     // kmp_task_t *new_task);
5171     RegionCodeGenTy RCG(CodeGen);
5172     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5173                               M, OMPRTL___kmpc_omp_task_begin_if0),
5174                           TaskArgs,
5175                           OMPBuilder.getOrCreateRuntimeFunction(
5176                               M, OMPRTL___kmpc_omp_task_complete_if0),
5177                           TaskArgs);
5178     RCG.setAction(Action);
5179     RCG(CGF);
5180   };
5181 
5182   if (IfCond) {
5183     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5184   } else {
5185     RegionCodeGenTy ThenRCG(ThenCodeGen);
5186     ThenRCG(CGF);
5187   }
5188 }
5189 
5190 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5191                                        const OMPLoopDirective &D,
5192                                        llvm::Function *TaskFunction,
5193                                        QualType SharedsTy, Address Shareds,
5194                                        const Expr *IfCond,
5195                                        const OMPTaskDataTy &Data) {
5196   if (!CGF.HaveInsertPoint())
5197     return;
5198   TaskResultTy Result =
5199       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5200   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5201   // libcall.
5202   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5203   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5204   // sched, kmp_uint64 grainsize, void *task_dup);
5205   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5206   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5207   llvm::Value *IfVal;
5208   if (IfCond) {
5209     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5210                                       /*isSigned=*/true);
5211   } else {
5212     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5213   }
5214 
5215   LValue LBLVal = CGF.EmitLValueForField(
5216       Result.TDBase,
5217       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5218   const auto *LBVar =
5219       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5220   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5221                        LBLVal.getQuals(),
5222                        /*IsInitializer=*/true);
5223   LValue UBLVal = CGF.EmitLValueForField(
5224       Result.TDBase,
5225       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5226   const auto *UBVar =
5227       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5228   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5229                        UBLVal.getQuals(),
5230                        /*IsInitializer=*/true);
5231   LValue StLVal = CGF.EmitLValueForField(
5232       Result.TDBase,
5233       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5234   const auto *StVar =
5235       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5236   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5237                        StLVal.getQuals(),
5238                        /*IsInitializer=*/true);
5239   // Store reductions address.
5240   LValue RedLVal = CGF.EmitLValueForField(
5241       Result.TDBase,
5242       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5243   if (Data.Reductions) {
5244     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5245   } else {
5246     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5247                                CGF.getContext().VoidPtrTy);
5248   }
5249   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5250   llvm::Value *TaskArgs[] = {
5251       UpLoc,
5252       ThreadID,
5253       Result.NewTask,
5254       IfVal,
5255       LBLVal.getPointer(CGF),
5256       UBLVal.getPointer(CGF),
5257       CGF.EmitLoadOfScalar(StLVal, Loc),
5258       llvm::ConstantInt::getSigned(
5259           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5260       llvm::ConstantInt::getSigned(
5261           CGF.IntTy, Data.Schedule.getPointer()
5262                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5263                          : NoSchedule),
5264       Data.Schedule.getPointer()
5265           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5266                                       /*isSigned=*/false)
5267           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5268       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5269                              Result.TaskDupFn, CGF.VoidPtrTy)
5270                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5271   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5272                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5273                       TaskArgs);
5274 }
5275 
5276 /// Emit reduction operation for each element of array (required for
5277 /// array sections) LHS op = RHS.
5278 /// \param Type Type of array.
5279 /// \param LHSVar Variable on the left side of the reduction operation
5280 /// (references element of array in original variable).
5281 /// \param RHSVar Variable on the right side of the reduction operation
5282 /// (references element of array in original variable).
5283 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5284 /// RHSVar.
5285 static void EmitOMPAggregateReduction(
5286     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5287     const VarDecl *RHSVar,
5288     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5289                                   const Expr *, const Expr *)> &RedOpGen,
5290     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5291     const Expr *UpExpr = nullptr) {
5292   // Perform element-by-element initialization.
5293   QualType ElementTy;
5294   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5295   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5296 
5297   // Drill down to the base element type on both arrays.
5298   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5299   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5300 
5301   llvm::Value *RHSBegin = RHSAddr.getPointer();
5302   llvm::Value *LHSBegin = LHSAddr.getPointer();
5303   // Cast from pointer to array type to pointer to single element.
5304   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5305   // The basic structure here is a while-do loop.
5306   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5307   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5308   llvm::Value *IsEmpty =
5309       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5310   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5311 
5312   // Enter the loop body, making that address the current address.
5313   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5314   CGF.EmitBlock(BodyBB);
5315 
5316   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5317 
5318   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5319       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5320   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5321   Address RHSElementCurrent =
5322       Address(RHSElementPHI,
5323               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5324 
5325   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5326       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5327   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5328   Address LHSElementCurrent =
5329       Address(LHSElementPHI,
5330               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5331 
5332   // Emit copy.
5333   CodeGenFunction::OMPPrivateScope Scope(CGF);
5334   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5335   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5336   Scope.Privatize();
5337   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5338   Scope.ForceCleanup();
5339 
5340   // Shift the address forward by one element.
5341   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5342       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5343   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5344       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5345   // Check whether we've reached the end.
5346   llvm::Value *Done =
5347       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5348   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5349   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5350   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5351 
5352   // Done.
5353   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5354 }
5355 
5356 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5357 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5358 /// UDR combiner function.
5359 static void emitReductionCombiner(CodeGenFunction &CGF,
5360                                   const Expr *ReductionOp) {
5361   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5362     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5363       if (const auto *DRE =
5364               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5365         if (const auto *DRD =
5366                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5367           std::pair<llvm::Function *, llvm::Function *> Reduction =
5368               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5369           RValue Func = RValue::get(Reduction.first);
5370           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5371           CGF.EmitIgnoredExpr(ReductionOp);
5372           return;
5373         }
5374   CGF.EmitIgnoredExpr(ReductionOp);
5375 }
5376 
5377 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5378     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5379     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5380     ArrayRef<const Expr *> ReductionOps) {
5381   ASTContext &C = CGM.getContext();
5382 
5383   // void reduction_func(void *LHSArg, void *RHSArg);
5384   FunctionArgList Args;
5385   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5386                            ImplicitParamDecl::Other);
5387   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5388                            ImplicitParamDecl::Other);
5389   Args.push_back(&LHSArg);
5390   Args.push_back(&RHSArg);
5391   const auto &CGFI =
5392       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5393   std::string Name = getName({"omp", "reduction", "reduction_func"});
5394   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5395                                     llvm::GlobalValue::InternalLinkage, Name,
5396                                     &CGM.getModule());
5397   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5398   Fn->setDoesNotRecurse();
5399   CodeGenFunction CGF(CGM);
5400   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5401 
5402   // Dst = (void*[n])(LHSArg);
5403   // Src = (void*[n])(RHSArg);
5404   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5405       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5406       ArgsType), CGF.getPointerAlign());
5407   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5408       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5409       ArgsType), CGF.getPointerAlign());
5410 
5411   //  ...
5412   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5413   //  ...
5414   CodeGenFunction::OMPPrivateScope Scope(CGF);
5415   auto IPriv = Privates.begin();
5416   unsigned Idx = 0;
5417   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5418     const auto *RHSVar =
5419         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5420     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5421       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5422     });
5423     const auto *LHSVar =
5424         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5425     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5426       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5427     });
5428     QualType PrivTy = (*IPriv)->getType();
5429     if (PrivTy->isVariablyModifiedType()) {
5430       // Get array size and emit VLA type.
5431       ++Idx;
5432       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5433       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5434       const VariableArrayType *VLA =
5435           CGF.getContext().getAsVariableArrayType(PrivTy);
5436       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5437       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5438           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5439       CGF.EmitVariablyModifiedType(PrivTy);
5440     }
5441   }
5442   Scope.Privatize();
5443   IPriv = Privates.begin();
5444   auto ILHS = LHSExprs.begin();
5445   auto IRHS = RHSExprs.begin();
5446   for (const Expr *E : ReductionOps) {
5447     if ((*IPriv)->getType()->isArrayType()) {
5448       // Emit reduction for array section.
5449       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5450       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5451       EmitOMPAggregateReduction(
5452           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5453           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5454             emitReductionCombiner(CGF, E);
5455           });
5456     } else {
5457       // Emit reduction for array subscript or single variable.
5458       emitReductionCombiner(CGF, E);
5459     }
5460     ++IPriv;
5461     ++ILHS;
5462     ++IRHS;
5463   }
5464   Scope.ForceCleanup();
5465   CGF.FinishFunction();
5466   return Fn;
5467 }
5468 
5469 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5470                                                   const Expr *ReductionOp,
5471                                                   const Expr *PrivateRef,
5472                                                   const DeclRefExpr *LHS,
5473                                                   const DeclRefExpr *RHS) {
5474   if (PrivateRef->getType()->isArrayType()) {
5475     // Emit reduction for array section.
5476     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5477     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5478     EmitOMPAggregateReduction(
5479         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5480         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5481           emitReductionCombiner(CGF, ReductionOp);
5482         });
5483   } else {
5484     // Emit reduction for array subscript or single variable.
5485     emitReductionCombiner(CGF, ReductionOp);
5486   }
5487 }
5488 
5489 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5490                                     ArrayRef<const Expr *> Privates,
5491                                     ArrayRef<const Expr *> LHSExprs,
5492                                     ArrayRef<const Expr *> RHSExprs,
5493                                     ArrayRef<const Expr *> ReductionOps,
5494                                     ReductionOptionsTy Options) {
5495   if (!CGF.HaveInsertPoint())
5496     return;
5497 
5498   bool WithNowait = Options.WithNowait;
5499   bool SimpleReduction = Options.SimpleReduction;
5500 
5501   // Next code should be emitted for reduction:
5502   //
5503   // static kmp_critical_name lock = { 0 };
5504   //
5505   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5506   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5507   //  ...
5508   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5509   //  *(Type<n>-1*)rhs[<n>-1]);
5510   // }
5511   //
5512   // ...
5513   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5514   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5515   // RedList, reduce_func, &<lock>)) {
5516   // case 1:
5517   //  ...
5518   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5519   //  ...
5520   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5521   // break;
5522   // case 2:
5523   //  ...
5524   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5525   //  ...
5526   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5527   // break;
5528   // default:;
5529   // }
5530   //
5531   // if SimpleReduction is true, only the next code is generated:
5532   //  ...
5533   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5534   //  ...
5535 
5536   ASTContext &C = CGM.getContext();
5537 
5538   if (SimpleReduction) {
5539     CodeGenFunction::RunCleanupsScope Scope(CGF);
5540     auto IPriv = Privates.begin();
5541     auto ILHS = LHSExprs.begin();
5542     auto IRHS = RHSExprs.begin();
5543     for (const Expr *E : ReductionOps) {
5544       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5545                                   cast<DeclRefExpr>(*IRHS));
5546       ++IPriv;
5547       ++ILHS;
5548       ++IRHS;
5549     }
5550     return;
5551   }
5552 
5553   // 1. Build a list of reduction variables.
5554   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5555   auto Size = RHSExprs.size();
5556   for (const Expr *E : Privates) {
5557     if (E->getType()->isVariablyModifiedType())
5558       // Reserve place for array size.
5559       ++Size;
5560   }
5561   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5562   QualType ReductionArrayTy =
5563       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5564                              /*IndexTypeQuals=*/0);
5565   Address ReductionList =
5566       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5567   auto IPriv = Privates.begin();
5568   unsigned Idx = 0;
5569   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5570     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5571     CGF.Builder.CreateStore(
5572         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5573             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5574         Elem);
5575     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5576       // Store array size.
5577       ++Idx;
5578       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5579       llvm::Value *Size = CGF.Builder.CreateIntCast(
5580           CGF.getVLASize(
5581                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5582               .NumElts,
5583           CGF.SizeTy, /*isSigned=*/false);
5584       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5585                               Elem);
5586     }
5587   }
5588 
5589   // 2. Emit reduce_func().
5590   llvm::Function *ReductionFn = emitReductionFunction(
5591       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5592       LHSExprs, RHSExprs, ReductionOps);
5593 
5594   // 3. Create static kmp_critical_name lock = { 0 };
5595   std::string Name = getName({"reduction"});
5596   llvm::Value *Lock = getCriticalRegionLock(Name);
5597 
5598   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5599   // RedList, reduce_func, &<lock>);
5600   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5601   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5602   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5603   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5604       ReductionList.getPointer(), CGF.VoidPtrTy);
5605   llvm::Value *Args[] = {
5606       IdentTLoc,                             // ident_t *<loc>
5607       ThreadId,                              // i32 <gtid>
5608       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5609       ReductionArrayTySize,                  // size_type sizeof(RedList)
5610       RL,                                    // void *RedList
5611       ReductionFn, // void (*) (void *, void *) <reduce_func>
5612       Lock         // kmp_critical_name *&<lock>
5613   };
5614   llvm::Value *Res = CGF.EmitRuntimeCall(
5615       OMPBuilder.getOrCreateRuntimeFunction(
5616           CGM.getModule(),
5617           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5618       Args);
5619 
5620   // 5. Build switch(res)
5621   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5622   llvm::SwitchInst *SwInst =
5623       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5624 
5625   // 6. Build case 1:
5626   //  ...
5627   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5628   //  ...
5629   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5630   // break;
5631   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5632   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5633   CGF.EmitBlock(Case1BB);
5634 
5635   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5636   llvm::Value *EndArgs[] = {
5637       IdentTLoc, // ident_t *<loc>
5638       ThreadId,  // i32 <gtid>
5639       Lock       // kmp_critical_name *&<lock>
5640   };
5641   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5642                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5643     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5644     auto IPriv = Privates.begin();
5645     auto ILHS = LHSExprs.begin();
5646     auto IRHS = RHSExprs.begin();
5647     for (const Expr *E : ReductionOps) {
5648       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5649                                      cast<DeclRefExpr>(*IRHS));
5650       ++IPriv;
5651       ++ILHS;
5652       ++IRHS;
5653     }
5654   };
5655   RegionCodeGenTy RCG(CodeGen);
5656   CommonActionTy Action(
5657       nullptr, llvm::None,
5658       OMPBuilder.getOrCreateRuntimeFunction(
5659           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5660                                       : OMPRTL___kmpc_end_reduce),
5661       EndArgs);
5662   RCG.setAction(Action);
5663   RCG(CGF);
5664 
5665   CGF.EmitBranch(DefaultBB);
5666 
5667   // 7. Build case 2:
5668   //  ...
5669   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5670   //  ...
5671   // break;
5672   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5673   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5674   CGF.EmitBlock(Case2BB);
5675 
5676   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5677                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5678     auto ILHS = LHSExprs.begin();
5679     auto IRHS = RHSExprs.begin();
5680     auto IPriv = Privates.begin();
5681     for (const Expr *E : ReductionOps) {
5682       const Expr *XExpr = nullptr;
5683       const Expr *EExpr = nullptr;
5684       const Expr *UpExpr = nullptr;
5685       BinaryOperatorKind BO = BO_Comma;
5686       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5687         if (BO->getOpcode() == BO_Assign) {
5688           XExpr = BO->getLHS();
5689           UpExpr = BO->getRHS();
5690         }
5691       }
5692       // Try to emit update expression as a simple atomic.
5693       const Expr *RHSExpr = UpExpr;
5694       if (RHSExpr) {
5695         // Analyze RHS part of the whole expression.
5696         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5697                 RHSExpr->IgnoreParenImpCasts())) {
5698           // If this is a conditional operator, analyze its condition for
5699           // min/max reduction operator.
5700           RHSExpr = ACO->getCond();
5701         }
5702         if (const auto *BORHS =
5703                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5704           EExpr = BORHS->getRHS();
5705           BO = BORHS->getOpcode();
5706         }
5707       }
5708       if (XExpr) {
5709         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5710         auto &&AtomicRedGen = [BO, VD,
5711                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5712                                     const Expr *EExpr, const Expr *UpExpr) {
5713           LValue X = CGF.EmitLValue(XExpr);
5714           RValue E;
5715           if (EExpr)
5716             E = CGF.EmitAnyExpr(EExpr);
5717           CGF.EmitOMPAtomicSimpleUpdateExpr(
5718               X, E, BO, /*IsXLHSInRHSPart=*/true,
5719               llvm::AtomicOrdering::Monotonic, Loc,
5720               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5721                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5722                 PrivateScope.addPrivate(
5723                     VD, [&CGF, VD, XRValue, Loc]() {
5724                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5725                       CGF.emitOMPSimpleStore(
5726                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5727                           VD->getType().getNonReferenceType(), Loc);
5728                       return LHSTemp;
5729                     });
5730                 (void)PrivateScope.Privatize();
5731                 return CGF.EmitAnyExpr(UpExpr);
5732               });
5733         };
5734         if ((*IPriv)->getType()->isArrayType()) {
5735           // Emit atomic reduction for array section.
5736           const auto *RHSVar =
5737               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5738           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5739                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5740         } else {
5741           // Emit atomic reduction for array subscript or single variable.
5742           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5743         }
5744       } else {
5745         // Emit as a critical region.
5746         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5747                                            const Expr *, const Expr *) {
5748           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5749           std::string Name = RT.getName({"atomic_reduction"});
5750           RT.emitCriticalRegion(
5751               CGF, Name,
5752               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5753                 Action.Enter(CGF);
5754                 emitReductionCombiner(CGF, E);
5755               },
5756               Loc);
5757         };
5758         if ((*IPriv)->getType()->isArrayType()) {
5759           const auto *LHSVar =
5760               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5761           const auto *RHSVar =
5762               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5763           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5764                                     CritRedGen);
5765         } else {
5766           CritRedGen(CGF, nullptr, nullptr, nullptr);
5767         }
5768       }
5769       ++ILHS;
5770       ++IRHS;
5771       ++IPriv;
5772     }
5773   };
5774   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5775   if (!WithNowait) {
5776     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5777     llvm::Value *EndArgs[] = {
5778         IdentTLoc, // ident_t *<loc>
5779         ThreadId,  // i32 <gtid>
5780         Lock       // kmp_critical_name *&<lock>
5781     };
5782     CommonActionTy Action(nullptr, llvm::None,
5783                           OMPBuilder.getOrCreateRuntimeFunction(
5784                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5785                           EndArgs);
5786     AtomicRCG.setAction(Action);
5787     AtomicRCG(CGF);
5788   } else {
5789     AtomicRCG(CGF);
5790   }
5791 
5792   CGF.EmitBranch(DefaultBB);
5793   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5794 }
5795 
5796 /// Generates unique name for artificial threadprivate variables.
5797 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5798 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5799                                       const Expr *Ref) {
5800   SmallString<256> Buffer;
5801   llvm::raw_svector_ostream Out(Buffer);
5802   const clang::DeclRefExpr *DE;
5803   const VarDecl *D = ::getBaseDecl(Ref, DE);
5804   if (!D)
5805     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5806   D = D->getCanonicalDecl();
5807   std::string Name = CGM.getOpenMPRuntime().getName(
5808       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5809   Out << Prefix << Name << "_"
5810       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5811   return std::string(Out.str());
5812 }
5813 
5814 /// Emits reduction initializer function:
5815 /// \code
5816 /// void @.red_init(void* %arg, void* %orig) {
5817 /// %0 = bitcast void* %arg to <type>*
5818 /// store <type> <init>, <type>* %0
5819 /// ret void
5820 /// }
5821 /// \endcode
5822 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5823                                            SourceLocation Loc,
5824                                            ReductionCodeGen &RCG, unsigned N) {
5825   ASTContext &C = CGM.getContext();
5826   QualType VoidPtrTy = C.VoidPtrTy;
5827   VoidPtrTy.addRestrict();
5828   FunctionArgList Args;
5829   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5830                           ImplicitParamDecl::Other);
5831   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5832                               ImplicitParamDecl::Other);
5833   Args.emplace_back(&Param);
5834   Args.emplace_back(&ParamOrig);
5835   const auto &FnInfo =
5836       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5837   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5838   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5839   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5840                                     Name, &CGM.getModule());
5841   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5842   Fn->setDoesNotRecurse();
5843   CodeGenFunction CGF(CGM);
5844   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5845   Address PrivateAddr = CGF.EmitLoadOfPointer(
5846       CGF.GetAddrOfLocalVar(&Param),
5847       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5848   llvm::Value *Size = nullptr;
5849   // If the size of the reduction item is non-constant, load it from global
5850   // threadprivate variable.
5851   if (RCG.getSizes(N).second) {
5852     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5853         CGF, CGM.getContext().getSizeType(),
5854         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5855     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5856                                 CGM.getContext().getSizeType(), Loc);
5857   }
5858   RCG.emitAggregateType(CGF, N, Size);
5859   LValue OrigLVal;
5860   // If initializer uses initializer from declare reduction construct, emit a
5861   // pointer to the address of the original reduction item (reuired by reduction
5862   // initializer)
5863   if (RCG.usesReductionInitializer(N)) {
5864     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5865     SharedAddr = CGF.EmitLoadOfPointer(
5866         SharedAddr,
5867         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5868     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5869   } else {
5870     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5871         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5872         CGM.getContext().VoidPtrTy);
5873   }
5874   // Emit the initializer:
5875   // %0 = bitcast void* %arg to <type>*
5876   // store <type> <init>, <type>* %0
5877   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5878                          [](CodeGenFunction &) { return false; });
5879   CGF.FinishFunction();
5880   return Fn;
5881 }
5882 
5883 /// Emits reduction combiner function:
5884 /// \code
5885 /// void @.red_comb(void* %arg0, void* %arg1) {
5886 /// %lhs = bitcast void* %arg0 to <type>*
5887 /// %rhs = bitcast void* %arg1 to <type>*
5888 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5889 /// store <type> %2, <type>* %lhs
5890 /// ret void
5891 /// }
5892 /// \endcode
5893 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5894                                            SourceLocation Loc,
5895                                            ReductionCodeGen &RCG, unsigned N,
5896                                            const Expr *ReductionOp,
5897                                            const Expr *LHS, const Expr *RHS,
5898                                            const Expr *PrivateRef) {
5899   ASTContext &C = CGM.getContext();
5900   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5901   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5902   FunctionArgList Args;
5903   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5904                                C.VoidPtrTy, ImplicitParamDecl::Other);
5905   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5906                             ImplicitParamDecl::Other);
5907   Args.emplace_back(&ParamInOut);
5908   Args.emplace_back(&ParamIn);
5909   const auto &FnInfo =
5910       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5911   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5912   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5913   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5914                                     Name, &CGM.getModule());
5915   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5916   Fn->setDoesNotRecurse();
5917   CodeGenFunction CGF(CGM);
5918   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5919   llvm::Value *Size = nullptr;
5920   // If the size of the reduction item is non-constant, load it from global
5921   // threadprivate variable.
5922   if (RCG.getSizes(N).second) {
5923     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5924         CGF, CGM.getContext().getSizeType(),
5925         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5926     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5927                                 CGM.getContext().getSizeType(), Loc);
5928   }
5929   RCG.emitAggregateType(CGF, N, Size);
5930   // Remap lhs and rhs variables to the addresses of the function arguments.
5931   // %lhs = bitcast void* %arg0 to <type>*
5932   // %rhs = bitcast void* %arg1 to <type>*
5933   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5934   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5935     // Pull out the pointer to the variable.
5936     Address PtrAddr = CGF.EmitLoadOfPointer(
5937         CGF.GetAddrOfLocalVar(&ParamInOut),
5938         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5939     return CGF.Builder.CreateElementBitCast(
5940         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5941   });
5942   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5943     // Pull out the pointer to the variable.
5944     Address PtrAddr = CGF.EmitLoadOfPointer(
5945         CGF.GetAddrOfLocalVar(&ParamIn),
5946         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5947     return CGF.Builder.CreateElementBitCast(
5948         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5949   });
5950   PrivateScope.Privatize();
5951   // Emit the combiner body:
5952   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5953   // store <type> %2, <type>* %lhs
5954   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5955       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5956       cast<DeclRefExpr>(RHS));
5957   CGF.FinishFunction();
5958   return Fn;
5959 }
5960 
5961 /// Emits reduction finalizer function:
5962 /// \code
5963 /// void @.red_fini(void* %arg) {
5964 /// %0 = bitcast void* %arg to <type>*
5965 /// <destroy>(<type>* %0)
5966 /// ret void
5967 /// }
5968 /// \endcode
5969 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5970                                            SourceLocation Loc,
5971                                            ReductionCodeGen &RCG, unsigned N) {
5972   if (!RCG.needCleanups(N))
5973     return nullptr;
5974   ASTContext &C = CGM.getContext();
5975   FunctionArgList Args;
5976   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5977                           ImplicitParamDecl::Other);
5978   Args.emplace_back(&Param);
5979   const auto &FnInfo =
5980       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5981   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5982   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5983   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5984                                     Name, &CGM.getModule());
5985   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5986   Fn->setDoesNotRecurse();
5987   CodeGenFunction CGF(CGM);
5988   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5989   Address PrivateAddr = CGF.EmitLoadOfPointer(
5990       CGF.GetAddrOfLocalVar(&Param),
5991       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5992   llvm::Value *Size = nullptr;
5993   // If the size of the reduction item is non-constant, load it from global
5994   // threadprivate variable.
5995   if (RCG.getSizes(N).second) {
5996     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5997         CGF, CGM.getContext().getSizeType(),
5998         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5999     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6000                                 CGM.getContext().getSizeType(), Loc);
6001   }
6002   RCG.emitAggregateType(CGF, N, Size);
6003   // Emit the finalizer body:
6004   // <destroy>(<type>* %0)
6005   RCG.emitCleanups(CGF, N, PrivateAddr);
6006   CGF.FinishFunction(Loc);
6007   return Fn;
6008 }
6009 
6010 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6011     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6012     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6013   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6014     return nullptr;
6015 
6016   // Build typedef struct:
6017   // kmp_taskred_input {
6018   //   void *reduce_shar; // shared reduction item
6019   //   void *reduce_orig; // original reduction item used for initialization
6020   //   size_t reduce_size; // size of data item
6021   //   void *reduce_init; // data initialization routine
6022   //   void *reduce_fini; // data finalization routine
6023   //   void *reduce_comb; // data combiner routine
6024   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6025   // } kmp_taskred_input_t;
6026   ASTContext &C = CGM.getContext();
6027   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6028   RD->startDefinition();
6029   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6030   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6031   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6032   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6033   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6034   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6035   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6036       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6037   RD->completeDefinition();
6038   QualType RDType = C.getRecordType(RD);
6039   unsigned Size = Data.ReductionVars.size();
6040   llvm::APInt ArraySize(/*numBits=*/64, Size);
6041   QualType ArrayRDType = C.getConstantArrayType(
6042       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6043   // kmp_task_red_input_t .rd_input.[Size];
6044   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6045   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6046                        Data.ReductionCopies, Data.ReductionOps);
6047   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6048     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6049     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6050                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6051     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6052         TaskRedInput.getPointer(), Idxs,
6053         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6054         ".rd_input.gep.");
6055     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6056     // ElemLVal.reduce_shar = &Shareds[Cnt];
6057     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6058     RCG.emitSharedOrigLValue(CGF, Cnt);
6059     llvm::Value *CastedShared =
6060         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6061     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6062     // ElemLVal.reduce_orig = &Origs[Cnt];
6063     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6064     llvm::Value *CastedOrig =
6065         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6066     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6067     RCG.emitAggregateType(CGF, Cnt);
6068     llvm::Value *SizeValInChars;
6069     llvm::Value *SizeVal;
6070     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6071     // We use delayed creation/initialization for VLAs and array sections. It is
6072     // required because runtime does not provide the way to pass the sizes of
6073     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6074     // threadprivate global variables are used to store these values and use
6075     // them in the functions.
6076     bool DelayedCreation = !!SizeVal;
6077     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6078                                                /*isSigned=*/false);
6079     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6080     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6081     // ElemLVal.reduce_init = init;
6082     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6083     llvm::Value *InitAddr =
6084         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6085     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6086     // ElemLVal.reduce_fini = fini;
6087     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6088     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6089     llvm::Value *FiniAddr = Fini
6090                                 ? CGF.EmitCastToVoidPtr(Fini)
6091                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6092     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6093     // ElemLVal.reduce_comb = comb;
6094     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6095     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6096         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6097         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6098     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6099     // ElemLVal.flags = 0;
6100     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6101     if (DelayedCreation) {
6102       CGF.EmitStoreOfScalar(
6103           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6104           FlagsLVal);
6105     } else
6106       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6107                                  FlagsLVal.getType());
6108   }
6109   if (Data.IsReductionWithTaskMod) {
6110     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6111     // is_ws, int num, void *data);
6112     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6113     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6114                                                   CGM.IntTy, /*isSigned=*/true);
6115     llvm::Value *Args[] = {
6116         IdentTLoc, GTid,
6117         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6118                                /*isSigned=*/true),
6119         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6120         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6121             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6122     return CGF.EmitRuntimeCall(
6123         OMPBuilder.getOrCreateRuntimeFunction(
6124             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6125         Args);
6126   }
6127   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6128   llvm::Value *Args[] = {
6129       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6130                                 /*isSigned=*/true),
6131       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6132       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6133                                                       CGM.VoidPtrTy)};
6134   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6135                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6136                              Args);
6137 }
6138 
6139 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6140                                             SourceLocation Loc,
6141                                             bool IsWorksharingReduction) {
6142   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6143   // is_ws, int num, void *data);
6144   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6145   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6146                                                 CGM.IntTy, /*isSigned=*/true);
6147   llvm::Value *Args[] = {IdentTLoc, GTid,
6148                          llvm::ConstantInt::get(CGM.IntTy,
6149                                                 IsWorksharingReduction ? 1 : 0,
6150                                                 /*isSigned=*/true)};
6151   (void)CGF.EmitRuntimeCall(
6152       OMPBuilder.getOrCreateRuntimeFunction(
6153           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6154       Args);
6155 }
6156 
6157 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6158                                               SourceLocation Loc,
6159                                               ReductionCodeGen &RCG,
6160                                               unsigned N) {
6161   auto Sizes = RCG.getSizes(N);
6162   // Emit threadprivate global variable if the type is non-constant
6163   // (Sizes.second = nullptr).
6164   if (Sizes.second) {
6165     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6166                                                      /*isSigned=*/false);
6167     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6168         CGF, CGM.getContext().getSizeType(),
6169         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6170     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6171   }
6172 }
6173 
6174 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6175                                               SourceLocation Loc,
6176                                               llvm::Value *ReductionsPtr,
6177                                               LValue SharedLVal) {
6178   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6179   // *d);
6180   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6181                                                    CGM.IntTy,
6182                                                    /*isSigned=*/true),
6183                          ReductionsPtr,
6184                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6185                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6186   return Address(
6187       CGF.EmitRuntimeCall(
6188           OMPBuilder.getOrCreateRuntimeFunction(
6189               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6190           Args),
6191       SharedLVal.getAlignment());
6192 }
6193 
6194 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6195                                        SourceLocation Loc) {
6196   if (!CGF.HaveInsertPoint())
6197     return;
6198 
6199   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6200     OMPBuilder.CreateTaskwait(CGF.Builder);
6201   } else {
6202     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6203     // global_tid);
6204     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6205     // Ignore return result until untied tasks are supported.
6206     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6207                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6208                         Args);
6209   }
6210 
6211   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6212     Region->emitUntiedSwitch(CGF);
6213 }
6214 
6215 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6216                                            OpenMPDirectiveKind InnerKind,
6217                                            const RegionCodeGenTy &CodeGen,
6218                                            bool HasCancel) {
6219   if (!CGF.HaveInsertPoint())
6220     return;
6221   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6222   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6223 }
6224 
6225 namespace {
6226 enum RTCancelKind {
6227   CancelNoreq = 0,
6228   CancelParallel = 1,
6229   CancelLoop = 2,
6230   CancelSections = 3,
6231   CancelTaskgroup = 4
6232 };
6233 } // anonymous namespace
6234 
6235 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6236   RTCancelKind CancelKind = CancelNoreq;
6237   if (CancelRegion == OMPD_parallel)
6238     CancelKind = CancelParallel;
6239   else if (CancelRegion == OMPD_for)
6240     CancelKind = CancelLoop;
6241   else if (CancelRegion == OMPD_sections)
6242     CancelKind = CancelSections;
6243   else {
6244     assert(CancelRegion == OMPD_taskgroup);
6245     CancelKind = CancelTaskgroup;
6246   }
6247   return CancelKind;
6248 }
6249 
6250 void CGOpenMPRuntime::emitCancellationPointCall(
6251     CodeGenFunction &CGF, SourceLocation Loc,
6252     OpenMPDirectiveKind CancelRegion) {
6253   if (!CGF.HaveInsertPoint())
6254     return;
6255   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6256   // global_tid, kmp_int32 cncl_kind);
6257   if (auto *OMPRegionInfo =
6258           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6259     // For 'cancellation point taskgroup', the task region info may not have a
6260     // cancel. This may instead happen in another adjacent task.
6261     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6262       llvm::Value *Args[] = {
6263           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6264           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6265       // Ignore return result until untied tasks are supported.
6266       llvm::Value *Result = CGF.EmitRuntimeCall(
6267           OMPBuilder.getOrCreateRuntimeFunction(
6268               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6269           Args);
6270       // if (__kmpc_cancellationpoint()) {
6271       //   exit from construct;
6272       // }
6273       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6274       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6275       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6276       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6277       CGF.EmitBlock(ExitBB);
6278       // exit from construct;
6279       CodeGenFunction::JumpDest CancelDest =
6280           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6281       CGF.EmitBranchThroughCleanup(CancelDest);
6282       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6283     }
6284   }
6285 }
6286 
6287 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6288                                      const Expr *IfCond,
6289                                      OpenMPDirectiveKind CancelRegion) {
6290   if (!CGF.HaveInsertPoint())
6291     return;
6292   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6293   // kmp_int32 cncl_kind);
6294   auto &M = CGM.getModule();
6295   if (auto *OMPRegionInfo =
6296           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6297     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6298                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6299       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6300       llvm::Value *Args[] = {
6301           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6302           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6303       // Ignore return result until untied tasks are supported.
6304       llvm::Value *Result = CGF.EmitRuntimeCall(
6305           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6306       // if (__kmpc_cancel()) {
6307       //   exit from construct;
6308       // }
6309       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6310       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6311       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6312       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6313       CGF.EmitBlock(ExitBB);
6314       // exit from construct;
6315       CodeGenFunction::JumpDest CancelDest =
6316           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6317       CGF.EmitBranchThroughCleanup(CancelDest);
6318       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6319     };
6320     if (IfCond) {
6321       emitIfClause(CGF, IfCond, ThenGen,
6322                    [](CodeGenFunction &, PrePostActionTy &) {});
6323     } else {
6324       RegionCodeGenTy ThenRCG(ThenGen);
6325       ThenRCG(CGF);
6326     }
6327   }
6328 }
6329 
6330 namespace {
6331 /// Cleanup action for uses_allocators support.
6332 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6333   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6334 
6335 public:
6336   OMPUsesAllocatorsActionTy(
6337       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6338       : Allocators(Allocators) {}
6339   void Enter(CodeGenFunction &CGF) override {
6340     if (!CGF.HaveInsertPoint())
6341       return;
6342     for (const auto &AllocatorData : Allocators) {
6343       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6344           CGF, AllocatorData.first, AllocatorData.second);
6345     }
6346   }
6347   void Exit(CodeGenFunction &CGF) override {
6348     if (!CGF.HaveInsertPoint())
6349       return;
6350     for (const auto &AllocatorData : Allocators) {
6351       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6352                                                         AllocatorData.first);
6353     }
6354   }
6355 };
6356 } // namespace
6357 
6358 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6359     const OMPExecutableDirective &D, StringRef ParentName,
6360     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6361     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6362   assert(!ParentName.empty() && "Invalid target region parent name!");
6363   HasEmittedTargetRegion = true;
6364   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6365   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6366     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6367       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6368       if (!D.AllocatorTraits)
6369         continue;
6370       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6371     }
6372   }
6373   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6374   CodeGen.setAction(UsesAllocatorAction);
6375   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6376                                    IsOffloadEntry, CodeGen);
6377 }
6378 
6379 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6380                                              const Expr *Allocator,
6381                                              const Expr *AllocatorTraits) {
6382   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6383   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6384   // Use default memspace handle.
6385   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6386   llvm::Value *NumTraits = llvm::ConstantInt::get(
6387       CGF.IntTy, cast<ConstantArrayType>(
6388                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6389                      ->getSize()
6390                      .getLimitedValue());
6391   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6392   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6393       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6394   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6395                                            AllocatorTraitsLVal.getBaseInfo(),
6396                                            AllocatorTraitsLVal.getTBAAInfo());
6397   llvm::Value *Traits =
6398       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6399 
6400   llvm::Value *AllocatorVal =
6401       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6402                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6403                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6404   // Store to allocator.
6405   CGF.EmitVarDecl(*cast<VarDecl>(
6406       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6407   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6408   AllocatorVal =
6409       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6410                                Allocator->getType(), Allocator->getExprLoc());
6411   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6412 }
6413 
6414 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6415                                              const Expr *Allocator) {
6416   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6417   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6418   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6419   llvm::Value *AllocatorVal =
6420       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6421   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6422                                           CGF.getContext().VoidPtrTy,
6423                                           Allocator->getExprLoc());
6424   (void)CGF.EmitRuntimeCall(
6425       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6426                                             OMPRTL___kmpc_destroy_allocator),
6427       {ThreadId, AllocatorVal});
6428 }
6429 
6430 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6431     const OMPExecutableDirective &D, StringRef ParentName,
6432     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6433     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6434   // Create a unique name for the entry function using the source location
6435   // information of the current target region. The name will be something like:
6436   //
6437   // __omp_offloading_DD_FFFF_PP_lBB
6438   //
6439   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6440   // mangled name of the function that encloses the target region and BB is the
6441   // line number of the target region.
6442 
6443   unsigned DeviceID;
6444   unsigned FileID;
6445   unsigned Line;
6446   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6447                            Line);
6448   SmallString<64> EntryFnName;
6449   {
6450     llvm::raw_svector_ostream OS(EntryFnName);
6451     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6452        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6453   }
6454 
6455   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6456 
6457   CodeGenFunction CGF(CGM, true);
6458   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6459   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6460 
6461   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6462 
6463   // If this target outline function is not an offload entry, we don't need to
6464   // register it.
6465   if (!IsOffloadEntry)
6466     return;
6467 
6468   // The target region ID is used by the runtime library to identify the current
6469   // target region, so it only has to be unique and not necessarily point to
6470   // anything. It could be the pointer to the outlined function that implements
6471   // the target region, but we aren't using that so that the compiler doesn't
6472   // need to keep that, and could therefore inline the host function if proven
6473   // worthwhile during optimization. In the other hand, if emitting code for the
6474   // device, the ID has to be the function address so that it can retrieved from
6475   // the offloading entry and launched by the runtime library. We also mark the
6476   // outlined function to have external linkage in case we are emitting code for
6477   // the device, because these functions will be entry points to the device.
6478 
6479   if (CGM.getLangOpts().OpenMPIsDevice) {
6480     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6481     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6482     OutlinedFn->setDSOLocal(false);
6483   } else {
6484     std::string Name = getName({EntryFnName, "region_id"});
6485     OutlinedFnID = new llvm::GlobalVariable(
6486         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6487         llvm::GlobalValue::WeakAnyLinkage,
6488         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6489   }
6490 
6491   // Register the information for the entry associated with this target region.
6492   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6493       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6494       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6495 }
6496 
6497 /// Checks if the expression is constant or does not have non-trivial function
6498 /// calls.
6499 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6500   // We can skip constant expressions.
6501   // We can skip expressions with trivial calls or simple expressions.
6502   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6503           !E->hasNonTrivialCall(Ctx)) &&
6504          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6505 }
6506 
6507 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6508                                                     const Stmt *Body) {
6509   const Stmt *Child = Body->IgnoreContainers();
6510   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6511     Child = nullptr;
6512     for (const Stmt *S : C->body()) {
6513       if (const auto *E = dyn_cast<Expr>(S)) {
6514         if (isTrivial(Ctx, E))
6515           continue;
6516       }
6517       // Some of the statements can be ignored.
6518       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6519           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6520         continue;
6521       // Analyze declarations.
6522       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6523         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6524               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6525                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6526                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6527                   isa<UsingDirectiveDecl>(D) ||
6528                   isa<OMPDeclareReductionDecl>(D) ||
6529                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6530                 return true;
6531               const auto *VD = dyn_cast<VarDecl>(D);
6532               if (!VD)
6533                 return false;
6534               return VD->isConstexpr() ||
6535                      ((VD->getType().isTrivialType(Ctx) ||
6536                        VD->getType()->isReferenceType()) &&
6537                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6538             }))
6539           continue;
6540       }
6541       // Found multiple children - cannot get the one child only.
6542       if (Child)
6543         return nullptr;
6544       Child = S;
6545     }
6546     if (Child)
6547       Child = Child->IgnoreContainers();
6548   }
6549   return Child;
6550 }
6551 
6552 /// Emit the number of teams for a target directive.  Inspect the num_teams
6553 /// clause associated with a teams construct combined or closely nested
6554 /// with the target directive.
6555 ///
6556 /// Emit a team of size one for directives such as 'target parallel' that
6557 /// have no associated teams construct.
6558 ///
6559 /// Otherwise, return nullptr.
6560 static llvm::Value *
6561 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6562                                const OMPExecutableDirective &D) {
6563   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6564          "Clauses associated with the teams directive expected to be emitted "
6565          "only for the host!");
6566   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6567   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6568          "Expected target-based executable directive.");
6569   CGBuilderTy &Bld = CGF.Builder;
6570   switch (DirectiveKind) {
6571   case OMPD_target: {
6572     const auto *CS = D.getInnermostCapturedStmt();
6573     const auto *Body =
6574         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6575     const Stmt *ChildStmt =
6576         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6577     if (const auto *NestedDir =
6578             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6579       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6580         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6581           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6582           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6583           const Expr *NumTeams =
6584               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6585           llvm::Value *NumTeamsVal =
6586               CGF.EmitScalarExpr(NumTeams,
6587                                  /*IgnoreResultAssign*/ true);
6588           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6589                                    /*isSigned=*/true);
6590         }
6591         return Bld.getInt32(0);
6592       }
6593       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6594           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6595         return Bld.getInt32(1);
6596       return Bld.getInt32(0);
6597     }
6598     return nullptr;
6599   }
6600   case OMPD_target_teams:
6601   case OMPD_target_teams_distribute:
6602   case OMPD_target_teams_distribute_simd:
6603   case OMPD_target_teams_distribute_parallel_for:
6604   case OMPD_target_teams_distribute_parallel_for_simd: {
6605     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6606       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6607       const Expr *NumTeams =
6608           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6609       llvm::Value *NumTeamsVal =
6610           CGF.EmitScalarExpr(NumTeams,
6611                              /*IgnoreResultAssign*/ true);
6612       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6613                                /*isSigned=*/true);
6614     }
6615     return Bld.getInt32(0);
6616   }
6617   case OMPD_target_parallel:
6618   case OMPD_target_parallel_for:
6619   case OMPD_target_parallel_for_simd:
6620   case OMPD_target_simd:
6621     return Bld.getInt32(1);
6622   case OMPD_parallel:
6623   case OMPD_for:
6624   case OMPD_parallel_for:
6625   case OMPD_parallel_master:
6626   case OMPD_parallel_sections:
6627   case OMPD_for_simd:
6628   case OMPD_parallel_for_simd:
6629   case OMPD_cancel:
6630   case OMPD_cancellation_point:
6631   case OMPD_ordered:
6632   case OMPD_threadprivate:
6633   case OMPD_allocate:
6634   case OMPD_task:
6635   case OMPD_simd:
6636   case OMPD_sections:
6637   case OMPD_section:
6638   case OMPD_single:
6639   case OMPD_master:
6640   case OMPD_critical:
6641   case OMPD_taskyield:
6642   case OMPD_barrier:
6643   case OMPD_taskwait:
6644   case OMPD_taskgroup:
6645   case OMPD_atomic:
6646   case OMPD_flush:
6647   case OMPD_depobj:
6648   case OMPD_scan:
6649   case OMPD_teams:
6650   case OMPD_target_data:
6651   case OMPD_target_exit_data:
6652   case OMPD_target_enter_data:
6653   case OMPD_distribute:
6654   case OMPD_distribute_simd:
6655   case OMPD_distribute_parallel_for:
6656   case OMPD_distribute_parallel_for_simd:
6657   case OMPD_teams_distribute:
6658   case OMPD_teams_distribute_simd:
6659   case OMPD_teams_distribute_parallel_for:
6660   case OMPD_teams_distribute_parallel_for_simd:
6661   case OMPD_target_update:
6662   case OMPD_declare_simd:
6663   case OMPD_declare_variant:
6664   case OMPD_begin_declare_variant:
6665   case OMPD_end_declare_variant:
6666   case OMPD_declare_target:
6667   case OMPD_end_declare_target:
6668   case OMPD_declare_reduction:
6669   case OMPD_declare_mapper:
6670   case OMPD_taskloop:
6671   case OMPD_taskloop_simd:
6672   case OMPD_master_taskloop:
6673   case OMPD_master_taskloop_simd:
6674   case OMPD_parallel_master_taskloop:
6675   case OMPD_parallel_master_taskloop_simd:
6676   case OMPD_requires:
6677   case OMPD_unknown:
6678     break;
6679   default:
6680     break;
6681   }
6682   llvm_unreachable("Unexpected directive kind.");
6683 }
6684 
6685 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6686                                   llvm::Value *DefaultThreadLimitVal) {
6687   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6688       CGF.getContext(), CS->getCapturedStmt());
6689   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6690     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6691       llvm::Value *NumThreads = nullptr;
6692       llvm::Value *CondVal = nullptr;
6693       // Handle if clause. If if clause present, the number of threads is
6694       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6695       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6696         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6697         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6698         const OMPIfClause *IfClause = nullptr;
6699         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6700           if (C->getNameModifier() == OMPD_unknown ||
6701               C->getNameModifier() == OMPD_parallel) {
6702             IfClause = C;
6703             break;
6704           }
6705         }
6706         if (IfClause) {
6707           const Expr *Cond = IfClause->getCondition();
6708           bool Result;
6709           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6710             if (!Result)
6711               return CGF.Builder.getInt32(1);
6712           } else {
6713             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6714             if (const auto *PreInit =
6715                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6716               for (const auto *I : PreInit->decls()) {
6717                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6718                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6719                 } else {
6720                   CodeGenFunction::AutoVarEmission Emission =
6721                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6722                   CGF.EmitAutoVarCleanups(Emission);
6723                 }
6724               }
6725             }
6726             CondVal = CGF.EvaluateExprAsBool(Cond);
6727           }
6728         }
6729       }
6730       // Check the value of num_threads clause iff if clause was not specified
6731       // or is not evaluated to false.
6732       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6733         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6734         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6735         const auto *NumThreadsClause =
6736             Dir->getSingleClause<OMPNumThreadsClause>();
6737         CodeGenFunction::LexicalScope Scope(
6738             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6739         if (const auto *PreInit =
6740                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6741           for (const auto *I : PreInit->decls()) {
6742             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6743               CGF.EmitVarDecl(cast<VarDecl>(*I));
6744             } else {
6745               CodeGenFunction::AutoVarEmission Emission =
6746                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6747               CGF.EmitAutoVarCleanups(Emission);
6748             }
6749           }
6750         }
6751         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6752         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6753                                                /*isSigned=*/false);
6754         if (DefaultThreadLimitVal)
6755           NumThreads = CGF.Builder.CreateSelect(
6756               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6757               DefaultThreadLimitVal, NumThreads);
6758       } else {
6759         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6760                                            : CGF.Builder.getInt32(0);
6761       }
6762       // Process condition of the if clause.
6763       if (CondVal) {
6764         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6765                                               CGF.Builder.getInt32(1));
6766       }
6767       return NumThreads;
6768     }
6769     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6770       return CGF.Builder.getInt32(1);
6771     return DefaultThreadLimitVal;
6772   }
6773   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6774                                : CGF.Builder.getInt32(0);
6775 }
6776 
6777 /// Emit the number of threads for a target directive.  Inspect the
6778 /// thread_limit clause associated with a teams construct combined or closely
6779 /// nested with the target directive.
6780 ///
6781 /// Emit the num_threads clause for directives such as 'target parallel' that
6782 /// have no associated teams construct.
6783 ///
6784 /// Otherwise, return nullptr.
6785 static llvm::Value *
6786 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6787                                  const OMPExecutableDirective &D) {
6788   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6789          "Clauses associated with the teams directive expected to be emitted "
6790          "only for the host!");
6791   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6792   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6793          "Expected target-based executable directive.");
6794   CGBuilderTy &Bld = CGF.Builder;
6795   llvm::Value *ThreadLimitVal = nullptr;
6796   llvm::Value *NumThreadsVal = nullptr;
6797   switch (DirectiveKind) {
6798   case OMPD_target: {
6799     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6800     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6801       return NumThreads;
6802     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6803         CGF.getContext(), CS->getCapturedStmt());
6804     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6805       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6806         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6807         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6808         const auto *ThreadLimitClause =
6809             Dir->getSingleClause<OMPThreadLimitClause>();
6810         CodeGenFunction::LexicalScope Scope(
6811             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6812         if (const auto *PreInit =
6813                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6814           for (const auto *I : PreInit->decls()) {
6815             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6816               CGF.EmitVarDecl(cast<VarDecl>(*I));
6817             } else {
6818               CodeGenFunction::AutoVarEmission Emission =
6819                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6820               CGF.EmitAutoVarCleanups(Emission);
6821             }
6822           }
6823         }
6824         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6825             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6826         ThreadLimitVal =
6827             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6828       }
6829       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6830           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6831         CS = Dir->getInnermostCapturedStmt();
6832         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6833             CGF.getContext(), CS->getCapturedStmt());
6834         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6835       }
6836       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6837           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6838         CS = Dir->getInnermostCapturedStmt();
6839         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6840           return NumThreads;
6841       }
6842       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6843         return Bld.getInt32(1);
6844     }
6845     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6846   }
6847   case OMPD_target_teams: {
6848     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6849       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6850       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6851       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6852           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6853       ThreadLimitVal =
6854           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6855     }
6856     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6857     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6858       return NumThreads;
6859     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6860         CGF.getContext(), CS->getCapturedStmt());
6861     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6862       if (Dir->getDirectiveKind() == OMPD_distribute) {
6863         CS = Dir->getInnermostCapturedStmt();
6864         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6865           return NumThreads;
6866       }
6867     }
6868     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6869   }
6870   case OMPD_target_teams_distribute:
6871     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6872       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6873       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6874       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6875           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6876       ThreadLimitVal =
6877           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6878     }
6879     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6880   case OMPD_target_parallel:
6881   case OMPD_target_parallel_for:
6882   case OMPD_target_parallel_for_simd:
6883   case OMPD_target_teams_distribute_parallel_for:
6884   case OMPD_target_teams_distribute_parallel_for_simd: {
6885     llvm::Value *CondVal = nullptr;
6886     // Handle if clause. If if clause present, the number of threads is
6887     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6888     if (D.hasClausesOfKind<OMPIfClause>()) {
6889       const OMPIfClause *IfClause = nullptr;
6890       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6891         if (C->getNameModifier() == OMPD_unknown ||
6892             C->getNameModifier() == OMPD_parallel) {
6893           IfClause = C;
6894           break;
6895         }
6896       }
6897       if (IfClause) {
6898         const Expr *Cond = IfClause->getCondition();
6899         bool Result;
6900         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6901           if (!Result)
6902             return Bld.getInt32(1);
6903         } else {
6904           CodeGenFunction::RunCleanupsScope Scope(CGF);
6905           CondVal = CGF.EvaluateExprAsBool(Cond);
6906         }
6907       }
6908     }
6909     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6910       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6911       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6912       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6913           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6914       ThreadLimitVal =
6915           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6916     }
6917     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6918       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6919       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6920       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6921           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6922       NumThreadsVal =
6923           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6924       ThreadLimitVal = ThreadLimitVal
6925                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6926                                                                 ThreadLimitVal),
6927                                               NumThreadsVal, ThreadLimitVal)
6928                            : NumThreadsVal;
6929     }
6930     if (!ThreadLimitVal)
6931       ThreadLimitVal = Bld.getInt32(0);
6932     if (CondVal)
6933       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6934     return ThreadLimitVal;
6935   }
6936   case OMPD_target_teams_distribute_simd:
6937   case OMPD_target_simd:
6938     return Bld.getInt32(1);
6939   case OMPD_parallel:
6940   case OMPD_for:
6941   case OMPD_parallel_for:
6942   case OMPD_parallel_master:
6943   case OMPD_parallel_sections:
6944   case OMPD_for_simd:
6945   case OMPD_parallel_for_simd:
6946   case OMPD_cancel:
6947   case OMPD_cancellation_point:
6948   case OMPD_ordered:
6949   case OMPD_threadprivate:
6950   case OMPD_allocate:
6951   case OMPD_task:
6952   case OMPD_simd:
6953   case OMPD_sections:
6954   case OMPD_section:
6955   case OMPD_single:
6956   case OMPD_master:
6957   case OMPD_critical:
6958   case OMPD_taskyield:
6959   case OMPD_barrier:
6960   case OMPD_taskwait:
6961   case OMPD_taskgroup:
6962   case OMPD_atomic:
6963   case OMPD_flush:
6964   case OMPD_depobj:
6965   case OMPD_scan:
6966   case OMPD_teams:
6967   case OMPD_target_data:
6968   case OMPD_target_exit_data:
6969   case OMPD_target_enter_data:
6970   case OMPD_distribute:
6971   case OMPD_distribute_simd:
6972   case OMPD_distribute_parallel_for:
6973   case OMPD_distribute_parallel_for_simd:
6974   case OMPD_teams_distribute:
6975   case OMPD_teams_distribute_simd:
6976   case OMPD_teams_distribute_parallel_for:
6977   case OMPD_teams_distribute_parallel_for_simd:
6978   case OMPD_target_update:
6979   case OMPD_declare_simd:
6980   case OMPD_declare_variant:
6981   case OMPD_begin_declare_variant:
6982   case OMPD_end_declare_variant:
6983   case OMPD_declare_target:
6984   case OMPD_end_declare_target:
6985   case OMPD_declare_reduction:
6986   case OMPD_declare_mapper:
6987   case OMPD_taskloop:
6988   case OMPD_taskloop_simd:
6989   case OMPD_master_taskloop:
6990   case OMPD_master_taskloop_simd:
6991   case OMPD_parallel_master_taskloop:
6992   case OMPD_parallel_master_taskloop_simd:
6993   case OMPD_requires:
6994   case OMPD_unknown:
6995     break;
6996   default:
6997     break;
6998   }
6999   llvm_unreachable("Unsupported directive kind.");
7000 }
7001 
7002 namespace {
7003 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7004 
7005 // Utility to handle information from clauses associated with a given
7006 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7007 // It provides a convenient interface to obtain the information and generate
7008 // code for that information.
7009 class MappableExprsHandler {
7010 public:
7011   /// Values for bit flags used to specify the mapping type for
7012   /// offloading.
7013   enum OpenMPOffloadMappingFlags : uint64_t {
7014     /// No flags
7015     OMP_MAP_NONE = 0x0,
7016     /// Allocate memory on the device and move data from host to device.
7017     OMP_MAP_TO = 0x01,
7018     /// Allocate memory on the device and move data from device to host.
7019     OMP_MAP_FROM = 0x02,
7020     /// Always perform the requested mapping action on the element, even
7021     /// if it was already mapped before.
7022     OMP_MAP_ALWAYS = 0x04,
7023     /// Delete the element from the device environment, ignoring the
7024     /// current reference count associated with the element.
7025     OMP_MAP_DELETE = 0x08,
7026     /// The element being mapped is a pointer-pointee pair; both the
7027     /// pointer and the pointee should be mapped.
7028     OMP_MAP_PTR_AND_OBJ = 0x10,
7029     /// This flags signals that the base address of an entry should be
7030     /// passed to the target kernel as an argument.
7031     OMP_MAP_TARGET_PARAM = 0x20,
7032     /// Signal that the runtime library has to return the device pointer
7033     /// in the current position for the data being mapped. Used when we have the
7034     /// use_device_ptr or use_device_addr clause.
7035     OMP_MAP_RETURN_PARAM = 0x40,
7036     /// This flag signals that the reference being passed is a pointer to
7037     /// private data.
7038     OMP_MAP_PRIVATE = 0x80,
7039     /// Pass the element to the device by value.
7040     OMP_MAP_LITERAL = 0x100,
7041     /// Implicit map
7042     OMP_MAP_IMPLICIT = 0x200,
7043     /// Close is a hint to the runtime to allocate memory close to
7044     /// the target device.
7045     OMP_MAP_CLOSE = 0x400,
7046     /// The 16 MSBs of the flags indicate whether the entry is member of some
7047     /// struct/class.
7048     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7049     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7050   };
7051 
7052   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7053   static unsigned getFlagMemberOffset() {
7054     unsigned Offset = 0;
7055     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7056          Remain = Remain >> 1)
7057       Offset++;
7058     return Offset;
7059   }
7060 
7061   /// Class that associates information with a base pointer to be passed to the
7062   /// runtime library.
7063   class BasePointerInfo {
7064     /// The base pointer.
7065     llvm::Value *Ptr = nullptr;
7066     /// The base declaration that refers to this device pointer, or null if
7067     /// there is none.
7068     const ValueDecl *DevPtrDecl = nullptr;
7069 
7070   public:
7071     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7072         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7073     llvm::Value *operator*() const { return Ptr; }
7074     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7075     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7076   };
7077 
7078   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7079   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7080   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7081 
7082   /// Map between a struct and the its lowest & highest elements which have been
7083   /// mapped.
7084   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7085   ///                    HE(FieldIndex, Pointer)}
7086   struct StructRangeInfoTy {
7087     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7088         0, Address::invalid()};
7089     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7090         0, Address::invalid()};
7091     Address Base = Address::invalid();
7092   };
7093 
7094 private:
7095   /// Kind that defines how a device pointer has to be returned.
7096   struct MapInfo {
7097     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7098     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7099     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7100     bool ReturnDevicePointer = false;
7101     bool IsImplicit = false;
7102     bool ForDeviceAddr = false;
7103 
7104     MapInfo() = default;
7105     MapInfo(
7106         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7107         OpenMPMapClauseKind MapType,
7108         ArrayRef<OpenMPMapModifierKind> MapModifiers, bool ReturnDevicePointer,
7109         bool IsImplicit, bool ForDeviceAddr = false)
7110         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7111           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7112           ForDeviceAddr(ForDeviceAddr) {}
7113   };
7114 
7115   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7116   /// member and there is no map information about it, then emission of that
7117   /// entry is deferred until the whole struct has been processed.
7118   struct DeferredDevicePtrEntryTy {
7119     const Expr *IE = nullptr;
7120     const ValueDecl *VD = nullptr;
7121     bool ForDeviceAddr = false;
7122 
7123     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7124                              bool ForDeviceAddr)
7125         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7126   };
7127 
7128   /// The target directive from where the mappable clauses were extracted. It
7129   /// is either a executable directive or a user-defined mapper directive.
7130   llvm::PointerUnion<const OMPExecutableDirective *,
7131                      const OMPDeclareMapperDecl *>
7132       CurDir;
7133 
7134   /// Function the directive is being generated for.
7135   CodeGenFunction &CGF;
7136 
7137   /// Set of all first private variables in the current directive.
7138   /// bool data is set to true if the variable is implicitly marked as
7139   /// firstprivate, false otherwise.
7140   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7141 
7142   /// Map between device pointer declarations and their expression components.
7143   /// The key value for declarations in 'this' is null.
7144   llvm::DenseMap<
7145       const ValueDecl *,
7146       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7147       DevPointersMap;
7148 
7149   llvm::Value *getExprTypeSize(const Expr *E) const {
7150     QualType ExprTy = E->getType().getCanonicalType();
7151 
7152     // Calculate the size for array shaping expression.
7153     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7154       llvm::Value *Size =
7155           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7156       for (const Expr *SE : OAE->getDimensions()) {
7157         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7158         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7159                                       CGF.getContext().getSizeType(),
7160                                       SE->getExprLoc());
7161         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7162       }
7163       return Size;
7164     }
7165 
7166     // Reference types are ignored for mapping purposes.
7167     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7168       ExprTy = RefTy->getPointeeType().getCanonicalType();
7169 
7170     // Given that an array section is considered a built-in type, we need to
7171     // do the calculation based on the length of the section instead of relying
7172     // on CGF.getTypeSize(E->getType()).
7173     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7174       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7175                             OAE->getBase()->IgnoreParenImpCasts())
7176                             .getCanonicalType();
7177 
7178       // If there is no length associated with the expression and lower bound is
7179       // not specified too, that means we are using the whole length of the
7180       // base.
7181       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7182           !OAE->getLowerBound())
7183         return CGF.getTypeSize(BaseTy);
7184 
7185       llvm::Value *ElemSize;
7186       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7187         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7188       } else {
7189         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7190         assert(ATy && "Expecting array type if not a pointer type.");
7191         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7192       }
7193 
7194       // If we don't have a length at this point, that is because we have an
7195       // array section with a single element.
7196       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7197         return ElemSize;
7198 
7199       if (const Expr *LenExpr = OAE->getLength()) {
7200         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7201         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7202                                              CGF.getContext().getSizeType(),
7203                                              LenExpr->getExprLoc());
7204         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7205       }
7206       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7207              OAE->getLowerBound() && "expected array_section[lb:].");
7208       // Size = sizetype - lb * elemtype;
7209       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7210       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7211       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7212                                        CGF.getContext().getSizeType(),
7213                                        OAE->getLowerBound()->getExprLoc());
7214       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7215       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7216       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7217       LengthVal = CGF.Builder.CreateSelect(
7218           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7219       return LengthVal;
7220     }
7221     return CGF.getTypeSize(ExprTy);
7222   }
7223 
7224   /// Return the corresponding bits for a given map clause modifier. Add
7225   /// a flag marking the map as a pointer if requested. Add a flag marking the
7226   /// map as the first one of a series of maps that relate to the same map
7227   /// expression.
7228   OpenMPOffloadMappingFlags getMapTypeBits(
7229       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7230       bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7231     OpenMPOffloadMappingFlags Bits =
7232         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7233     switch (MapType) {
7234     case OMPC_MAP_alloc:
7235     case OMPC_MAP_release:
7236       // alloc and release is the default behavior in the runtime library,  i.e.
7237       // if we don't pass any bits alloc/release that is what the runtime is
7238       // going to do. Therefore, we don't need to signal anything for these two
7239       // type modifiers.
7240       break;
7241     case OMPC_MAP_to:
7242       Bits |= OMP_MAP_TO;
7243       break;
7244     case OMPC_MAP_from:
7245       Bits |= OMP_MAP_FROM;
7246       break;
7247     case OMPC_MAP_tofrom:
7248       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7249       break;
7250     case OMPC_MAP_delete:
7251       Bits |= OMP_MAP_DELETE;
7252       break;
7253     case OMPC_MAP_unknown:
7254       llvm_unreachable("Unexpected map type!");
7255     }
7256     if (AddPtrFlag)
7257       Bits |= OMP_MAP_PTR_AND_OBJ;
7258     if (AddIsTargetParamFlag)
7259       Bits |= OMP_MAP_TARGET_PARAM;
7260     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7261         != MapModifiers.end())
7262       Bits |= OMP_MAP_ALWAYS;
7263     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7264         != MapModifiers.end())
7265       Bits |= OMP_MAP_CLOSE;
7266     return Bits;
7267   }
7268 
7269   /// Return true if the provided expression is a final array section. A
7270   /// final array section, is one whose length can't be proved to be one.
7271   bool isFinalArraySectionExpression(const Expr *E) const {
7272     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7273 
7274     // It is not an array section and therefore not a unity-size one.
7275     if (!OASE)
7276       return false;
7277 
7278     // An array section with no colon always refer to a single element.
7279     if (OASE->getColonLocFirst().isInvalid())
7280       return false;
7281 
7282     const Expr *Length = OASE->getLength();
7283 
7284     // If we don't have a length we have to check if the array has size 1
7285     // for this dimension. Also, we should always expect a length if the
7286     // base type is pointer.
7287     if (!Length) {
7288       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7289                              OASE->getBase()->IgnoreParenImpCasts())
7290                              .getCanonicalType();
7291       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7292         return ATy->getSize().getSExtValue() != 1;
7293       // If we don't have a constant dimension length, we have to consider
7294       // the current section as having any size, so it is not necessarily
7295       // unitary. If it happen to be unity size, that's user fault.
7296       return true;
7297     }
7298 
7299     // Check if the length evaluates to 1.
7300     Expr::EvalResult Result;
7301     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7302       return true; // Can have more that size 1.
7303 
7304     llvm::APSInt ConstLength = Result.Val.getInt();
7305     return ConstLength.getSExtValue() != 1;
7306   }
7307 
7308   /// Generate the base pointers, section pointers, sizes and map type
7309   /// bits for the provided map type, map modifier, and expression components.
7310   /// \a IsFirstComponent should be set to true if the provided set of
7311   /// components is the first associated with a capture.
7312   void generateInfoForComponentList(
7313       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7314       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7315       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7316       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7317       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7318       bool IsImplicit, bool ForDeviceAddr = false,
7319       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7320           OverlappedElements = llvm::None) const {
7321     // The following summarizes what has to be generated for each map and the
7322     // types below. The generated information is expressed in this order:
7323     // base pointer, section pointer, size, flags
7324     // (to add to the ones that come from the map type and modifier).
7325     //
7326     // double d;
7327     // int i[100];
7328     // float *p;
7329     //
7330     // struct S1 {
7331     //   int i;
7332     //   float f[50];
7333     // }
7334     // struct S2 {
7335     //   int i;
7336     //   float f[50];
7337     //   S1 s;
7338     //   double *p;
7339     //   struct S2 *ps;
7340     // }
7341     // S2 s;
7342     // S2 *ps;
7343     //
7344     // map(d)
7345     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7346     //
7347     // map(i)
7348     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7349     //
7350     // map(i[1:23])
7351     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7352     //
7353     // map(p)
7354     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7355     //
7356     // map(p[1:24])
7357     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7358     //
7359     // map(s)
7360     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7361     //
7362     // map(s.i)
7363     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7364     //
7365     // map(s.s.f)
7366     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7367     //
7368     // map(s.p)
7369     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7370     //
7371     // map(to: s.p[:22])
7372     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7373     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7374     // &(s.p), &(s.p[0]), 22*sizeof(double),
7375     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7376     // (*) alloc space for struct members, only this is a target parameter
7377     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7378     //      optimizes this entry out, same in the examples below)
7379     // (***) map the pointee (map: to)
7380     //
7381     // map(s.ps)
7382     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7383     //
7384     // map(from: s.ps->s.i)
7385     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7386     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7387     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7388     //
7389     // map(to: s.ps->ps)
7390     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7391     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7392     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7393     //
7394     // map(s.ps->ps->ps)
7395     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7396     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7397     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7398     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7399     //
7400     // map(to: s.ps->ps->s.f[:22])
7401     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7402     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7403     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7404     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7405     //
7406     // map(ps)
7407     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7408     //
7409     // map(ps->i)
7410     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7411     //
7412     // map(ps->s.f)
7413     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7414     //
7415     // map(from: ps->p)
7416     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7417     //
7418     // map(to: ps->p[:22])
7419     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7420     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7421     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7422     //
7423     // map(ps->ps)
7424     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7425     //
7426     // map(from: ps->ps->s.i)
7427     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7428     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7429     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7430     //
7431     // map(from: ps->ps->ps)
7432     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7433     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7434     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7435     //
7436     // map(ps->ps->ps->ps)
7437     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7438     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7439     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7440     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7441     //
7442     // map(to: ps->ps->ps->s.f[:22])
7443     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7444     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7445     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7446     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7447     //
7448     // map(to: s.f[:22]) map(from: s.p[:33])
7449     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7450     //     sizeof(double*) (**), TARGET_PARAM
7451     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7452     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7453     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7454     // (*) allocate contiguous space needed to fit all mapped members even if
7455     //     we allocate space for members not mapped (in this example,
7456     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7457     //     them as well because they fall between &s.f[0] and &s.p)
7458     //
7459     // map(from: s.f[:22]) map(to: ps->p[:33])
7460     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7461     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7462     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7463     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7464     // (*) the struct this entry pertains to is the 2nd element in the list of
7465     //     arguments, hence MEMBER_OF(2)
7466     //
7467     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7468     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7469     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7470     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7471     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7472     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7473     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7474     // (*) the struct this entry pertains to is the 4th element in the list
7475     //     of arguments, hence MEMBER_OF(4)
7476 
7477     // Track if the map information being generated is the first for a capture.
7478     bool IsCaptureFirstInfo = IsFirstComponentList;
7479     // When the variable is on a declare target link or in a to clause with
7480     // unified memory, a reference is needed to hold the host/device address
7481     // of the variable.
7482     bool RequiresReference = false;
7483 
7484     // Scan the components from the base to the complete expression.
7485     auto CI = Components.rbegin();
7486     auto CE = Components.rend();
7487     auto I = CI;
7488 
7489     // Track if the map information being generated is the first for a list of
7490     // components.
7491     bool IsExpressionFirstInfo = true;
7492     Address BP = Address::invalid();
7493     const Expr *AssocExpr = I->getAssociatedExpression();
7494     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7495     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7496     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7497 
7498     if (isa<MemberExpr>(AssocExpr)) {
7499       // The base is the 'this' pointer. The content of the pointer is going
7500       // to be the base of the field being mapped.
7501       BP = CGF.LoadCXXThisAddress();
7502     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7503                (OASE &&
7504                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7505       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7506     } else if (OAShE &&
7507                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7508       BP = Address(
7509           CGF.EmitScalarExpr(OAShE->getBase()),
7510           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7511     } else {
7512       // The base is the reference to the variable.
7513       // BP = &Var.
7514       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7515       if (const auto *VD =
7516               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7517         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7518                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7519           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7520               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7521                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7522             RequiresReference = true;
7523             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7524           }
7525         }
7526       }
7527 
7528       // If the variable is a pointer and is being dereferenced (i.e. is not
7529       // the last component), the base has to be the pointer itself, not its
7530       // reference. References are ignored for mapping purposes.
7531       QualType Ty =
7532           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7533       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7534         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7535 
7536         // We do not need to generate individual map information for the
7537         // pointer, it can be associated with the combined storage.
7538         ++I;
7539       }
7540     }
7541 
7542     // Track whether a component of the list should be marked as MEMBER_OF some
7543     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7544     // in a component list should be marked as MEMBER_OF, all subsequent entries
7545     // do not belong to the base struct. E.g.
7546     // struct S2 s;
7547     // s.ps->ps->ps->f[:]
7548     //   (1) (2) (3) (4)
7549     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7550     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7551     // is the pointee of ps(2) which is not member of struct s, so it should not
7552     // be marked as such (it is still PTR_AND_OBJ).
7553     // The variable is initialized to false so that PTR_AND_OBJ entries which
7554     // are not struct members are not considered (e.g. array of pointers to
7555     // data).
7556     bool ShouldBeMemberOf = false;
7557 
7558     // Variable keeping track of whether or not we have encountered a component
7559     // in the component list which is a member expression. Useful when we have a
7560     // pointer or a final array section, in which case it is the previous
7561     // component in the list which tells us whether we have a member expression.
7562     // E.g. X.f[:]
7563     // While processing the final array section "[:]" it is "f" which tells us
7564     // whether we are dealing with a member of a declared struct.
7565     const MemberExpr *EncounteredME = nullptr;
7566 
7567     for (; I != CE; ++I) {
7568       // If the current component is member of a struct (parent struct) mark it.
7569       if (!EncounteredME) {
7570         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7571         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7572         // as MEMBER_OF the parent struct.
7573         if (EncounteredME)
7574           ShouldBeMemberOf = true;
7575       }
7576 
7577       auto Next = std::next(I);
7578 
7579       // We need to generate the addresses and sizes if this is the last
7580       // component, if the component is a pointer or if it is an array section
7581       // whose length can't be proved to be one. If this is a pointer, it
7582       // becomes the base address for the following components.
7583 
7584       // A final array section, is one whose length can't be proved to be one.
7585       bool IsFinalArraySection =
7586           isFinalArraySectionExpression(I->getAssociatedExpression());
7587 
7588       // Get information on whether the element is a pointer. Have to do a
7589       // special treatment for array sections given that they are built-in
7590       // types.
7591       const auto *OASE =
7592           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7593       const auto *OAShE =
7594           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7595       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7596       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7597       bool IsPointer =
7598           OAShE ||
7599           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7600                        .getCanonicalType()
7601                        ->isAnyPointerType()) ||
7602           I->getAssociatedExpression()->getType()->isAnyPointerType();
7603       bool IsNonDerefPointer = IsPointer && !UO && !BO;
7604 
7605       if (Next == CE || IsNonDerefPointer || IsFinalArraySection) {
7606         // If this is not the last component, we expect the pointer to be
7607         // associated with an array expression or member expression.
7608         assert((Next == CE ||
7609                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7610                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7611                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7612                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7613                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7614                "Unexpected expression");
7615 
7616         Address LB = Address::invalid();
7617         if (OAShE) {
7618           LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
7619                        CGF.getContext().getTypeAlignInChars(
7620                            OAShE->getBase()->getType()));
7621         } else {
7622           LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7623                    .getAddress(CGF);
7624         }
7625 
7626         // If this component is a pointer inside the base struct then we don't
7627         // need to create any entry for it - it will be combined with the object
7628         // it is pointing to into a single PTR_AND_OBJ entry.
7629         bool IsMemberPointerOrAddr =
7630             (IsPointer || ForDeviceAddr) && EncounteredME &&
7631             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7632              EncounteredME);
7633         if (!OverlappedElements.empty()) {
7634           // Handle base element with the info for overlapped elements.
7635           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7636           assert(Next == CE &&
7637                  "Expected last element for the overlapped elements.");
7638           assert(!IsPointer &&
7639                  "Unexpected base element with the pointer type.");
7640           // Mark the whole struct as the struct that requires allocation on the
7641           // device.
7642           PartialStruct.LowestElem = {0, LB};
7643           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7644               I->getAssociatedExpression()->getType());
7645           Address HB = CGF.Builder.CreateConstGEP(
7646               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7647                                                               CGF.VoidPtrTy),
7648               TypeSize.getQuantity() - 1);
7649           PartialStruct.HighestElem = {
7650               std::numeric_limits<decltype(
7651                   PartialStruct.HighestElem.first)>::max(),
7652               HB};
7653           PartialStruct.Base = BP;
7654           // Emit data for non-overlapped data.
7655           OpenMPOffloadMappingFlags Flags =
7656               OMP_MAP_MEMBER_OF |
7657               getMapTypeBits(MapType, MapModifiers, IsImplicit,
7658                              /*AddPtrFlag=*/false,
7659                              /*AddIsTargetParamFlag=*/false);
7660           LB = BP;
7661           llvm::Value *Size = nullptr;
7662           // Do bitcopy of all non-overlapped structure elements.
7663           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7664                    Component : OverlappedElements) {
7665             Address ComponentLB = Address::invalid();
7666             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7667                  Component) {
7668               if (MC.getAssociatedDeclaration()) {
7669                 ComponentLB =
7670                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7671                         .getAddress(CGF);
7672                 Size = CGF.Builder.CreatePtrDiff(
7673                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7674                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7675                 break;
7676               }
7677             }
7678             BasePointers.push_back(BP.getPointer());
7679             Pointers.push_back(LB.getPointer());
7680             Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
7681                                                       /*isSigned=*/true));
7682             Types.push_back(Flags);
7683             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7684           }
7685           BasePointers.push_back(BP.getPointer());
7686           Pointers.push_back(LB.getPointer());
7687           Size = CGF.Builder.CreatePtrDiff(
7688               CGF.EmitCastToVoidPtr(
7689                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7690               CGF.EmitCastToVoidPtr(LB.getPointer()));
7691           Sizes.push_back(
7692               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7693           Types.push_back(Flags);
7694           break;
7695         }
7696         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7697         if (!IsMemberPointerOrAddr) {
7698           BasePointers.push_back(BP.getPointer());
7699           Pointers.push_back(LB.getPointer());
7700           Sizes.push_back(
7701               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7702 
7703           // We need to add a pointer flag for each map that comes from the
7704           // same expression except for the first one. We also need to signal
7705           // this map is the first one that relates with the current capture
7706           // (there is a set of entries for each capture).
7707           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7708               MapType, MapModifiers, IsImplicit,
7709               !IsExpressionFirstInfo || RequiresReference,
7710               IsCaptureFirstInfo && !RequiresReference);
7711 
7712           if (!IsExpressionFirstInfo) {
7713             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7714             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7715             if (IsPointer)
7716               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7717                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7718 
7719             if (ShouldBeMemberOf) {
7720               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7721               // should be later updated with the correct value of MEMBER_OF.
7722               Flags |= OMP_MAP_MEMBER_OF;
7723               // From now on, all subsequent PTR_AND_OBJ entries should not be
7724               // marked as MEMBER_OF.
7725               ShouldBeMemberOf = false;
7726             }
7727           }
7728 
7729           Types.push_back(Flags);
7730         }
7731 
7732         // If we have encountered a member expression so far, keep track of the
7733         // mapped member. If the parent is "*this", then the value declaration
7734         // is nullptr.
7735         if (EncounteredME) {
7736           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7737           unsigned FieldIndex = FD->getFieldIndex();
7738 
7739           // Update info about the lowest and highest elements for this struct
7740           if (!PartialStruct.Base.isValid()) {
7741             PartialStruct.LowestElem = {FieldIndex, LB};
7742             if (IsFinalArraySection) {
7743               Address HB =
7744                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7745                       .getAddress(CGF);
7746               PartialStruct.HighestElem = {FieldIndex, HB};
7747             } else {
7748               PartialStruct.HighestElem = {FieldIndex, LB};
7749             }
7750             PartialStruct.Base = BP;
7751           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7752             PartialStruct.LowestElem = {FieldIndex, LB};
7753           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7754             PartialStruct.HighestElem = {FieldIndex, LB};
7755           }
7756         }
7757 
7758         // If we have a final array section, we are done with this expression.
7759         if (IsFinalArraySection)
7760           break;
7761 
7762         // The pointer becomes the base for the next element.
7763         if (Next != CE)
7764           BP = LB;
7765 
7766         IsExpressionFirstInfo = false;
7767         IsCaptureFirstInfo = false;
7768       }
7769     }
7770   }
7771 
7772   /// Return the adjusted map modifiers if the declaration a capture refers to
7773   /// appears in a first-private clause. This is expected to be used only with
7774   /// directives that start with 'target'.
7775   MappableExprsHandler::OpenMPOffloadMappingFlags
7776   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7777     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7778 
7779     // A first private variable captured by reference will use only the
7780     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7781     // declaration is known as first-private in this handler.
7782     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7783       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7784           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7785         return MappableExprsHandler::OMP_MAP_ALWAYS |
7786                MappableExprsHandler::OMP_MAP_TO;
7787       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7788         return MappableExprsHandler::OMP_MAP_TO |
7789                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7790       return MappableExprsHandler::OMP_MAP_PRIVATE |
7791              MappableExprsHandler::OMP_MAP_TO;
7792     }
7793     return MappableExprsHandler::OMP_MAP_TO |
7794            MappableExprsHandler::OMP_MAP_FROM;
7795   }
7796 
7797   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7798     // Rotate by getFlagMemberOffset() bits.
7799     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7800                                                   << getFlagMemberOffset());
7801   }
7802 
7803   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7804                                      OpenMPOffloadMappingFlags MemberOfFlag) {
7805     // If the entry is PTR_AND_OBJ but has not been marked with the special
7806     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7807     // marked as MEMBER_OF.
7808     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7809         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7810       return;
7811 
7812     // Reset the placeholder value to prepare the flag for the assignment of the
7813     // proper MEMBER_OF value.
7814     Flags &= ~OMP_MAP_MEMBER_OF;
7815     Flags |= MemberOfFlag;
7816   }
7817 
7818   void getPlainLayout(const CXXRecordDecl *RD,
7819                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7820                       bool AsBase) const {
7821     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7822 
7823     llvm::StructType *St =
7824         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7825 
7826     unsigned NumElements = St->getNumElements();
7827     llvm::SmallVector<
7828         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7829         RecordLayout(NumElements);
7830 
7831     // Fill bases.
7832     for (const auto &I : RD->bases()) {
7833       if (I.isVirtual())
7834         continue;
7835       const auto *Base = I.getType()->getAsCXXRecordDecl();
7836       // Ignore empty bases.
7837       if (Base->isEmpty() || CGF.getContext()
7838                                  .getASTRecordLayout(Base)
7839                                  .getNonVirtualSize()
7840                                  .isZero())
7841         continue;
7842 
7843       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7844       RecordLayout[FieldIndex] = Base;
7845     }
7846     // Fill in virtual bases.
7847     for (const auto &I : RD->vbases()) {
7848       const auto *Base = I.getType()->getAsCXXRecordDecl();
7849       // Ignore empty bases.
7850       if (Base->isEmpty())
7851         continue;
7852       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7853       if (RecordLayout[FieldIndex])
7854         continue;
7855       RecordLayout[FieldIndex] = Base;
7856     }
7857     // Fill in all the fields.
7858     assert(!RD->isUnion() && "Unexpected union.");
7859     for (const auto *Field : RD->fields()) {
7860       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7861       // will fill in later.)
7862       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7863         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7864         RecordLayout[FieldIndex] = Field;
7865       }
7866     }
7867     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7868              &Data : RecordLayout) {
7869       if (Data.isNull())
7870         continue;
7871       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7872         getPlainLayout(Base, Layout, /*AsBase=*/true);
7873       else
7874         Layout.push_back(Data.get<const FieldDecl *>());
7875     }
7876   }
7877 
7878 public:
7879   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7880       : CurDir(&Dir), CGF(CGF) {
7881     // Extract firstprivate clause information.
7882     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7883       for (const auto *D : C->varlists())
7884         FirstPrivateDecls.try_emplace(
7885             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
7886     // Extract implicit firstprivates from uses_allocators clauses.
7887     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
7888       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
7889         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
7890         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
7891           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
7892                                         /*Implicit=*/true);
7893         else if (const auto *VD = dyn_cast<VarDecl>(
7894                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
7895                          ->getDecl()))
7896           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
7897       }
7898     }
7899     // Extract device pointer clause information.
7900     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7901       for (auto L : C->component_lists())
7902         DevPointersMap[L.first].push_back(L.second);
7903   }
7904 
7905   /// Constructor for the declare mapper directive.
7906   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
7907       : CurDir(&Dir), CGF(CGF) {}
7908 
7909   /// Generate code for the combined entry if we have a partially mapped struct
7910   /// and take care of the mapping flags of the arguments corresponding to
7911   /// individual struct members.
7912   void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7913                          MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7914                          MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7915                          const StructRangeInfoTy &PartialStruct) const {
7916     // Base is the base of the struct
7917     BasePointers.push_back(PartialStruct.Base.getPointer());
7918     // Pointer is the address of the lowest element
7919     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7920     Pointers.push_back(LB);
7921     // Size is (addr of {highest+1} element) - (addr of lowest element)
7922     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7923     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7924     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7925     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7926     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7927     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
7928                                                   /*isSigned=*/false);
7929     Sizes.push_back(Size);
7930     // Map type is always TARGET_PARAM
7931     Types.push_back(OMP_MAP_TARGET_PARAM);
7932     // Remove TARGET_PARAM flag from the first element
7933     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7934 
7935     // All other current entries will be MEMBER_OF the combined entry
7936     // (except for PTR_AND_OBJ entries which do not have a placeholder value
7937     // 0xFFFF in the MEMBER_OF field).
7938     OpenMPOffloadMappingFlags MemberOfFlag =
7939         getMemberOfFlag(BasePointers.size() - 1);
7940     for (auto &M : CurTypes)
7941       setCorrectMemberOfFlag(M, MemberOfFlag);
7942   }
7943 
7944   /// Generate all the base pointers, section pointers, sizes and map
7945   /// types for the extracted mappable expressions. Also, for each item that
7946   /// relates with a device pointer, a pair of the relevant declaration and
7947   /// index where it occurs is appended to the device pointers info array.
7948   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7949                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7950                        MapFlagsArrayTy &Types) const {
7951     // We have to process the component lists that relate with the same
7952     // declaration in a single chunk so that we can generate the map flags
7953     // correctly. Therefore, we organize all lists in a map.
7954     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7955 
7956     // Helper function to fill the information map for the different supported
7957     // clauses.
7958     auto &&InfoGen =
7959         [&Info](const ValueDecl *D,
7960                 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7961                 OpenMPMapClauseKind MapType,
7962                 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7963                 bool ReturnDevicePointer, bool IsImplicit,
7964                 bool ForDeviceAddr = false) {
7965           const ValueDecl *VD =
7966               D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7967           Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7968                                 IsImplicit, ForDeviceAddr);
7969         };
7970 
7971     assert(CurDir.is<const OMPExecutableDirective *>() &&
7972            "Expect a executable directive");
7973     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
7974     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
7975       for (const auto L : C->component_lists()) {
7976         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
7977             /*ReturnDevicePointer=*/false, C->isImplicit());
7978       }
7979     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
7980       for (const auto L : C->component_lists()) {
7981         InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
7982             /*ReturnDevicePointer=*/false, C->isImplicit());
7983       }
7984     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
7985       for (const auto L : C->component_lists()) {
7986         InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
7987             /*ReturnDevicePointer=*/false, C->isImplicit());
7988       }
7989 
7990     // Look at the use_device_ptr clause information and mark the existing map
7991     // entries as such. If there is no map information for an entry in the
7992     // use_device_ptr list, we create one with map type 'alloc' and zero size
7993     // section. It is the user fault if that was not mapped before. If there is
7994     // no map information and the pointer is a struct member, then we defer the
7995     // emission of that entry until the whole struct has been processed.
7996     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
7997         DeferredInfo;
7998 
7999     for (const auto *C :
8000          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
8001       for (const auto L : C->component_lists()) {
8002         assert(!L.second.empty() && "Not expecting empty list of components!");
8003         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
8004         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8005         const Expr *IE = L.second.back().getAssociatedExpression();
8006         // If the first component is a member expression, we have to look into
8007         // 'this', which maps to null in the map of map information. Otherwise
8008         // look directly for the information.
8009         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8010 
8011         // We potentially have map information for this declaration already.
8012         // Look for the first set of components that refer to it.
8013         if (It != Info.end()) {
8014           auto CI = std::find_if(
8015               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
8016                 return MI.Components.back().getAssociatedDeclaration() == VD;
8017               });
8018           // If we found a map entry, signal that the pointer has to be returned
8019           // and move on to the next declaration.
8020           if (CI != It->second.end()) {
8021             CI->ReturnDevicePointer = true;
8022             continue;
8023           }
8024         }
8025 
8026         // We didn't find any match in our map information - generate a zero
8027         // size array section - if the pointer is a struct member we defer this
8028         // action until the whole struct has been processed.
8029         if (isa<MemberExpr>(IE)) {
8030           // Insert the pointer into Info to be processed by
8031           // generateInfoForComponentList. Because it is a member pointer
8032           // without a pointee, no entry will be generated for it, therefore
8033           // we need to generate one after the whole struct has been processed.
8034           // Nonetheless, generateInfoForComponentList must be called to take
8035           // the pointer into account for the calculation of the range of the
8036           // partial struct.
8037           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
8038                   /*ReturnDevicePointer=*/false, C->isImplicit());
8039           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8040         } else {
8041           llvm::Value *Ptr =
8042               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8043           BasePointers.emplace_back(Ptr, VD);
8044           Pointers.push_back(Ptr);
8045           Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8046           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8047         }
8048       }
8049     }
8050 
8051     // Look at the use_device_addr clause information and mark the existing map
8052     // entries as such. If there is no map information for an entry in the
8053     // use_device_addr list, we create one with map type 'alloc' and zero size
8054     // section. It is the user fault if that was not mapped before. If there is
8055     // no map information and the pointer is a struct member, then we defer the
8056     // emission of that entry until the whole struct has been processed.
8057     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8058     for (const auto *C :
8059          CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) {
8060       for (const auto L : C->component_lists()) {
8061         assert(!L.second.empty() && "Not expecting empty list of components!");
8062         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
8063         if (!Processed.insert(VD).second)
8064           continue;
8065         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8066         const Expr *IE = L.second.back().getAssociatedExpression();
8067         // If the first component is a member expression, we have to look into
8068         // 'this', which maps to null in the map of map information. Otherwise
8069         // look directly for the information.
8070         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8071 
8072         // We potentially have map information for this declaration already.
8073         // Look for the first set of components that refer to it.
8074         if (It != Info.end()) {
8075           auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
8076             return MI.Components.back().getAssociatedDeclaration() == VD;
8077           });
8078           // If we found a map entry, signal that the pointer has to be returned
8079           // and move on to the next declaration.
8080           if (CI != It->second.end()) {
8081             CI->ReturnDevicePointer = true;
8082             continue;
8083           }
8084         }
8085 
8086         // We didn't find any match in our map information - generate a zero
8087         // size array section - if the pointer is a struct member we defer this
8088         // action until the whole struct has been processed.
8089         if (isa<MemberExpr>(IE)) {
8090           // Insert the pointer into Info to be processed by
8091           // generateInfoForComponentList. Because it is a member pointer
8092           // without a pointee, no entry will be generated for it, therefore
8093           // we need to generate one after the whole struct has been processed.
8094           // Nonetheless, generateInfoForComponentList must be called to take
8095           // the pointer into account for the calculation of the range of the
8096           // partial struct.
8097           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
8098                   /*ReturnDevicePointer=*/false, C->isImplicit(),
8099                   /*ForDeviceAddr=*/true);
8100           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8101         } else {
8102           llvm::Value *Ptr;
8103           if (IE->isGLValue())
8104             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8105           else
8106             Ptr = CGF.EmitScalarExpr(IE);
8107           BasePointers.emplace_back(Ptr, VD);
8108           Pointers.push_back(Ptr);
8109           Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8110           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8111         }
8112       }
8113     }
8114 
8115     for (const auto &M : Info) {
8116       // We need to know when we generate information for the first component
8117       // associated with a capture, because the mapping flags depend on it.
8118       bool IsFirstComponentList = true;
8119 
8120       // Temporary versions of arrays
8121       MapBaseValuesArrayTy CurBasePointers;
8122       MapValuesArrayTy CurPointers;
8123       MapValuesArrayTy CurSizes;
8124       MapFlagsArrayTy CurTypes;
8125       StructRangeInfoTy PartialStruct;
8126 
8127       for (const MapInfo &L : M.second) {
8128         assert(!L.Components.empty() &&
8129                "Not expecting declaration with no component lists.");
8130 
8131         // Remember the current base pointer index.
8132         unsigned CurrentBasePointersIdx = CurBasePointers.size();
8133         generateInfoForComponentList(
8134             L.MapType, L.MapModifiers, L.Components, CurBasePointers,
8135             CurPointers, CurSizes, CurTypes, PartialStruct,
8136             IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr);
8137 
8138         // If this entry relates with a device pointer, set the relevant
8139         // declaration and add the 'return pointer' flag.
8140         if (L.ReturnDevicePointer) {
8141           assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8142                  "Unexpected number of mapped base pointers.");
8143 
8144           const ValueDecl *RelevantVD =
8145               L.Components.back().getAssociatedDeclaration();
8146           assert(RelevantVD &&
8147                  "No relevant declaration related with device pointer??");
8148 
8149           CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8150           CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8151         }
8152         IsFirstComponentList = false;
8153       }
8154 
8155       // Append any pending zero-length pointers which are struct members and
8156       // used with use_device_ptr or use_device_addr.
8157       auto CI = DeferredInfo.find(M.first);
8158       if (CI != DeferredInfo.end()) {
8159         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8160           llvm::Value *BasePtr;
8161           llvm::Value *Ptr;
8162           if (L.ForDeviceAddr) {
8163             if (L.IE->isGLValue())
8164               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8165             else
8166               Ptr = this->CGF.EmitScalarExpr(L.IE);
8167             BasePtr = Ptr;
8168             // Entry is RETURN_PARAM. Also, set the placeholder value
8169             // MEMBER_OF=FFFF so that the entry is later updated with the
8170             // correct value of MEMBER_OF.
8171             CurTypes.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8172           } else {
8173             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8174             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8175                                              L.IE->getExprLoc());
8176             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8177             // value MEMBER_OF=FFFF so that the entry is later updated with the
8178             // correct value of MEMBER_OF.
8179             CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8180                                OMP_MAP_MEMBER_OF);
8181           }
8182           CurBasePointers.emplace_back(BasePtr, L.VD);
8183           CurPointers.push_back(Ptr);
8184           CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8185         }
8186       }
8187 
8188       // If there is an entry in PartialStruct it means we have a struct with
8189       // individual members mapped. Emit an extra combined entry.
8190       if (PartialStruct.Base.isValid())
8191         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8192                           PartialStruct);
8193 
8194       // We need to append the results of this capture to what we already have.
8195       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8196       Pointers.append(CurPointers.begin(), CurPointers.end());
8197       Sizes.append(CurSizes.begin(), CurSizes.end());
8198       Types.append(CurTypes.begin(), CurTypes.end());
8199     }
8200   }
8201 
8202   /// Generate all the base pointers, section pointers, sizes and map types for
8203   /// the extracted map clauses of user-defined mapper.
8204   void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
8205                                 MapValuesArrayTy &Pointers,
8206                                 MapValuesArrayTy &Sizes,
8207                                 MapFlagsArrayTy &Types) const {
8208     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8209            "Expect a declare mapper directive");
8210     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8211     // We have to process the component lists that relate with the same
8212     // declaration in a single chunk so that we can generate the map flags
8213     // correctly. Therefore, we organize all lists in a map.
8214     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8215 
8216     // Helper function to fill the information map for the different supported
8217     // clauses.
8218     auto &&InfoGen = [&Info](
8219         const ValueDecl *D,
8220         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8221         OpenMPMapClauseKind MapType,
8222         ArrayRef<OpenMPMapModifierKind> MapModifiers,
8223         bool ReturnDevicePointer, bool IsImplicit) {
8224       const ValueDecl *VD =
8225           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8226       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8227                             IsImplicit);
8228     };
8229 
8230     for (const auto *C : CurMapperDir->clauselists()) {
8231       const auto *MC = cast<OMPMapClause>(C);
8232       for (const auto L : MC->component_lists()) {
8233         InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
8234                 /*ReturnDevicePointer=*/false, MC->isImplicit());
8235       }
8236     }
8237 
8238     for (const auto &M : Info) {
8239       // We need to know when we generate information for the first component
8240       // associated with a capture, because the mapping flags depend on it.
8241       bool IsFirstComponentList = true;
8242 
8243       // Temporary versions of arrays
8244       MapBaseValuesArrayTy CurBasePointers;
8245       MapValuesArrayTy CurPointers;
8246       MapValuesArrayTy CurSizes;
8247       MapFlagsArrayTy CurTypes;
8248       StructRangeInfoTy PartialStruct;
8249 
8250       for (const MapInfo &L : M.second) {
8251         assert(!L.Components.empty() &&
8252                "Not expecting declaration with no component lists.");
8253         generateInfoForComponentList(
8254             L.MapType, L.MapModifiers, L.Components, CurBasePointers,
8255             CurPointers, CurSizes, CurTypes, PartialStruct,
8256             IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr);
8257         IsFirstComponentList = false;
8258       }
8259 
8260       // If there is an entry in PartialStruct it means we have a struct with
8261       // individual members mapped. Emit an extra combined entry.
8262       if (PartialStruct.Base.isValid())
8263         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8264                           PartialStruct);
8265 
8266       // We need to append the results of this capture to what we already have.
8267       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8268       Pointers.append(CurPointers.begin(), CurPointers.end());
8269       Sizes.append(CurSizes.begin(), CurSizes.end());
8270       Types.append(CurTypes.begin(), CurTypes.end());
8271     }
8272   }
8273 
8274   /// Emit capture info for lambdas for variables captured by reference.
8275   void generateInfoForLambdaCaptures(
8276       const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8277       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8278       MapFlagsArrayTy &Types,
8279       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8280     const auto *RD = VD->getType()
8281                          .getCanonicalType()
8282                          .getNonReferenceType()
8283                          ->getAsCXXRecordDecl();
8284     if (!RD || !RD->isLambda())
8285       return;
8286     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8287     LValue VDLVal = CGF.MakeAddrLValue(
8288         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8289     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8290     FieldDecl *ThisCapture = nullptr;
8291     RD->getCaptureFields(Captures, ThisCapture);
8292     if (ThisCapture) {
8293       LValue ThisLVal =
8294           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8295       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8296       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8297                                  VDLVal.getPointer(CGF));
8298       BasePointers.push_back(ThisLVal.getPointer(CGF));
8299       Pointers.push_back(ThisLValVal.getPointer(CGF));
8300       Sizes.push_back(
8301           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8302                                     CGF.Int64Ty, /*isSigned=*/true));
8303       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8304                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8305     }
8306     for (const LambdaCapture &LC : RD->captures()) {
8307       if (!LC.capturesVariable())
8308         continue;
8309       const VarDecl *VD = LC.getCapturedVar();
8310       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8311         continue;
8312       auto It = Captures.find(VD);
8313       assert(It != Captures.end() && "Found lambda capture without field.");
8314       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8315       if (LC.getCaptureKind() == LCK_ByRef) {
8316         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8317         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8318                                    VDLVal.getPointer(CGF));
8319         BasePointers.push_back(VarLVal.getPointer(CGF));
8320         Pointers.push_back(VarLValVal.getPointer(CGF));
8321         Sizes.push_back(CGF.Builder.CreateIntCast(
8322             CGF.getTypeSize(
8323                 VD->getType().getCanonicalType().getNonReferenceType()),
8324             CGF.Int64Ty, /*isSigned=*/true));
8325       } else {
8326         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8327         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8328                                    VDLVal.getPointer(CGF));
8329         BasePointers.push_back(VarLVal.getPointer(CGF));
8330         Pointers.push_back(VarRVal.getScalarVal());
8331         Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8332       }
8333       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8334                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8335     }
8336   }
8337 
8338   /// Set correct indices for lambdas captures.
8339   void adjustMemberOfForLambdaCaptures(
8340       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8341       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8342       MapFlagsArrayTy &Types) const {
8343     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8344       // Set correct member_of idx for all implicit lambda captures.
8345       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8346                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8347         continue;
8348       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8349       assert(BasePtr && "Unable to find base lambda address.");
8350       int TgtIdx = -1;
8351       for (unsigned J = I; J > 0; --J) {
8352         unsigned Idx = J - 1;
8353         if (Pointers[Idx] != BasePtr)
8354           continue;
8355         TgtIdx = Idx;
8356         break;
8357       }
8358       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8359       // All other current entries will be MEMBER_OF the combined entry
8360       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8361       // 0xFFFF in the MEMBER_OF field).
8362       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8363       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8364     }
8365   }
8366 
8367   /// Generate the base pointers, section pointers, sizes and map types
8368   /// associated to a given capture.
8369   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8370                               llvm::Value *Arg,
8371                               MapBaseValuesArrayTy &BasePointers,
8372                               MapValuesArrayTy &Pointers,
8373                               MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8374                               StructRangeInfoTy &PartialStruct) const {
8375     assert(!Cap->capturesVariableArrayType() &&
8376            "Not expecting to generate map info for a variable array type!");
8377 
8378     // We need to know when we generating information for the first component
8379     const ValueDecl *VD = Cap->capturesThis()
8380                               ? nullptr
8381                               : Cap->getCapturedVar()->getCanonicalDecl();
8382 
8383     // If this declaration appears in a is_device_ptr clause we just have to
8384     // pass the pointer by value. If it is a reference to a declaration, we just
8385     // pass its value.
8386     if (DevPointersMap.count(VD)) {
8387       BasePointers.emplace_back(Arg, VD);
8388       Pointers.push_back(Arg);
8389       Sizes.push_back(
8390           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8391                                     CGF.Int64Ty, /*isSigned=*/true));
8392       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8393       return;
8394     }
8395 
8396     using MapData =
8397         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8398                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8399     SmallVector<MapData, 4> DeclComponentLists;
8400     assert(CurDir.is<const OMPExecutableDirective *>() &&
8401            "Expect a executable directive");
8402     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8403     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8404       for (const auto L : C->decl_component_lists(VD)) {
8405         assert(L.first == VD &&
8406                "We got information for the wrong declaration??");
8407         assert(!L.second.empty() &&
8408                "Not expecting declaration with no component lists.");
8409         DeclComponentLists.emplace_back(L.second, C->getMapType(),
8410                                         C->getMapTypeModifiers(),
8411                                         C->isImplicit());
8412       }
8413     }
8414 
8415     // Find overlapping elements (including the offset from the base element).
8416     llvm::SmallDenseMap<
8417         const MapData *,
8418         llvm::SmallVector<
8419             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8420         4>
8421         OverlappedData;
8422     size_t Count = 0;
8423     for (const MapData &L : DeclComponentLists) {
8424       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8425       OpenMPMapClauseKind MapType;
8426       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8427       bool IsImplicit;
8428       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8429       ++Count;
8430       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8431         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8432         std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8433         auto CI = Components.rbegin();
8434         auto CE = Components.rend();
8435         auto SI = Components1.rbegin();
8436         auto SE = Components1.rend();
8437         for (; CI != CE && SI != SE; ++CI, ++SI) {
8438           if (CI->getAssociatedExpression()->getStmtClass() !=
8439               SI->getAssociatedExpression()->getStmtClass())
8440             break;
8441           // Are we dealing with different variables/fields?
8442           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8443             break;
8444         }
8445         // Found overlapping if, at least for one component, reached the head of
8446         // the components list.
8447         if (CI == CE || SI == SE) {
8448           assert((CI != CE || SI != SE) &&
8449                  "Unexpected full match of the mapping components.");
8450           const MapData &BaseData = CI == CE ? L : L1;
8451           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8452               SI == SE ? Components : Components1;
8453           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8454           OverlappedElements.getSecond().push_back(SubData);
8455         }
8456       }
8457     }
8458     // Sort the overlapped elements for each item.
8459     llvm::SmallVector<const FieldDecl *, 4> Layout;
8460     if (!OverlappedData.empty()) {
8461       if (const auto *CRD =
8462               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8463         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8464       else {
8465         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8466         Layout.append(RD->field_begin(), RD->field_end());
8467       }
8468     }
8469     for (auto &Pair : OverlappedData) {
8470       llvm::sort(
8471           Pair.getSecond(),
8472           [&Layout](
8473               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8474               OMPClauseMappableExprCommon::MappableExprComponentListRef
8475                   Second) {
8476             auto CI = First.rbegin();
8477             auto CE = First.rend();
8478             auto SI = Second.rbegin();
8479             auto SE = Second.rend();
8480             for (; CI != CE && SI != SE; ++CI, ++SI) {
8481               if (CI->getAssociatedExpression()->getStmtClass() !=
8482                   SI->getAssociatedExpression()->getStmtClass())
8483                 break;
8484               // Are we dealing with different variables/fields?
8485               if (CI->getAssociatedDeclaration() !=
8486                   SI->getAssociatedDeclaration())
8487                 break;
8488             }
8489 
8490             // Lists contain the same elements.
8491             if (CI == CE && SI == SE)
8492               return false;
8493 
8494             // List with less elements is less than list with more elements.
8495             if (CI == CE || SI == SE)
8496               return CI == CE;
8497 
8498             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8499             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8500             if (FD1->getParent() == FD2->getParent())
8501               return FD1->getFieldIndex() < FD2->getFieldIndex();
8502             const auto It =
8503                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8504                   return FD == FD1 || FD == FD2;
8505                 });
8506             return *It == FD1;
8507           });
8508     }
8509 
8510     // Associated with a capture, because the mapping flags depend on it.
8511     // Go through all of the elements with the overlapped elements.
8512     for (const auto &Pair : OverlappedData) {
8513       const MapData &L = *Pair.getFirst();
8514       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8515       OpenMPMapClauseKind MapType;
8516       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8517       bool IsImplicit;
8518       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8519       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8520           OverlappedComponents = Pair.getSecond();
8521       bool IsFirstComponentList = true;
8522       generateInfoForComponentList(
8523           MapType, MapModifiers, Components, BasePointers, Pointers, Sizes,
8524           Types, PartialStruct, IsFirstComponentList, IsImplicit,
8525           /*ForDeviceAddr=*/false, OverlappedComponents);
8526     }
8527     // Go through other elements without overlapped elements.
8528     bool IsFirstComponentList = OverlappedData.empty();
8529     for (const MapData &L : DeclComponentLists) {
8530       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8531       OpenMPMapClauseKind MapType;
8532       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8533       bool IsImplicit;
8534       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8535       auto It = OverlappedData.find(&L);
8536       if (It == OverlappedData.end())
8537         generateInfoForComponentList(MapType, MapModifiers, Components,
8538                                      BasePointers, Pointers, Sizes, Types,
8539                                      PartialStruct, IsFirstComponentList,
8540                                      IsImplicit);
8541       IsFirstComponentList = false;
8542     }
8543   }
8544 
8545   /// Generate the base pointers, section pointers, sizes and map types
8546   /// associated with the declare target link variables.
8547   void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8548                                         MapValuesArrayTy &Pointers,
8549                                         MapValuesArrayTy &Sizes,
8550                                         MapFlagsArrayTy &Types) const {
8551     assert(CurDir.is<const OMPExecutableDirective *>() &&
8552            "Expect a executable directive");
8553     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8554     // Map other list items in the map clause which are not captured variables
8555     // but "declare target link" global variables.
8556     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8557       for (const auto L : C->component_lists()) {
8558         if (!L.first)
8559           continue;
8560         const auto *VD = dyn_cast<VarDecl>(L.first);
8561         if (!VD)
8562           continue;
8563         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8564             OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8565         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8566             !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8567           continue;
8568         StructRangeInfoTy PartialStruct;
8569         generateInfoForComponentList(
8570             C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8571             Pointers, Sizes, Types, PartialStruct,
8572             /*IsFirstComponentList=*/true, C->isImplicit());
8573         assert(!PartialStruct.Base.isValid() &&
8574                "No partial structs for declare target link expected.");
8575       }
8576     }
8577   }
8578 
8579   /// Generate the default map information for a given capture \a CI,
8580   /// record field declaration \a RI and captured value \a CV.
8581   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8582                               const FieldDecl &RI, llvm::Value *CV,
8583                               MapBaseValuesArrayTy &CurBasePointers,
8584                               MapValuesArrayTy &CurPointers,
8585                               MapValuesArrayTy &CurSizes,
8586                               MapFlagsArrayTy &CurMapTypes) const {
8587     bool IsImplicit = true;
8588     // Do the default mapping.
8589     if (CI.capturesThis()) {
8590       CurBasePointers.push_back(CV);
8591       CurPointers.push_back(CV);
8592       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8593       CurSizes.push_back(
8594           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8595                                     CGF.Int64Ty, /*isSigned=*/true));
8596       // Default map type.
8597       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8598     } else if (CI.capturesVariableByCopy()) {
8599       CurBasePointers.push_back(CV);
8600       CurPointers.push_back(CV);
8601       if (!RI.getType()->isAnyPointerType()) {
8602         // We have to signal to the runtime captures passed by value that are
8603         // not pointers.
8604         CurMapTypes.push_back(OMP_MAP_LITERAL);
8605         CurSizes.push_back(CGF.Builder.CreateIntCast(
8606             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8607       } else {
8608         // Pointers are implicitly mapped with a zero size and no flags
8609         // (other than first map that is added for all implicit maps).
8610         CurMapTypes.push_back(OMP_MAP_NONE);
8611         CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8612       }
8613       const VarDecl *VD = CI.getCapturedVar();
8614       auto I = FirstPrivateDecls.find(VD);
8615       if (I != FirstPrivateDecls.end())
8616         IsImplicit = I->getSecond();
8617     } else {
8618       assert(CI.capturesVariable() && "Expected captured reference.");
8619       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8620       QualType ElementType = PtrTy->getPointeeType();
8621       CurSizes.push_back(CGF.Builder.CreateIntCast(
8622           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8623       // The default map type for a scalar/complex type is 'to' because by
8624       // default the value doesn't have to be retrieved. For an aggregate
8625       // type, the default is 'tofrom'.
8626       CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8627       const VarDecl *VD = CI.getCapturedVar();
8628       auto I = FirstPrivateDecls.find(VD);
8629       if (I != FirstPrivateDecls.end() &&
8630           VD->getType().isConstant(CGF.getContext())) {
8631         llvm::Constant *Addr =
8632             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8633         // Copy the value of the original variable to the new global copy.
8634         CGF.Builder.CreateMemCpy(
8635             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
8636             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8637             CurSizes.back(), /*IsVolatile=*/false);
8638         // Use new global variable as the base pointers.
8639         CurBasePointers.push_back(Addr);
8640         CurPointers.push_back(Addr);
8641       } else {
8642         CurBasePointers.push_back(CV);
8643         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8644           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8645               CV, ElementType, CGF.getContext().getDeclAlign(VD),
8646               AlignmentSource::Decl));
8647           CurPointers.push_back(PtrAddr.getPointer());
8648         } else {
8649           CurPointers.push_back(CV);
8650         }
8651       }
8652       if (I != FirstPrivateDecls.end())
8653         IsImplicit = I->getSecond();
8654     }
8655     // Every default map produces a single argument which is a target parameter.
8656     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8657 
8658     // Add flag stating this is an implicit map.
8659     if (IsImplicit)
8660       CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8661   }
8662 };
8663 } // anonymous namespace
8664 
8665 /// Emit the arrays used to pass the captures and map information to the
8666 /// offloading runtime library. If there is no map or capture information,
8667 /// return nullptr by reference.
8668 static void
8669 emitOffloadingArrays(CodeGenFunction &CGF,
8670                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
8671                      MappableExprsHandler::MapValuesArrayTy &Pointers,
8672                      MappableExprsHandler::MapValuesArrayTy &Sizes,
8673                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
8674                      CGOpenMPRuntime::TargetDataInfo &Info) {
8675   CodeGenModule &CGM = CGF.CGM;
8676   ASTContext &Ctx = CGF.getContext();
8677 
8678   // Reset the array information.
8679   Info.clearArrayInfo();
8680   Info.NumberOfPtrs = BasePointers.size();
8681 
8682   if (Info.NumberOfPtrs) {
8683     // Detect if we have any capture size requiring runtime evaluation of the
8684     // size so that a constant array could be eventually used.
8685     bool hasRuntimeEvaluationCaptureSize = false;
8686     for (llvm::Value *S : Sizes)
8687       if (!isa<llvm::Constant>(S)) {
8688         hasRuntimeEvaluationCaptureSize = true;
8689         break;
8690       }
8691 
8692     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8693     QualType PointerArrayType = Ctx.getConstantArrayType(
8694         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
8695         /*IndexTypeQuals=*/0);
8696 
8697     Info.BasePointersArray =
8698         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8699     Info.PointersArray =
8700         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8701 
8702     // If we don't have any VLA types or other types that require runtime
8703     // evaluation, we can use a constant array for the map sizes, otherwise we
8704     // need to fill up the arrays as we do for the pointers.
8705     QualType Int64Ty =
8706         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8707     if (hasRuntimeEvaluationCaptureSize) {
8708       QualType SizeArrayType = Ctx.getConstantArrayType(
8709           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
8710           /*IndexTypeQuals=*/0);
8711       Info.SizesArray =
8712           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8713     } else {
8714       // We expect all the sizes to be constant, so we collect them to create
8715       // a constant array.
8716       SmallVector<llvm::Constant *, 16> ConstSizes;
8717       for (llvm::Value *S : Sizes)
8718         ConstSizes.push_back(cast<llvm::Constant>(S));
8719 
8720       auto *SizesArrayInit = llvm::ConstantArray::get(
8721           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8722       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8723       auto *SizesArrayGbl = new llvm::GlobalVariable(
8724           CGM.getModule(), SizesArrayInit->getType(),
8725           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8726           SizesArrayInit, Name);
8727       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8728       Info.SizesArray = SizesArrayGbl;
8729     }
8730 
8731     // The map types are always constant so we don't need to generate code to
8732     // fill arrays. Instead, we create an array constant.
8733     SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8734     llvm::copy(MapTypes, Mapping.begin());
8735     llvm::Constant *MapTypesArrayInit =
8736         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8737     std::string MaptypesName =
8738         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8739     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8740         CGM.getModule(), MapTypesArrayInit->getType(),
8741         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8742         MapTypesArrayInit, MaptypesName);
8743     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8744     Info.MapTypesArray = MapTypesArrayGbl;
8745 
8746     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8747       llvm::Value *BPVal = *BasePointers[I];
8748       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8749           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8750           Info.BasePointersArray, 0, I);
8751       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8752           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8753       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8754       CGF.Builder.CreateStore(BPVal, BPAddr);
8755 
8756       if (Info.requiresDevicePointerInfo())
8757         if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8758           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8759 
8760       llvm::Value *PVal = Pointers[I];
8761       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8762           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8763           Info.PointersArray, 0, I);
8764       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8765           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8766       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8767       CGF.Builder.CreateStore(PVal, PAddr);
8768 
8769       if (hasRuntimeEvaluationCaptureSize) {
8770         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8771             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8772             Info.SizesArray,
8773             /*Idx0=*/0,
8774             /*Idx1=*/I);
8775         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8776         CGF.Builder.CreateStore(
8777             CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
8778             SAddr);
8779       }
8780     }
8781   }
8782 }
8783 
8784 /// Emit the arguments to be passed to the runtime library based on the
8785 /// arrays of pointers, sizes and map types.
8786 static void emitOffloadingArraysArgument(
8787     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8788     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8789     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8790   CodeGenModule &CGM = CGF.CGM;
8791   if (Info.NumberOfPtrs) {
8792     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8793         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8794         Info.BasePointersArray,
8795         /*Idx0=*/0, /*Idx1=*/0);
8796     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8797         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8798         Info.PointersArray,
8799         /*Idx0=*/0,
8800         /*Idx1=*/0);
8801     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8802         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8803         /*Idx0=*/0, /*Idx1=*/0);
8804     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8805         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8806         Info.MapTypesArray,
8807         /*Idx0=*/0,
8808         /*Idx1=*/0);
8809   } else {
8810     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8811     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8812     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8813     MapTypesArrayArg =
8814         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8815   }
8816 }
8817 
8818 /// Check for inner distribute directive.
8819 static const OMPExecutableDirective *
8820 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8821   const auto *CS = D.getInnermostCapturedStmt();
8822   const auto *Body =
8823       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8824   const Stmt *ChildStmt =
8825       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8826 
8827   if (const auto *NestedDir =
8828           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8829     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8830     switch (D.getDirectiveKind()) {
8831     case OMPD_target:
8832       if (isOpenMPDistributeDirective(DKind))
8833         return NestedDir;
8834       if (DKind == OMPD_teams) {
8835         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8836             /*IgnoreCaptured=*/true);
8837         if (!Body)
8838           return nullptr;
8839         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8840         if (const auto *NND =
8841                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8842           DKind = NND->getDirectiveKind();
8843           if (isOpenMPDistributeDirective(DKind))
8844             return NND;
8845         }
8846       }
8847       return nullptr;
8848     case OMPD_target_teams:
8849       if (isOpenMPDistributeDirective(DKind))
8850         return NestedDir;
8851       return nullptr;
8852     case OMPD_target_parallel:
8853     case OMPD_target_simd:
8854     case OMPD_target_parallel_for:
8855     case OMPD_target_parallel_for_simd:
8856       return nullptr;
8857     case OMPD_target_teams_distribute:
8858     case OMPD_target_teams_distribute_simd:
8859     case OMPD_target_teams_distribute_parallel_for:
8860     case OMPD_target_teams_distribute_parallel_for_simd:
8861     case OMPD_parallel:
8862     case OMPD_for:
8863     case OMPD_parallel_for:
8864     case OMPD_parallel_master:
8865     case OMPD_parallel_sections:
8866     case OMPD_for_simd:
8867     case OMPD_parallel_for_simd:
8868     case OMPD_cancel:
8869     case OMPD_cancellation_point:
8870     case OMPD_ordered:
8871     case OMPD_threadprivate:
8872     case OMPD_allocate:
8873     case OMPD_task:
8874     case OMPD_simd:
8875     case OMPD_sections:
8876     case OMPD_section:
8877     case OMPD_single:
8878     case OMPD_master:
8879     case OMPD_critical:
8880     case OMPD_taskyield:
8881     case OMPD_barrier:
8882     case OMPD_taskwait:
8883     case OMPD_taskgroup:
8884     case OMPD_atomic:
8885     case OMPD_flush:
8886     case OMPD_depobj:
8887     case OMPD_scan:
8888     case OMPD_teams:
8889     case OMPD_target_data:
8890     case OMPD_target_exit_data:
8891     case OMPD_target_enter_data:
8892     case OMPD_distribute:
8893     case OMPD_distribute_simd:
8894     case OMPD_distribute_parallel_for:
8895     case OMPD_distribute_parallel_for_simd:
8896     case OMPD_teams_distribute:
8897     case OMPD_teams_distribute_simd:
8898     case OMPD_teams_distribute_parallel_for:
8899     case OMPD_teams_distribute_parallel_for_simd:
8900     case OMPD_target_update:
8901     case OMPD_declare_simd:
8902     case OMPD_declare_variant:
8903     case OMPD_begin_declare_variant:
8904     case OMPD_end_declare_variant:
8905     case OMPD_declare_target:
8906     case OMPD_end_declare_target:
8907     case OMPD_declare_reduction:
8908     case OMPD_declare_mapper:
8909     case OMPD_taskloop:
8910     case OMPD_taskloop_simd:
8911     case OMPD_master_taskloop:
8912     case OMPD_master_taskloop_simd:
8913     case OMPD_parallel_master_taskloop:
8914     case OMPD_parallel_master_taskloop_simd:
8915     case OMPD_requires:
8916     case OMPD_unknown:
8917     default:
8918       llvm_unreachable("Unexpected directive.");
8919     }
8920   }
8921 
8922   return nullptr;
8923 }
8924 
8925 /// Emit the user-defined mapper function. The code generation follows the
8926 /// pattern in the example below.
8927 /// \code
8928 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
8929 ///                                           void *base, void *begin,
8930 ///                                           int64_t size, int64_t type) {
8931 ///   // Allocate space for an array section first.
8932 ///   if (size > 1 && !maptype.IsDelete)
8933 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8934 ///                                 size*sizeof(Ty), clearToFrom(type));
8935 ///   // Map members.
8936 ///   for (unsigned i = 0; i < size; i++) {
8937 ///     // For each component specified by this mapper:
8938 ///     for (auto c : all_components) {
8939 ///       if (c.hasMapper())
8940 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
8941 ///                       c.arg_type);
8942 ///       else
8943 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
8944 ///                                     c.arg_begin, c.arg_size, c.arg_type);
8945 ///     }
8946 ///   }
8947 ///   // Delete the array section.
8948 ///   if (size > 1 && maptype.IsDelete)
8949 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8950 ///                                 size*sizeof(Ty), clearToFrom(type));
8951 /// }
8952 /// \endcode
8953 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
8954                                             CodeGenFunction *CGF) {
8955   if (UDMMap.count(D) > 0)
8956     return;
8957   ASTContext &C = CGM.getContext();
8958   QualType Ty = D->getType();
8959   QualType PtrTy = C.getPointerType(Ty).withRestrict();
8960   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
8961   auto *MapperVarDecl =
8962       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
8963   SourceLocation Loc = D->getLocation();
8964   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
8965 
8966   // Prepare mapper function arguments and attributes.
8967   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8968                               C.VoidPtrTy, ImplicitParamDecl::Other);
8969   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
8970                             ImplicitParamDecl::Other);
8971   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8972                              C.VoidPtrTy, ImplicitParamDecl::Other);
8973   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8974                             ImplicitParamDecl::Other);
8975   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8976                             ImplicitParamDecl::Other);
8977   FunctionArgList Args;
8978   Args.push_back(&HandleArg);
8979   Args.push_back(&BaseArg);
8980   Args.push_back(&BeginArg);
8981   Args.push_back(&SizeArg);
8982   Args.push_back(&TypeArg);
8983   const CGFunctionInfo &FnInfo =
8984       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
8985   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
8986   SmallString<64> TyStr;
8987   llvm::raw_svector_ostream Out(TyStr);
8988   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
8989   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
8990   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
8991                                     Name, &CGM.getModule());
8992   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
8993   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
8994   // Start the mapper function code generation.
8995   CodeGenFunction MapperCGF(CGM);
8996   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
8997   // Compute the starting and end addreses of array elements.
8998   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
8999       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9000       C.getPointerType(Int64Ty), Loc);
9001   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9002       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
9003       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
9004   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
9005   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9006       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9007       C.getPointerType(Int64Ty), Loc);
9008   // Prepare common arguments for array initiation and deletion.
9009   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9010       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9011       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9012   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9013       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9014       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9015   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9016       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9017       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9018 
9019   // Emit array initiation if this is an array section and \p MapType indicates
9020   // that memory allocation is required.
9021   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9022   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9023                              ElementSize, HeadBB, /*IsInit=*/true);
9024 
9025   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9026 
9027   // Emit the loop header block.
9028   MapperCGF.EmitBlock(HeadBB);
9029   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9030   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9031   // Evaluate whether the initial condition is satisfied.
9032   llvm::Value *IsEmpty =
9033       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9034   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9035   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9036 
9037   // Emit the loop body block.
9038   MapperCGF.EmitBlock(BodyBB);
9039   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9040       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9041   PtrPHI->addIncoming(PtrBegin, EntryBB);
9042   Address PtrCurrent =
9043       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9044                           .getAlignment()
9045                           .alignmentOfArrayElement(ElementSize));
9046   // Privatize the declared variable of mapper to be the current array element.
9047   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9048   Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
9049     return MapperCGF
9050         .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
9051         .getAddress(MapperCGF);
9052   });
9053   (void)Scope.Privatize();
9054 
9055   // Get map clause information. Fill up the arrays with all mapped variables.
9056   MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9057   MappableExprsHandler::MapValuesArrayTy Pointers;
9058   MappableExprsHandler::MapValuesArrayTy Sizes;
9059   MappableExprsHandler::MapFlagsArrayTy MapTypes;
9060   MappableExprsHandler MEHandler(*D, MapperCGF);
9061   MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
9062 
9063   // Call the runtime API __tgt_mapper_num_components to get the number of
9064   // pre-existing components.
9065   llvm::Value *OffloadingArgs[] = {Handle};
9066   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9067       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9068                                             OMPRTL___tgt_mapper_num_components),
9069       OffloadingArgs);
9070   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9071       PreviousSize,
9072       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9073 
9074   // Fill up the runtime mapper handle for all components.
9075   for (unsigned I = 0; I < BasePointers.size(); ++I) {
9076     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9077         *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9078     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9079         Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9080     llvm::Value *CurSizeArg = Sizes[I];
9081 
9082     // Extract the MEMBER_OF field from the map type.
9083     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
9084     MapperCGF.EmitBlock(MemberBB);
9085     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
9086     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
9087         OriMapType,
9088         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
9089     llvm::BasicBlock *MemberCombineBB =
9090         MapperCGF.createBasicBlock("omp.member.combine");
9091     llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
9092     llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
9093     MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
9094     // Add the number of pre-existing components to the MEMBER_OF field if it
9095     // is valid.
9096     MapperCGF.EmitBlock(MemberCombineBB);
9097     llvm::Value *CombinedMember =
9098         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9099     // Do nothing if it is not a member of previous components.
9100     MapperCGF.EmitBlock(TypeBB);
9101     llvm::PHINode *MemberMapType =
9102         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
9103     MemberMapType->addIncoming(OriMapType, MemberBB);
9104     MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
9105 
9106     // Combine the map type inherited from user-defined mapper with that
9107     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9108     // bits of the \a MapType, which is the input argument of the mapper
9109     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9110     // bits of MemberMapType.
9111     // [OpenMP 5.0], 1.2.6. map-type decay.
9112     //        | alloc |  to   | from  | tofrom | release | delete
9113     // ----------------------------------------------------------
9114     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9115     // to     | alloc |  to   | alloc |   to   | release | delete
9116     // from   | alloc | alloc | from  |  from  | release | delete
9117     // tofrom | alloc |  to   | from  | tofrom | release | delete
9118     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9119         MapType,
9120         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9121                                    MappableExprsHandler::OMP_MAP_FROM));
9122     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9123     llvm::BasicBlock *AllocElseBB =
9124         MapperCGF.createBasicBlock("omp.type.alloc.else");
9125     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9126     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9127     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9128     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9129     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9130     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9131     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9132     MapperCGF.EmitBlock(AllocBB);
9133     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9134         MemberMapType,
9135         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9136                                      MappableExprsHandler::OMP_MAP_FROM)));
9137     MapperCGF.Builder.CreateBr(EndBB);
9138     MapperCGF.EmitBlock(AllocElseBB);
9139     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9140         LeftToFrom,
9141         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9142     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9143     // In case of to, clear OMP_MAP_FROM.
9144     MapperCGF.EmitBlock(ToBB);
9145     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9146         MemberMapType,
9147         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9148     MapperCGF.Builder.CreateBr(EndBB);
9149     MapperCGF.EmitBlock(ToElseBB);
9150     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9151         LeftToFrom,
9152         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9153     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9154     // In case of from, clear OMP_MAP_TO.
9155     MapperCGF.EmitBlock(FromBB);
9156     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9157         MemberMapType,
9158         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9159     // In case of tofrom, do nothing.
9160     MapperCGF.EmitBlock(EndBB);
9161     llvm::PHINode *CurMapType =
9162         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9163     CurMapType->addIncoming(AllocMapType, AllocBB);
9164     CurMapType->addIncoming(ToMapType, ToBB);
9165     CurMapType->addIncoming(FromMapType, FromBB);
9166     CurMapType->addIncoming(MemberMapType, ToElseBB);
9167 
9168     // TODO: call the corresponding mapper function if a user-defined mapper is
9169     // associated with this map clause.
9170     // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9171     // data structure.
9172     llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9173                                      CurSizeArg, CurMapType};
9174     MapperCGF.EmitRuntimeCall(
9175         OMPBuilder.getOrCreateRuntimeFunction(
9176             CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9177         OffloadingArgs);
9178   }
9179 
9180   // Update the pointer to point to the next element that needs to be mapped,
9181   // and check whether we have mapped all elements.
9182   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9183       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9184   PtrPHI->addIncoming(PtrNext, BodyBB);
9185   llvm::Value *IsDone =
9186       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9187   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9188   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9189 
9190   MapperCGF.EmitBlock(ExitBB);
9191   // Emit array deletion if this is an array section and \p MapType indicates
9192   // that deletion is required.
9193   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9194                              ElementSize, DoneBB, /*IsInit=*/false);
9195 
9196   // Emit the function exit block.
9197   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9198   MapperCGF.FinishFunction();
9199   UDMMap.try_emplace(D, Fn);
9200   if (CGF) {
9201     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9202     Decls.second.push_back(D);
9203   }
9204 }
9205 
9206 /// Emit the array initialization or deletion portion for user-defined mapper
9207 /// code generation. First, it evaluates whether an array section is mapped and
9208 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9209 /// true, and \a MapType indicates to not delete this array, array
9210 /// initialization code is generated. If \a IsInit is false, and \a MapType
9211 /// indicates to not this array, array deletion code is generated.
9212 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9213     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9214     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9215     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9216   StringRef Prefix = IsInit ? ".init" : ".del";
9217 
9218   // Evaluate if this is an array section.
9219   llvm::BasicBlock *IsDeleteBB =
9220       MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"}));
9221   llvm::BasicBlock *BodyBB =
9222       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9223   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9224       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9225   MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9226 
9227   // Evaluate if we are going to delete this section.
9228   MapperCGF.EmitBlock(IsDeleteBB);
9229   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9230       MapType,
9231       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9232   llvm::Value *DeleteCond;
9233   if (IsInit) {
9234     DeleteCond = MapperCGF.Builder.CreateIsNull(
9235         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9236   } else {
9237     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9238         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9239   }
9240   MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9241 
9242   MapperCGF.EmitBlock(BodyBB);
9243   // Get the array size by multiplying element size and element number (i.e., \p
9244   // Size).
9245   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9246       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9247   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9248   // memory allocation/deletion purpose only.
9249   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9250       MapType,
9251       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9252                                    MappableExprsHandler::OMP_MAP_FROM)));
9253   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9254   // data structure.
9255   llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9256   MapperCGF.EmitRuntimeCall(
9257       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9258                                             OMPRTL___tgt_push_mapper_component),
9259       OffloadingArgs);
9260 }
9261 
9262 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9263     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9264     llvm::Value *DeviceID,
9265     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9266                                      const OMPLoopDirective &D)>
9267         SizeEmitter) {
9268   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9269   const OMPExecutableDirective *TD = &D;
9270   // Get nested teams distribute kind directive, if any.
9271   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9272     TD = getNestedDistributeDirective(CGM.getContext(), D);
9273   if (!TD)
9274     return;
9275   const auto *LD = cast<OMPLoopDirective>(TD);
9276   auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
9277                                                      PrePostActionTy &) {
9278     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9279       llvm::Value *Args[] = {DeviceID, NumIterations};
9280       CGF.EmitRuntimeCall(
9281           OMPBuilder.getOrCreateRuntimeFunction(
9282               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount),
9283           Args);
9284     }
9285   };
9286   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9287 }
9288 
9289 void CGOpenMPRuntime::emitTargetCall(
9290     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9291     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9292     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9293     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9294                                      const OMPLoopDirective &D)>
9295         SizeEmitter) {
9296   if (!CGF.HaveInsertPoint())
9297     return;
9298 
9299   assert(OutlinedFn && "Invalid outlined function!");
9300 
9301   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
9302   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9303   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9304   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9305                                             PrePostActionTy &) {
9306     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9307   };
9308   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9309 
9310   CodeGenFunction::OMPTargetDataInfo InputInfo;
9311   llvm::Value *MapTypesArray = nullptr;
9312   // Fill up the pointer arrays and transfer execution to the device.
9313   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9314                     &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
9315                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9316     if (Device.getInt() == OMPC_DEVICE_ancestor) {
9317       // Reverse offloading is not supported, so just execute on the host.
9318       if (RequiresOuterTask) {
9319         CapturedVars.clear();
9320         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9321       }
9322       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9323       return;
9324     }
9325 
9326     // On top of the arrays that were filled up, the target offloading call
9327     // takes as arguments the device id as well as the host pointer. The host
9328     // pointer is used by the runtime library to identify the current target
9329     // region, so it only has to be unique and not necessarily point to
9330     // anything. It could be the pointer to the outlined function that
9331     // implements the target region, but we aren't using that so that the
9332     // compiler doesn't need to keep that, and could therefore inline the host
9333     // function if proven worthwhile during optimization.
9334 
9335     // From this point on, we need to have an ID of the target region defined.
9336     assert(OutlinedFnID && "Invalid outlined function ID!");
9337 
9338     // Emit device ID if any.
9339     llvm::Value *DeviceID;
9340     if (Device.getPointer()) {
9341       assert((Device.getInt() == OMPC_DEVICE_unknown ||
9342               Device.getInt() == OMPC_DEVICE_device_num) &&
9343              "Expected device_num modifier.");
9344       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9345       DeviceID =
9346           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9347     } else {
9348       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9349     }
9350 
9351     // Emit the number of elements in the offloading arrays.
9352     llvm::Value *PointerNum =
9353         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9354 
9355     // Return value of the runtime offloading call.
9356     llvm::Value *Return;
9357 
9358     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9359     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9360 
9361     // Emit tripcount for the target loop-based directive.
9362     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
9363 
9364     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9365     // The target region is an outlined function launched by the runtime
9366     // via calls __tgt_target() or __tgt_target_teams().
9367     //
9368     // __tgt_target() launches a target region with one team and one thread,
9369     // executing a serial region.  This master thread may in turn launch
9370     // more threads within its team upon encountering a parallel region,
9371     // however, no additional teams can be launched on the device.
9372     //
9373     // __tgt_target_teams() launches a target region with one or more teams,
9374     // each with one or more threads.  This call is required for target
9375     // constructs such as:
9376     //  'target teams'
9377     //  'target' / 'teams'
9378     //  'target teams distribute parallel for'
9379     //  'target parallel'
9380     // and so on.
9381     //
9382     // Note that on the host and CPU targets, the runtime implementation of
9383     // these calls simply call the outlined function without forking threads.
9384     // The outlined functions themselves have runtime calls to
9385     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9386     // the compiler in emitTeamsCall() and emitParallelCall().
9387     //
9388     // In contrast, on the NVPTX target, the implementation of
9389     // __tgt_target_teams() launches a GPU kernel with the requested number
9390     // of teams and threads so no additional calls to the runtime are required.
9391     if (NumTeams) {
9392       // If we have NumTeams defined this means that we have an enclosed teams
9393       // region. Therefore we also expect to have NumThreads defined. These two
9394       // values should be defined in the presence of a teams directive,
9395       // regardless of having any clauses associated. If the user is using teams
9396       // but no clauses, these two values will be the default that should be
9397       // passed to the runtime library - a 32-bit integer with the value zero.
9398       assert(NumThreads && "Thread limit expression should be available along "
9399                            "with number of teams.");
9400       llvm::Value *OffloadingArgs[] = {DeviceID,
9401                                        OutlinedFnID,
9402                                        PointerNum,
9403                                        InputInfo.BasePointersArray.getPointer(),
9404                                        InputInfo.PointersArray.getPointer(),
9405                                        InputInfo.SizesArray.getPointer(),
9406                                        MapTypesArray,
9407                                        NumTeams,
9408                                        NumThreads};
9409       Return = CGF.EmitRuntimeCall(
9410           OMPBuilder.getOrCreateRuntimeFunction(
9411               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_teams_nowait
9412                                          : OMPRTL___tgt_target_teams),
9413           OffloadingArgs);
9414     } else {
9415       llvm::Value *OffloadingArgs[] = {DeviceID,
9416                                        OutlinedFnID,
9417                                        PointerNum,
9418                                        InputInfo.BasePointersArray.getPointer(),
9419                                        InputInfo.PointersArray.getPointer(),
9420                                        InputInfo.SizesArray.getPointer(),
9421                                        MapTypesArray};
9422       Return = CGF.EmitRuntimeCall(
9423           OMPBuilder.getOrCreateRuntimeFunction(
9424               CGM.getModule(),
9425               HasNowait ? OMPRTL___tgt_target_nowait : OMPRTL___tgt_target),
9426           OffloadingArgs);
9427     }
9428 
9429     // Check the error code and execute the host version if required.
9430     llvm::BasicBlock *OffloadFailedBlock =
9431         CGF.createBasicBlock("omp_offload.failed");
9432     llvm::BasicBlock *OffloadContBlock =
9433         CGF.createBasicBlock("omp_offload.cont");
9434     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9435     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9436 
9437     CGF.EmitBlock(OffloadFailedBlock);
9438     if (RequiresOuterTask) {
9439       CapturedVars.clear();
9440       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9441     }
9442     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9443     CGF.EmitBranch(OffloadContBlock);
9444 
9445     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9446   };
9447 
9448   // Notify that the host version must be executed.
9449   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
9450                     RequiresOuterTask](CodeGenFunction &CGF,
9451                                        PrePostActionTy &) {
9452     if (RequiresOuterTask) {
9453       CapturedVars.clear();
9454       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9455     }
9456     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9457   };
9458 
9459   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9460                           &CapturedVars, RequiresOuterTask,
9461                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9462     // Fill up the arrays with all the captured variables.
9463     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9464     MappableExprsHandler::MapValuesArrayTy Pointers;
9465     MappableExprsHandler::MapValuesArrayTy Sizes;
9466     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9467 
9468     // Get mappable expression information.
9469     MappableExprsHandler MEHandler(D, CGF);
9470     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9471 
9472     auto RI = CS.getCapturedRecordDecl()->field_begin();
9473     auto CV = CapturedVars.begin();
9474     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9475                                               CE = CS.capture_end();
9476          CI != CE; ++CI, ++RI, ++CV) {
9477       MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
9478       MappableExprsHandler::MapValuesArrayTy CurPointers;
9479       MappableExprsHandler::MapValuesArrayTy CurSizes;
9480       MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
9481       MappableExprsHandler::StructRangeInfoTy PartialStruct;
9482 
9483       // VLA sizes are passed to the outlined region by copy and do not have map
9484       // information associated.
9485       if (CI->capturesVariableArrayType()) {
9486         CurBasePointers.push_back(*CV);
9487         CurPointers.push_back(*CV);
9488         CurSizes.push_back(CGF.Builder.CreateIntCast(
9489             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9490         // Copy to the device as an argument. No need to retrieve it.
9491         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
9492                               MappableExprsHandler::OMP_MAP_TARGET_PARAM |
9493                               MappableExprsHandler::OMP_MAP_IMPLICIT);
9494       } else {
9495         // If we have any information in the map clause, we use it, otherwise we
9496         // just do a default mapping.
9497         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
9498                                          CurSizes, CurMapTypes, PartialStruct);
9499         if (CurBasePointers.empty())
9500           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
9501                                            CurPointers, CurSizes, CurMapTypes);
9502         // Generate correct mapping for variables captured by reference in
9503         // lambdas.
9504         if (CI->capturesVariable())
9505           MEHandler.generateInfoForLambdaCaptures(
9506               CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
9507               CurMapTypes, LambdaPointers);
9508       }
9509       // We expect to have at least an element of information for this capture.
9510       assert(!CurBasePointers.empty() &&
9511              "Non-existing map pointer for capture!");
9512       assert(CurBasePointers.size() == CurPointers.size() &&
9513              CurBasePointers.size() == CurSizes.size() &&
9514              CurBasePointers.size() == CurMapTypes.size() &&
9515              "Inconsistent map information sizes!");
9516 
9517       // If there is an entry in PartialStruct it means we have a struct with
9518       // individual members mapped. Emit an extra combined entry.
9519       if (PartialStruct.Base.isValid())
9520         MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
9521                                     CurMapTypes, PartialStruct);
9522 
9523       // We need to append the results of this capture to what we already have.
9524       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
9525       Pointers.append(CurPointers.begin(), CurPointers.end());
9526       Sizes.append(CurSizes.begin(), CurSizes.end());
9527       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
9528     }
9529     // Adjust MEMBER_OF flags for the lambdas captures.
9530     MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
9531                                               Pointers, MapTypes);
9532     // Map other list items in the map clause which are not captured variables
9533     // but "declare target link" global variables.
9534     MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
9535                                                MapTypes);
9536 
9537     TargetDataInfo Info;
9538     // Fill up the arrays and create the arguments.
9539     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9540     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9541                                  Info.PointersArray, Info.SizesArray,
9542                                  Info.MapTypesArray, Info);
9543     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9544     InputInfo.BasePointersArray =
9545         Address(Info.BasePointersArray, CGM.getPointerAlign());
9546     InputInfo.PointersArray =
9547         Address(Info.PointersArray, CGM.getPointerAlign());
9548     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
9549     MapTypesArray = Info.MapTypesArray;
9550     if (RequiresOuterTask)
9551       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9552     else
9553       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9554   };
9555 
9556   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
9557                              CodeGenFunction &CGF, PrePostActionTy &) {
9558     if (RequiresOuterTask) {
9559       CodeGenFunction::OMPTargetDataInfo InputInfo;
9560       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9561     } else {
9562       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9563     }
9564   };
9565 
9566   // If we have a target function ID it means that we need to support
9567   // offloading, otherwise, just execute on the host. We need to execute on host
9568   // regardless of the conditional in the if clause if, e.g., the user do not
9569   // specify target triples.
9570   if (OutlinedFnID) {
9571     if (IfCond) {
9572       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9573     } else {
9574       RegionCodeGenTy ThenRCG(TargetThenGen);
9575       ThenRCG(CGF);
9576     }
9577   } else {
9578     RegionCodeGenTy ElseRCG(TargetElseGen);
9579     ElseRCG(CGF);
9580   }
9581 }
9582 
9583 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9584                                                     StringRef ParentName) {
9585   if (!S)
9586     return;
9587 
9588   // Codegen OMP target directives that offload compute to the device.
9589   bool RequiresDeviceCodegen =
9590       isa<OMPExecutableDirective>(S) &&
9591       isOpenMPTargetExecutionDirective(
9592           cast<OMPExecutableDirective>(S)->getDirectiveKind());
9593 
9594   if (RequiresDeviceCodegen) {
9595     const auto &E = *cast<OMPExecutableDirective>(S);
9596     unsigned DeviceID;
9597     unsigned FileID;
9598     unsigned Line;
9599     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
9600                              FileID, Line);
9601 
9602     // Is this a target region that should not be emitted as an entry point? If
9603     // so just signal we are done with this target region.
9604     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
9605                                                             ParentName, Line))
9606       return;
9607 
9608     switch (E.getDirectiveKind()) {
9609     case OMPD_target:
9610       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9611                                                    cast<OMPTargetDirective>(E));
9612       break;
9613     case OMPD_target_parallel:
9614       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9615           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9616       break;
9617     case OMPD_target_teams:
9618       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9619           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9620       break;
9621     case OMPD_target_teams_distribute:
9622       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9623           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9624       break;
9625     case OMPD_target_teams_distribute_simd:
9626       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9627           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9628       break;
9629     case OMPD_target_parallel_for:
9630       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9631           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9632       break;
9633     case OMPD_target_parallel_for_simd:
9634       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9635           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9636       break;
9637     case OMPD_target_simd:
9638       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9639           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9640       break;
9641     case OMPD_target_teams_distribute_parallel_for:
9642       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9643           CGM, ParentName,
9644           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9645       break;
9646     case OMPD_target_teams_distribute_parallel_for_simd:
9647       CodeGenFunction::
9648           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9649               CGM, ParentName,
9650               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9651       break;
9652     case OMPD_parallel:
9653     case OMPD_for:
9654     case OMPD_parallel_for:
9655     case OMPD_parallel_master:
9656     case OMPD_parallel_sections:
9657     case OMPD_for_simd:
9658     case OMPD_parallel_for_simd:
9659     case OMPD_cancel:
9660     case OMPD_cancellation_point:
9661     case OMPD_ordered:
9662     case OMPD_threadprivate:
9663     case OMPD_allocate:
9664     case OMPD_task:
9665     case OMPD_simd:
9666     case OMPD_sections:
9667     case OMPD_section:
9668     case OMPD_single:
9669     case OMPD_master:
9670     case OMPD_critical:
9671     case OMPD_taskyield:
9672     case OMPD_barrier:
9673     case OMPD_taskwait:
9674     case OMPD_taskgroup:
9675     case OMPD_atomic:
9676     case OMPD_flush:
9677     case OMPD_depobj:
9678     case OMPD_scan:
9679     case OMPD_teams:
9680     case OMPD_target_data:
9681     case OMPD_target_exit_data:
9682     case OMPD_target_enter_data:
9683     case OMPD_distribute:
9684     case OMPD_distribute_simd:
9685     case OMPD_distribute_parallel_for:
9686     case OMPD_distribute_parallel_for_simd:
9687     case OMPD_teams_distribute:
9688     case OMPD_teams_distribute_simd:
9689     case OMPD_teams_distribute_parallel_for:
9690     case OMPD_teams_distribute_parallel_for_simd:
9691     case OMPD_target_update:
9692     case OMPD_declare_simd:
9693     case OMPD_declare_variant:
9694     case OMPD_begin_declare_variant:
9695     case OMPD_end_declare_variant:
9696     case OMPD_declare_target:
9697     case OMPD_end_declare_target:
9698     case OMPD_declare_reduction:
9699     case OMPD_declare_mapper:
9700     case OMPD_taskloop:
9701     case OMPD_taskloop_simd:
9702     case OMPD_master_taskloop:
9703     case OMPD_master_taskloop_simd:
9704     case OMPD_parallel_master_taskloop:
9705     case OMPD_parallel_master_taskloop_simd:
9706     case OMPD_requires:
9707     case OMPD_unknown:
9708     default:
9709       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9710     }
9711     return;
9712   }
9713 
9714   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9715     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9716       return;
9717 
9718     scanForTargetRegionsFunctions(
9719         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9720     return;
9721   }
9722 
9723   // If this is a lambda function, look into its body.
9724   if (const auto *L = dyn_cast<LambdaExpr>(S))
9725     S = L->getBody();
9726 
9727   // Keep looking for target regions recursively.
9728   for (const Stmt *II : S->children())
9729     scanForTargetRegionsFunctions(II, ParentName);
9730 }
9731 
9732 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9733   // If emitting code for the host, we do not process FD here. Instead we do
9734   // the normal code generation.
9735   if (!CGM.getLangOpts().OpenMPIsDevice) {
9736     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
9737       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9738           OMPDeclareTargetDeclAttr::getDeviceType(FD);
9739       // Do not emit device_type(nohost) functions for the host.
9740       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9741         return true;
9742     }
9743     return false;
9744   }
9745 
9746   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9747   // Try to detect target regions in the function.
9748   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9749     StringRef Name = CGM.getMangledName(GD);
9750     scanForTargetRegionsFunctions(FD->getBody(), Name);
9751     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9752         OMPDeclareTargetDeclAttr::getDeviceType(FD);
9753     // Do not emit device_type(nohost) functions for the host.
9754     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9755       return true;
9756   }
9757 
9758   // Do not to emit function if it is not marked as declare target.
9759   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9760          AlreadyEmittedTargetDecls.count(VD) == 0;
9761 }
9762 
9763 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9764   if (!CGM.getLangOpts().OpenMPIsDevice)
9765     return false;
9766 
9767   // Check if there are Ctors/Dtors in this declaration and look for target
9768   // regions in it. We use the complete variant to produce the kernel name
9769   // mangling.
9770   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9771   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9772     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9773       StringRef ParentName =
9774           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9775       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9776     }
9777     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9778       StringRef ParentName =
9779           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9780       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9781     }
9782   }
9783 
9784   // Do not to emit variable if it is not marked as declare target.
9785   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9786       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9787           cast<VarDecl>(GD.getDecl()));
9788   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9789       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9790        HasRequiresUnifiedSharedMemory)) {
9791     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9792     return true;
9793   }
9794   return false;
9795 }
9796 
9797 llvm::Constant *
9798 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9799                                                 const VarDecl *VD) {
9800   assert(VD->getType().isConstant(CGM.getContext()) &&
9801          "Expected constant variable.");
9802   StringRef VarName;
9803   llvm::Constant *Addr;
9804   llvm::GlobalValue::LinkageTypes Linkage;
9805   QualType Ty = VD->getType();
9806   SmallString<128> Buffer;
9807   {
9808     unsigned DeviceID;
9809     unsigned FileID;
9810     unsigned Line;
9811     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9812                              FileID, Line);
9813     llvm::raw_svector_ostream OS(Buffer);
9814     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9815        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9816     VarName = OS.str();
9817   }
9818   Linkage = llvm::GlobalValue::InternalLinkage;
9819   Addr =
9820       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9821                                   getDefaultFirstprivateAddressSpace());
9822   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9823   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9824   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9825   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9826       VarName, Addr, VarSize,
9827       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9828   return Addr;
9829 }
9830 
9831 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9832                                                    llvm::Constant *Addr) {
9833   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9834       !CGM.getLangOpts().OpenMPIsDevice)
9835     return;
9836   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9837       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9838   if (!Res) {
9839     if (CGM.getLangOpts().OpenMPIsDevice) {
9840       // Register non-target variables being emitted in device code (debug info
9841       // may cause this).
9842       StringRef VarName = CGM.getMangledName(VD);
9843       EmittedNonTargetVariables.try_emplace(VarName, Addr);
9844     }
9845     return;
9846   }
9847   // Register declare target variables.
9848   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9849   StringRef VarName;
9850   CharUnits VarSize;
9851   llvm::GlobalValue::LinkageTypes Linkage;
9852 
9853   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9854       !HasRequiresUnifiedSharedMemory) {
9855     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9856     VarName = CGM.getMangledName(VD);
9857     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9858       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9859       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9860     } else {
9861       VarSize = CharUnits::Zero();
9862     }
9863     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9864     // Temp solution to prevent optimizations of the internal variables.
9865     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9866       std::string RefName = getName({VarName, "ref"});
9867       if (!CGM.GetGlobalValue(RefName)) {
9868         llvm::Constant *AddrRef =
9869             getOrCreateInternalVariable(Addr->getType(), RefName);
9870         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9871         GVAddrRef->setConstant(/*Val=*/true);
9872         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9873         GVAddrRef->setInitializer(Addr);
9874         CGM.addCompilerUsedGlobal(GVAddrRef);
9875       }
9876     }
9877   } else {
9878     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
9879             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9880              HasRequiresUnifiedSharedMemory)) &&
9881            "Declare target attribute must link or to with unified memory.");
9882     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
9883       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
9884     else
9885       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9886 
9887     if (CGM.getLangOpts().OpenMPIsDevice) {
9888       VarName = Addr->getName();
9889       Addr = nullptr;
9890     } else {
9891       VarName = getAddrOfDeclareTargetVar(VD).getName();
9892       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
9893     }
9894     VarSize = CGM.getPointerSize();
9895     Linkage = llvm::GlobalValue::WeakAnyLinkage;
9896   }
9897 
9898   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9899       VarName, Addr, VarSize, Flags, Linkage);
9900 }
9901 
9902 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9903   if (isa<FunctionDecl>(GD.getDecl()) ||
9904       isa<OMPDeclareReductionDecl>(GD.getDecl()))
9905     return emitTargetFunctions(GD);
9906 
9907   return emitTargetGlobalVariable(GD);
9908 }
9909 
9910 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9911   for (const VarDecl *VD : DeferredGlobalVariables) {
9912     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9913         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9914     if (!Res)
9915       continue;
9916     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9917         !HasRequiresUnifiedSharedMemory) {
9918       CGM.EmitGlobal(VD);
9919     } else {
9920       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9921               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9922                HasRequiresUnifiedSharedMemory)) &&
9923              "Expected link clause or to clause with unified memory.");
9924       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
9925     }
9926   }
9927 }
9928 
9929 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
9930     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9931   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9932          " Expected target-based directive.");
9933 }
9934 
9935 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
9936   for (const OMPClause *Clause : D->clauselists()) {
9937     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9938       HasRequiresUnifiedSharedMemory = true;
9939     } else if (const auto *AC =
9940                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
9941       switch (AC->getAtomicDefaultMemOrderKind()) {
9942       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
9943         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
9944         break;
9945       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
9946         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
9947         break;
9948       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
9949         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
9950         break;
9951       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
9952         break;
9953       }
9954     }
9955   }
9956 }
9957 
9958 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
9959   return RequiresAtomicOrdering;
9960 }
9961 
9962 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
9963                                                        LangAS &AS) {
9964   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9965     return false;
9966   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9967   switch(A->getAllocatorType()) {
9968   case OMPAllocateDeclAttr::OMPNullMemAlloc:
9969   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9970   // Not supported, fallback to the default mem space.
9971   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9972   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9973   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9974   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9975   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9976   case OMPAllocateDeclAttr::OMPConstMemAlloc:
9977   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9978     AS = LangAS::Default;
9979     return true;
9980   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9981     llvm_unreachable("Expected predefined allocator for the variables with the "
9982                      "static storage.");
9983   }
9984   return false;
9985 }
9986 
9987 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
9988   return HasRequiresUnifiedSharedMemory;
9989 }
9990 
9991 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
9992     CodeGenModule &CGM)
9993     : CGM(CGM) {
9994   if (CGM.getLangOpts().OpenMPIsDevice) {
9995     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
9996     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
9997   }
9998 }
9999 
10000 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10001   if (CGM.getLangOpts().OpenMPIsDevice)
10002     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10003 }
10004 
10005 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10006   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10007     return true;
10008 
10009   const auto *D = cast<FunctionDecl>(GD.getDecl());
10010   // Do not to emit function if it is marked as declare target as it was already
10011   // emitted.
10012   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10013     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10014       if (auto *F = dyn_cast_or_null<llvm::Function>(
10015               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10016         return !F->isDeclaration();
10017       return false;
10018     }
10019     return true;
10020   }
10021 
10022   return !AlreadyEmittedTargetDecls.insert(D).second;
10023 }
10024 
10025 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10026   // If we don't have entries or if we are emitting code for the device, we
10027   // don't need to do anything.
10028   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10029       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10030       (OffloadEntriesInfoManager.empty() &&
10031        !HasEmittedDeclareTargetRegion &&
10032        !HasEmittedTargetRegion))
10033     return nullptr;
10034 
10035   // Create and register the function that handles the requires directives.
10036   ASTContext &C = CGM.getContext();
10037 
10038   llvm::Function *RequiresRegFn;
10039   {
10040     CodeGenFunction CGF(CGM);
10041     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10042     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10043     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10044     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10045     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10046     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10047     // TODO: check for other requires clauses.
10048     // The requires directive takes effect only when a target region is
10049     // present in the compilation unit. Otherwise it is ignored and not
10050     // passed to the runtime. This avoids the runtime from throwing an error
10051     // for mismatching requires clauses across compilation units that don't
10052     // contain at least 1 target region.
10053     assert((HasEmittedTargetRegion ||
10054             HasEmittedDeclareTargetRegion ||
10055             !OffloadEntriesInfoManager.empty()) &&
10056            "Target or declare target region expected.");
10057     if (HasRequiresUnifiedSharedMemory)
10058       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10059     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10060                             CGM.getModule(), OMPRTL___tgt_register_requires),
10061                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10062     CGF.FinishFunction();
10063   }
10064   return RequiresRegFn;
10065 }
10066 
10067 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10068                                     const OMPExecutableDirective &D,
10069                                     SourceLocation Loc,
10070                                     llvm::Function *OutlinedFn,
10071                                     ArrayRef<llvm::Value *> CapturedVars) {
10072   if (!CGF.HaveInsertPoint())
10073     return;
10074 
10075   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10076   CodeGenFunction::RunCleanupsScope Scope(CGF);
10077 
10078   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10079   llvm::Value *Args[] = {
10080       RTLoc,
10081       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10082       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10083   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10084   RealArgs.append(std::begin(Args), std::end(Args));
10085   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10086 
10087   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10088       CGM.getModule(), OMPRTL___kmpc_fork_teams);
10089   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10090 }
10091 
10092 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10093                                          const Expr *NumTeams,
10094                                          const Expr *ThreadLimit,
10095                                          SourceLocation Loc) {
10096   if (!CGF.HaveInsertPoint())
10097     return;
10098 
10099   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10100 
10101   llvm::Value *NumTeamsVal =
10102       NumTeams
10103           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10104                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10105           : CGF.Builder.getInt32(0);
10106 
10107   llvm::Value *ThreadLimitVal =
10108       ThreadLimit
10109           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10110                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10111           : CGF.Builder.getInt32(0);
10112 
10113   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10114   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10115                                      ThreadLimitVal};
10116   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10117                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10118                       PushNumTeamsArgs);
10119 }
10120 
10121 void CGOpenMPRuntime::emitTargetDataCalls(
10122     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10123     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10124   if (!CGF.HaveInsertPoint())
10125     return;
10126 
10127   // Action used to replace the default codegen action and turn privatization
10128   // off.
10129   PrePostActionTy NoPrivAction;
10130 
10131   // Generate the code for the opening of the data environment. Capture all the
10132   // arguments of the runtime call by reference because they are used in the
10133   // closing of the region.
10134   auto &&BeginThenGen = [this, &D, Device, &Info,
10135                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10136     // Fill up the arrays with all the mapped variables.
10137     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10138     MappableExprsHandler::MapValuesArrayTy Pointers;
10139     MappableExprsHandler::MapValuesArrayTy Sizes;
10140     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10141 
10142     // Get map clause information.
10143     MappableExprsHandler MCHandler(D, CGF);
10144     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10145 
10146     // Fill up the arrays and create the arguments.
10147     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10148 
10149     llvm::Value *BasePointersArrayArg = nullptr;
10150     llvm::Value *PointersArrayArg = nullptr;
10151     llvm::Value *SizesArrayArg = nullptr;
10152     llvm::Value *MapTypesArrayArg = nullptr;
10153     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10154                                  SizesArrayArg, MapTypesArrayArg, Info);
10155 
10156     // Emit device ID if any.
10157     llvm::Value *DeviceID = nullptr;
10158     if (Device) {
10159       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10160                                            CGF.Int64Ty, /*isSigned=*/true);
10161     } else {
10162       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10163     }
10164 
10165     // Emit the number of elements in the offloading arrays.
10166     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10167 
10168     llvm::Value *OffloadingArgs[] = {
10169         DeviceID,         PointerNum,    BasePointersArrayArg,
10170         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10171     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10172                             CGM.getModule(), OMPRTL___tgt_target_data_begin),
10173                         OffloadingArgs);
10174 
10175     // If device pointer privatization is required, emit the body of the region
10176     // here. It will have to be duplicated: with and without privatization.
10177     if (!Info.CaptureDeviceAddrMap.empty())
10178       CodeGen(CGF);
10179   };
10180 
10181   // Generate code for the closing of the data region.
10182   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10183                                             PrePostActionTy &) {
10184     assert(Info.isValid() && "Invalid data environment closing arguments.");
10185 
10186     llvm::Value *BasePointersArrayArg = nullptr;
10187     llvm::Value *PointersArrayArg = nullptr;
10188     llvm::Value *SizesArrayArg = nullptr;
10189     llvm::Value *MapTypesArrayArg = nullptr;
10190     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10191                                  SizesArrayArg, MapTypesArrayArg, Info);
10192 
10193     // Emit device ID if any.
10194     llvm::Value *DeviceID = nullptr;
10195     if (Device) {
10196       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10197                                            CGF.Int64Ty, /*isSigned=*/true);
10198     } else {
10199       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10200     }
10201 
10202     // Emit the number of elements in the offloading arrays.
10203     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10204 
10205     llvm::Value *OffloadingArgs[] = {
10206         DeviceID,         PointerNum,    BasePointersArrayArg,
10207         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10208     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10209                             CGM.getModule(), OMPRTL___tgt_target_data_end),
10210                         OffloadingArgs);
10211   };
10212 
10213   // If we need device pointer privatization, we need to emit the body of the
10214   // region with no privatization in the 'else' branch of the conditional.
10215   // Otherwise, we don't have to do anything.
10216   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10217                                                          PrePostActionTy &) {
10218     if (!Info.CaptureDeviceAddrMap.empty()) {
10219       CodeGen.setAction(NoPrivAction);
10220       CodeGen(CGF);
10221     }
10222   };
10223 
10224   // We don't have to do anything to close the region if the if clause evaluates
10225   // to false.
10226   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10227 
10228   if (IfCond) {
10229     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10230   } else {
10231     RegionCodeGenTy RCG(BeginThenGen);
10232     RCG(CGF);
10233   }
10234 
10235   // If we don't require privatization of device pointers, we emit the body in
10236   // between the runtime calls. This avoids duplicating the body code.
10237   if (Info.CaptureDeviceAddrMap.empty()) {
10238     CodeGen.setAction(NoPrivAction);
10239     CodeGen(CGF);
10240   }
10241 
10242   if (IfCond) {
10243     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10244   } else {
10245     RegionCodeGenTy RCG(EndThenGen);
10246     RCG(CGF);
10247   }
10248 }
10249 
10250 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10251     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10252     const Expr *Device) {
10253   if (!CGF.HaveInsertPoint())
10254     return;
10255 
10256   assert((isa<OMPTargetEnterDataDirective>(D) ||
10257           isa<OMPTargetExitDataDirective>(D) ||
10258           isa<OMPTargetUpdateDirective>(D)) &&
10259          "Expecting either target enter, exit data, or update directives.");
10260 
10261   CodeGenFunction::OMPTargetDataInfo InputInfo;
10262   llvm::Value *MapTypesArray = nullptr;
10263   // Generate the code for the opening of the data environment.
10264   auto &&ThenGen = [this, &D, Device, &InputInfo,
10265                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10266     // Emit device ID if any.
10267     llvm::Value *DeviceID = nullptr;
10268     if (Device) {
10269       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10270                                            CGF.Int64Ty, /*isSigned=*/true);
10271     } else {
10272       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10273     }
10274 
10275     // Emit the number of elements in the offloading arrays.
10276     llvm::Constant *PointerNum =
10277         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10278 
10279     llvm::Value *OffloadingArgs[] = {DeviceID,
10280                                      PointerNum,
10281                                      InputInfo.BasePointersArray.getPointer(),
10282                                      InputInfo.PointersArray.getPointer(),
10283                                      InputInfo.SizesArray.getPointer(),
10284                                      MapTypesArray};
10285 
10286     // Select the right runtime function call for each expected standalone
10287     // directive.
10288     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10289     RuntimeFunction RTLFn;
10290     switch (D.getDirectiveKind()) {
10291     case OMPD_target_enter_data:
10292       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait
10293                         : OMPRTL___tgt_target_data_begin;
10294       break;
10295     case OMPD_target_exit_data:
10296       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait
10297                         : OMPRTL___tgt_target_data_end;
10298       break;
10299     case OMPD_target_update:
10300       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait
10301                         : OMPRTL___tgt_target_data_update;
10302       break;
10303     case OMPD_parallel:
10304     case OMPD_for:
10305     case OMPD_parallel_for:
10306     case OMPD_parallel_master:
10307     case OMPD_parallel_sections:
10308     case OMPD_for_simd:
10309     case OMPD_parallel_for_simd:
10310     case OMPD_cancel:
10311     case OMPD_cancellation_point:
10312     case OMPD_ordered:
10313     case OMPD_threadprivate:
10314     case OMPD_allocate:
10315     case OMPD_task:
10316     case OMPD_simd:
10317     case OMPD_sections:
10318     case OMPD_section:
10319     case OMPD_single:
10320     case OMPD_master:
10321     case OMPD_critical:
10322     case OMPD_taskyield:
10323     case OMPD_barrier:
10324     case OMPD_taskwait:
10325     case OMPD_taskgroup:
10326     case OMPD_atomic:
10327     case OMPD_flush:
10328     case OMPD_depobj:
10329     case OMPD_scan:
10330     case OMPD_teams:
10331     case OMPD_target_data:
10332     case OMPD_distribute:
10333     case OMPD_distribute_simd:
10334     case OMPD_distribute_parallel_for:
10335     case OMPD_distribute_parallel_for_simd:
10336     case OMPD_teams_distribute:
10337     case OMPD_teams_distribute_simd:
10338     case OMPD_teams_distribute_parallel_for:
10339     case OMPD_teams_distribute_parallel_for_simd:
10340     case OMPD_declare_simd:
10341     case OMPD_declare_variant:
10342     case OMPD_begin_declare_variant:
10343     case OMPD_end_declare_variant:
10344     case OMPD_declare_target:
10345     case OMPD_end_declare_target:
10346     case OMPD_declare_reduction:
10347     case OMPD_declare_mapper:
10348     case OMPD_taskloop:
10349     case OMPD_taskloop_simd:
10350     case OMPD_master_taskloop:
10351     case OMPD_master_taskloop_simd:
10352     case OMPD_parallel_master_taskloop:
10353     case OMPD_parallel_master_taskloop_simd:
10354     case OMPD_target:
10355     case OMPD_target_simd:
10356     case OMPD_target_teams_distribute:
10357     case OMPD_target_teams_distribute_simd:
10358     case OMPD_target_teams_distribute_parallel_for:
10359     case OMPD_target_teams_distribute_parallel_for_simd:
10360     case OMPD_target_teams:
10361     case OMPD_target_parallel:
10362     case OMPD_target_parallel_for:
10363     case OMPD_target_parallel_for_simd:
10364     case OMPD_requires:
10365     case OMPD_unknown:
10366     default:
10367       llvm_unreachable("Unexpected standalone target data directive.");
10368       break;
10369     }
10370     CGF.EmitRuntimeCall(
10371         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10372         OffloadingArgs);
10373   };
10374 
10375   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
10376                              CodeGenFunction &CGF, PrePostActionTy &) {
10377     // Fill up the arrays with all the mapped variables.
10378     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10379     MappableExprsHandler::MapValuesArrayTy Pointers;
10380     MappableExprsHandler::MapValuesArrayTy Sizes;
10381     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10382 
10383     // Get map clause information.
10384     MappableExprsHandler MEHandler(D, CGF);
10385     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10386 
10387     TargetDataInfo Info;
10388     // Fill up the arrays and create the arguments.
10389     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10390     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10391                                  Info.PointersArray, Info.SizesArray,
10392                                  Info.MapTypesArray, Info);
10393     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10394     InputInfo.BasePointersArray =
10395         Address(Info.BasePointersArray, CGM.getPointerAlign());
10396     InputInfo.PointersArray =
10397         Address(Info.PointersArray, CGM.getPointerAlign());
10398     InputInfo.SizesArray =
10399         Address(Info.SizesArray, CGM.getPointerAlign());
10400     MapTypesArray = Info.MapTypesArray;
10401     if (D.hasClausesOfKind<OMPDependClause>())
10402       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10403     else
10404       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10405   };
10406 
10407   if (IfCond) {
10408     emitIfClause(CGF, IfCond, TargetThenGen,
10409                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
10410   } else {
10411     RegionCodeGenTy ThenRCG(TargetThenGen);
10412     ThenRCG(CGF);
10413   }
10414 }
10415 
10416 namespace {
10417   /// Kind of parameter in a function with 'declare simd' directive.
10418   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
10419   /// Attribute set of the parameter.
10420   struct ParamAttrTy {
10421     ParamKindTy Kind = Vector;
10422     llvm::APSInt StrideOrArg;
10423     llvm::APSInt Alignment;
10424   };
10425 } // namespace
10426 
10427 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10428                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10429   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10430   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10431   // of that clause. The VLEN value must be power of 2.
10432   // In other case the notion of the function`s "characteristic data type" (CDT)
10433   // is used to compute the vector length.
10434   // CDT is defined in the following order:
10435   //   a) For non-void function, the CDT is the return type.
10436   //   b) If the function has any non-uniform, non-linear parameters, then the
10437   //   CDT is the type of the first such parameter.
10438   //   c) If the CDT determined by a) or b) above is struct, union, or class
10439   //   type which is pass-by-value (except for the type that maps to the
10440   //   built-in complex data type), the characteristic data type is int.
10441   //   d) If none of the above three cases is applicable, the CDT is int.
10442   // The VLEN is then determined based on the CDT and the size of vector
10443   // register of that ISA for which current vector version is generated. The
10444   // VLEN is computed using the formula below:
10445   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10446   // where vector register size specified in section 3.2.1 Registers and the
10447   // Stack Frame of original AMD64 ABI document.
10448   QualType RetType = FD->getReturnType();
10449   if (RetType.isNull())
10450     return 0;
10451   ASTContext &C = FD->getASTContext();
10452   QualType CDT;
10453   if (!RetType.isNull() && !RetType->isVoidType()) {
10454     CDT = RetType;
10455   } else {
10456     unsigned Offset = 0;
10457     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10458       if (ParamAttrs[Offset].Kind == Vector)
10459         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10460       ++Offset;
10461     }
10462     if (CDT.isNull()) {
10463       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10464         if (ParamAttrs[I + Offset].Kind == Vector) {
10465           CDT = FD->getParamDecl(I)->getType();
10466           break;
10467         }
10468       }
10469     }
10470   }
10471   if (CDT.isNull())
10472     CDT = C.IntTy;
10473   CDT = CDT->getCanonicalTypeUnqualified();
10474   if (CDT->isRecordType() || CDT->isUnionType())
10475     CDT = C.IntTy;
10476   return C.getTypeSize(CDT);
10477 }
10478 
10479 static void
10480 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10481                            const llvm::APSInt &VLENVal,
10482                            ArrayRef<ParamAttrTy> ParamAttrs,
10483                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
10484   struct ISADataTy {
10485     char ISA;
10486     unsigned VecRegSize;
10487   };
10488   ISADataTy ISAData[] = {
10489       {
10490           'b', 128
10491       }, // SSE
10492       {
10493           'c', 256
10494       }, // AVX
10495       {
10496           'd', 256
10497       }, // AVX2
10498       {
10499           'e', 512
10500       }, // AVX512
10501   };
10502   llvm::SmallVector<char, 2> Masked;
10503   switch (State) {
10504   case OMPDeclareSimdDeclAttr::BS_Undefined:
10505     Masked.push_back('N');
10506     Masked.push_back('M');
10507     break;
10508   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10509     Masked.push_back('N');
10510     break;
10511   case OMPDeclareSimdDeclAttr::BS_Inbranch:
10512     Masked.push_back('M');
10513     break;
10514   }
10515   for (char Mask : Masked) {
10516     for (const ISADataTy &Data : ISAData) {
10517       SmallString<256> Buffer;
10518       llvm::raw_svector_ostream Out(Buffer);
10519       Out << "_ZGV" << Data.ISA << Mask;
10520       if (!VLENVal) {
10521         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10522         assert(NumElts && "Non-zero simdlen/cdtsize expected");
10523         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10524       } else {
10525         Out << VLENVal;
10526       }
10527       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
10528         switch (ParamAttr.Kind){
10529         case LinearWithVarStride:
10530           Out << 's' << ParamAttr.StrideOrArg;
10531           break;
10532         case Linear:
10533           Out << 'l';
10534           if (ParamAttr.StrideOrArg != 1)
10535             Out << ParamAttr.StrideOrArg;
10536           break;
10537         case Uniform:
10538           Out << 'u';
10539           break;
10540         case Vector:
10541           Out << 'v';
10542           break;
10543         }
10544         if (!!ParamAttr.Alignment)
10545           Out << 'a' << ParamAttr.Alignment;
10546       }
10547       Out << '_' << Fn->getName();
10548       Fn->addFnAttr(Out.str());
10549     }
10550   }
10551 }
10552 
10553 // This are the Functions that are needed to mangle the name of the
10554 // vector functions generated by the compiler, according to the rules
10555 // defined in the "Vector Function ABI specifications for AArch64",
10556 // available at
10557 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10558 
10559 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
10560 ///
10561 /// TODO: Need to implement the behavior for reference marked with a
10562 /// var or no linear modifiers (1.b in the section). For this, we
10563 /// need to extend ParamKindTy to support the linear modifiers.
10564 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10565   QT = QT.getCanonicalType();
10566 
10567   if (QT->isVoidType())
10568     return false;
10569 
10570   if (Kind == ParamKindTy::Uniform)
10571     return false;
10572 
10573   if (Kind == ParamKindTy::Linear)
10574     return false;
10575 
10576   // TODO: Handle linear references with modifiers
10577 
10578   if (Kind == ParamKindTy::LinearWithVarStride)
10579     return false;
10580 
10581   return true;
10582 }
10583 
10584 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10585 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10586   QT = QT.getCanonicalType();
10587   unsigned Size = C.getTypeSize(QT);
10588 
10589   // Only scalars and complex within 16 bytes wide set PVB to true.
10590   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10591     return false;
10592 
10593   if (QT->isFloatingType())
10594     return true;
10595 
10596   if (QT->isIntegerType())
10597     return true;
10598 
10599   if (QT->isPointerType())
10600     return true;
10601 
10602   // TODO: Add support for complex types (section 3.1.2, item 2).
10603 
10604   return false;
10605 }
10606 
10607 /// Computes the lane size (LS) of a return type or of an input parameter,
10608 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10609 /// TODO: Add support for references, section 3.2.1, item 1.
10610 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10611   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10612     QualType PTy = QT.getCanonicalType()->getPointeeType();
10613     if (getAArch64PBV(PTy, C))
10614       return C.getTypeSize(PTy);
10615   }
10616   if (getAArch64PBV(QT, C))
10617     return C.getTypeSize(QT);
10618 
10619   return C.getTypeSize(C.getUIntPtrType());
10620 }
10621 
10622 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10623 // signature of the scalar function, as defined in 3.2.2 of the
10624 // AAVFABI.
10625 static std::tuple<unsigned, unsigned, bool>
10626 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10627   QualType RetType = FD->getReturnType().getCanonicalType();
10628 
10629   ASTContext &C = FD->getASTContext();
10630 
10631   bool OutputBecomesInput = false;
10632 
10633   llvm::SmallVector<unsigned, 8> Sizes;
10634   if (!RetType->isVoidType()) {
10635     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10636     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10637       OutputBecomesInput = true;
10638   }
10639   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10640     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10641     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10642   }
10643 
10644   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10645   // The LS of a function parameter / return value can only be a power
10646   // of 2, starting from 8 bits, up to 128.
10647   assert(std::all_of(Sizes.begin(), Sizes.end(),
10648                      [](unsigned Size) {
10649                        return Size == 8 || Size == 16 || Size == 32 ||
10650                               Size == 64 || Size == 128;
10651                      }) &&
10652          "Invalid size");
10653 
10654   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10655                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
10656                          OutputBecomesInput);
10657 }
10658 
10659 /// Mangle the parameter part of the vector function name according to
10660 /// their OpenMP classification. The mangling function is defined in
10661 /// section 3.5 of the AAVFABI.
10662 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10663   SmallString<256> Buffer;
10664   llvm::raw_svector_ostream Out(Buffer);
10665   for (const auto &ParamAttr : ParamAttrs) {
10666     switch (ParamAttr.Kind) {
10667     case LinearWithVarStride:
10668       Out << "ls" << ParamAttr.StrideOrArg;
10669       break;
10670     case Linear:
10671       Out << 'l';
10672       // Don't print the step value if it is not present or if it is
10673       // equal to 1.
10674       if (ParamAttr.StrideOrArg != 1)
10675         Out << ParamAttr.StrideOrArg;
10676       break;
10677     case Uniform:
10678       Out << 'u';
10679       break;
10680     case Vector:
10681       Out << 'v';
10682       break;
10683     }
10684 
10685     if (!!ParamAttr.Alignment)
10686       Out << 'a' << ParamAttr.Alignment;
10687   }
10688 
10689   return std::string(Out.str());
10690 }
10691 
10692 // Function used to add the attribute. The parameter `VLEN` is
10693 // templated to allow the use of "x" when targeting scalable functions
10694 // for SVE.
10695 template <typename T>
10696 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10697                                  char ISA, StringRef ParSeq,
10698                                  StringRef MangledName, bool OutputBecomesInput,
10699                                  llvm::Function *Fn) {
10700   SmallString<256> Buffer;
10701   llvm::raw_svector_ostream Out(Buffer);
10702   Out << Prefix << ISA << LMask << VLEN;
10703   if (OutputBecomesInput)
10704     Out << "v";
10705   Out << ParSeq << "_" << MangledName;
10706   Fn->addFnAttr(Out.str());
10707 }
10708 
10709 // Helper function to generate the Advanced SIMD names depending on
10710 // the value of the NDS when simdlen is not present.
10711 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10712                                       StringRef Prefix, char ISA,
10713                                       StringRef ParSeq, StringRef MangledName,
10714                                       bool OutputBecomesInput,
10715                                       llvm::Function *Fn) {
10716   switch (NDS) {
10717   case 8:
10718     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10719                          OutputBecomesInput, Fn);
10720     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10721                          OutputBecomesInput, Fn);
10722     break;
10723   case 16:
10724     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10725                          OutputBecomesInput, Fn);
10726     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10727                          OutputBecomesInput, Fn);
10728     break;
10729   case 32:
10730     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10731                          OutputBecomesInput, Fn);
10732     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10733                          OutputBecomesInput, Fn);
10734     break;
10735   case 64:
10736   case 128:
10737     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10738                          OutputBecomesInput, Fn);
10739     break;
10740   default:
10741     llvm_unreachable("Scalar type is too wide.");
10742   }
10743 }
10744 
10745 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10746 static void emitAArch64DeclareSimdFunction(
10747     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10748     ArrayRef<ParamAttrTy> ParamAttrs,
10749     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10750     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10751 
10752   // Get basic data for building the vector signature.
10753   const auto Data = getNDSWDS(FD, ParamAttrs);
10754   const unsigned NDS = std::get<0>(Data);
10755   const unsigned WDS = std::get<1>(Data);
10756   const bool OutputBecomesInput = std::get<2>(Data);
10757 
10758   // Check the values provided via `simdlen` by the user.
10759   // 1. A `simdlen(1)` doesn't produce vector signatures,
10760   if (UserVLEN == 1) {
10761     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10762         DiagnosticsEngine::Warning,
10763         "The clause simdlen(1) has no effect when targeting aarch64.");
10764     CGM.getDiags().Report(SLoc, DiagID);
10765     return;
10766   }
10767 
10768   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10769   // Advanced SIMD output.
10770   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10771     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10772         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10773                                     "power of 2 when targeting Advanced SIMD.");
10774     CGM.getDiags().Report(SLoc, DiagID);
10775     return;
10776   }
10777 
10778   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10779   // limits.
10780   if (ISA == 's' && UserVLEN != 0) {
10781     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10782       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10783           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10784                                       "lanes in the architectural constraints "
10785                                       "for SVE (min is 128-bit, max is "
10786                                       "2048-bit, by steps of 128-bit)");
10787       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10788       return;
10789     }
10790   }
10791 
10792   // Sort out parameter sequence.
10793   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10794   StringRef Prefix = "_ZGV";
10795   // Generate simdlen from user input (if any).
10796   if (UserVLEN) {
10797     if (ISA == 's') {
10798       // SVE generates only a masked function.
10799       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10800                            OutputBecomesInput, Fn);
10801     } else {
10802       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10803       // Advanced SIMD generates one or two functions, depending on
10804       // the `[not]inbranch` clause.
10805       switch (State) {
10806       case OMPDeclareSimdDeclAttr::BS_Undefined:
10807         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10808                              OutputBecomesInput, Fn);
10809         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10810                              OutputBecomesInput, Fn);
10811         break;
10812       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10813         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10814                              OutputBecomesInput, Fn);
10815         break;
10816       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10817         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10818                              OutputBecomesInput, Fn);
10819         break;
10820       }
10821     }
10822   } else {
10823     // If no user simdlen is provided, follow the AAVFABI rules for
10824     // generating the vector length.
10825     if (ISA == 's') {
10826       // SVE, section 3.4.1, item 1.
10827       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10828                            OutputBecomesInput, Fn);
10829     } else {
10830       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10831       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10832       // two vector names depending on the use of the clause
10833       // `[not]inbranch`.
10834       switch (State) {
10835       case OMPDeclareSimdDeclAttr::BS_Undefined:
10836         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10837                                   OutputBecomesInput, Fn);
10838         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10839                                   OutputBecomesInput, Fn);
10840         break;
10841       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10842         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10843                                   OutputBecomesInput, Fn);
10844         break;
10845       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10846         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10847                                   OutputBecomesInput, Fn);
10848         break;
10849       }
10850     }
10851   }
10852 }
10853 
10854 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10855                                               llvm::Function *Fn) {
10856   ASTContext &C = CGM.getContext();
10857   FD = FD->getMostRecentDecl();
10858   // Map params to their positions in function decl.
10859   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10860   if (isa<CXXMethodDecl>(FD))
10861     ParamPositions.try_emplace(FD, 0);
10862   unsigned ParamPos = ParamPositions.size();
10863   for (const ParmVarDecl *P : FD->parameters()) {
10864     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10865     ++ParamPos;
10866   }
10867   while (FD) {
10868     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10869       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10870       // Mark uniform parameters.
10871       for (const Expr *E : Attr->uniforms()) {
10872         E = E->IgnoreParenImpCasts();
10873         unsigned Pos;
10874         if (isa<CXXThisExpr>(E)) {
10875           Pos = ParamPositions[FD];
10876         } else {
10877           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10878                                 ->getCanonicalDecl();
10879           Pos = ParamPositions[PVD];
10880         }
10881         ParamAttrs[Pos].Kind = Uniform;
10882       }
10883       // Get alignment info.
10884       auto NI = Attr->alignments_begin();
10885       for (const Expr *E : Attr->aligneds()) {
10886         E = E->IgnoreParenImpCasts();
10887         unsigned Pos;
10888         QualType ParmTy;
10889         if (isa<CXXThisExpr>(E)) {
10890           Pos = ParamPositions[FD];
10891           ParmTy = E->getType();
10892         } else {
10893           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10894                                 ->getCanonicalDecl();
10895           Pos = ParamPositions[PVD];
10896           ParmTy = PVD->getType();
10897         }
10898         ParamAttrs[Pos].Alignment =
10899             (*NI)
10900                 ? (*NI)->EvaluateKnownConstInt(C)
10901                 : llvm::APSInt::getUnsigned(
10902                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10903                           .getQuantity());
10904         ++NI;
10905       }
10906       // Mark linear parameters.
10907       auto SI = Attr->steps_begin();
10908       auto MI = Attr->modifiers_begin();
10909       for (const Expr *E : Attr->linears()) {
10910         E = E->IgnoreParenImpCasts();
10911         unsigned Pos;
10912         // Rescaling factor needed to compute the linear parameter
10913         // value in the mangled name.
10914         unsigned PtrRescalingFactor = 1;
10915         if (isa<CXXThisExpr>(E)) {
10916           Pos = ParamPositions[FD];
10917         } else {
10918           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10919                                 ->getCanonicalDecl();
10920           Pos = ParamPositions[PVD];
10921           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
10922             PtrRescalingFactor = CGM.getContext()
10923                                      .getTypeSizeInChars(P->getPointeeType())
10924                                      .getQuantity();
10925         }
10926         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10927         ParamAttr.Kind = Linear;
10928         // Assuming a stride of 1, for `linear` without modifiers.
10929         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
10930         if (*SI) {
10931           Expr::EvalResult Result;
10932           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10933             if (const auto *DRE =
10934                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10935               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10936                 ParamAttr.Kind = LinearWithVarStride;
10937                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10938                     ParamPositions[StridePVD->getCanonicalDecl()]);
10939               }
10940             }
10941           } else {
10942             ParamAttr.StrideOrArg = Result.Val.getInt();
10943           }
10944         }
10945         // If we are using a linear clause on a pointer, we need to
10946         // rescale the value of linear_step with the byte size of the
10947         // pointee type.
10948         if (Linear == ParamAttr.Kind)
10949           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
10950         ++SI;
10951         ++MI;
10952       }
10953       llvm::APSInt VLENVal;
10954       SourceLocation ExprLoc;
10955       const Expr *VLENExpr = Attr->getSimdlen();
10956       if (VLENExpr) {
10957         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10958         ExprLoc = VLENExpr->getExprLoc();
10959       }
10960       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10961       if (CGM.getTriple().isX86()) {
10962         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10963       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10964         unsigned VLEN = VLENVal.getExtValue();
10965         StringRef MangledName = Fn->getName();
10966         if (CGM.getTarget().hasFeature("sve"))
10967           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10968                                          MangledName, 's', 128, Fn, ExprLoc);
10969         if (CGM.getTarget().hasFeature("neon"))
10970           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10971                                          MangledName, 'n', 128, Fn, ExprLoc);
10972       }
10973     }
10974     FD = FD->getPreviousDecl();
10975   }
10976 }
10977 
10978 namespace {
10979 /// Cleanup action for doacross support.
10980 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10981 public:
10982   static const int DoacrossFinArgs = 2;
10983 
10984 private:
10985   llvm::FunctionCallee RTLFn;
10986   llvm::Value *Args[DoacrossFinArgs];
10987 
10988 public:
10989   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10990                     ArrayRef<llvm::Value *> CallArgs)
10991       : RTLFn(RTLFn) {
10992     assert(CallArgs.size() == DoacrossFinArgs);
10993     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10994   }
10995   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10996     if (!CGF.HaveInsertPoint())
10997       return;
10998     CGF.EmitRuntimeCall(RTLFn, Args);
10999   }
11000 };
11001 } // namespace
11002 
11003 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11004                                        const OMPLoopDirective &D,
11005                                        ArrayRef<Expr *> NumIterations) {
11006   if (!CGF.HaveInsertPoint())
11007     return;
11008 
11009   ASTContext &C = CGM.getContext();
11010   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11011   RecordDecl *RD;
11012   if (KmpDimTy.isNull()) {
11013     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11014     //  kmp_int64 lo; // lower
11015     //  kmp_int64 up; // upper
11016     //  kmp_int64 st; // stride
11017     // };
11018     RD = C.buildImplicitRecord("kmp_dim");
11019     RD->startDefinition();
11020     addFieldToRecordDecl(C, RD, Int64Ty);
11021     addFieldToRecordDecl(C, RD, Int64Ty);
11022     addFieldToRecordDecl(C, RD, Int64Ty);
11023     RD->completeDefinition();
11024     KmpDimTy = C.getRecordType(RD);
11025   } else {
11026     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11027   }
11028   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11029   QualType ArrayTy =
11030       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11031 
11032   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11033   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11034   enum { LowerFD = 0, UpperFD, StrideFD };
11035   // Fill dims with data.
11036   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11037     LValue DimsLVal = CGF.MakeAddrLValue(
11038         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11039     // dims.upper = num_iterations;
11040     LValue UpperLVal = CGF.EmitLValueForField(
11041         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11042     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11043         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11044         Int64Ty, NumIterations[I]->getExprLoc());
11045     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11046     // dims.stride = 1;
11047     LValue StrideLVal = CGF.EmitLValueForField(
11048         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11049     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11050                           StrideLVal);
11051   }
11052 
11053   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11054   // kmp_int32 num_dims, struct kmp_dim * dims);
11055   llvm::Value *Args[] = {
11056       emitUpdateLocation(CGF, D.getBeginLoc()),
11057       getThreadID(CGF, D.getBeginLoc()),
11058       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11059       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11060           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11061           CGM.VoidPtrTy)};
11062 
11063   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11064       CGM.getModule(), OMPRTL___kmpc_doacross_init);
11065   CGF.EmitRuntimeCall(RTLFn, Args);
11066   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11067       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11068   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11069       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11070   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11071                                              llvm::makeArrayRef(FiniArgs));
11072 }
11073 
11074 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11075                                           const OMPDependClause *C) {
11076   QualType Int64Ty =
11077       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11078   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11079   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11080       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11081   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11082   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11083     const Expr *CounterVal = C->getLoopData(I);
11084     assert(CounterVal);
11085     llvm::Value *CntVal = CGF.EmitScalarConversion(
11086         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11087         CounterVal->getExprLoc());
11088     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11089                           /*Volatile=*/false, Int64Ty);
11090   }
11091   llvm::Value *Args[] = {
11092       emitUpdateLocation(CGF, C->getBeginLoc()),
11093       getThreadID(CGF, C->getBeginLoc()),
11094       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11095   llvm::FunctionCallee RTLFn;
11096   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11097     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11098                                                   OMPRTL___kmpc_doacross_post);
11099   } else {
11100     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11101     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11102                                                   OMPRTL___kmpc_doacross_wait);
11103   }
11104   CGF.EmitRuntimeCall(RTLFn, Args);
11105 }
11106 
11107 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11108                                llvm::FunctionCallee Callee,
11109                                ArrayRef<llvm::Value *> Args) const {
11110   assert(Loc.isValid() && "Outlined function call location must be valid.");
11111   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11112 
11113   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11114     if (Fn->doesNotThrow()) {
11115       CGF.EmitNounwindRuntimeCall(Fn, Args);
11116       return;
11117     }
11118   }
11119   CGF.EmitRuntimeCall(Callee, Args);
11120 }
11121 
11122 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11123     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11124     ArrayRef<llvm::Value *> Args) const {
11125   emitCall(CGF, Loc, OutlinedFn, Args);
11126 }
11127 
11128 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11129   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11130     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11131       HasEmittedDeclareTargetRegion = true;
11132 }
11133 
11134 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11135                                              const VarDecl *NativeParam,
11136                                              const VarDecl *TargetParam) const {
11137   return CGF.GetAddrOfLocalVar(NativeParam);
11138 }
11139 
11140 namespace {
11141 /// Cleanup action for allocate support.
11142 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11143 public:
11144   static const int CleanupArgs = 3;
11145 
11146 private:
11147   llvm::FunctionCallee RTLFn;
11148   llvm::Value *Args[CleanupArgs];
11149 
11150 public:
11151   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11152                        ArrayRef<llvm::Value *> CallArgs)
11153       : RTLFn(RTLFn) {
11154     assert(CallArgs.size() == CleanupArgs &&
11155            "Size of arguments does not match.");
11156     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11157   }
11158   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11159     if (!CGF.HaveInsertPoint())
11160       return;
11161     CGF.EmitRuntimeCall(RTLFn, Args);
11162   }
11163 };
11164 } // namespace
11165 
11166 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11167                                                    const VarDecl *VD) {
11168   if (!VD)
11169     return Address::invalid();
11170   const VarDecl *CVD = VD->getCanonicalDecl();
11171   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
11172     return Address::invalid();
11173   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11174   // Use the default allocation.
11175   if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
11176        AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
11177       !AA->getAllocator())
11178     return Address::invalid();
11179   llvm::Value *Size;
11180   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11181   if (CVD->getType()->isVariablyModifiedType()) {
11182     Size = CGF.getTypeSize(CVD->getType());
11183     // Align the size: ((size + align - 1) / align) * align
11184     Size = CGF.Builder.CreateNUWAdd(
11185         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11186     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11187     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11188   } else {
11189     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11190     Size = CGM.getSize(Sz.alignTo(Align));
11191   }
11192   llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11193   assert(AA->getAllocator() &&
11194          "Expected allocator expression for non-default allocator.");
11195   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11196   // According to the standard, the original allocator type is a enum (integer).
11197   // Convert to pointer type, if required.
11198   if (Allocator->getType()->isIntegerTy())
11199     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
11200   else if (Allocator->getType()->isPointerTy())
11201     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
11202                                                                 CGM.VoidPtrTy);
11203   llvm::Value *Args[] = {ThreadID, Size, Allocator};
11204 
11205   llvm::Value *Addr =
11206       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11207                               CGM.getModule(), OMPRTL___kmpc_alloc),
11208                           Args, getName({CVD->getName(), ".void.addr"}));
11209   llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
11210                                                               Allocator};
11211   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11212       CGM.getModule(), OMPRTL___kmpc_free);
11213 
11214   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11215                                                 llvm::makeArrayRef(FiniArgs));
11216   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11217       Addr,
11218       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
11219       getName({CVD->getName(), ".addr"}));
11220   return Address(Addr, Align);
11221 }
11222 
11223 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11224     CodeGenModule &CGM, const OMPLoopDirective &S)
11225     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11226   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11227   if (!NeedToPush)
11228     return;
11229   NontemporalDeclsSet &DS =
11230       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11231   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11232     for (const Stmt *Ref : C->private_refs()) {
11233       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11234       const ValueDecl *VD;
11235       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11236         VD = DRE->getDecl();
11237       } else {
11238         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11239         assert((ME->isImplicitCXXThis() ||
11240                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11241                "Expected member of current class.");
11242         VD = ME->getMemberDecl();
11243       }
11244       DS.insert(VD);
11245     }
11246   }
11247 }
11248 
11249 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11250   if (!NeedToPush)
11251     return;
11252   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11253 }
11254 
11255 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11256   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11257 
11258   return llvm::any_of(
11259       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11260       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
11261 }
11262 
11263 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11264     const OMPExecutableDirective &S,
11265     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11266     const {
11267   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11268   // Vars in target/task regions must be excluded completely.
11269   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11270       isOpenMPTaskingDirective(S.getDirectiveKind())) {
11271     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11272     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11273     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11274     for (const CapturedStmt::Capture &Cap : CS->captures()) {
11275       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11276         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11277     }
11278   }
11279   // Exclude vars in private clauses.
11280   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11281     for (const Expr *Ref : C->varlists()) {
11282       if (!Ref->getType()->isScalarType())
11283         continue;
11284       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11285       if (!DRE)
11286         continue;
11287       NeedToCheckForLPCs.insert(DRE->getDecl());
11288     }
11289   }
11290   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11291     for (const Expr *Ref : C->varlists()) {
11292       if (!Ref->getType()->isScalarType())
11293         continue;
11294       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11295       if (!DRE)
11296         continue;
11297       NeedToCheckForLPCs.insert(DRE->getDecl());
11298     }
11299   }
11300   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11301     for (const Expr *Ref : C->varlists()) {
11302       if (!Ref->getType()->isScalarType())
11303         continue;
11304       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11305       if (!DRE)
11306         continue;
11307       NeedToCheckForLPCs.insert(DRE->getDecl());
11308     }
11309   }
11310   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11311     for (const Expr *Ref : C->varlists()) {
11312       if (!Ref->getType()->isScalarType())
11313         continue;
11314       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11315       if (!DRE)
11316         continue;
11317       NeedToCheckForLPCs.insert(DRE->getDecl());
11318     }
11319   }
11320   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11321     for (const Expr *Ref : C->varlists()) {
11322       if (!Ref->getType()->isScalarType())
11323         continue;
11324       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11325       if (!DRE)
11326         continue;
11327       NeedToCheckForLPCs.insert(DRE->getDecl());
11328     }
11329   }
11330   for (const Decl *VD : NeedToCheckForLPCs) {
11331     for (const LastprivateConditionalData &Data :
11332          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11333       if (Data.DeclToUniqueName.count(VD) > 0) {
11334         if (!Data.Disabled)
11335           NeedToAddForLPCsAsDisabled.insert(VD);
11336         break;
11337       }
11338     }
11339   }
11340 }
11341 
11342 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11343     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11344     : CGM(CGF.CGM),
11345       Action((CGM.getLangOpts().OpenMP >= 50 &&
11346               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11347                            [](const OMPLastprivateClause *C) {
11348                              return C->getKind() ==
11349                                     OMPC_LASTPRIVATE_conditional;
11350                            }))
11351                  ? ActionToDo::PushAsLastprivateConditional
11352                  : ActionToDo::DoNotPush) {
11353   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11354   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11355     return;
11356   assert(Action == ActionToDo::PushAsLastprivateConditional &&
11357          "Expected a push action.");
11358   LastprivateConditionalData &Data =
11359       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11360   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11361     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11362       continue;
11363 
11364     for (const Expr *Ref : C->varlists()) {
11365       Data.DeclToUniqueName.insert(std::make_pair(
11366           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11367           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11368     }
11369   }
11370   Data.IVLVal = IVLVal;
11371   Data.Fn = CGF.CurFn;
11372 }
11373 
11374 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11375     CodeGenFunction &CGF, const OMPExecutableDirective &S)
11376     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11377   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11378   if (CGM.getLangOpts().OpenMP < 50)
11379     return;
11380   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11381   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11382   if (!NeedToAddForLPCsAsDisabled.empty()) {
11383     Action = ActionToDo::DisableLastprivateConditional;
11384     LastprivateConditionalData &Data =
11385         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11386     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11387       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11388     Data.Fn = CGF.CurFn;
11389     Data.Disabled = true;
11390   }
11391 }
11392 
11393 CGOpenMPRuntime::LastprivateConditionalRAII
11394 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11395     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11396   return LastprivateConditionalRAII(CGF, S);
11397 }
11398 
11399 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11400   if (CGM.getLangOpts().OpenMP < 50)
11401     return;
11402   if (Action == ActionToDo::DisableLastprivateConditional) {
11403     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11404            "Expected list of disabled private vars.");
11405     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11406   }
11407   if (Action == ActionToDo::PushAsLastprivateConditional) {
11408     assert(
11409         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11410         "Expected list of lastprivate conditional vars.");
11411     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11412   }
11413 }
11414 
11415 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11416                                                         const VarDecl *VD) {
11417   ASTContext &C = CGM.getContext();
11418   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11419   if (I == LastprivateConditionalToTypes.end())
11420     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11421   QualType NewType;
11422   const FieldDecl *VDField;
11423   const FieldDecl *FiredField;
11424   LValue BaseLVal;
11425   auto VI = I->getSecond().find(VD);
11426   if (VI == I->getSecond().end()) {
11427     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11428     RD->startDefinition();
11429     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11430     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11431     RD->completeDefinition();
11432     NewType = C.getRecordType(RD);
11433     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11434     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11435     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11436   } else {
11437     NewType = std::get<0>(VI->getSecond());
11438     VDField = std::get<1>(VI->getSecond());
11439     FiredField = std::get<2>(VI->getSecond());
11440     BaseLVal = std::get<3>(VI->getSecond());
11441   }
11442   LValue FiredLVal =
11443       CGF.EmitLValueForField(BaseLVal, FiredField);
11444   CGF.EmitStoreOfScalar(
11445       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11446       FiredLVal);
11447   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
11448 }
11449 
11450 namespace {
11451 /// Checks if the lastprivate conditional variable is referenced in LHS.
11452 class LastprivateConditionalRefChecker final
11453     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11454   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11455   const Expr *FoundE = nullptr;
11456   const Decl *FoundD = nullptr;
11457   StringRef UniqueDeclName;
11458   LValue IVLVal;
11459   llvm::Function *FoundFn = nullptr;
11460   SourceLocation Loc;
11461 
11462 public:
11463   bool VisitDeclRefExpr(const DeclRefExpr *E) {
11464     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11465          llvm::reverse(LPM)) {
11466       auto It = D.DeclToUniqueName.find(E->getDecl());
11467       if (It == D.DeclToUniqueName.end())
11468         continue;
11469       if (D.Disabled)
11470         return false;
11471       FoundE = E;
11472       FoundD = E->getDecl()->getCanonicalDecl();
11473       UniqueDeclName = It->second;
11474       IVLVal = D.IVLVal;
11475       FoundFn = D.Fn;
11476       break;
11477     }
11478     return FoundE == E;
11479   }
11480   bool VisitMemberExpr(const MemberExpr *E) {
11481     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11482       return false;
11483     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11484          llvm::reverse(LPM)) {
11485       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11486       if (It == D.DeclToUniqueName.end())
11487         continue;
11488       if (D.Disabled)
11489         return false;
11490       FoundE = E;
11491       FoundD = E->getMemberDecl()->getCanonicalDecl();
11492       UniqueDeclName = It->second;
11493       IVLVal = D.IVLVal;
11494       FoundFn = D.Fn;
11495       break;
11496     }
11497     return FoundE == E;
11498   }
11499   bool VisitStmt(const Stmt *S) {
11500     for (const Stmt *Child : S->children()) {
11501       if (!Child)
11502         continue;
11503       if (const auto *E = dyn_cast<Expr>(Child))
11504         if (!E->isGLValue())
11505           continue;
11506       if (Visit(Child))
11507         return true;
11508     }
11509     return false;
11510   }
11511   explicit LastprivateConditionalRefChecker(
11512       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11513       : LPM(LPM) {}
11514   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11515   getFoundData() const {
11516     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11517   }
11518 };
11519 } // namespace
11520 
11521 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11522                                                        LValue IVLVal,
11523                                                        StringRef UniqueDeclName,
11524                                                        LValue LVal,
11525                                                        SourceLocation Loc) {
11526   // Last updated loop counter for the lastprivate conditional var.
11527   // int<xx> last_iv = 0;
11528   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11529   llvm::Constant *LastIV =
11530       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
11531   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11532       IVLVal.getAlignment().getAsAlign());
11533   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11534 
11535   // Last value of the lastprivate conditional.
11536   // decltype(priv_a) last_a;
11537   llvm::Constant *Last = getOrCreateInternalVariable(
11538       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11539   cast<llvm::GlobalVariable>(Last)->setAlignment(
11540       LVal.getAlignment().getAsAlign());
11541   LValue LastLVal =
11542       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11543 
11544   // Global loop counter. Required to handle inner parallel-for regions.
11545   // iv
11546   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11547 
11548   // #pragma omp critical(a)
11549   // if (last_iv <= iv) {
11550   //   last_iv = iv;
11551   //   last_a = priv_a;
11552   // }
11553   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11554                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11555     Action.Enter(CGF);
11556     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11557     // (last_iv <= iv) ? Check if the variable is updated and store new
11558     // value in global var.
11559     llvm::Value *CmpRes;
11560     if (IVLVal.getType()->isSignedIntegerType()) {
11561       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11562     } else {
11563       assert(IVLVal.getType()->isUnsignedIntegerType() &&
11564              "Loop iteration variable must be integer.");
11565       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11566     }
11567     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11568     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11569     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11570     // {
11571     CGF.EmitBlock(ThenBB);
11572 
11573     //   last_iv = iv;
11574     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11575 
11576     //   last_a = priv_a;
11577     switch (CGF.getEvaluationKind(LVal.getType())) {
11578     case TEK_Scalar: {
11579       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11580       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11581       break;
11582     }
11583     case TEK_Complex: {
11584       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11585       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11586       break;
11587     }
11588     case TEK_Aggregate:
11589       llvm_unreachable(
11590           "Aggregates are not supported in lastprivate conditional.");
11591     }
11592     // }
11593     CGF.EmitBranch(ExitBB);
11594     // There is no need to emit line number for unconditional branch.
11595     (void)ApplyDebugLocation::CreateEmpty(CGF);
11596     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11597   };
11598 
11599   if (CGM.getLangOpts().OpenMPSimd) {
11600     // Do not emit as a critical region as no parallel region could be emitted.
11601     RegionCodeGenTy ThenRCG(CodeGen);
11602     ThenRCG(CGF);
11603   } else {
11604     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11605   }
11606 }
11607 
11608 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11609                                                          const Expr *LHS) {
11610   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11611     return;
11612   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11613   if (!Checker.Visit(LHS))
11614     return;
11615   const Expr *FoundE;
11616   const Decl *FoundD;
11617   StringRef UniqueDeclName;
11618   LValue IVLVal;
11619   llvm::Function *FoundFn;
11620   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11621       Checker.getFoundData();
11622   if (FoundFn != CGF.CurFn) {
11623     // Special codegen for inner parallel regions.
11624     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11625     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11626     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11627            "Lastprivate conditional is not found in outer region.");
11628     QualType StructTy = std::get<0>(It->getSecond());
11629     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11630     LValue PrivLVal = CGF.EmitLValue(FoundE);
11631     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11632         PrivLVal.getAddress(CGF),
11633         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
11634     LValue BaseLVal =
11635         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11636     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11637     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11638                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11639                         FiredLVal, llvm::AtomicOrdering::Unordered,
11640                         /*IsVolatile=*/true, /*isInit=*/false);
11641     return;
11642   }
11643 
11644   // Private address of the lastprivate conditional in the current context.
11645   // priv_a
11646   LValue LVal = CGF.EmitLValue(FoundE);
11647   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11648                                    FoundE->getExprLoc());
11649 }
11650 
11651 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11652     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11653     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11654   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11655     return;
11656   auto Range = llvm::reverse(LastprivateConditionalStack);
11657   auto It = llvm::find_if(
11658       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11659   if (It == Range.end() || It->Fn != CGF.CurFn)
11660     return;
11661   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11662   assert(LPCI != LastprivateConditionalToTypes.end() &&
11663          "Lastprivates must be registered already.");
11664   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11665   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11666   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11667   for (const auto &Pair : It->DeclToUniqueName) {
11668     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11669     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
11670       continue;
11671     auto I = LPCI->getSecond().find(Pair.first);
11672     assert(I != LPCI->getSecond().end() &&
11673            "Lastprivate must be rehistered already.");
11674     // bool Cmp = priv_a.Fired != 0;
11675     LValue BaseLVal = std::get<3>(I->getSecond());
11676     LValue FiredLVal =
11677         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11678     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11679     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11680     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11681     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11682     // if (Cmp) {
11683     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11684     CGF.EmitBlock(ThenBB);
11685     Address Addr = CGF.GetAddrOfLocalVar(VD);
11686     LValue LVal;
11687     if (VD->getType()->isReferenceType())
11688       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11689                                            AlignmentSource::Decl);
11690     else
11691       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11692                                 AlignmentSource::Decl);
11693     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11694                                      D.getBeginLoc());
11695     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11696     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11697     // }
11698   }
11699 }
11700 
11701 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11702     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11703     SourceLocation Loc) {
11704   if (CGF.getLangOpts().OpenMP < 50)
11705     return;
11706   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11707   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11708          "Unknown lastprivate conditional variable.");
11709   StringRef UniqueName = It->second;
11710   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11711   // The variable was not updated in the region - exit.
11712   if (!GV)
11713     return;
11714   LValue LPLVal = CGF.MakeAddrLValue(
11715       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11716   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11717   CGF.EmitStoreOfScalar(Res, PrivLVal);
11718 }
11719 
11720 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11721     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11722     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11723   llvm_unreachable("Not supported in SIMD-only mode");
11724 }
11725 
11726 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11727     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11728     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11729   llvm_unreachable("Not supported in SIMD-only mode");
11730 }
11731 
11732 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11733     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11734     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11735     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11736     bool Tied, unsigned &NumberOfParts) {
11737   llvm_unreachable("Not supported in SIMD-only mode");
11738 }
11739 
11740 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11741                                            SourceLocation Loc,
11742                                            llvm::Function *OutlinedFn,
11743                                            ArrayRef<llvm::Value *> CapturedVars,
11744                                            const Expr *IfCond) {
11745   llvm_unreachable("Not supported in SIMD-only mode");
11746 }
11747 
11748 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11749     CodeGenFunction &CGF, StringRef CriticalName,
11750     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11751     const Expr *Hint) {
11752   llvm_unreachable("Not supported in SIMD-only mode");
11753 }
11754 
11755 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11756                                            const RegionCodeGenTy &MasterOpGen,
11757                                            SourceLocation Loc) {
11758   llvm_unreachable("Not supported in SIMD-only mode");
11759 }
11760 
11761 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11762                                             SourceLocation Loc) {
11763   llvm_unreachable("Not supported in SIMD-only mode");
11764 }
11765 
11766 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11767     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11768     SourceLocation Loc) {
11769   llvm_unreachable("Not supported in SIMD-only mode");
11770 }
11771 
11772 void CGOpenMPSIMDRuntime::emitSingleRegion(
11773     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11774     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11775     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11776     ArrayRef<const Expr *> AssignmentOps) {
11777   llvm_unreachable("Not supported in SIMD-only mode");
11778 }
11779 
11780 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11781                                             const RegionCodeGenTy &OrderedOpGen,
11782                                             SourceLocation Loc,
11783                                             bool IsThreads) {
11784   llvm_unreachable("Not supported in SIMD-only mode");
11785 }
11786 
11787 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11788                                           SourceLocation Loc,
11789                                           OpenMPDirectiveKind Kind,
11790                                           bool EmitChecks,
11791                                           bool ForceSimpleCall) {
11792   llvm_unreachable("Not supported in SIMD-only mode");
11793 }
11794 
11795 void CGOpenMPSIMDRuntime::emitForDispatchInit(
11796     CodeGenFunction &CGF, SourceLocation Loc,
11797     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11798     bool Ordered, const DispatchRTInput &DispatchValues) {
11799   llvm_unreachable("Not supported in SIMD-only mode");
11800 }
11801 
11802 void CGOpenMPSIMDRuntime::emitForStaticInit(
11803     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
11804     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11805   llvm_unreachable("Not supported in SIMD-only mode");
11806 }
11807 
11808 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
11809     CodeGenFunction &CGF, SourceLocation Loc,
11810     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11811   llvm_unreachable("Not supported in SIMD-only mode");
11812 }
11813 
11814 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
11815                                                      SourceLocation Loc,
11816                                                      unsigned IVSize,
11817                                                      bool IVSigned) {
11818   llvm_unreachable("Not supported in SIMD-only mode");
11819 }
11820 
11821 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
11822                                               SourceLocation Loc,
11823                                               OpenMPDirectiveKind DKind) {
11824   llvm_unreachable("Not supported in SIMD-only mode");
11825 }
11826 
11827 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
11828                                               SourceLocation Loc,
11829                                               unsigned IVSize, bool IVSigned,
11830                                               Address IL, Address LB,
11831                                               Address UB, Address ST) {
11832   llvm_unreachable("Not supported in SIMD-only mode");
11833 }
11834 
11835 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
11836                                                llvm::Value *NumThreads,
11837                                                SourceLocation Loc) {
11838   llvm_unreachable("Not supported in SIMD-only mode");
11839 }
11840 
11841 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
11842                                              ProcBindKind ProcBind,
11843                                              SourceLocation Loc) {
11844   llvm_unreachable("Not supported in SIMD-only mode");
11845 }
11846 
11847 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
11848                                                     const VarDecl *VD,
11849                                                     Address VDAddr,
11850                                                     SourceLocation Loc) {
11851   llvm_unreachable("Not supported in SIMD-only mode");
11852 }
11853 
11854 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
11855     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
11856     CodeGenFunction *CGF) {
11857   llvm_unreachable("Not supported in SIMD-only mode");
11858 }
11859 
11860 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
11861     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
11862   llvm_unreachable("Not supported in SIMD-only mode");
11863 }
11864 
11865 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
11866                                     ArrayRef<const Expr *> Vars,
11867                                     SourceLocation Loc,
11868                                     llvm::AtomicOrdering AO) {
11869   llvm_unreachable("Not supported in SIMD-only mode");
11870 }
11871 
11872 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
11873                                        const OMPExecutableDirective &D,
11874                                        llvm::Function *TaskFunction,
11875                                        QualType SharedsTy, Address Shareds,
11876                                        const Expr *IfCond,
11877                                        const OMPTaskDataTy &Data) {
11878   llvm_unreachable("Not supported in SIMD-only mode");
11879 }
11880 
11881 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
11882     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
11883     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
11884     const Expr *IfCond, const OMPTaskDataTy &Data) {
11885   llvm_unreachable("Not supported in SIMD-only mode");
11886 }
11887 
11888 void CGOpenMPSIMDRuntime::emitReduction(
11889     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
11890     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
11891     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
11892   assert(Options.SimpleReduction && "Only simple reduction is expected.");
11893   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
11894                                  ReductionOps, Options);
11895 }
11896 
11897 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
11898     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
11899     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
11900   llvm_unreachable("Not supported in SIMD-only mode");
11901 }
11902 
11903 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
11904                                                 SourceLocation Loc,
11905                                                 bool IsWorksharingReduction) {
11906   llvm_unreachable("Not supported in SIMD-only mode");
11907 }
11908 
11909 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
11910                                                   SourceLocation Loc,
11911                                                   ReductionCodeGen &RCG,
11912                                                   unsigned N) {
11913   llvm_unreachable("Not supported in SIMD-only mode");
11914 }
11915 
11916 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
11917                                                   SourceLocation Loc,
11918                                                   llvm::Value *ReductionsPtr,
11919                                                   LValue SharedLVal) {
11920   llvm_unreachable("Not supported in SIMD-only mode");
11921 }
11922 
11923 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
11924                                            SourceLocation Loc) {
11925   llvm_unreachable("Not supported in SIMD-only mode");
11926 }
11927 
11928 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
11929     CodeGenFunction &CGF, SourceLocation Loc,
11930     OpenMPDirectiveKind CancelRegion) {
11931   llvm_unreachable("Not supported in SIMD-only mode");
11932 }
11933 
11934 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
11935                                          SourceLocation Loc, const Expr *IfCond,
11936                                          OpenMPDirectiveKind CancelRegion) {
11937   llvm_unreachable("Not supported in SIMD-only mode");
11938 }
11939 
11940 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
11941     const OMPExecutableDirective &D, StringRef ParentName,
11942     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
11943     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
11944   llvm_unreachable("Not supported in SIMD-only mode");
11945 }
11946 
11947 void CGOpenMPSIMDRuntime::emitTargetCall(
11948     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11949     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
11950     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
11951     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
11952                                      const OMPLoopDirective &D)>
11953         SizeEmitter) {
11954   llvm_unreachable("Not supported in SIMD-only mode");
11955 }
11956 
11957 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
11958   llvm_unreachable("Not supported in SIMD-only mode");
11959 }
11960 
11961 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
11962   llvm_unreachable("Not supported in SIMD-only mode");
11963 }
11964 
11965 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
11966   return false;
11967 }
11968 
11969 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
11970                                         const OMPExecutableDirective &D,
11971                                         SourceLocation Loc,
11972                                         llvm::Function *OutlinedFn,
11973                                         ArrayRef<llvm::Value *> CapturedVars) {
11974   llvm_unreachable("Not supported in SIMD-only mode");
11975 }
11976 
11977 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11978                                              const Expr *NumTeams,
11979                                              const Expr *ThreadLimit,
11980                                              SourceLocation Loc) {
11981   llvm_unreachable("Not supported in SIMD-only mode");
11982 }
11983 
11984 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
11985     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11986     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11987   llvm_unreachable("Not supported in SIMD-only mode");
11988 }
11989 
11990 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
11991     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11992     const Expr *Device) {
11993   llvm_unreachable("Not supported in SIMD-only mode");
11994 }
11995 
11996 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11997                                            const OMPLoopDirective &D,
11998                                            ArrayRef<Expr *> NumIterations) {
11999   llvm_unreachable("Not supported in SIMD-only mode");
12000 }
12001 
12002 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12003                                               const OMPDependClause *C) {
12004   llvm_unreachable("Not supported in SIMD-only mode");
12005 }
12006 
12007 const VarDecl *
12008 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12009                                         const VarDecl *NativeParam) const {
12010   llvm_unreachable("Not supported in SIMD-only mode");
12011 }
12012 
12013 Address
12014 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12015                                          const VarDecl *NativeParam,
12016                                          const VarDecl *TargetParam) const {
12017   llvm_unreachable("Not supported in SIMD-only mode");
12018 }
12019