xref: /freebsd/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp (revision c66ec88fed842fbaad62c30d510644ceb7bd2d71)
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/Basic/FileManager.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/CodeGen/ConstantInitBuilder.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/SetOperations.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Bitcode/BitcodeReader.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DerivedTypes.h"
34 #include "llvm/IR/GlobalValue.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/Support/AtomicOrdering.h"
37 #include "llvm/Support/Format.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include <cassert>
40 #include <numeric>
41 
42 using namespace clang;
43 using namespace CodeGen;
44 using namespace llvm::omp;
45 
46 namespace {
47 /// Base class for handling code generation inside OpenMP regions.
48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
49 public:
50   /// Kinds of OpenMP regions used in codegen.
51   enum CGOpenMPRegionKind {
52     /// Region with outlined function for standalone 'parallel'
53     /// directive.
54     ParallelOutlinedRegion,
55     /// Region with outlined function for standalone 'task' directive.
56     TaskOutlinedRegion,
57     /// Region for constructs that do not require function outlining,
58     /// like 'for', 'sections', 'atomic' etc. directives.
59     InlinedRegion,
60     /// Region with outlined function for standalone 'target' directive.
61     TargetRegion,
62   };
63 
64   CGOpenMPRegionInfo(const CapturedStmt &CS,
65                      const CGOpenMPRegionKind RegionKind,
66                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
67                      bool HasCancel)
68       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
69         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
70 
71   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
72                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73                      bool HasCancel)
74       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
75         Kind(Kind), HasCancel(HasCancel) {}
76 
77   /// Get a variable or parameter for storing global thread id
78   /// inside OpenMP construct.
79   virtual const VarDecl *getThreadIDVariable() const = 0;
80 
81   /// Emit the captured statement body.
82   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
83 
84   /// Get an LValue for the current ThreadID variable.
85   /// \return LValue for thread id variable. This LValue always has type int32*.
86   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
87 
88   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
89 
90   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
91 
92   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
93 
94   bool hasCancel() const { return HasCancel; }
95 
96   static bool classof(const CGCapturedStmtInfo *Info) {
97     return Info->getKind() == CR_OpenMP;
98   }
99 
100   ~CGOpenMPRegionInfo() override = default;
101 
102 protected:
103   CGOpenMPRegionKind RegionKind;
104   RegionCodeGenTy CodeGen;
105   OpenMPDirectiveKind Kind;
106   bool HasCancel;
107 };
108 
109 /// API for captured statement code generation in OpenMP constructs.
110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
111 public:
112   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
113                              const RegionCodeGenTy &CodeGen,
114                              OpenMPDirectiveKind Kind, bool HasCancel,
115                              StringRef HelperName)
116       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
117                            HasCancel),
118         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
119     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
120   }
121 
122   /// Get a variable or parameter for storing global thread id
123   /// inside OpenMP construct.
124   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
125 
126   /// Get the name of the capture helper.
127   StringRef getHelperName() const override { return HelperName; }
128 
129   static bool classof(const CGCapturedStmtInfo *Info) {
130     return CGOpenMPRegionInfo::classof(Info) &&
131            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
132                ParallelOutlinedRegion;
133   }
134 
135 private:
136   /// A variable or parameter storing global thread id for OpenMP
137   /// constructs.
138   const VarDecl *ThreadIDVar;
139   StringRef HelperName;
140 };
141 
142 /// API for captured statement code generation in OpenMP constructs.
143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
144 public:
145   class UntiedTaskActionTy final : public PrePostActionTy {
146     bool Untied;
147     const VarDecl *PartIDVar;
148     const RegionCodeGenTy UntiedCodeGen;
149     llvm::SwitchInst *UntiedSwitch = nullptr;
150 
151   public:
152     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
153                        const RegionCodeGenTy &UntiedCodeGen)
154         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
155     void Enter(CodeGenFunction &CGF) override {
156       if (Untied) {
157         // Emit task switching point.
158         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
159             CGF.GetAddrOfLocalVar(PartIDVar),
160             PartIDVar->getType()->castAs<PointerType>());
161         llvm::Value *Res =
162             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
163         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
164         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
165         CGF.EmitBlock(DoneBB);
166         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
167         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
168         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
169                               CGF.Builder.GetInsertBlock());
170         emitUntiedSwitch(CGF);
171       }
172     }
173     void emitUntiedSwitch(CodeGenFunction &CGF) const {
174       if (Untied) {
175         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
176             CGF.GetAddrOfLocalVar(PartIDVar),
177             PartIDVar->getType()->castAs<PointerType>());
178         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
179                               PartIdLVal);
180         UntiedCodeGen(CGF);
181         CodeGenFunction::JumpDest CurPoint =
182             CGF.getJumpDestInCurrentScope(".untied.next.");
183         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
184         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
185         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
186                               CGF.Builder.GetInsertBlock());
187         CGF.EmitBranchThroughCleanup(CurPoint);
188         CGF.EmitBlock(CurPoint.getBlock());
189       }
190     }
191     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
192   };
193   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
194                                  const VarDecl *ThreadIDVar,
195                                  const RegionCodeGenTy &CodeGen,
196                                  OpenMPDirectiveKind Kind, bool HasCancel,
197                                  const UntiedTaskActionTy &Action)
198       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
199         ThreadIDVar(ThreadIDVar), Action(Action) {
200     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
201   }
202 
203   /// Get a variable or parameter for storing global thread id
204   /// inside OpenMP construct.
205   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
206 
207   /// Get an LValue for the current ThreadID variable.
208   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
209 
210   /// Get the name of the capture helper.
211   StringRef getHelperName() const override { return ".omp_outlined."; }
212 
213   void emitUntiedSwitch(CodeGenFunction &CGF) override {
214     Action.emitUntiedSwitch(CGF);
215   }
216 
217   static bool classof(const CGCapturedStmtInfo *Info) {
218     return CGOpenMPRegionInfo::classof(Info) &&
219            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
220                TaskOutlinedRegion;
221   }
222 
223 private:
224   /// A variable or parameter storing global thread id for OpenMP
225   /// constructs.
226   const VarDecl *ThreadIDVar;
227   /// Action for emitting code for untied tasks.
228   const UntiedTaskActionTy &Action;
229 };
230 
231 /// API for inlined captured statement code generation in OpenMP
232 /// constructs.
233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
234 public:
235   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
236                             const RegionCodeGenTy &CodeGen,
237                             OpenMPDirectiveKind Kind, bool HasCancel)
238       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
239         OldCSI(OldCSI),
240         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
241 
242   // Retrieve the value of the context parameter.
243   llvm::Value *getContextValue() const override {
244     if (OuterRegionInfo)
245       return OuterRegionInfo->getContextValue();
246     llvm_unreachable("No context value for inlined OpenMP region");
247   }
248 
249   void setContextValue(llvm::Value *V) override {
250     if (OuterRegionInfo) {
251       OuterRegionInfo->setContextValue(V);
252       return;
253     }
254     llvm_unreachable("No context value for inlined OpenMP region");
255   }
256 
257   /// Lookup the captured field decl for a variable.
258   const FieldDecl *lookup(const VarDecl *VD) const override {
259     if (OuterRegionInfo)
260       return OuterRegionInfo->lookup(VD);
261     // If there is no outer outlined region,no need to lookup in a list of
262     // captured variables, we can use the original one.
263     return nullptr;
264   }
265 
266   FieldDecl *getThisFieldDecl() const override {
267     if (OuterRegionInfo)
268       return OuterRegionInfo->getThisFieldDecl();
269     return nullptr;
270   }
271 
272   /// Get a variable or parameter for storing global thread id
273   /// inside OpenMP construct.
274   const VarDecl *getThreadIDVariable() const override {
275     if (OuterRegionInfo)
276       return OuterRegionInfo->getThreadIDVariable();
277     return nullptr;
278   }
279 
280   /// Get an LValue for the current ThreadID variable.
281   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
282     if (OuterRegionInfo)
283       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
284     llvm_unreachable("No LValue for inlined OpenMP construct");
285   }
286 
287   /// Get the name of the capture helper.
288   StringRef getHelperName() const override {
289     if (auto *OuterRegionInfo = getOldCSI())
290       return OuterRegionInfo->getHelperName();
291     llvm_unreachable("No helper name for inlined OpenMP construct");
292   }
293 
294   void emitUntiedSwitch(CodeGenFunction &CGF) override {
295     if (OuterRegionInfo)
296       OuterRegionInfo->emitUntiedSwitch(CGF);
297   }
298 
299   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
300 
301   static bool classof(const CGCapturedStmtInfo *Info) {
302     return CGOpenMPRegionInfo::classof(Info) &&
303            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
304   }
305 
306   ~CGOpenMPInlinedRegionInfo() override = default;
307 
308 private:
309   /// CodeGen info about outer OpenMP region.
310   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
311   CGOpenMPRegionInfo *OuterRegionInfo;
312 };
313 
314 /// API for captured statement code generation in OpenMP target
315 /// constructs. For this captures, implicit parameters are used instead of the
316 /// captured fields. The name of the target region has to be unique in a given
317 /// application so it is provided by the client, because only the client has
318 /// the information to generate that.
319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
320 public:
321   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
322                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
323       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
324                            /*HasCancel=*/false),
325         HelperName(HelperName) {}
326 
327   /// This is unused for target regions because each starts executing
328   /// with a single thread.
329   const VarDecl *getThreadIDVariable() const override { return nullptr; }
330 
331   /// Get the name of the capture helper.
332   StringRef getHelperName() const override { return HelperName; }
333 
334   static bool classof(const CGCapturedStmtInfo *Info) {
335     return CGOpenMPRegionInfo::classof(Info) &&
336            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
337   }
338 
339 private:
340   StringRef HelperName;
341 };
342 
343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
344   llvm_unreachable("No codegen for expressions");
345 }
346 /// API for generation of expressions captured in a innermost OpenMP
347 /// region.
348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
349 public:
350   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
351       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
352                                   OMPD_unknown,
353                                   /*HasCancel=*/false),
354         PrivScope(CGF) {
355     // Make sure the globals captured in the provided statement are local by
356     // using the privatization logic. We assume the same variable is not
357     // captured more than once.
358     for (const auto &C : CS.captures()) {
359       if (!C.capturesVariable() && !C.capturesVariableByCopy())
360         continue;
361 
362       const VarDecl *VD = C.getCapturedVar();
363       if (VD->isLocalVarDeclOrParm())
364         continue;
365 
366       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
367                       /*RefersToEnclosingVariableOrCapture=*/false,
368                       VD->getType().getNonReferenceType(), VK_LValue,
369                       C.getLocation());
370       PrivScope.addPrivate(
371           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
372     }
373     (void)PrivScope.Privatize();
374   }
375 
376   /// Lookup the captured field decl for a variable.
377   const FieldDecl *lookup(const VarDecl *VD) const override {
378     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
379       return FD;
380     return nullptr;
381   }
382 
383   /// Emit the captured statement body.
384   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
385     llvm_unreachable("No body for expressions");
386   }
387 
388   /// Get a variable or parameter for storing global thread id
389   /// inside OpenMP construct.
390   const VarDecl *getThreadIDVariable() const override {
391     llvm_unreachable("No thread id for expressions");
392   }
393 
394   /// Get the name of the capture helper.
395   StringRef getHelperName() const override {
396     llvm_unreachable("No helper name for expressions");
397   }
398 
399   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
400 
401 private:
402   /// Private scope to capture global variables.
403   CodeGenFunction::OMPPrivateScope PrivScope;
404 };
405 
406 /// RAII for emitting code of OpenMP constructs.
407 class InlinedOpenMPRegionRAII {
408   CodeGenFunction &CGF;
409   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
410   FieldDecl *LambdaThisCaptureField = nullptr;
411   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
412 
413 public:
414   /// Constructs region for combined constructs.
415   /// \param CodeGen Code generation sequence for combined directives. Includes
416   /// a list of functions used for code generation of implicitly inlined
417   /// regions.
418   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
419                           OpenMPDirectiveKind Kind, bool HasCancel)
420       : CGF(CGF) {
421     // Start emission for the construct.
422     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
423         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
424     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
425     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
426     CGF.LambdaThisCaptureField = nullptr;
427     BlockInfo = CGF.BlockInfo;
428     CGF.BlockInfo = nullptr;
429   }
430 
431   ~InlinedOpenMPRegionRAII() {
432     // Restore original CapturedStmtInfo only if we're done with code emission.
433     auto *OldCSI =
434         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
435     delete CGF.CapturedStmtInfo;
436     CGF.CapturedStmtInfo = OldCSI;
437     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
438     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
439     CGF.BlockInfo = BlockInfo;
440   }
441 };
442 
443 /// Values for bit flags used in the ident_t to describe the fields.
444 /// All enumeric elements are named and described in accordance with the code
445 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
446 enum OpenMPLocationFlags : unsigned {
447   /// Use trampoline for internal microtask.
448   OMP_IDENT_IMD = 0x01,
449   /// Use c-style ident structure.
450   OMP_IDENT_KMPC = 0x02,
451   /// Atomic reduction option for kmpc_reduce.
452   OMP_ATOMIC_REDUCE = 0x10,
453   /// Explicit 'barrier' directive.
454   OMP_IDENT_BARRIER_EXPL = 0x20,
455   /// Implicit barrier in code.
456   OMP_IDENT_BARRIER_IMPL = 0x40,
457   /// Implicit barrier in 'for' directive.
458   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
459   /// Implicit barrier in 'sections' directive.
460   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
461   /// Implicit barrier in 'single' directive.
462   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
463   /// Call of __kmp_for_static_init for static loop.
464   OMP_IDENT_WORK_LOOP = 0x200,
465   /// Call of __kmp_for_static_init for sections.
466   OMP_IDENT_WORK_SECTIONS = 0x400,
467   /// Call of __kmp_for_static_init for distribute.
468   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
469   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
470 };
471 
472 namespace {
473 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
474 /// Values for bit flags for marking which requires clauses have been used.
475 enum OpenMPOffloadingRequiresDirFlags : int64_t {
476   /// flag undefined.
477   OMP_REQ_UNDEFINED               = 0x000,
478   /// no requires clause present.
479   OMP_REQ_NONE                    = 0x001,
480   /// reverse_offload clause.
481   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
482   /// unified_address clause.
483   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
484   /// unified_shared_memory clause.
485   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
486   /// dynamic_allocators clause.
487   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
488   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
489 };
490 
491 enum OpenMPOffloadingReservedDeviceIDs {
492   /// Device ID if the device was not defined, runtime should get it
493   /// from environment variables in the spec.
494   OMP_DEVICEID_UNDEF = -1,
495 };
496 } // anonymous namespace
497 
498 /// Describes ident structure that describes a source location.
499 /// All descriptions are taken from
500 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
501 /// Original structure:
502 /// typedef struct ident {
503 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
504 ///                                  see above  */
505 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
506 ///                                  KMP_IDENT_KMPC identifies this union
507 ///                                  member  */
508 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
509 ///                                  see above */
510 ///#if USE_ITT_BUILD
511 ///                            /*  but currently used for storing
512 ///                                region-specific ITT */
513 ///                            /*  contextual information. */
514 ///#endif /* USE_ITT_BUILD */
515 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
516 ///                                 C++  */
517 ///    char const *psource;    /**< String describing the source location.
518 ///                            The string is composed of semi-colon separated
519 //                             fields which describe the source file,
520 ///                            the function and a pair of line numbers that
521 ///                            delimit the construct.
522 ///                             */
523 /// } ident_t;
524 enum IdentFieldIndex {
525   /// might be used in Fortran
526   IdentField_Reserved_1,
527   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
528   IdentField_Flags,
529   /// Not really used in Fortran any more
530   IdentField_Reserved_2,
531   /// Source[4] in Fortran, do not use for C++
532   IdentField_Reserved_3,
533   /// String describing the source location. The string is composed of
534   /// semi-colon separated fields which describe the source file, the function
535   /// and a pair of line numbers that delimit the construct.
536   IdentField_PSource
537 };
538 
539 /// Schedule types for 'omp for' loops (these enumerators are taken from
540 /// the enum sched_type in kmp.h).
541 enum OpenMPSchedType {
542   /// Lower bound for default (unordered) versions.
543   OMP_sch_lower = 32,
544   OMP_sch_static_chunked = 33,
545   OMP_sch_static = 34,
546   OMP_sch_dynamic_chunked = 35,
547   OMP_sch_guided_chunked = 36,
548   OMP_sch_runtime = 37,
549   OMP_sch_auto = 38,
550   /// static with chunk adjustment (e.g., simd)
551   OMP_sch_static_balanced_chunked = 45,
552   /// Lower bound for 'ordered' versions.
553   OMP_ord_lower = 64,
554   OMP_ord_static_chunked = 65,
555   OMP_ord_static = 66,
556   OMP_ord_dynamic_chunked = 67,
557   OMP_ord_guided_chunked = 68,
558   OMP_ord_runtime = 69,
559   OMP_ord_auto = 70,
560   OMP_sch_default = OMP_sch_static,
561   /// dist_schedule types
562   OMP_dist_sch_static_chunked = 91,
563   OMP_dist_sch_static = 92,
564   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
565   /// Set if the monotonic schedule modifier was present.
566   OMP_sch_modifier_monotonic = (1 << 29),
567   /// Set if the nonmonotonic schedule modifier was present.
568   OMP_sch_modifier_nonmonotonic = (1 << 30),
569 };
570 
571 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
572 /// region.
573 class CleanupTy final : public EHScopeStack::Cleanup {
574   PrePostActionTy *Action;
575 
576 public:
577   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
578   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
579     if (!CGF.HaveInsertPoint())
580       return;
581     Action->Exit(CGF);
582   }
583 };
584 
585 } // anonymous namespace
586 
587 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
588   CodeGenFunction::RunCleanupsScope Scope(CGF);
589   if (PrePostAction) {
590     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
591     Callback(CodeGen, CGF, *PrePostAction);
592   } else {
593     PrePostActionTy Action;
594     Callback(CodeGen, CGF, Action);
595   }
596 }
597 
598 /// Check if the combiner is a call to UDR combiner and if it is so return the
599 /// UDR decl used for reduction.
600 static const OMPDeclareReductionDecl *
601 getReductionInit(const Expr *ReductionOp) {
602   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
603     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
604       if (const auto *DRE =
605               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
606         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
607           return DRD;
608   return nullptr;
609 }
610 
611 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
612                                              const OMPDeclareReductionDecl *DRD,
613                                              const Expr *InitOp,
614                                              Address Private, Address Original,
615                                              QualType Ty) {
616   if (DRD->getInitializer()) {
617     std::pair<llvm::Function *, llvm::Function *> Reduction =
618         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
619     const auto *CE = cast<CallExpr>(InitOp);
620     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
621     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
622     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
623     const auto *LHSDRE =
624         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
625     const auto *RHSDRE =
626         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
627     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
628     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
629                             [=]() { return Private; });
630     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
631                             [=]() { return Original; });
632     (void)PrivateScope.Privatize();
633     RValue Func = RValue::get(Reduction.second);
634     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
635     CGF.EmitIgnoredExpr(InitOp);
636   } else {
637     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
638     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
639     auto *GV = new llvm::GlobalVariable(
640         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
641         llvm::GlobalValue::PrivateLinkage, Init, Name);
642     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
643     RValue InitRVal;
644     switch (CGF.getEvaluationKind(Ty)) {
645     case TEK_Scalar:
646       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
647       break;
648     case TEK_Complex:
649       InitRVal =
650           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
651       break;
652     case TEK_Aggregate:
653       InitRVal = RValue::getAggregate(LV.getAddress(CGF));
654       break;
655     }
656     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
657     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
658     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
659                          /*IsInitializer=*/false);
660   }
661 }
662 
663 /// Emit initialization of arrays of complex types.
664 /// \param DestAddr Address of the array.
665 /// \param Type Type of array.
666 /// \param Init Initial expression of array.
667 /// \param SrcAddr Address of the original array.
668 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
669                                  QualType Type, bool EmitDeclareReductionInit,
670                                  const Expr *Init,
671                                  const OMPDeclareReductionDecl *DRD,
672                                  Address SrcAddr = Address::invalid()) {
673   // Perform element-by-element initialization.
674   QualType ElementTy;
675 
676   // Drill down to the base element type on both arrays.
677   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
678   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
679   DestAddr =
680       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
681   if (DRD)
682     SrcAddr =
683         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
684 
685   llvm::Value *SrcBegin = nullptr;
686   if (DRD)
687     SrcBegin = SrcAddr.getPointer();
688   llvm::Value *DestBegin = DestAddr.getPointer();
689   // Cast from pointer to array type to pointer to single element.
690   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
691   // The basic structure here is a while-do loop.
692   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
693   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
694   llvm::Value *IsEmpty =
695       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
696   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
697 
698   // Enter the loop body, making that address the current address.
699   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
700   CGF.EmitBlock(BodyBB);
701 
702   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
703 
704   llvm::PHINode *SrcElementPHI = nullptr;
705   Address SrcElementCurrent = Address::invalid();
706   if (DRD) {
707     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
708                                           "omp.arraycpy.srcElementPast");
709     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
710     SrcElementCurrent =
711         Address(SrcElementPHI,
712                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
713   }
714   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
715       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
716   DestElementPHI->addIncoming(DestBegin, EntryBB);
717   Address DestElementCurrent =
718       Address(DestElementPHI,
719               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
720 
721   // Emit copy.
722   {
723     CodeGenFunction::RunCleanupsScope InitScope(CGF);
724     if (EmitDeclareReductionInit) {
725       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
726                                        SrcElementCurrent, ElementTy);
727     } else
728       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
729                            /*IsInitializer=*/false);
730   }
731 
732   if (DRD) {
733     // Shift the address forward by one element.
734     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
735         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
736     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
737   }
738 
739   // Shift the address forward by one element.
740   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
741       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
742   // Check whether we've reached the end.
743   llvm::Value *Done =
744       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
745   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
746   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
747 
748   // Done.
749   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
750 }
751 
752 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
753   return CGF.EmitOMPSharedLValue(E);
754 }
755 
756 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
757                                             const Expr *E) {
758   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
759     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
760   return LValue();
761 }
762 
763 void ReductionCodeGen::emitAggregateInitialization(
764     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
765     const OMPDeclareReductionDecl *DRD) {
766   // Emit VarDecl with copy init for arrays.
767   // Get the address of the original variable captured in current
768   // captured region.
769   const auto *PrivateVD =
770       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
771   bool EmitDeclareReductionInit =
772       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
773   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
774                        EmitDeclareReductionInit,
775                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
776                                                 : PrivateVD->getInit(),
777                        DRD, SharedLVal.getAddress(CGF));
778 }
779 
780 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
781                                    ArrayRef<const Expr *> Origs,
782                                    ArrayRef<const Expr *> Privates,
783                                    ArrayRef<const Expr *> ReductionOps) {
784   ClausesData.reserve(Shareds.size());
785   SharedAddresses.reserve(Shareds.size());
786   Sizes.reserve(Shareds.size());
787   BaseDecls.reserve(Shareds.size());
788   const auto *IOrig = Origs.begin();
789   const auto *IPriv = Privates.begin();
790   const auto *IRed = ReductionOps.begin();
791   for (const Expr *Ref : Shareds) {
792     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
793     std::advance(IOrig, 1);
794     std::advance(IPriv, 1);
795     std::advance(IRed, 1);
796   }
797 }
798 
799 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
800   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
801          "Number of generated lvalues must be exactly N.");
802   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
803   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
804   SharedAddresses.emplace_back(First, Second);
805   if (ClausesData[N].Shared == ClausesData[N].Ref) {
806     OrigAddresses.emplace_back(First, Second);
807   } else {
808     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
809     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
810     OrigAddresses.emplace_back(First, Second);
811   }
812 }
813 
814 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
815   const auto *PrivateVD =
816       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
817   QualType PrivateType = PrivateVD->getType();
818   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
819   if (!PrivateType->isVariablyModifiedType()) {
820     Sizes.emplace_back(
821         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
822         nullptr);
823     return;
824   }
825   llvm::Value *Size;
826   llvm::Value *SizeInChars;
827   auto *ElemType =
828       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
829           ->getElementType();
830   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
831   if (AsArraySection) {
832     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
833                                      OrigAddresses[N].first.getPointer(CGF));
834     Size = CGF.Builder.CreateNUWAdd(
835         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
836     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
837   } else {
838     SizeInChars =
839         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
840     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
841   }
842   Sizes.emplace_back(SizeInChars, Size);
843   CodeGenFunction::OpaqueValueMapping OpaqueMap(
844       CGF,
845       cast<OpaqueValueExpr>(
846           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
847       RValue::get(Size));
848   CGF.EmitVariablyModifiedType(PrivateType);
849 }
850 
851 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
852                                          llvm::Value *Size) {
853   const auto *PrivateVD =
854       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
855   QualType PrivateType = PrivateVD->getType();
856   if (!PrivateType->isVariablyModifiedType()) {
857     assert(!Size && !Sizes[N].second &&
858            "Size should be nullptr for non-variably modified reduction "
859            "items.");
860     return;
861   }
862   CodeGenFunction::OpaqueValueMapping OpaqueMap(
863       CGF,
864       cast<OpaqueValueExpr>(
865           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
866       RValue::get(Size));
867   CGF.EmitVariablyModifiedType(PrivateType);
868 }
869 
870 void ReductionCodeGen::emitInitialization(
871     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
872     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
873   assert(SharedAddresses.size() > N && "No variable was generated");
874   const auto *PrivateVD =
875       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
876   const OMPDeclareReductionDecl *DRD =
877       getReductionInit(ClausesData[N].ReductionOp);
878   QualType PrivateType = PrivateVD->getType();
879   PrivateAddr = CGF.Builder.CreateElementBitCast(
880       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
881   QualType SharedType = SharedAddresses[N].first.getType();
882   SharedLVal = CGF.MakeAddrLValue(
883       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
884                                        CGF.ConvertTypeForMem(SharedType)),
885       SharedType, SharedAddresses[N].first.getBaseInfo(),
886       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
887   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
888     if (DRD && DRD->getInitializer())
889       (void)DefaultInit(CGF);
890     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
891   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
892     (void)DefaultInit(CGF);
893     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
894                                      PrivateAddr, SharedLVal.getAddress(CGF),
895                                      SharedLVal.getType());
896   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
897              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
898     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
899                          PrivateVD->getType().getQualifiers(),
900                          /*IsInitializer=*/false);
901   }
902 }
903 
904 bool ReductionCodeGen::needCleanups(unsigned N) {
905   const auto *PrivateVD =
906       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
907   QualType PrivateType = PrivateVD->getType();
908   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
909   return DTorKind != QualType::DK_none;
910 }
911 
912 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
913                                     Address PrivateAddr) {
914   const auto *PrivateVD =
915       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
916   QualType PrivateType = PrivateVD->getType();
917   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
918   if (needCleanups(N)) {
919     PrivateAddr = CGF.Builder.CreateElementBitCast(
920         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
921     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
922   }
923 }
924 
925 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
926                           LValue BaseLV) {
927   BaseTy = BaseTy.getNonReferenceType();
928   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
929          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
930     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
931       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
932     } else {
933       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
934       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
935     }
936     BaseTy = BaseTy->getPointeeType();
937   }
938   return CGF.MakeAddrLValue(
939       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
940                                        CGF.ConvertTypeForMem(ElTy)),
941       BaseLV.getType(), BaseLV.getBaseInfo(),
942       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
943 }
944 
945 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
946                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
947                           llvm::Value *Addr) {
948   Address Tmp = Address::invalid();
949   Address TopTmp = Address::invalid();
950   Address MostTopTmp = Address::invalid();
951   BaseTy = BaseTy.getNonReferenceType();
952   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
953          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
954     Tmp = CGF.CreateMemTemp(BaseTy);
955     if (TopTmp.isValid())
956       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
957     else
958       MostTopTmp = Tmp;
959     TopTmp = Tmp;
960     BaseTy = BaseTy->getPointeeType();
961   }
962   llvm::Type *Ty = BaseLVType;
963   if (Tmp.isValid())
964     Ty = Tmp.getElementType();
965   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
966   if (Tmp.isValid()) {
967     CGF.Builder.CreateStore(Addr, Tmp);
968     return MostTopTmp;
969   }
970   return Address(Addr, BaseLVAlignment);
971 }
972 
973 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
974   const VarDecl *OrigVD = nullptr;
975   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
976     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
977     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
978       Base = TempOASE->getBase()->IgnoreParenImpCasts();
979     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
980       Base = TempASE->getBase()->IgnoreParenImpCasts();
981     DE = cast<DeclRefExpr>(Base);
982     OrigVD = cast<VarDecl>(DE->getDecl());
983   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
984     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
985     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
986       Base = TempASE->getBase()->IgnoreParenImpCasts();
987     DE = cast<DeclRefExpr>(Base);
988     OrigVD = cast<VarDecl>(DE->getDecl());
989   }
990   return OrigVD;
991 }
992 
993 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
994                                                Address PrivateAddr) {
995   const DeclRefExpr *DE;
996   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
997     BaseDecls.emplace_back(OrigVD);
998     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
999     LValue BaseLValue =
1000         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1001                     OriginalBaseLValue);
1002     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1003         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1004     llvm::Value *PrivatePointer =
1005         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1006             PrivateAddr.getPointer(),
1007             SharedAddresses[N].first.getAddress(CGF).getType());
1008     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1009     return castToBase(CGF, OrigVD->getType(),
1010                       SharedAddresses[N].first.getType(),
1011                       OriginalBaseLValue.getAddress(CGF).getType(),
1012                       OriginalBaseLValue.getAlignment(), Ptr);
1013   }
1014   BaseDecls.emplace_back(
1015       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1016   return PrivateAddr;
1017 }
1018 
1019 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1020   const OMPDeclareReductionDecl *DRD =
1021       getReductionInit(ClausesData[N].ReductionOp);
1022   return DRD && DRD->getInitializer();
1023 }
1024 
1025 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1026   return CGF.EmitLoadOfPointerLValue(
1027       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1028       getThreadIDVariable()->getType()->castAs<PointerType>());
1029 }
1030 
1031 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1032   if (!CGF.HaveInsertPoint())
1033     return;
1034   // 1.2.2 OpenMP Language Terminology
1035   // Structured block - An executable statement with a single entry at the
1036   // top and a single exit at the bottom.
1037   // The point of exit cannot be a branch out of the structured block.
1038   // longjmp() and throw() must not violate the entry/exit criteria.
1039   CGF.EHStack.pushTerminate();
1040   CodeGen(CGF);
1041   CGF.EHStack.popTerminate();
1042 }
1043 
1044 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1045     CodeGenFunction &CGF) {
1046   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1047                             getThreadIDVariable()->getType(),
1048                             AlignmentSource::Decl);
1049 }
1050 
1051 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1052                                        QualType FieldTy) {
1053   auto *Field = FieldDecl::Create(
1054       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1055       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1056       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1057   Field->setAccess(AS_public);
1058   DC->addDecl(Field);
1059   return Field;
1060 }
1061 
1062 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1063                                  StringRef Separator)
1064     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1065       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1066   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1067 
1068   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1069   OMPBuilder.initialize();
1070   loadOffloadInfoMetadata();
1071 }
1072 
1073 void CGOpenMPRuntime::clear() {
1074   InternalVars.clear();
1075   // Clean non-target variable declarations possibly used only in debug info.
1076   for (const auto &Data : EmittedNonTargetVariables) {
1077     if (!Data.getValue().pointsToAliveValue())
1078       continue;
1079     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1080     if (!GV)
1081       continue;
1082     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1083       continue;
1084     GV->eraseFromParent();
1085   }
1086 }
1087 
1088 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1089   SmallString<128> Buffer;
1090   llvm::raw_svector_ostream OS(Buffer);
1091   StringRef Sep = FirstSeparator;
1092   for (StringRef Part : Parts) {
1093     OS << Sep << Part;
1094     Sep = Separator;
1095   }
1096   return std::string(OS.str());
1097 }
1098 
1099 static llvm::Function *
1100 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1101                           const Expr *CombinerInitializer, const VarDecl *In,
1102                           const VarDecl *Out, bool IsCombiner) {
1103   // void .omp_combiner.(Ty *in, Ty *out);
1104   ASTContext &C = CGM.getContext();
1105   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1106   FunctionArgList Args;
1107   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1108                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1109   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1110                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1111   Args.push_back(&OmpOutParm);
1112   Args.push_back(&OmpInParm);
1113   const CGFunctionInfo &FnInfo =
1114       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1115   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1116   std::string Name = CGM.getOpenMPRuntime().getName(
1117       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1118   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1119                                     Name, &CGM.getModule());
1120   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1121   if (CGM.getLangOpts().Optimize) {
1122     Fn->removeFnAttr(llvm::Attribute::NoInline);
1123     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1124     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1125   }
1126   CodeGenFunction CGF(CGM);
1127   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1128   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1129   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1130                     Out->getLocation());
1131   CodeGenFunction::OMPPrivateScope Scope(CGF);
1132   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1133   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1134     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1135         .getAddress(CGF);
1136   });
1137   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1138   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1139     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1140         .getAddress(CGF);
1141   });
1142   (void)Scope.Privatize();
1143   if (!IsCombiner && Out->hasInit() &&
1144       !CGF.isTrivialInitializer(Out->getInit())) {
1145     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1146                          Out->getType().getQualifiers(),
1147                          /*IsInitializer=*/true);
1148   }
1149   if (CombinerInitializer)
1150     CGF.EmitIgnoredExpr(CombinerInitializer);
1151   Scope.ForceCleanup();
1152   CGF.FinishFunction();
1153   return Fn;
1154 }
1155 
1156 void CGOpenMPRuntime::emitUserDefinedReduction(
1157     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1158   if (UDRMap.count(D) > 0)
1159     return;
1160   llvm::Function *Combiner = emitCombinerOrInitializer(
1161       CGM, D->getType(), D->getCombiner(),
1162       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1163       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1164       /*IsCombiner=*/true);
1165   llvm::Function *Initializer = nullptr;
1166   if (const Expr *Init = D->getInitializer()) {
1167     Initializer = emitCombinerOrInitializer(
1168         CGM, D->getType(),
1169         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1170                                                                      : nullptr,
1171         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1172         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1173         /*IsCombiner=*/false);
1174   }
1175   UDRMap.try_emplace(D, Combiner, Initializer);
1176   if (CGF) {
1177     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1178     Decls.second.push_back(D);
1179   }
1180 }
1181 
1182 std::pair<llvm::Function *, llvm::Function *>
1183 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1184   auto I = UDRMap.find(D);
1185   if (I != UDRMap.end())
1186     return I->second;
1187   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1188   return UDRMap.lookup(D);
1189 }
1190 
1191 namespace {
1192 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1193 // Builder if one is present.
1194 struct PushAndPopStackRAII {
1195   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1196                       bool HasCancel)
1197       : OMPBuilder(OMPBuilder) {
1198     if (!OMPBuilder)
1199       return;
1200 
1201     // The following callback is the crucial part of clangs cleanup process.
1202     //
1203     // NOTE:
1204     // Once the OpenMPIRBuilder is used to create parallel regions (and
1205     // similar), the cancellation destination (Dest below) is determined via
1206     // IP. That means if we have variables to finalize we split the block at IP,
1207     // use the new block (=BB) as destination to build a JumpDest (via
1208     // getJumpDestInCurrentScope(BB)) which then is fed to
1209     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1210     // to push & pop an FinalizationInfo object.
1211     // The FiniCB will still be needed but at the point where the
1212     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1213     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1214       assert(IP.getBlock()->end() == IP.getPoint() &&
1215              "Clang CG should cause non-terminated block!");
1216       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1217       CGF.Builder.restoreIP(IP);
1218       CodeGenFunction::JumpDest Dest =
1219           CGF.getOMPCancelDestination(OMPD_parallel);
1220       CGF.EmitBranchThroughCleanup(Dest);
1221     };
1222 
1223     // TODO: Remove this once we emit parallel regions through the
1224     //       OpenMPIRBuilder as it can do this setup internally.
1225     llvm::OpenMPIRBuilder::FinalizationInfo FI(
1226         {FiniCB, OMPD_parallel, HasCancel});
1227     OMPBuilder->pushFinalizationCB(std::move(FI));
1228   }
1229   ~PushAndPopStackRAII() {
1230     if (OMPBuilder)
1231       OMPBuilder->popFinalizationCB();
1232   }
1233   llvm::OpenMPIRBuilder *OMPBuilder;
1234 };
1235 } // namespace
1236 
1237 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1238     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1239     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1240     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1241   assert(ThreadIDVar->getType()->isPointerType() &&
1242          "thread id variable must be of type kmp_int32 *");
1243   CodeGenFunction CGF(CGM, true);
1244   bool HasCancel = false;
1245   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1246     HasCancel = OPD->hasCancel();
1247   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1248     HasCancel = OPD->hasCancel();
1249   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1250     HasCancel = OPSD->hasCancel();
1251   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1252     HasCancel = OPFD->hasCancel();
1253   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1254     HasCancel = OPFD->hasCancel();
1255   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1256     HasCancel = OPFD->hasCancel();
1257   else if (const auto *OPFD =
1258                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1259     HasCancel = OPFD->hasCancel();
1260   else if (const auto *OPFD =
1261                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1262     HasCancel = OPFD->hasCancel();
1263 
1264   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1265   //       parallel region to make cancellation barriers work properly.
1266   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1267   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel);
1268   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1269                                     HasCancel, OutlinedHelperName);
1270   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1271   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1272 }
1273 
1274 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1275     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1276     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1277   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1278   return emitParallelOrTeamsOutlinedFunction(
1279       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1280 }
1281 
1282 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1283     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1284     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1285   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1286   return emitParallelOrTeamsOutlinedFunction(
1287       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1288 }
1289 
1290 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1291     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1292     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1293     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1294     bool Tied, unsigned &NumberOfParts) {
1295   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1296                                               PrePostActionTy &) {
1297     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1298     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1299     llvm::Value *TaskArgs[] = {
1300         UpLoc, ThreadID,
1301         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1302                                     TaskTVar->getType()->castAs<PointerType>())
1303             .getPointer(CGF)};
1304     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1305                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1306                         TaskArgs);
1307   };
1308   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1309                                                             UntiedCodeGen);
1310   CodeGen.setAction(Action);
1311   assert(!ThreadIDVar->getType()->isPointerType() &&
1312          "thread id variable must be of type kmp_int32 for tasks");
1313   const OpenMPDirectiveKind Region =
1314       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1315                                                       : OMPD_task;
1316   const CapturedStmt *CS = D.getCapturedStmt(Region);
1317   bool HasCancel = false;
1318   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1319     HasCancel = TD->hasCancel();
1320   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1321     HasCancel = TD->hasCancel();
1322   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1323     HasCancel = TD->hasCancel();
1324   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1325     HasCancel = TD->hasCancel();
1326 
1327   CodeGenFunction CGF(CGM, true);
1328   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1329                                         InnermostKind, HasCancel, Action);
1330   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1331   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1332   if (!Tied)
1333     NumberOfParts = Action.getNumberOfParts();
1334   return Res;
1335 }
1336 
1337 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1338                              const RecordDecl *RD, const CGRecordLayout &RL,
1339                              ArrayRef<llvm::Constant *> Data) {
1340   llvm::StructType *StructTy = RL.getLLVMType();
1341   unsigned PrevIdx = 0;
1342   ConstantInitBuilder CIBuilder(CGM);
1343   auto DI = Data.begin();
1344   for (const FieldDecl *FD : RD->fields()) {
1345     unsigned Idx = RL.getLLVMFieldNo(FD);
1346     // Fill the alignment.
1347     for (unsigned I = PrevIdx; I < Idx; ++I)
1348       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1349     PrevIdx = Idx + 1;
1350     Fields.add(*DI);
1351     ++DI;
1352   }
1353 }
1354 
1355 template <class... As>
1356 static llvm::GlobalVariable *
1357 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1358                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1359                    As &&... Args) {
1360   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1361   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1362   ConstantInitBuilder CIBuilder(CGM);
1363   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1364   buildStructValue(Fields, CGM, RD, RL, Data);
1365   return Fields.finishAndCreateGlobal(
1366       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1367       std::forward<As>(Args)...);
1368 }
1369 
1370 template <typename T>
1371 static void
1372 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1373                                          ArrayRef<llvm::Constant *> Data,
1374                                          T &Parent) {
1375   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1376   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1377   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1378   buildStructValue(Fields, CGM, RD, RL, Data);
1379   Fields.finishAndAddTo(Parent);
1380 }
1381 
1382 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1383                                              bool AtCurrentPoint) {
1384   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1385   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1386 
1387   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1388   if (AtCurrentPoint) {
1389     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1390         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1391   } else {
1392     Elem.second.ServiceInsertPt =
1393         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1394     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1395   }
1396 }
1397 
1398 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1399   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1400   if (Elem.second.ServiceInsertPt) {
1401     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1402     Elem.second.ServiceInsertPt = nullptr;
1403     Ptr->eraseFromParent();
1404   }
1405 }
1406 
1407 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1408                                                  SourceLocation Loc,
1409                                                  unsigned Flags) {
1410   llvm::Constant *SrcLocStr;
1411   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1412       Loc.isInvalid()) {
1413     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1414   } else {
1415     std::string FunctionName = "";
1416     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1417       FunctionName = FD->getQualifiedNameAsString();
1418     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1419     const char *FileName = PLoc.getFilename();
1420     unsigned Line = PLoc.getLine();
1421     unsigned Column = PLoc.getColumn();
1422     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName,
1423                                                 Line, Column);
1424   }
1425   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1426   return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1427                                      Reserved2Flags);
1428 }
1429 
1430 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1431                                           SourceLocation Loc) {
1432   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1433 
1434   llvm::Value *ThreadID = nullptr;
1435   // Check whether we've already cached a load of the thread id in this
1436   // function.
1437   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1438   if (I != OpenMPLocThreadIDMap.end()) {
1439     ThreadID = I->second.ThreadID;
1440     if (ThreadID != nullptr)
1441       return ThreadID;
1442   }
1443   // If exceptions are enabled, do not use parameter to avoid possible crash.
1444   if (auto *OMPRegionInfo =
1445           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1446     if (OMPRegionInfo->getThreadIDVariable()) {
1447       // Check if this an outlined function with thread id passed as argument.
1448       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1449       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1450       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1451           !CGF.getLangOpts().CXXExceptions ||
1452           CGF.Builder.GetInsertBlock() == TopBlock ||
1453           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1454           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1455               TopBlock ||
1456           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1457               CGF.Builder.GetInsertBlock()) {
1458         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1459         // If value loaded in entry block, cache it and use it everywhere in
1460         // function.
1461         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1462           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1463           Elem.second.ThreadID = ThreadID;
1464         }
1465         return ThreadID;
1466       }
1467     }
1468   }
1469 
1470   // This is not an outlined function region - need to call __kmpc_int32
1471   // kmpc_global_thread_num(ident_t *loc).
1472   // Generate thread id value and cache this value for use across the
1473   // function.
1474   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1475   if (!Elem.second.ServiceInsertPt)
1476     setLocThreadIdInsertPt(CGF);
1477   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1478   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1479   llvm::CallInst *Call = CGF.Builder.CreateCall(
1480       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1481                                             OMPRTL___kmpc_global_thread_num),
1482       emitUpdateLocation(CGF, Loc));
1483   Call->setCallingConv(CGF.getRuntimeCC());
1484   Elem.second.ThreadID = Call;
1485   return Call;
1486 }
1487 
1488 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1489   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1490   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1491     clearLocThreadIdInsertPt(CGF);
1492     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1493   }
1494   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1495     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1496       UDRMap.erase(D);
1497     FunctionUDRMap.erase(CGF.CurFn);
1498   }
1499   auto I = FunctionUDMMap.find(CGF.CurFn);
1500   if (I != FunctionUDMMap.end()) {
1501     for(const auto *D : I->second)
1502       UDMMap.erase(D);
1503     FunctionUDMMap.erase(I);
1504   }
1505   LastprivateConditionalToTypes.erase(CGF.CurFn);
1506 }
1507 
1508 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1509   return OMPBuilder.IdentPtr;
1510 }
1511 
1512 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1513   if (!Kmpc_MicroTy) {
1514     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1515     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1516                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1517     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1518   }
1519   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1520 }
1521 
1522 llvm::FunctionCallee
1523 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1524   assert((IVSize == 32 || IVSize == 64) &&
1525          "IV size is not compatible with the omp runtime");
1526   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1527                                             : "__kmpc_for_static_init_4u")
1528                                 : (IVSigned ? "__kmpc_for_static_init_8"
1529                                             : "__kmpc_for_static_init_8u");
1530   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1531   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1532   llvm::Type *TypeParams[] = {
1533     getIdentTyPointerTy(),                     // loc
1534     CGM.Int32Ty,                               // tid
1535     CGM.Int32Ty,                               // schedtype
1536     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1537     PtrTy,                                     // p_lower
1538     PtrTy,                                     // p_upper
1539     PtrTy,                                     // p_stride
1540     ITy,                                       // incr
1541     ITy                                        // chunk
1542   };
1543   auto *FnTy =
1544       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1545   return CGM.CreateRuntimeFunction(FnTy, Name);
1546 }
1547 
1548 llvm::FunctionCallee
1549 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1550   assert((IVSize == 32 || IVSize == 64) &&
1551          "IV size is not compatible with the omp runtime");
1552   StringRef Name =
1553       IVSize == 32
1554           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1555           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1556   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1557   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1558                                CGM.Int32Ty,           // tid
1559                                CGM.Int32Ty,           // schedtype
1560                                ITy,                   // lower
1561                                ITy,                   // upper
1562                                ITy,                   // stride
1563                                ITy                    // chunk
1564   };
1565   auto *FnTy =
1566       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1567   return CGM.CreateRuntimeFunction(FnTy, Name);
1568 }
1569 
1570 llvm::FunctionCallee
1571 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1572   assert((IVSize == 32 || IVSize == 64) &&
1573          "IV size is not compatible with the omp runtime");
1574   StringRef Name =
1575       IVSize == 32
1576           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1577           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1578   llvm::Type *TypeParams[] = {
1579       getIdentTyPointerTy(), // loc
1580       CGM.Int32Ty,           // tid
1581   };
1582   auto *FnTy =
1583       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1584   return CGM.CreateRuntimeFunction(FnTy, Name);
1585 }
1586 
1587 llvm::FunctionCallee
1588 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1589   assert((IVSize == 32 || IVSize == 64) &&
1590          "IV size is not compatible with the omp runtime");
1591   StringRef Name =
1592       IVSize == 32
1593           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1594           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1595   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1596   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1597   llvm::Type *TypeParams[] = {
1598     getIdentTyPointerTy(),                     // loc
1599     CGM.Int32Ty,                               // tid
1600     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1601     PtrTy,                                     // p_lower
1602     PtrTy,                                     // p_upper
1603     PtrTy                                      // p_stride
1604   };
1605   auto *FnTy =
1606       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1607   return CGM.CreateRuntimeFunction(FnTy, Name);
1608 }
1609 
1610 /// Obtain information that uniquely identifies a target entry. This
1611 /// consists of the file and device IDs as well as line number associated with
1612 /// the relevant entry source location.
1613 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1614                                      unsigned &DeviceID, unsigned &FileID,
1615                                      unsigned &LineNum) {
1616   SourceManager &SM = C.getSourceManager();
1617 
1618   // The loc should be always valid and have a file ID (the user cannot use
1619   // #pragma directives in macros)
1620 
1621   assert(Loc.isValid() && "Source location is expected to be always valid.");
1622 
1623   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1624   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1625 
1626   llvm::sys::fs::UniqueID ID;
1627   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1628     SM.getDiagnostics().Report(diag::err_cannot_open_file)
1629         << PLoc.getFilename() << EC.message();
1630 
1631   DeviceID = ID.getDevice();
1632   FileID = ID.getFile();
1633   LineNum = PLoc.getLine();
1634 }
1635 
1636 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1637   if (CGM.getLangOpts().OpenMPSimd)
1638     return Address::invalid();
1639   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1640       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1641   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1642               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1643                HasRequiresUnifiedSharedMemory))) {
1644     SmallString<64> PtrName;
1645     {
1646       llvm::raw_svector_ostream OS(PtrName);
1647       OS << CGM.getMangledName(GlobalDecl(VD));
1648       if (!VD->isExternallyVisible()) {
1649         unsigned DeviceID, FileID, Line;
1650         getTargetEntryUniqueInfo(CGM.getContext(),
1651                                  VD->getCanonicalDecl()->getBeginLoc(),
1652                                  DeviceID, FileID, Line);
1653         OS << llvm::format("_%x", FileID);
1654       }
1655       OS << "_decl_tgt_ref_ptr";
1656     }
1657     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1658     if (!Ptr) {
1659       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1660       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1661                                         PtrName);
1662 
1663       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1664       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1665 
1666       if (!CGM.getLangOpts().OpenMPIsDevice)
1667         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1668       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1669     }
1670     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1671   }
1672   return Address::invalid();
1673 }
1674 
1675 llvm::Constant *
1676 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1677   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1678          !CGM.getContext().getTargetInfo().isTLSSupported());
1679   // Lookup the entry, lazily creating it if necessary.
1680   std::string Suffix = getName({"cache", ""});
1681   return getOrCreateInternalVariable(
1682       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1683 }
1684 
1685 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1686                                                 const VarDecl *VD,
1687                                                 Address VDAddr,
1688                                                 SourceLocation Loc) {
1689   if (CGM.getLangOpts().OpenMPUseTLS &&
1690       CGM.getContext().getTargetInfo().isTLSSupported())
1691     return VDAddr;
1692 
1693   llvm::Type *VarTy = VDAddr.getElementType();
1694   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1695                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1696                                                        CGM.Int8PtrTy),
1697                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1698                          getOrCreateThreadPrivateCache(VD)};
1699   return Address(CGF.EmitRuntimeCall(
1700                      OMPBuilder.getOrCreateRuntimeFunction(
1701                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1702                      Args),
1703                  VDAddr.getAlignment());
1704 }
1705 
1706 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1707     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1708     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1709   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1710   // library.
1711   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1712   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1713                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1714                       OMPLoc);
1715   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1716   // to register constructor/destructor for variable.
1717   llvm::Value *Args[] = {
1718       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1719       Ctor, CopyCtor, Dtor};
1720   CGF.EmitRuntimeCall(
1721       OMPBuilder.getOrCreateRuntimeFunction(
1722           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1723       Args);
1724 }
1725 
1726 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1727     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1728     bool PerformInit, CodeGenFunction *CGF) {
1729   if (CGM.getLangOpts().OpenMPUseTLS &&
1730       CGM.getContext().getTargetInfo().isTLSSupported())
1731     return nullptr;
1732 
1733   VD = VD->getDefinition(CGM.getContext());
1734   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1735     QualType ASTTy = VD->getType();
1736 
1737     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1738     const Expr *Init = VD->getAnyInitializer();
1739     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1740       // Generate function that re-emits the declaration's initializer into the
1741       // threadprivate copy of the variable VD
1742       CodeGenFunction CtorCGF(CGM);
1743       FunctionArgList Args;
1744       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1745                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1746                             ImplicitParamDecl::Other);
1747       Args.push_back(&Dst);
1748 
1749       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1750           CGM.getContext().VoidPtrTy, Args);
1751       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1752       std::string Name = getName({"__kmpc_global_ctor_", ""});
1753       llvm::Function *Fn =
1754           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1755       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1756                             Args, Loc, Loc);
1757       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1758           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1759           CGM.getContext().VoidPtrTy, Dst.getLocation());
1760       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1761       Arg = CtorCGF.Builder.CreateElementBitCast(
1762           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1763       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1764                                /*IsInitializer=*/true);
1765       ArgVal = CtorCGF.EmitLoadOfScalar(
1766           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1767           CGM.getContext().VoidPtrTy, Dst.getLocation());
1768       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1769       CtorCGF.FinishFunction();
1770       Ctor = Fn;
1771     }
1772     if (VD->getType().isDestructedType() != QualType::DK_none) {
1773       // Generate function that emits destructor call for the threadprivate copy
1774       // of the variable VD
1775       CodeGenFunction DtorCGF(CGM);
1776       FunctionArgList Args;
1777       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1778                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1779                             ImplicitParamDecl::Other);
1780       Args.push_back(&Dst);
1781 
1782       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1783           CGM.getContext().VoidTy, Args);
1784       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1785       std::string Name = getName({"__kmpc_global_dtor_", ""});
1786       llvm::Function *Fn =
1787           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1788       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1789       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1790                             Loc, Loc);
1791       // Create a scope with an artificial location for the body of this function.
1792       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1793       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1794           DtorCGF.GetAddrOfLocalVar(&Dst),
1795           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1796       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1797                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1798                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1799       DtorCGF.FinishFunction();
1800       Dtor = Fn;
1801     }
1802     // Do not emit init function if it is not required.
1803     if (!Ctor && !Dtor)
1804       return nullptr;
1805 
1806     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1807     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1808                                                /*isVarArg=*/false)
1809                            ->getPointerTo();
1810     // Copying constructor for the threadprivate variable.
1811     // Must be NULL - reserved by runtime, but currently it requires that this
1812     // parameter is always NULL. Otherwise it fires assertion.
1813     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1814     if (Ctor == nullptr) {
1815       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1816                                              /*isVarArg=*/false)
1817                          ->getPointerTo();
1818       Ctor = llvm::Constant::getNullValue(CtorTy);
1819     }
1820     if (Dtor == nullptr) {
1821       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1822                                              /*isVarArg=*/false)
1823                          ->getPointerTo();
1824       Dtor = llvm::Constant::getNullValue(DtorTy);
1825     }
1826     if (!CGF) {
1827       auto *InitFunctionTy =
1828           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1829       std::string Name = getName({"__omp_threadprivate_init_", ""});
1830       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1831           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1832       CodeGenFunction InitCGF(CGM);
1833       FunctionArgList ArgList;
1834       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1835                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1836                             Loc, Loc);
1837       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1838       InitCGF.FinishFunction();
1839       return InitFunction;
1840     }
1841     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1842   }
1843   return nullptr;
1844 }
1845 
1846 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1847                                                      llvm::GlobalVariable *Addr,
1848                                                      bool PerformInit) {
1849   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1850       !CGM.getLangOpts().OpenMPIsDevice)
1851     return false;
1852   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1853       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1854   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1855       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1856        HasRequiresUnifiedSharedMemory))
1857     return CGM.getLangOpts().OpenMPIsDevice;
1858   VD = VD->getDefinition(CGM.getContext());
1859   assert(VD && "Unknown VarDecl");
1860 
1861   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1862     return CGM.getLangOpts().OpenMPIsDevice;
1863 
1864   QualType ASTTy = VD->getType();
1865   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1866 
1867   // Produce the unique prefix to identify the new target regions. We use
1868   // the source location of the variable declaration which we know to not
1869   // conflict with any target region.
1870   unsigned DeviceID;
1871   unsigned FileID;
1872   unsigned Line;
1873   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1874   SmallString<128> Buffer, Out;
1875   {
1876     llvm::raw_svector_ostream OS(Buffer);
1877     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1878        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1879   }
1880 
1881   const Expr *Init = VD->getAnyInitializer();
1882   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1883     llvm::Constant *Ctor;
1884     llvm::Constant *ID;
1885     if (CGM.getLangOpts().OpenMPIsDevice) {
1886       // Generate function that re-emits the declaration's initializer into
1887       // the threadprivate copy of the variable VD
1888       CodeGenFunction CtorCGF(CGM);
1889 
1890       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1891       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1892       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1893           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1894       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1895       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1896                             FunctionArgList(), Loc, Loc);
1897       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1898       CtorCGF.EmitAnyExprToMem(Init,
1899                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1900                                Init->getType().getQualifiers(),
1901                                /*IsInitializer=*/true);
1902       CtorCGF.FinishFunction();
1903       Ctor = Fn;
1904       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1905       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1906     } else {
1907       Ctor = new llvm::GlobalVariable(
1908           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1909           llvm::GlobalValue::PrivateLinkage,
1910           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1911       ID = Ctor;
1912     }
1913 
1914     // Register the information for the entry associated with the constructor.
1915     Out.clear();
1916     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1917         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1918         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1919   }
1920   if (VD->getType().isDestructedType() != QualType::DK_none) {
1921     llvm::Constant *Dtor;
1922     llvm::Constant *ID;
1923     if (CGM.getLangOpts().OpenMPIsDevice) {
1924       // Generate function that emits destructor call for the threadprivate
1925       // copy of the variable VD
1926       CodeGenFunction DtorCGF(CGM);
1927 
1928       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1929       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1930       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1931           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1932       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1933       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1934                             FunctionArgList(), Loc, Loc);
1935       // Create a scope with an artificial location for the body of this
1936       // function.
1937       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1938       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1939                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1940                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1941       DtorCGF.FinishFunction();
1942       Dtor = Fn;
1943       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1944       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1945     } else {
1946       Dtor = new llvm::GlobalVariable(
1947           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1948           llvm::GlobalValue::PrivateLinkage,
1949           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1950       ID = Dtor;
1951     }
1952     // Register the information for the entry associated with the destructor.
1953     Out.clear();
1954     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1955         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
1956         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
1957   }
1958   return CGM.getLangOpts().OpenMPIsDevice;
1959 }
1960 
1961 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1962                                                           QualType VarType,
1963                                                           StringRef Name) {
1964   std::string Suffix = getName({"artificial", ""});
1965   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1966   llvm::Value *GAddr =
1967       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
1968   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1969       CGM.getTarget().isTLSSupported()) {
1970     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
1971     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
1972   }
1973   std::string CacheSuffix = getName({"cache", ""});
1974   llvm::Value *Args[] = {
1975       emitUpdateLocation(CGF, SourceLocation()),
1976       getThreadID(CGF, SourceLocation()),
1977       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1978       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1979                                 /*isSigned=*/false),
1980       getOrCreateInternalVariable(
1981           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
1982   return Address(
1983       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1984           CGF.EmitRuntimeCall(
1985               OMPBuilder.getOrCreateRuntimeFunction(
1986                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1987               Args),
1988           VarLVType->getPointerTo(/*AddrSpace=*/0)),
1989       CGM.getContext().getTypeAlignInChars(VarType));
1990 }
1991 
1992 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
1993                                    const RegionCodeGenTy &ThenGen,
1994                                    const RegionCodeGenTy &ElseGen) {
1995   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1996 
1997   // If the condition constant folds and can be elided, try to avoid emitting
1998   // the condition and the dead arm of the if/else.
1999   bool CondConstant;
2000   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2001     if (CondConstant)
2002       ThenGen(CGF);
2003     else
2004       ElseGen(CGF);
2005     return;
2006   }
2007 
2008   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2009   // emit the conditional branch.
2010   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2011   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2012   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2013   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2014 
2015   // Emit the 'then' code.
2016   CGF.EmitBlock(ThenBlock);
2017   ThenGen(CGF);
2018   CGF.EmitBranch(ContBlock);
2019   // Emit the 'else' code if present.
2020   // There is no need to emit line number for unconditional branch.
2021   (void)ApplyDebugLocation::CreateEmpty(CGF);
2022   CGF.EmitBlock(ElseBlock);
2023   ElseGen(CGF);
2024   // There is no need to emit line number for unconditional branch.
2025   (void)ApplyDebugLocation::CreateEmpty(CGF);
2026   CGF.EmitBranch(ContBlock);
2027   // Emit the continuation block for code after the if.
2028   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2029 }
2030 
2031 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2032                                        llvm::Function *OutlinedFn,
2033                                        ArrayRef<llvm::Value *> CapturedVars,
2034                                        const Expr *IfCond) {
2035   if (!CGF.HaveInsertPoint())
2036     return;
2037   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2038   auto &M = CGM.getModule();
2039   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2040                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2041     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2042     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2043     llvm::Value *Args[] = {
2044         RTLoc,
2045         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2046         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2047     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2048     RealArgs.append(std::begin(Args), std::end(Args));
2049     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2050 
2051     llvm::FunctionCallee RTLFn =
2052         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2053     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2054   };
2055   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2056                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2057     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2058     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2059     // Build calls:
2060     // __kmpc_serialized_parallel(&Loc, GTid);
2061     llvm::Value *Args[] = {RTLoc, ThreadID};
2062     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2063                             M, OMPRTL___kmpc_serialized_parallel),
2064                         Args);
2065 
2066     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2067     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2068     Address ZeroAddrBound =
2069         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2070                                          /*Name=*/".bound.zero.addr");
2071     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2072     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2073     // ThreadId for serialized parallels is 0.
2074     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2075     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2076     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2077     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2078 
2079     // __kmpc_end_serialized_parallel(&Loc, GTid);
2080     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2081     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2082                             M, OMPRTL___kmpc_end_serialized_parallel),
2083                         EndArgs);
2084   };
2085   if (IfCond) {
2086     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2087   } else {
2088     RegionCodeGenTy ThenRCG(ThenGen);
2089     ThenRCG(CGF);
2090   }
2091 }
2092 
2093 // If we're inside an (outlined) parallel region, use the region info's
2094 // thread-ID variable (it is passed in a first argument of the outlined function
2095 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2096 // regular serial code region, get thread ID by calling kmp_int32
2097 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2098 // return the address of that temp.
2099 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2100                                              SourceLocation Loc) {
2101   if (auto *OMPRegionInfo =
2102           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2103     if (OMPRegionInfo->getThreadIDVariable())
2104       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2105 
2106   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2107   QualType Int32Ty =
2108       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2109   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2110   CGF.EmitStoreOfScalar(ThreadID,
2111                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2112 
2113   return ThreadIDTemp;
2114 }
2115 
2116 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2117     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2118   SmallString<256> Buffer;
2119   llvm::raw_svector_ostream Out(Buffer);
2120   Out << Name;
2121   StringRef RuntimeName = Out.str();
2122   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2123   if (Elem.second) {
2124     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2125            "OMP internal variable has different type than requested");
2126     return &*Elem.second;
2127   }
2128 
2129   return Elem.second = new llvm::GlobalVariable(
2130              CGM.getModule(), Ty, /*IsConstant*/ false,
2131              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2132              Elem.first(), /*InsertBefore=*/nullptr,
2133              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2134 }
2135 
2136 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2137   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2138   std::string Name = getName({Prefix, "var"});
2139   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2140 }
2141 
2142 namespace {
2143 /// Common pre(post)-action for different OpenMP constructs.
2144 class CommonActionTy final : public PrePostActionTy {
2145   llvm::FunctionCallee EnterCallee;
2146   ArrayRef<llvm::Value *> EnterArgs;
2147   llvm::FunctionCallee ExitCallee;
2148   ArrayRef<llvm::Value *> ExitArgs;
2149   bool Conditional;
2150   llvm::BasicBlock *ContBlock = nullptr;
2151 
2152 public:
2153   CommonActionTy(llvm::FunctionCallee EnterCallee,
2154                  ArrayRef<llvm::Value *> EnterArgs,
2155                  llvm::FunctionCallee ExitCallee,
2156                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2157       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2158         ExitArgs(ExitArgs), Conditional(Conditional) {}
2159   void Enter(CodeGenFunction &CGF) override {
2160     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2161     if (Conditional) {
2162       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2163       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2164       ContBlock = CGF.createBasicBlock("omp_if.end");
2165       // Generate the branch (If-stmt)
2166       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2167       CGF.EmitBlock(ThenBlock);
2168     }
2169   }
2170   void Done(CodeGenFunction &CGF) {
2171     // Emit the rest of blocks/branches
2172     CGF.EmitBranch(ContBlock);
2173     CGF.EmitBlock(ContBlock, true);
2174   }
2175   void Exit(CodeGenFunction &CGF) override {
2176     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2177   }
2178 };
2179 } // anonymous namespace
2180 
2181 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2182                                          StringRef CriticalName,
2183                                          const RegionCodeGenTy &CriticalOpGen,
2184                                          SourceLocation Loc, const Expr *Hint) {
2185   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2186   // CriticalOpGen();
2187   // __kmpc_end_critical(ident_t *, gtid, Lock);
2188   // Prepare arguments and build a call to __kmpc_critical
2189   if (!CGF.HaveInsertPoint())
2190     return;
2191   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2192                          getCriticalRegionLock(CriticalName)};
2193   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2194                                                 std::end(Args));
2195   if (Hint) {
2196     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2197         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2198   }
2199   CommonActionTy Action(
2200       OMPBuilder.getOrCreateRuntimeFunction(
2201           CGM.getModule(),
2202           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2203       EnterArgs,
2204       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2205                                             OMPRTL___kmpc_end_critical),
2206       Args);
2207   CriticalOpGen.setAction(Action);
2208   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2209 }
2210 
2211 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2212                                        const RegionCodeGenTy &MasterOpGen,
2213                                        SourceLocation Loc) {
2214   if (!CGF.HaveInsertPoint())
2215     return;
2216   // if(__kmpc_master(ident_t *, gtid)) {
2217   //   MasterOpGen();
2218   //   __kmpc_end_master(ident_t *, gtid);
2219   // }
2220   // Prepare arguments and build a call to __kmpc_master
2221   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2222   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2223                             CGM.getModule(), OMPRTL___kmpc_master),
2224                         Args,
2225                         OMPBuilder.getOrCreateRuntimeFunction(
2226                             CGM.getModule(), OMPRTL___kmpc_end_master),
2227                         Args,
2228                         /*Conditional=*/true);
2229   MasterOpGen.setAction(Action);
2230   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2231   Action.Done(CGF);
2232 }
2233 
2234 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2235                                         SourceLocation Loc) {
2236   if (!CGF.HaveInsertPoint())
2237     return;
2238   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2239     OMPBuilder.CreateTaskyield(CGF.Builder);
2240   } else {
2241     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2242     llvm::Value *Args[] = {
2243         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2244         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2245     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2246                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2247                         Args);
2248   }
2249 
2250   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2251     Region->emitUntiedSwitch(CGF);
2252 }
2253 
2254 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2255                                           const RegionCodeGenTy &TaskgroupOpGen,
2256                                           SourceLocation Loc) {
2257   if (!CGF.HaveInsertPoint())
2258     return;
2259   // __kmpc_taskgroup(ident_t *, gtid);
2260   // TaskgroupOpGen();
2261   // __kmpc_end_taskgroup(ident_t *, gtid);
2262   // Prepare arguments and build a call to __kmpc_taskgroup
2263   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2264   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2265                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2266                         Args,
2267                         OMPBuilder.getOrCreateRuntimeFunction(
2268                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2269                         Args);
2270   TaskgroupOpGen.setAction(Action);
2271   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2272 }
2273 
2274 /// Given an array of pointers to variables, project the address of a
2275 /// given variable.
2276 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2277                                       unsigned Index, const VarDecl *Var) {
2278   // Pull out the pointer to the variable.
2279   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2280   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2281 
2282   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2283   Addr = CGF.Builder.CreateElementBitCast(
2284       Addr, CGF.ConvertTypeForMem(Var->getType()));
2285   return Addr;
2286 }
2287 
2288 static llvm::Value *emitCopyprivateCopyFunction(
2289     CodeGenModule &CGM, llvm::Type *ArgsType,
2290     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2291     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2292     SourceLocation Loc) {
2293   ASTContext &C = CGM.getContext();
2294   // void copy_func(void *LHSArg, void *RHSArg);
2295   FunctionArgList Args;
2296   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2297                            ImplicitParamDecl::Other);
2298   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2299                            ImplicitParamDecl::Other);
2300   Args.push_back(&LHSArg);
2301   Args.push_back(&RHSArg);
2302   const auto &CGFI =
2303       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2304   std::string Name =
2305       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2306   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2307                                     llvm::GlobalValue::InternalLinkage, Name,
2308                                     &CGM.getModule());
2309   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2310   Fn->setDoesNotRecurse();
2311   CodeGenFunction CGF(CGM);
2312   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2313   // Dest = (void*[n])(LHSArg);
2314   // Src = (void*[n])(RHSArg);
2315   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2316       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2317       ArgsType), CGF.getPointerAlign());
2318   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2319       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2320       ArgsType), CGF.getPointerAlign());
2321   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2322   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2323   // ...
2324   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2325   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2326     const auto *DestVar =
2327         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2328     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2329 
2330     const auto *SrcVar =
2331         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2332     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2333 
2334     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2335     QualType Type = VD->getType();
2336     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2337   }
2338   CGF.FinishFunction();
2339   return Fn;
2340 }
2341 
2342 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2343                                        const RegionCodeGenTy &SingleOpGen,
2344                                        SourceLocation Loc,
2345                                        ArrayRef<const Expr *> CopyprivateVars,
2346                                        ArrayRef<const Expr *> SrcExprs,
2347                                        ArrayRef<const Expr *> DstExprs,
2348                                        ArrayRef<const Expr *> AssignmentOps) {
2349   if (!CGF.HaveInsertPoint())
2350     return;
2351   assert(CopyprivateVars.size() == SrcExprs.size() &&
2352          CopyprivateVars.size() == DstExprs.size() &&
2353          CopyprivateVars.size() == AssignmentOps.size());
2354   ASTContext &C = CGM.getContext();
2355   // int32 did_it = 0;
2356   // if(__kmpc_single(ident_t *, gtid)) {
2357   //   SingleOpGen();
2358   //   __kmpc_end_single(ident_t *, gtid);
2359   //   did_it = 1;
2360   // }
2361   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2362   // <copy_func>, did_it);
2363 
2364   Address DidIt = Address::invalid();
2365   if (!CopyprivateVars.empty()) {
2366     // int32 did_it = 0;
2367     QualType KmpInt32Ty =
2368         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2369     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2370     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2371   }
2372   // Prepare arguments and build a call to __kmpc_single
2373   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2374   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2375                             CGM.getModule(), OMPRTL___kmpc_single),
2376                         Args,
2377                         OMPBuilder.getOrCreateRuntimeFunction(
2378                             CGM.getModule(), OMPRTL___kmpc_end_single),
2379                         Args,
2380                         /*Conditional=*/true);
2381   SingleOpGen.setAction(Action);
2382   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2383   if (DidIt.isValid()) {
2384     // did_it = 1;
2385     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2386   }
2387   Action.Done(CGF);
2388   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2389   // <copy_func>, did_it);
2390   if (DidIt.isValid()) {
2391     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2392     QualType CopyprivateArrayTy = C.getConstantArrayType(
2393         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2394         /*IndexTypeQuals=*/0);
2395     // Create a list of all private variables for copyprivate.
2396     Address CopyprivateList =
2397         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2398     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2399       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2400       CGF.Builder.CreateStore(
2401           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2402               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2403               CGF.VoidPtrTy),
2404           Elem);
2405     }
2406     // Build function that copies private values from single region to all other
2407     // threads in the corresponding parallel region.
2408     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2409         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2410         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2411     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2412     Address CL =
2413       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2414                                                       CGF.VoidPtrTy);
2415     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2416     llvm::Value *Args[] = {
2417         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2418         getThreadID(CGF, Loc),        // i32 <gtid>
2419         BufSize,                      // size_t <buf_size>
2420         CL.getPointer(),              // void *<copyprivate list>
2421         CpyFn,                        // void (*) (void *, void *) <copy_func>
2422         DidItVal                      // i32 did_it
2423     };
2424     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2425                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2426                         Args);
2427   }
2428 }
2429 
2430 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2431                                         const RegionCodeGenTy &OrderedOpGen,
2432                                         SourceLocation Loc, bool IsThreads) {
2433   if (!CGF.HaveInsertPoint())
2434     return;
2435   // __kmpc_ordered(ident_t *, gtid);
2436   // OrderedOpGen();
2437   // __kmpc_end_ordered(ident_t *, gtid);
2438   // Prepare arguments and build a call to __kmpc_ordered
2439   if (IsThreads) {
2440     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2441     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2442                               CGM.getModule(), OMPRTL___kmpc_ordered),
2443                           Args,
2444                           OMPBuilder.getOrCreateRuntimeFunction(
2445                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2446                           Args);
2447     OrderedOpGen.setAction(Action);
2448     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2449     return;
2450   }
2451   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2452 }
2453 
2454 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2455   unsigned Flags;
2456   if (Kind == OMPD_for)
2457     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2458   else if (Kind == OMPD_sections)
2459     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2460   else if (Kind == OMPD_single)
2461     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2462   else if (Kind == OMPD_barrier)
2463     Flags = OMP_IDENT_BARRIER_EXPL;
2464   else
2465     Flags = OMP_IDENT_BARRIER_IMPL;
2466   return Flags;
2467 }
2468 
2469 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2470     CodeGenFunction &CGF, const OMPLoopDirective &S,
2471     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2472   // Check if the loop directive is actually a doacross loop directive. In this
2473   // case choose static, 1 schedule.
2474   if (llvm::any_of(
2475           S.getClausesOfKind<OMPOrderedClause>(),
2476           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2477     ScheduleKind = OMPC_SCHEDULE_static;
2478     // Chunk size is 1 in this case.
2479     llvm::APInt ChunkSize(32, 1);
2480     ChunkExpr = IntegerLiteral::Create(
2481         CGF.getContext(), ChunkSize,
2482         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2483         SourceLocation());
2484   }
2485 }
2486 
2487 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2488                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2489                                       bool ForceSimpleCall) {
2490   // Check if we should use the OMPBuilder
2491   auto *OMPRegionInfo =
2492       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2493   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2494     CGF.Builder.restoreIP(OMPBuilder.CreateBarrier(
2495         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2496     return;
2497   }
2498 
2499   if (!CGF.HaveInsertPoint())
2500     return;
2501   // Build call __kmpc_cancel_barrier(loc, thread_id);
2502   // Build call __kmpc_barrier(loc, thread_id);
2503   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2504   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2505   // thread_id);
2506   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2507                          getThreadID(CGF, Loc)};
2508   if (OMPRegionInfo) {
2509     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2510       llvm::Value *Result = CGF.EmitRuntimeCall(
2511           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2512                                                 OMPRTL___kmpc_cancel_barrier),
2513           Args);
2514       if (EmitChecks) {
2515         // if (__kmpc_cancel_barrier()) {
2516         //   exit from construct;
2517         // }
2518         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2519         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2520         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2521         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2522         CGF.EmitBlock(ExitBB);
2523         //   exit from construct;
2524         CodeGenFunction::JumpDest CancelDestination =
2525             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2526         CGF.EmitBranchThroughCleanup(CancelDestination);
2527         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2528       }
2529       return;
2530     }
2531   }
2532   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2533                           CGM.getModule(), OMPRTL___kmpc_barrier),
2534                       Args);
2535 }
2536 
2537 /// Map the OpenMP loop schedule to the runtime enumeration.
2538 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2539                                           bool Chunked, bool Ordered) {
2540   switch (ScheduleKind) {
2541   case OMPC_SCHEDULE_static:
2542     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2543                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2544   case OMPC_SCHEDULE_dynamic:
2545     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2546   case OMPC_SCHEDULE_guided:
2547     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2548   case OMPC_SCHEDULE_runtime:
2549     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2550   case OMPC_SCHEDULE_auto:
2551     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2552   case OMPC_SCHEDULE_unknown:
2553     assert(!Chunked && "chunk was specified but schedule kind not known");
2554     return Ordered ? OMP_ord_static : OMP_sch_static;
2555   }
2556   llvm_unreachable("Unexpected runtime schedule");
2557 }
2558 
2559 /// Map the OpenMP distribute schedule to the runtime enumeration.
2560 static OpenMPSchedType
2561 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2562   // only static is allowed for dist_schedule
2563   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2564 }
2565 
2566 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2567                                          bool Chunked) const {
2568   OpenMPSchedType Schedule =
2569       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2570   return Schedule == OMP_sch_static;
2571 }
2572 
2573 bool CGOpenMPRuntime::isStaticNonchunked(
2574     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2575   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2576   return Schedule == OMP_dist_sch_static;
2577 }
2578 
2579 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2580                                       bool Chunked) const {
2581   OpenMPSchedType Schedule =
2582       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2583   return Schedule == OMP_sch_static_chunked;
2584 }
2585 
2586 bool CGOpenMPRuntime::isStaticChunked(
2587     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2588   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2589   return Schedule == OMP_dist_sch_static_chunked;
2590 }
2591 
2592 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2593   OpenMPSchedType Schedule =
2594       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2595   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2596   return Schedule != OMP_sch_static;
2597 }
2598 
2599 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2600                                   OpenMPScheduleClauseModifier M1,
2601                                   OpenMPScheduleClauseModifier M2) {
2602   int Modifier = 0;
2603   switch (M1) {
2604   case OMPC_SCHEDULE_MODIFIER_monotonic:
2605     Modifier = OMP_sch_modifier_monotonic;
2606     break;
2607   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2608     Modifier = OMP_sch_modifier_nonmonotonic;
2609     break;
2610   case OMPC_SCHEDULE_MODIFIER_simd:
2611     if (Schedule == OMP_sch_static_chunked)
2612       Schedule = OMP_sch_static_balanced_chunked;
2613     break;
2614   case OMPC_SCHEDULE_MODIFIER_last:
2615   case OMPC_SCHEDULE_MODIFIER_unknown:
2616     break;
2617   }
2618   switch (M2) {
2619   case OMPC_SCHEDULE_MODIFIER_monotonic:
2620     Modifier = OMP_sch_modifier_monotonic;
2621     break;
2622   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2623     Modifier = OMP_sch_modifier_nonmonotonic;
2624     break;
2625   case OMPC_SCHEDULE_MODIFIER_simd:
2626     if (Schedule == OMP_sch_static_chunked)
2627       Schedule = OMP_sch_static_balanced_chunked;
2628     break;
2629   case OMPC_SCHEDULE_MODIFIER_last:
2630   case OMPC_SCHEDULE_MODIFIER_unknown:
2631     break;
2632   }
2633   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2634   // If the static schedule kind is specified or if the ordered clause is
2635   // specified, and if the nonmonotonic modifier is not specified, the effect is
2636   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2637   // modifier is specified, the effect is as if the nonmonotonic modifier is
2638   // specified.
2639   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2640     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2641           Schedule == OMP_sch_static_balanced_chunked ||
2642           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2643           Schedule == OMP_dist_sch_static_chunked ||
2644           Schedule == OMP_dist_sch_static))
2645       Modifier = OMP_sch_modifier_nonmonotonic;
2646   }
2647   return Schedule | Modifier;
2648 }
2649 
2650 void CGOpenMPRuntime::emitForDispatchInit(
2651     CodeGenFunction &CGF, SourceLocation Loc,
2652     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2653     bool Ordered, const DispatchRTInput &DispatchValues) {
2654   if (!CGF.HaveInsertPoint())
2655     return;
2656   OpenMPSchedType Schedule = getRuntimeSchedule(
2657       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2658   assert(Ordered ||
2659          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2660           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2661           Schedule != OMP_sch_static_balanced_chunked));
2662   // Call __kmpc_dispatch_init(
2663   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2664   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2665   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2666 
2667   // If the Chunk was not specified in the clause - use default value 1.
2668   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2669                                             : CGF.Builder.getIntN(IVSize, 1);
2670   llvm::Value *Args[] = {
2671       emitUpdateLocation(CGF, Loc),
2672       getThreadID(CGF, Loc),
2673       CGF.Builder.getInt32(addMonoNonMonoModifier(
2674           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2675       DispatchValues.LB,                                     // Lower
2676       DispatchValues.UB,                                     // Upper
2677       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2678       Chunk                                                  // Chunk
2679   };
2680   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2681 }
2682 
2683 static void emitForStaticInitCall(
2684     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2685     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2686     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2687     const CGOpenMPRuntime::StaticRTInput &Values) {
2688   if (!CGF.HaveInsertPoint())
2689     return;
2690 
2691   assert(!Values.Ordered);
2692   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2693          Schedule == OMP_sch_static_balanced_chunked ||
2694          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2695          Schedule == OMP_dist_sch_static ||
2696          Schedule == OMP_dist_sch_static_chunked);
2697 
2698   // Call __kmpc_for_static_init(
2699   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2700   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2701   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2702   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2703   llvm::Value *Chunk = Values.Chunk;
2704   if (Chunk == nullptr) {
2705     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2706             Schedule == OMP_dist_sch_static) &&
2707            "expected static non-chunked schedule");
2708     // If the Chunk was not specified in the clause - use default value 1.
2709     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2710   } else {
2711     assert((Schedule == OMP_sch_static_chunked ||
2712             Schedule == OMP_sch_static_balanced_chunked ||
2713             Schedule == OMP_ord_static_chunked ||
2714             Schedule == OMP_dist_sch_static_chunked) &&
2715            "expected static chunked schedule");
2716   }
2717   llvm::Value *Args[] = {
2718       UpdateLocation,
2719       ThreadId,
2720       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2721                                                   M2)), // Schedule type
2722       Values.IL.getPointer(),                           // &isLastIter
2723       Values.LB.getPointer(),                           // &LB
2724       Values.UB.getPointer(),                           // &UB
2725       Values.ST.getPointer(),                           // &Stride
2726       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2727       Chunk                                             // Chunk
2728   };
2729   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2730 }
2731 
2732 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2733                                         SourceLocation Loc,
2734                                         OpenMPDirectiveKind DKind,
2735                                         const OpenMPScheduleTy &ScheduleKind,
2736                                         const StaticRTInput &Values) {
2737   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2738       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2739   assert(isOpenMPWorksharingDirective(DKind) &&
2740          "Expected loop-based or sections-based directive.");
2741   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2742                                              isOpenMPLoopDirective(DKind)
2743                                                  ? OMP_IDENT_WORK_LOOP
2744                                                  : OMP_IDENT_WORK_SECTIONS);
2745   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2746   llvm::FunctionCallee StaticInitFunction =
2747       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2748   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2749   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2750                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2751 }
2752 
2753 void CGOpenMPRuntime::emitDistributeStaticInit(
2754     CodeGenFunction &CGF, SourceLocation Loc,
2755     OpenMPDistScheduleClauseKind SchedKind,
2756     const CGOpenMPRuntime::StaticRTInput &Values) {
2757   OpenMPSchedType ScheduleNum =
2758       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2759   llvm::Value *UpdatedLocation =
2760       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2761   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2762   llvm::FunctionCallee StaticInitFunction =
2763       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2764   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2765                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2766                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2767 }
2768 
2769 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2770                                           SourceLocation Loc,
2771                                           OpenMPDirectiveKind DKind) {
2772   if (!CGF.HaveInsertPoint())
2773     return;
2774   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2775   llvm::Value *Args[] = {
2776       emitUpdateLocation(CGF, Loc,
2777                          isOpenMPDistributeDirective(DKind)
2778                              ? OMP_IDENT_WORK_DISTRIBUTE
2779                              : isOpenMPLoopDirective(DKind)
2780                                    ? OMP_IDENT_WORK_LOOP
2781                                    : OMP_IDENT_WORK_SECTIONS),
2782       getThreadID(CGF, Loc)};
2783   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2784   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2785                           CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2786                       Args);
2787 }
2788 
2789 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2790                                                  SourceLocation Loc,
2791                                                  unsigned IVSize,
2792                                                  bool IVSigned) {
2793   if (!CGF.HaveInsertPoint())
2794     return;
2795   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2796   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2797   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2798 }
2799 
2800 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2801                                           SourceLocation Loc, unsigned IVSize,
2802                                           bool IVSigned, Address IL,
2803                                           Address LB, Address UB,
2804                                           Address ST) {
2805   // Call __kmpc_dispatch_next(
2806   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2807   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2808   //          kmp_int[32|64] *p_stride);
2809   llvm::Value *Args[] = {
2810       emitUpdateLocation(CGF, Loc),
2811       getThreadID(CGF, Loc),
2812       IL.getPointer(), // &isLastIter
2813       LB.getPointer(), // &Lower
2814       UB.getPointer(), // &Upper
2815       ST.getPointer()  // &Stride
2816   };
2817   llvm::Value *Call =
2818       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2819   return CGF.EmitScalarConversion(
2820       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2821       CGF.getContext().BoolTy, Loc);
2822 }
2823 
2824 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2825                                            llvm::Value *NumThreads,
2826                                            SourceLocation Loc) {
2827   if (!CGF.HaveInsertPoint())
2828     return;
2829   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2830   llvm::Value *Args[] = {
2831       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2832       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2833   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2834                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2835                       Args);
2836 }
2837 
2838 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2839                                          ProcBindKind ProcBind,
2840                                          SourceLocation Loc) {
2841   if (!CGF.HaveInsertPoint())
2842     return;
2843   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2844   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2845   llvm::Value *Args[] = {
2846       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2847       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2848   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2849                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2850                       Args);
2851 }
2852 
2853 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2854                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2855   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2856     OMPBuilder.CreateFlush(CGF.Builder);
2857   } else {
2858     if (!CGF.HaveInsertPoint())
2859       return;
2860     // Build call void __kmpc_flush(ident_t *loc)
2861     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2862                             CGM.getModule(), OMPRTL___kmpc_flush),
2863                         emitUpdateLocation(CGF, Loc));
2864   }
2865 }
2866 
2867 namespace {
2868 /// Indexes of fields for type kmp_task_t.
2869 enum KmpTaskTFields {
2870   /// List of shared variables.
2871   KmpTaskTShareds,
2872   /// Task routine.
2873   KmpTaskTRoutine,
2874   /// Partition id for the untied tasks.
2875   KmpTaskTPartId,
2876   /// Function with call of destructors for private variables.
2877   Data1,
2878   /// Task priority.
2879   Data2,
2880   /// (Taskloops only) Lower bound.
2881   KmpTaskTLowerBound,
2882   /// (Taskloops only) Upper bound.
2883   KmpTaskTUpperBound,
2884   /// (Taskloops only) Stride.
2885   KmpTaskTStride,
2886   /// (Taskloops only) Is last iteration flag.
2887   KmpTaskTLastIter,
2888   /// (Taskloops only) Reduction data.
2889   KmpTaskTReductions,
2890 };
2891 } // anonymous namespace
2892 
2893 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2894   return OffloadEntriesTargetRegion.empty() &&
2895          OffloadEntriesDeviceGlobalVar.empty();
2896 }
2897 
2898 /// Initialize target region entry.
2899 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2900     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2901                                     StringRef ParentName, unsigned LineNum,
2902                                     unsigned Order) {
2903   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2904                                              "only required for the device "
2905                                              "code generation.");
2906   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2907       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2908                                    OMPTargetRegionEntryTargetRegion);
2909   ++OffloadingEntriesNum;
2910 }
2911 
2912 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2913     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2914                                   StringRef ParentName, unsigned LineNum,
2915                                   llvm::Constant *Addr, llvm::Constant *ID,
2916                                   OMPTargetRegionEntryKind Flags) {
2917   // If we are emitting code for a target, the entry is already initialized,
2918   // only has to be registered.
2919   if (CGM.getLangOpts().OpenMPIsDevice) {
2920     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
2921       unsigned DiagID = CGM.getDiags().getCustomDiagID(
2922           DiagnosticsEngine::Error,
2923           "Unable to find target region on line '%0' in the device code.");
2924       CGM.getDiags().Report(DiagID) << LineNum;
2925       return;
2926     }
2927     auto &Entry =
2928         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
2929     assert(Entry.isValid() && "Entry not initialized!");
2930     Entry.setAddress(Addr);
2931     Entry.setID(ID);
2932     Entry.setFlags(Flags);
2933   } else {
2934     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
2935     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
2936     ++OffloadingEntriesNum;
2937   }
2938 }
2939 
2940 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
2941     unsigned DeviceID, unsigned FileID, StringRef ParentName,
2942     unsigned LineNum) const {
2943   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
2944   if (PerDevice == OffloadEntriesTargetRegion.end())
2945     return false;
2946   auto PerFile = PerDevice->second.find(FileID);
2947   if (PerFile == PerDevice->second.end())
2948     return false;
2949   auto PerParentName = PerFile->second.find(ParentName);
2950   if (PerParentName == PerFile->second.end())
2951     return false;
2952   auto PerLine = PerParentName->second.find(LineNum);
2953   if (PerLine == PerParentName->second.end())
2954     return false;
2955   // Fail if this entry is already registered.
2956   if (PerLine->second.getAddress() || PerLine->second.getID())
2957     return false;
2958   return true;
2959 }
2960 
2961 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
2962     const OffloadTargetRegionEntryInfoActTy &Action) {
2963   // Scan all target region entries and perform the provided action.
2964   for (const auto &D : OffloadEntriesTargetRegion)
2965     for (const auto &F : D.second)
2966       for (const auto &P : F.second)
2967         for (const auto &L : P.second)
2968           Action(D.first, F.first, P.first(), L.first, L.second);
2969 }
2970 
2971 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2972     initializeDeviceGlobalVarEntryInfo(StringRef Name,
2973                                        OMPTargetGlobalVarEntryKind Flags,
2974                                        unsigned Order) {
2975   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2976                                              "only required for the device "
2977                                              "code generation.");
2978   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
2979   ++OffloadingEntriesNum;
2980 }
2981 
2982 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2983     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
2984                                      CharUnits VarSize,
2985                                      OMPTargetGlobalVarEntryKind Flags,
2986                                      llvm::GlobalValue::LinkageTypes Linkage) {
2987   if (CGM.getLangOpts().OpenMPIsDevice) {
2988     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
2989     assert(Entry.isValid() && Entry.getFlags() == Flags &&
2990            "Entry not initialized!");
2991     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
2992            "Resetting with the new address.");
2993     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
2994       if (Entry.getVarSize().isZero()) {
2995         Entry.setVarSize(VarSize);
2996         Entry.setLinkage(Linkage);
2997       }
2998       return;
2999     }
3000     Entry.setVarSize(VarSize);
3001     Entry.setLinkage(Linkage);
3002     Entry.setAddress(Addr);
3003   } else {
3004     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3005       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3006       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3007              "Entry not initialized!");
3008       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3009              "Resetting with the new address.");
3010       if (Entry.getVarSize().isZero()) {
3011         Entry.setVarSize(VarSize);
3012         Entry.setLinkage(Linkage);
3013       }
3014       return;
3015     }
3016     OffloadEntriesDeviceGlobalVar.try_emplace(
3017         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3018     ++OffloadingEntriesNum;
3019   }
3020 }
3021 
3022 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3023     actOnDeviceGlobalVarEntriesInfo(
3024         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3025   // Scan all target region entries and perform the provided action.
3026   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3027     Action(E.getKey(), E.getValue());
3028 }
3029 
3030 void CGOpenMPRuntime::createOffloadEntry(
3031     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3032     llvm::GlobalValue::LinkageTypes Linkage) {
3033   StringRef Name = Addr->getName();
3034   llvm::Module &M = CGM.getModule();
3035   llvm::LLVMContext &C = M.getContext();
3036 
3037   // Create constant string with the name.
3038   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3039 
3040   std::string StringName = getName({"omp_offloading", "entry_name"});
3041   auto *Str = new llvm::GlobalVariable(
3042       M, StrPtrInit->getType(), /*isConstant=*/true,
3043       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3044   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3045 
3046   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
3047                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
3048                             llvm::ConstantInt::get(CGM.SizeTy, Size),
3049                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3050                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3051   std::string EntryName = getName({"omp_offloading", "entry", ""});
3052   llvm::GlobalVariable *Entry = createGlobalStruct(
3053       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3054       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3055 
3056   // The entry has to be created in the section the linker expects it to be.
3057   Entry->setSection("omp_offloading_entries");
3058 }
3059 
3060 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3061   // Emit the offloading entries and metadata so that the device codegen side
3062   // can easily figure out what to emit. The produced metadata looks like
3063   // this:
3064   //
3065   // !omp_offload.info = !{!1, ...}
3066   //
3067   // Right now we only generate metadata for function that contain target
3068   // regions.
3069 
3070   // If we are in simd mode or there are no entries, we don't need to do
3071   // anything.
3072   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3073     return;
3074 
3075   llvm::Module &M = CGM.getModule();
3076   llvm::LLVMContext &C = M.getContext();
3077   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3078                          SourceLocation, StringRef>,
3079               16>
3080       OrderedEntries(OffloadEntriesInfoManager.size());
3081   llvm::SmallVector<StringRef, 16> ParentFunctions(
3082       OffloadEntriesInfoManager.size());
3083 
3084   // Auxiliary methods to create metadata values and strings.
3085   auto &&GetMDInt = [this](unsigned V) {
3086     return llvm::ConstantAsMetadata::get(
3087         llvm::ConstantInt::get(CGM.Int32Ty, V));
3088   };
3089 
3090   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3091 
3092   // Create the offloading info metadata node.
3093   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3094 
3095   // Create function that emits metadata for each target region entry;
3096   auto &&TargetRegionMetadataEmitter =
3097       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3098        &GetMDString](
3099           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3100           unsigned Line,
3101           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3102         // Generate metadata for target regions. Each entry of this metadata
3103         // contains:
3104         // - Entry 0 -> Kind of this type of metadata (0).
3105         // - Entry 1 -> Device ID of the file where the entry was identified.
3106         // - Entry 2 -> File ID of the file where the entry was identified.
3107         // - Entry 3 -> Mangled name of the function where the entry was
3108         // identified.
3109         // - Entry 4 -> Line in the file where the entry was identified.
3110         // - Entry 5 -> Order the entry was created.
3111         // The first element of the metadata node is the kind.
3112         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3113                                  GetMDInt(FileID),      GetMDString(ParentName),
3114                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3115 
3116         SourceLocation Loc;
3117         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3118                   E = CGM.getContext().getSourceManager().fileinfo_end();
3119              I != E; ++I) {
3120           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3121               I->getFirst()->getUniqueID().getFile() == FileID) {
3122             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3123                 I->getFirst(), Line, 1);
3124             break;
3125           }
3126         }
3127         // Save this entry in the right position of the ordered entries array.
3128         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3129         ParentFunctions[E.getOrder()] = ParentName;
3130 
3131         // Add metadata to the named metadata node.
3132         MD->addOperand(llvm::MDNode::get(C, Ops));
3133       };
3134 
3135   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3136       TargetRegionMetadataEmitter);
3137 
3138   // Create function that emits metadata for each device global variable entry;
3139   auto &&DeviceGlobalVarMetadataEmitter =
3140       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3141        MD](StringRef MangledName,
3142            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3143                &E) {
3144         // Generate metadata for global variables. Each entry of this metadata
3145         // contains:
3146         // - Entry 0 -> Kind of this type of metadata (1).
3147         // - Entry 1 -> Mangled name of the variable.
3148         // - Entry 2 -> Declare target kind.
3149         // - Entry 3 -> Order the entry was created.
3150         // The first element of the metadata node is the kind.
3151         llvm::Metadata *Ops[] = {
3152             GetMDInt(E.getKind()), GetMDString(MangledName),
3153             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3154 
3155         // Save this entry in the right position of the ordered entries array.
3156         OrderedEntries[E.getOrder()] =
3157             std::make_tuple(&E, SourceLocation(), MangledName);
3158 
3159         // Add metadata to the named metadata node.
3160         MD->addOperand(llvm::MDNode::get(C, Ops));
3161       };
3162 
3163   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3164       DeviceGlobalVarMetadataEmitter);
3165 
3166   for (const auto &E : OrderedEntries) {
3167     assert(std::get<0>(E) && "All ordered entries must exist!");
3168     if (const auto *CE =
3169             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3170                 std::get<0>(E))) {
3171       if (!CE->getID() || !CE->getAddress()) {
3172         // Do not blame the entry if the parent funtion is not emitted.
3173         StringRef FnName = ParentFunctions[CE->getOrder()];
3174         if (!CGM.GetGlobalValue(FnName))
3175           continue;
3176         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3177             DiagnosticsEngine::Error,
3178             "Offloading entry for target region in %0 is incorrect: either the "
3179             "address or the ID is invalid.");
3180         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3181         continue;
3182       }
3183       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3184                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3185     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3186                                              OffloadEntryInfoDeviceGlobalVar>(
3187                    std::get<0>(E))) {
3188       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3189           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3190               CE->getFlags());
3191       switch (Flags) {
3192       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3193         if (CGM.getLangOpts().OpenMPIsDevice &&
3194             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3195           continue;
3196         if (!CE->getAddress()) {
3197           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3198               DiagnosticsEngine::Error, "Offloading entry for declare target "
3199                                         "variable %0 is incorrect: the "
3200                                         "address is invalid.");
3201           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3202           continue;
3203         }
3204         // The vaiable has no definition - no need to add the entry.
3205         if (CE->getVarSize().isZero())
3206           continue;
3207         break;
3208       }
3209       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3210         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3211                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3212                "Declaret target link address is set.");
3213         if (CGM.getLangOpts().OpenMPIsDevice)
3214           continue;
3215         if (!CE->getAddress()) {
3216           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3217               DiagnosticsEngine::Error,
3218               "Offloading entry for declare target variable is incorrect: the "
3219               "address is invalid.");
3220           CGM.getDiags().Report(DiagID);
3221           continue;
3222         }
3223         break;
3224       }
3225       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3226                          CE->getVarSize().getQuantity(), Flags,
3227                          CE->getLinkage());
3228     } else {
3229       llvm_unreachable("Unsupported entry kind.");
3230     }
3231   }
3232 }
3233 
3234 /// Loads all the offload entries information from the host IR
3235 /// metadata.
3236 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3237   // If we are in target mode, load the metadata from the host IR. This code has
3238   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3239 
3240   if (!CGM.getLangOpts().OpenMPIsDevice)
3241     return;
3242 
3243   if (CGM.getLangOpts().OMPHostIRFile.empty())
3244     return;
3245 
3246   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3247   if (auto EC = Buf.getError()) {
3248     CGM.getDiags().Report(diag::err_cannot_open_file)
3249         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3250     return;
3251   }
3252 
3253   llvm::LLVMContext C;
3254   auto ME = expectedToErrorOrAndEmitErrors(
3255       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3256 
3257   if (auto EC = ME.getError()) {
3258     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3259         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3260     CGM.getDiags().Report(DiagID)
3261         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3262     return;
3263   }
3264 
3265   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3266   if (!MD)
3267     return;
3268 
3269   for (llvm::MDNode *MN : MD->operands()) {
3270     auto &&GetMDInt = [MN](unsigned Idx) {
3271       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3272       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3273     };
3274 
3275     auto &&GetMDString = [MN](unsigned Idx) {
3276       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3277       return V->getString();
3278     };
3279 
3280     switch (GetMDInt(0)) {
3281     default:
3282       llvm_unreachable("Unexpected metadata!");
3283       break;
3284     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3285         OffloadingEntryInfoTargetRegion:
3286       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3287           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3288           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3289           /*Order=*/GetMDInt(5));
3290       break;
3291     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3292         OffloadingEntryInfoDeviceGlobalVar:
3293       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3294           /*MangledName=*/GetMDString(1),
3295           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3296               /*Flags=*/GetMDInt(2)),
3297           /*Order=*/GetMDInt(3));
3298       break;
3299     }
3300   }
3301 }
3302 
3303 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3304   if (!KmpRoutineEntryPtrTy) {
3305     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3306     ASTContext &C = CGM.getContext();
3307     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3308     FunctionProtoType::ExtProtoInfo EPI;
3309     KmpRoutineEntryPtrQTy = C.getPointerType(
3310         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3311     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3312   }
3313 }
3314 
3315 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3316   // Make sure the type of the entry is already created. This is the type we
3317   // have to create:
3318   // struct __tgt_offload_entry{
3319   //   void      *addr;       // Pointer to the offload entry info.
3320   //                          // (function or global)
3321   //   char      *name;       // Name of the function or global.
3322   //   size_t     size;       // Size of the entry info (0 if it a function).
3323   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3324   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3325   // };
3326   if (TgtOffloadEntryQTy.isNull()) {
3327     ASTContext &C = CGM.getContext();
3328     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3329     RD->startDefinition();
3330     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3331     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3332     addFieldToRecordDecl(C, RD, C.getSizeType());
3333     addFieldToRecordDecl(
3334         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3335     addFieldToRecordDecl(
3336         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3337     RD->completeDefinition();
3338     RD->addAttr(PackedAttr::CreateImplicit(C));
3339     TgtOffloadEntryQTy = C.getRecordType(RD);
3340   }
3341   return TgtOffloadEntryQTy;
3342 }
3343 
3344 namespace {
3345 struct PrivateHelpersTy {
3346   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3347                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3348       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3349         PrivateElemInit(PrivateElemInit) {}
3350   const Expr *OriginalRef = nullptr;
3351   const VarDecl *Original = nullptr;
3352   const VarDecl *PrivateCopy = nullptr;
3353   const VarDecl *PrivateElemInit = nullptr;
3354 };
3355 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3356 } // anonymous namespace
3357 
3358 static RecordDecl *
3359 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3360   if (!Privates.empty()) {
3361     ASTContext &C = CGM.getContext();
3362     // Build struct .kmp_privates_t. {
3363     //         /*  private vars  */
3364     //       };
3365     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3366     RD->startDefinition();
3367     for (const auto &Pair : Privates) {
3368       const VarDecl *VD = Pair.second.Original;
3369       QualType Type = VD->getType().getNonReferenceType();
3370       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3371       if (VD->hasAttrs()) {
3372         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3373              E(VD->getAttrs().end());
3374              I != E; ++I)
3375           FD->addAttr(*I);
3376       }
3377     }
3378     RD->completeDefinition();
3379     return RD;
3380   }
3381   return nullptr;
3382 }
3383 
3384 static RecordDecl *
3385 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3386                          QualType KmpInt32Ty,
3387                          QualType KmpRoutineEntryPointerQTy) {
3388   ASTContext &C = CGM.getContext();
3389   // Build struct kmp_task_t {
3390   //         void *              shareds;
3391   //         kmp_routine_entry_t routine;
3392   //         kmp_int32           part_id;
3393   //         kmp_cmplrdata_t data1;
3394   //         kmp_cmplrdata_t data2;
3395   // For taskloops additional fields:
3396   //         kmp_uint64          lb;
3397   //         kmp_uint64          ub;
3398   //         kmp_int64           st;
3399   //         kmp_int32           liter;
3400   //         void *              reductions;
3401   //       };
3402   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3403   UD->startDefinition();
3404   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3405   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3406   UD->completeDefinition();
3407   QualType KmpCmplrdataTy = C.getRecordType(UD);
3408   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3409   RD->startDefinition();
3410   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3411   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3412   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3413   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3414   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3415   if (isOpenMPTaskLoopDirective(Kind)) {
3416     QualType KmpUInt64Ty =
3417         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3418     QualType KmpInt64Ty =
3419         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3420     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3421     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3422     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3423     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3424     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3425   }
3426   RD->completeDefinition();
3427   return RD;
3428 }
3429 
3430 static RecordDecl *
3431 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3432                                      ArrayRef<PrivateDataTy> Privates) {
3433   ASTContext &C = CGM.getContext();
3434   // Build struct kmp_task_t_with_privates {
3435   //         kmp_task_t task_data;
3436   //         .kmp_privates_t. privates;
3437   //       };
3438   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3439   RD->startDefinition();
3440   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3441   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3442     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3443   RD->completeDefinition();
3444   return RD;
3445 }
3446 
3447 /// Emit a proxy function which accepts kmp_task_t as the second
3448 /// argument.
3449 /// \code
3450 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3451 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3452 ///   For taskloops:
3453 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3454 ///   tt->reductions, tt->shareds);
3455 ///   return 0;
3456 /// }
3457 /// \endcode
3458 static llvm::Function *
3459 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3460                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3461                       QualType KmpTaskTWithPrivatesPtrQTy,
3462                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3463                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3464                       llvm::Value *TaskPrivatesMap) {
3465   ASTContext &C = CGM.getContext();
3466   FunctionArgList Args;
3467   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3468                             ImplicitParamDecl::Other);
3469   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3470                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3471                                 ImplicitParamDecl::Other);
3472   Args.push_back(&GtidArg);
3473   Args.push_back(&TaskTypeArg);
3474   const auto &TaskEntryFnInfo =
3475       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3476   llvm::FunctionType *TaskEntryTy =
3477       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3478   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3479   auto *TaskEntry = llvm::Function::Create(
3480       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3481   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3482   TaskEntry->setDoesNotRecurse();
3483   CodeGenFunction CGF(CGM);
3484   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3485                     Loc, Loc);
3486 
3487   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3488   // tt,
3489   // For taskloops:
3490   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3491   // tt->task_data.shareds);
3492   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3493       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3494   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3495       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3496       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3497   const auto *KmpTaskTWithPrivatesQTyRD =
3498       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3499   LValue Base =
3500       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3501   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3502   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3503   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3504   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3505 
3506   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3507   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3508   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3509       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3510       CGF.ConvertTypeForMem(SharedsPtrTy));
3511 
3512   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3513   llvm::Value *PrivatesParam;
3514   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3515     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3516     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3517         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3518   } else {
3519     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3520   }
3521 
3522   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3523                                TaskPrivatesMap,
3524                                CGF.Builder
3525                                    .CreatePointerBitCastOrAddrSpaceCast(
3526                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3527                                    .getPointer()};
3528   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3529                                           std::end(CommonArgs));
3530   if (isOpenMPTaskLoopDirective(Kind)) {
3531     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3532     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3533     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3534     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3535     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3536     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3537     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3538     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3539     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3540     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3541     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3542     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3543     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3544     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3545     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3546     CallArgs.push_back(LBParam);
3547     CallArgs.push_back(UBParam);
3548     CallArgs.push_back(StParam);
3549     CallArgs.push_back(LIParam);
3550     CallArgs.push_back(RParam);
3551   }
3552   CallArgs.push_back(SharedsParam);
3553 
3554   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3555                                                   CallArgs);
3556   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3557                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3558   CGF.FinishFunction();
3559   return TaskEntry;
3560 }
3561 
3562 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3563                                             SourceLocation Loc,
3564                                             QualType KmpInt32Ty,
3565                                             QualType KmpTaskTWithPrivatesPtrQTy,
3566                                             QualType KmpTaskTWithPrivatesQTy) {
3567   ASTContext &C = CGM.getContext();
3568   FunctionArgList Args;
3569   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3570                             ImplicitParamDecl::Other);
3571   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3572                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3573                                 ImplicitParamDecl::Other);
3574   Args.push_back(&GtidArg);
3575   Args.push_back(&TaskTypeArg);
3576   const auto &DestructorFnInfo =
3577       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3578   llvm::FunctionType *DestructorFnTy =
3579       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3580   std::string Name =
3581       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3582   auto *DestructorFn =
3583       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3584                              Name, &CGM.getModule());
3585   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3586                                     DestructorFnInfo);
3587   DestructorFn->setDoesNotRecurse();
3588   CodeGenFunction CGF(CGM);
3589   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3590                     Args, Loc, Loc);
3591 
3592   LValue Base = CGF.EmitLoadOfPointerLValue(
3593       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3594       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3595   const auto *KmpTaskTWithPrivatesQTyRD =
3596       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3597   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3598   Base = CGF.EmitLValueForField(Base, *FI);
3599   for (const auto *Field :
3600        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3601     if (QualType::DestructionKind DtorKind =
3602             Field->getType().isDestructedType()) {
3603       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3604       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3605     }
3606   }
3607   CGF.FinishFunction();
3608   return DestructorFn;
3609 }
3610 
3611 /// Emit a privates mapping function for correct handling of private and
3612 /// firstprivate variables.
3613 /// \code
3614 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3615 /// **noalias priv1,...,  <tyn> **noalias privn) {
3616 ///   *priv1 = &.privates.priv1;
3617 ///   ...;
3618 ///   *privn = &.privates.privn;
3619 /// }
3620 /// \endcode
3621 static llvm::Value *
3622 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3623                                ArrayRef<const Expr *> PrivateVars,
3624                                ArrayRef<const Expr *> FirstprivateVars,
3625                                ArrayRef<const Expr *> LastprivateVars,
3626                                QualType PrivatesQTy,
3627                                ArrayRef<PrivateDataTy> Privates) {
3628   ASTContext &C = CGM.getContext();
3629   FunctionArgList Args;
3630   ImplicitParamDecl TaskPrivatesArg(
3631       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3632       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3633       ImplicitParamDecl::Other);
3634   Args.push_back(&TaskPrivatesArg);
3635   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
3636   unsigned Counter = 1;
3637   for (const Expr *E : PrivateVars) {
3638     Args.push_back(ImplicitParamDecl::Create(
3639         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3640         C.getPointerType(C.getPointerType(E->getType()))
3641             .withConst()
3642             .withRestrict(),
3643         ImplicitParamDecl::Other));
3644     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3645     PrivateVarsPos[VD] = Counter;
3646     ++Counter;
3647   }
3648   for (const Expr *E : FirstprivateVars) {
3649     Args.push_back(ImplicitParamDecl::Create(
3650         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3651         C.getPointerType(C.getPointerType(E->getType()))
3652             .withConst()
3653             .withRestrict(),
3654         ImplicitParamDecl::Other));
3655     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3656     PrivateVarsPos[VD] = Counter;
3657     ++Counter;
3658   }
3659   for (const Expr *E : LastprivateVars) {
3660     Args.push_back(ImplicitParamDecl::Create(
3661         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3662         C.getPointerType(C.getPointerType(E->getType()))
3663             .withConst()
3664             .withRestrict(),
3665         ImplicitParamDecl::Other));
3666     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3667     PrivateVarsPos[VD] = Counter;
3668     ++Counter;
3669   }
3670   const auto &TaskPrivatesMapFnInfo =
3671       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3672   llvm::FunctionType *TaskPrivatesMapTy =
3673       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3674   std::string Name =
3675       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3676   auto *TaskPrivatesMap = llvm::Function::Create(
3677       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3678       &CGM.getModule());
3679   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3680                                     TaskPrivatesMapFnInfo);
3681   if (CGM.getLangOpts().Optimize) {
3682     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3683     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3684     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3685   }
3686   CodeGenFunction CGF(CGM);
3687   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3688                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3689 
3690   // *privi = &.privates.privi;
3691   LValue Base = CGF.EmitLoadOfPointerLValue(
3692       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3693       TaskPrivatesArg.getType()->castAs<PointerType>());
3694   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3695   Counter = 0;
3696   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3697     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3698     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3699     LValue RefLVal =
3700         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3701     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3702         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3703     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3704     ++Counter;
3705   }
3706   CGF.FinishFunction();
3707   return TaskPrivatesMap;
3708 }
3709 
3710 /// Emit initialization for private variables in task-based directives.
3711 static void emitPrivatesInit(CodeGenFunction &CGF,
3712                              const OMPExecutableDirective &D,
3713                              Address KmpTaskSharedsPtr, LValue TDBase,
3714                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3715                              QualType SharedsTy, QualType SharedsPtrTy,
3716                              const OMPTaskDataTy &Data,
3717                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3718   ASTContext &C = CGF.getContext();
3719   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3720   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3721   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3722                                  ? OMPD_taskloop
3723                                  : OMPD_task;
3724   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3725   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3726   LValue SrcBase;
3727   bool IsTargetTask =
3728       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3729       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3730   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
3731   // PointersArray and SizesArray. The original variables for these arrays are
3732   // not captured and we get their addresses explicitly.
3733   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3734       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3735     SrcBase = CGF.MakeAddrLValue(
3736         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3737             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3738         SharedsTy);
3739   }
3740   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3741   for (const PrivateDataTy &Pair : Privates) {
3742     const VarDecl *VD = Pair.second.PrivateCopy;
3743     const Expr *Init = VD->getAnyInitializer();
3744     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3745                              !CGF.isTrivialInitializer(Init)))) {
3746       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3747       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3748         const VarDecl *OriginalVD = Pair.second.Original;
3749         // Check if the variable is the target-based BasePointersArray,
3750         // PointersArray or SizesArray.
3751         LValue SharedRefLValue;
3752         QualType Type = PrivateLValue.getType();
3753         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3754         if (IsTargetTask && !SharedField) {
3755           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3756                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3757                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3758                          ->getNumParams() == 0 &&
3759                  isa<TranslationUnitDecl>(
3760                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3761                          ->getDeclContext()) &&
3762                  "Expected artificial target data variable.");
3763           SharedRefLValue =
3764               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3765         } else if (ForDup) {
3766           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3767           SharedRefLValue = CGF.MakeAddrLValue(
3768               Address(SharedRefLValue.getPointer(CGF),
3769                       C.getDeclAlign(OriginalVD)),
3770               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3771               SharedRefLValue.getTBAAInfo());
3772         } else if (CGF.LambdaCaptureFields.count(
3773                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3774                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3775           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3776         } else {
3777           // Processing for implicitly captured variables.
3778           InlinedOpenMPRegionRAII Region(
3779               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3780               /*HasCancel=*/false);
3781           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3782         }
3783         if (Type->isArrayType()) {
3784           // Initialize firstprivate array.
3785           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3786             // Perform simple memcpy.
3787             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3788           } else {
3789             // Initialize firstprivate array using element-by-element
3790             // initialization.
3791             CGF.EmitOMPAggregateAssign(
3792                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3793                 Type,
3794                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3795                                                   Address SrcElement) {
3796                   // Clean up any temporaries needed by the initialization.
3797                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3798                   InitScope.addPrivate(
3799                       Elem, [SrcElement]() -> Address { return SrcElement; });
3800                   (void)InitScope.Privatize();
3801                   // Emit initialization for single element.
3802                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3803                       CGF, &CapturesInfo);
3804                   CGF.EmitAnyExprToMem(Init, DestElement,
3805                                        Init->getType().getQualifiers(),
3806                                        /*IsInitializer=*/false);
3807                 });
3808           }
3809         } else {
3810           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3811           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3812             return SharedRefLValue.getAddress(CGF);
3813           });
3814           (void)InitScope.Privatize();
3815           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3816           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3817                              /*capturedByInit=*/false);
3818         }
3819       } else {
3820         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3821       }
3822     }
3823     ++FI;
3824   }
3825 }
3826 
3827 /// Check if duplication function is required for taskloops.
3828 static bool checkInitIsRequired(CodeGenFunction &CGF,
3829                                 ArrayRef<PrivateDataTy> Privates) {
3830   bool InitRequired = false;
3831   for (const PrivateDataTy &Pair : Privates) {
3832     const VarDecl *VD = Pair.second.PrivateCopy;
3833     const Expr *Init = VD->getAnyInitializer();
3834     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3835                                     !CGF.isTrivialInitializer(Init));
3836     if (InitRequired)
3837       break;
3838   }
3839   return InitRequired;
3840 }
3841 
3842 
3843 /// Emit task_dup function (for initialization of
3844 /// private/firstprivate/lastprivate vars and last_iter flag)
3845 /// \code
3846 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3847 /// lastpriv) {
3848 /// // setup lastprivate flag
3849 ///    task_dst->last = lastpriv;
3850 /// // could be constructor calls here...
3851 /// }
3852 /// \endcode
3853 static llvm::Value *
3854 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3855                     const OMPExecutableDirective &D,
3856                     QualType KmpTaskTWithPrivatesPtrQTy,
3857                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3858                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3859                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3860                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3861   ASTContext &C = CGM.getContext();
3862   FunctionArgList Args;
3863   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3864                            KmpTaskTWithPrivatesPtrQTy,
3865                            ImplicitParamDecl::Other);
3866   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3867                            KmpTaskTWithPrivatesPtrQTy,
3868                            ImplicitParamDecl::Other);
3869   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3870                                 ImplicitParamDecl::Other);
3871   Args.push_back(&DstArg);
3872   Args.push_back(&SrcArg);
3873   Args.push_back(&LastprivArg);
3874   const auto &TaskDupFnInfo =
3875       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3876   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3877   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3878   auto *TaskDup = llvm::Function::Create(
3879       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3880   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3881   TaskDup->setDoesNotRecurse();
3882   CodeGenFunction CGF(CGM);
3883   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3884                     Loc);
3885 
3886   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3887       CGF.GetAddrOfLocalVar(&DstArg),
3888       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3889   // task_dst->liter = lastpriv;
3890   if (WithLastIter) {
3891     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3892     LValue Base = CGF.EmitLValueForField(
3893         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3894     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3895     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3896         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3897     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3898   }
3899 
3900   // Emit initial values for private copies (if any).
3901   assert(!Privates.empty());
3902   Address KmpTaskSharedsPtr = Address::invalid();
3903   if (!Data.FirstprivateVars.empty()) {
3904     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3905         CGF.GetAddrOfLocalVar(&SrcArg),
3906         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3907     LValue Base = CGF.EmitLValueForField(
3908         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3909     KmpTaskSharedsPtr = Address(
3910         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3911                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
3912                                                   KmpTaskTShareds)),
3913                              Loc),
3914         CGM.getNaturalTypeAlignment(SharedsTy));
3915   }
3916   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3917                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3918   CGF.FinishFunction();
3919   return TaskDup;
3920 }
3921 
3922 /// Checks if destructor function is required to be generated.
3923 /// \return true if cleanups are required, false otherwise.
3924 static bool
3925 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
3926   bool NeedsCleanup = false;
3927   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3928   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
3929   for (const FieldDecl *FD : PrivateRD->fields()) {
3930     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
3931     if (NeedsCleanup)
3932       break;
3933   }
3934   return NeedsCleanup;
3935 }
3936 
3937 namespace {
3938 /// Loop generator for OpenMP iterator expression.
3939 class OMPIteratorGeneratorScope final
3940     : public CodeGenFunction::OMPPrivateScope {
3941   CodeGenFunction &CGF;
3942   const OMPIteratorExpr *E = nullptr;
3943   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3944   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3945   OMPIteratorGeneratorScope() = delete;
3946   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3947 
3948 public:
3949   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3950       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3951     if (!E)
3952       return;
3953     SmallVector<llvm::Value *, 4> Uppers;
3954     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3955       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3956       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3957       addPrivate(VD, [&CGF, VD]() {
3958         return CGF.CreateMemTemp(VD->getType(), VD->getName());
3959       });
3960       const OMPIteratorHelperData &HelperData = E->getHelper(I);
3961       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
3962         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
3963                                  "counter.addr");
3964       });
3965     }
3966     Privatize();
3967 
3968     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3969       const OMPIteratorHelperData &HelperData = E->getHelper(I);
3970       LValue CLVal =
3971           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3972                              HelperData.CounterVD->getType());
3973       // Counter = 0;
3974       CGF.EmitStoreOfScalar(
3975           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
3976           CLVal);
3977       CodeGenFunction::JumpDest &ContDest =
3978           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3979       CodeGenFunction::JumpDest &ExitDest =
3980           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3981       // N = <number-of_iterations>;
3982       llvm::Value *N = Uppers[I];
3983       // cont:
3984       // if (Counter < N) goto body; else goto exit;
3985       CGF.EmitBlock(ContDest.getBlock());
3986       auto *CVal =
3987           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3988       llvm::Value *Cmp =
3989           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3990               ? CGF.Builder.CreateICmpSLT(CVal, N)
3991               : CGF.Builder.CreateICmpULT(CVal, N);
3992       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3993       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3994       // body:
3995       CGF.EmitBlock(BodyBB);
3996       // Iteri = Begini + Counter * Stepi;
3997       CGF.EmitIgnoredExpr(HelperData.Update);
3998     }
3999   }
4000   ~OMPIteratorGeneratorScope() {
4001     if (!E)
4002       return;
4003     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4004       // Counter = Counter + 1;
4005       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4006       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4007       // goto cont;
4008       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4009       // exit:
4010       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4011     }
4012   }
4013 };
4014 } // namespace
4015 
4016 static std::pair<llvm::Value *, llvm::Value *>
4017 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4018   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4019   llvm::Value *Addr;
4020   if (OASE) {
4021     const Expr *Base = OASE->getBase();
4022     Addr = CGF.EmitScalarExpr(Base);
4023   } else {
4024     Addr = CGF.EmitLValue(E).getPointer(CGF);
4025   }
4026   llvm::Value *SizeVal;
4027   QualType Ty = E->getType();
4028   if (OASE) {
4029     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4030     for (const Expr *SE : OASE->getDimensions()) {
4031       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4032       Sz = CGF.EmitScalarConversion(
4033           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4034       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4035     }
4036   } else if (const auto *ASE =
4037                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4038     LValue UpAddrLVal =
4039         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4040     llvm::Value *UpAddr =
4041         CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
4042     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4043     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4044     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4045   } else {
4046     SizeVal = CGF.getTypeSize(Ty);
4047   }
4048   return std::make_pair(Addr, SizeVal);
4049 }
4050 
4051 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4052 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4053   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4054   if (KmpTaskAffinityInfoTy.isNull()) {
4055     RecordDecl *KmpAffinityInfoRD =
4056         C.buildImplicitRecord("kmp_task_affinity_info_t");
4057     KmpAffinityInfoRD->startDefinition();
4058     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4059     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4060     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4061     KmpAffinityInfoRD->completeDefinition();
4062     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4063   }
4064 }
4065 
4066 CGOpenMPRuntime::TaskResultTy
4067 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4068                               const OMPExecutableDirective &D,
4069                               llvm::Function *TaskFunction, QualType SharedsTy,
4070                               Address Shareds, const OMPTaskDataTy &Data) {
4071   ASTContext &C = CGM.getContext();
4072   llvm::SmallVector<PrivateDataTy, 4> Privates;
4073   // Aggregate privates and sort them by the alignment.
4074   const auto *I = Data.PrivateCopies.begin();
4075   for (const Expr *E : Data.PrivateVars) {
4076     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4077     Privates.emplace_back(
4078         C.getDeclAlign(VD),
4079         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4080                          /*PrivateElemInit=*/nullptr));
4081     ++I;
4082   }
4083   I = Data.FirstprivateCopies.begin();
4084   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4085   for (const Expr *E : Data.FirstprivateVars) {
4086     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4087     Privates.emplace_back(
4088         C.getDeclAlign(VD),
4089         PrivateHelpersTy(
4090             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4091             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4092     ++I;
4093     ++IElemInitRef;
4094   }
4095   I = Data.LastprivateCopies.begin();
4096   for (const Expr *E : Data.LastprivateVars) {
4097     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4098     Privates.emplace_back(
4099         C.getDeclAlign(VD),
4100         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4101                          /*PrivateElemInit=*/nullptr));
4102     ++I;
4103   }
4104   llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
4105     return L.first > R.first;
4106   });
4107   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4108   // Build type kmp_routine_entry_t (if not built yet).
4109   emitKmpRoutineEntryT(KmpInt32Ty);
4110   // Build type kmp_task_t (if not built yet).
4111   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4112     if (SavedKmpTaskloopTQTy.isNull()) {
4113       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4114           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4115     }
4116     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4117   } else {
4118     assert((D.getDirectiveKind() == OMPD_task ||
4119             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4120             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4121            "Expected taskloop, task or target directive");
4122     if (SavedKmpTaskTQTy.isNull()) {
4123       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4124           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4125     }
4126     KmpTaskTQTy = SavedKmpTaskTQTy;
4127   }
4128   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4129   // Build particular struct kmp_task_t for the given task.
4130   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4131       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4132   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4133   QualType KmpTaskTWithPrivatesPtrQTy =
4134       C.getPointerType(KmpTaskTWithPrivatesQTy);
4135   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4136   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4137       KmpTaskTWithPrivatesTy->getPointerTo();
4138   llvm::Value *KmpTaskTWithPrivatesTySize =
4139       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4140   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4141 
4142   // Emit initial values for private copies (if any).
4143   llvm::Value *TaskPrivatesMap = nullptr;
4144   llvm::Type *TaskPrivatesMapTy =
4145       std::next(TaskFunction->arg_begin(), 3)->getType();
4146   if (!Privates.empty()) {
4147     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4148     TaskPrivatesMap = emitTaskPrivateMappingFunction(
4149         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
4150         FI->getType(), Privates);
4151     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4152         TaskPrivatesMap, TaskPrivatesMapTy);
4153   } else {
4154     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4155         cast<llvm::PointerType>(TaskPrivatesMapTy));
4156   }
4157   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4158   // kmp_task_t *tt);
4159   llvm::Function *TaskEntry = emitProxyTaskFunction(
4160       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4161       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4162       TaskPrivatesMap);
4163 
4164   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4165   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4166   // kmp_routine_entry_t *task_entry);
4167   // Task flags. Format is taken from
4168   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
4169   // description of kmp_tasking_flags struct.
4170   enum {
4171     TiedFlag = 0x1,
4172     FinalFlag = 0x2,
4173     DestructorsFlag = 0x8,
4174     PriorityFlag = 0x20,
4175     DetachableFlag = 0x40,
4176   };
4177   unsigned Flags = Data.Tied ? TiedFlag : 0;
4178   bool NeedsCleanup = false;
4179   if (!Privates.empty()) {
4180     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
4181     if (NeedsCleanup)
4182       Flags = Flags | DestructorsFlag;
4183   }
4184   if (Data.Priority.getInt())
4185     Flags = Flags | PriorityFlag;
4186   if (D.hasClausesOfKind<OMPDetachClause>())
4187     Flags = Flags | DetachableFlag;
4188   llvm::Value *TaskFlags =
4189       Data.Final.getPointer()
4190           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4191                                      CGF.Builder.getInt32(FinalFlag),
4192                                      CGF.Builder.getInt32(/*C=*/0))
4193           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4194   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4195   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4196   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4197       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4198       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4199           TaskEntry, KmpRoutineEntryPtrTy)};
4200   llvm::Value *NewTask;
4201   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4202     // Check if we have any device clause associated with the directive.
4203     const Expr *Device = nullptr;
4204     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4205       Device = C->getDevice();
4206     // Emit device ID if any otherwise use default value.
4207     llvm::Value *DeviceID;
4208     if (Device)
4209       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4210                                            CGF.Int64Ty, /*isSigned=*/true);
4211     else
4212       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4213     AllocArgs.push_back(DeviceID);
4214     NewTask = CGF.EmitRuntimeCall(
4215         OMPBuilder.getOrCreateRuntimeFunction(
4216             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4217         AllocArgs);
4218   } else {
4219     NewTask =
4220         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4221                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4222                             AllocArgs);
4223   }
4224   // Emit detach clause initialization.
4225   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4226   // task_descriptor);
4227   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4228     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4229     LValue EvtLVal = CGF.EmitLValue(Evt);
4230 
4231     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4232     // int gtid, kmp_task_t *task);
4233     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4234     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4235     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4236     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4237         OMPBuilder.getOrCreateRuntimeFunction(
4238             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4239         {Loc, Tid, NewTask});
4240     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4241                                       Evt->getExprLoc());
4242     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4243   }
4244   // Process affinity clauses.
4245   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4246     // Process list of affinity data.
4247     ASTContext &C = CGM.getContext();
4248     Address AffinitiesArray = Address::invalid();
4249     // Calculate number of elements to form the array of affinity data.
4250     llvm::Value *NumOfElements = nullptr;
4251     unsigned NumAffinities = 0;
4252     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4253       if (const Expr *Modifier = C->getModifier()) {
4254         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4255         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4256           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4257           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4258           NumOfElements =
4259               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4260         }
4261       } else {
4262         NumAffinities += C->varlist_size();
4263       }
4264     }
4265     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4266     // Fields ids in kmp_task_affinity_info record.
4267     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4268 
4269     QualType KmpTaskAffinityInfoArrayTy;
4270     if (NumOfElements) {
4271       NumOfElements = CGF.Builder.CreateNUWAdd(
4272           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4273       OpaqueValueExpr OVE(
4274           Loc,
4275           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4276           VK_RValue);
4277       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4278                                                     RValue::get(NumOfElements));
4279       KmpTaskAffinityInfoArrayTy =
4280           C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4281                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4282       // Properly emit variable-sized array.
4283       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4284                                            ImplicitParamDecl::Other);
4285       CGF.EmitVarDecl(*PD);
4286       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4287       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4288                                                 /*isSigned=*/false);
4289     } else {
4290       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4291           KmpTaskAffinityInfoTy,
4292           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4293           ArrayType::Normal, /*IndexTypeQuals=*/0);
4294       AffinitiesArray =
4295           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4296       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4297       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4298                                              /*isSigned=*/false);
4299     }
4300 
4301     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4302     // Fill array by elements without iterators.
4303     unsigned Pos = 0;
4304     bool HasIterator = false;
4305     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4306       if (C->getModifier()) {
4307         HasIterator = true;
4308         continue;
4309       }
4310       for (const Expr *E : C->varlists()) {
4311         llvm::Value *Addr;
4312         llvm::Value *Size;
4313         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4314         LValue Base =
4315             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4316                                KmpTaskAffinityInfoTy);
4317         // affs[i].base_addr = &<Affinities[i].second>;
4318         LValue BaseAddrLVal = CGF.EmitLValueForField(
4319             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4320         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4321                               BaseAddrLVal);
4322         // affs[i].len = sizeof(<Affinities[i].second>);
4323         LValue LenLVal = CGF.EmitLValueForField(
4324             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4325         CGF.EmitStoreOfScalar(Size, LenLVal);
4326         ++Pos;
4327       }
4328     }
4329     LValue PosLVal;
4330     if (HasIterator) {
4331       PosLVal = CGF.MakeAddrLValue(
4332           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4333           C.getSizeType());
4334       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4335     }
4336     // Process elements with iterators.
4337     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4338       const Expr *Modifier = C->getModifier();
4339       if (!Modifier)
4340         continue;
4341       OMPIteratorGeneratorScope IteratorScope(
4342           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4343       for (const Expr *E : C->varlists()) {
4344         llvm::Value *Addr;
4345         llvm::Value *Size;
4346         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4347         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4348         LValue Base = CGF.MakeAddrLValue(
4349             Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx),
4350                     AffinitiesArray.getAlignment()),
4351             KmpTaskAffinityInfoTy);
4352         // affs[i].base_addr = &<Affinities[i].second>;
4353         LValue BaseAddrLVal = CGF.EmitLValueForField(
4354             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4355         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4356                               BaseAddrLVal);
4357         // affs[i].len = sizeof(<Affinities[i].second>);
4358         LValue LenLVal = CGF.EmitLValueForField(
4359             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4360         CGF.EmitStoreOfScalar(Size, LenLVal);
4361         Idx = CGF.Builder.CreateNUWAdd(
4362             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4363         CGF.EmitStoreOfScalar(Idx, PosLVal);
4364       }
4365     }
4366     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4367     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4368     // naffins, kmp_task_affinity_info_t *affin_list);
4369     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4370     llvm::Value *GTid = getThreadID(CGF, Loc);
4371     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4372         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4373     // FIXME: Emit the function and ignore its result for now unless the
4374     // runtime function is properly implemented.
4375     (void)CGF.EmitRuntimeCall(
4376         OMPBuilder.getOrCreateRuntimeFunction(
4377             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4378         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4379   }
4380   llvm::Value *NewTaskNewTaskTTy =
4381       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4382           NewTask, KmpTaskTWithPrivatesPtrTy);
4383   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4384                                                KmpTaskTWithPrivatesQTy);
4385   LValue TDBase =
4386       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4387   // Fill the data in the resulting kmp_task_t record.
4388   // Copy shareds if there are any.
4389   Address KmpTaskSharedsPtr = Address::invalid();
4390   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4391     KmpTaskSharedsPtr =
4392         Address(CGF.EmitLoadOfScalar(
4393                     CGF.EmitLValueForField(
4394                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4395                                            KmpTaskTShareds)),
4396                     Loc),
4397                 CGM.getNaturalTypeAlignment(SharedsTy));
4398     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4399     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4400     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4401   }
4402   // Emit initial values for private copies (if any).
4403   TaskResultTy Result;
4404   if (!Privates.empty()) {
4405     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4406                      SharedsTy, SharedsPtrTy, Data, Privates,
4407                      /*ForDup=*/false);
4408     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4409         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4410       Result.TaskDupFn = emitTaskDupFunction(
4411           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4412           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4413           /*WithLastIter=*/!Data.LastprivateVars.empty());
4414     }
4415   }
4416   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4417   enum { Priority = 0, Destructors = 1 };
4418   // Provide pointer to function with destructors for privates.
4419   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4420   const RecordDecl *KmpCmplrdataUD =
4421       (*FI)->getType()->getAsUnionType()->getDecl();
4422   if (NeedsCleanup) {
4423     llvm::Value *DestructorFn = emitDestructorsFunction(
4424         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4425         KmpTaskTWithPrivatesQTy);
4426     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4427     LValue DestructorsLV = CGF.EmitLValueForField(
4428         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4429     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4430                               DestructorFn, KmpRoutineEntryPtrTy),
4431                           DestructorsLV);
4432   }
4433   // Set priority.
4434   if (Data.Priority.getInt()) {
4435     LValue Data2LV = CGF.EmitLValueForField(
4436         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4437     LValue PriorityLV = CGF.EmitLValueForField(
4438         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4439     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4440   }
4441   Result.NewTask = NewTask;
4442   Result.TaskEntry = TaskEntry;
4443   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4444   Result.TDBase = TDBase;
4445   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4446   return Result;
4447 }
4448 
4449 namespace {
4450 /// Dependence kind for RTL.
4451 enum RTLDependenceKindTy {
4452   DepIn = 0x01,
4453   DepInOut = 0x3,
4454   DepMutexInOutSet = 0x4
4455 };
4456 /// Fields ids in kmp_depend_info record.
4457 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4458 } // namespace
4459 
4460 /// Translates internal dependency kind into the runtime kind.
4461 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4462   RTLDependenceKindTy DepKind;
4463   switch (K) {
4464   case OMPC_DEPEND_in:
4465     DepKind = DepIn;
4466     break;
4467   // Out and InOut dependencies must use the same code.
4468   case OMPC_DEPEND_out:
4469   case OMPC_DEPEND_inout:
4470     DepKind = DepInOut;
4471     break;
4472   case OMPC_DEPEND_mutexinoutset:
4473     DepKind = DepMutexInOutSet;
4474     break;
4475   case OMPC_DEPEND_source:
4476   case OMPC_DEPEND_sink:
4477   case OMPC_DEPEND_depobj:
4478   case OMPC_DEPEND_unknown:
4479     llvm_unreachable("Unknown task dependence type");
4480   }
4481   return DepKind;
4482 }
4483 
4484 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4485 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4486                            QualType &FlagsTy) {
4487   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4488   if (KmpDependInfoTy.isNull()) {
4489     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4490     KmpDependInfoRD->startDefinition();
4491     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4492     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4493     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4494     KmpDependInfoRD->completeDefinition();
4495     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4496   }
4497 }
4498 
4499 std::pair<llvm::Value *, LValue>
4500 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4501                                    SourceLocation Loc) {
4502   ASTContext &C = CGM.getContext();
4503   QualType FlagsTy;
4504   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4505   RecordDecl *KmpDependInfoRD =
4506       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4507   LValue Base = CGF.EmitLoadOfPointerLValue(
4508       DepobjLVal.getAddress(CGF),
4509       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4510   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4511   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4512           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4513   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4514                             Base.getTBAAInfo());
4515   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4516       Addr.getPointer(),
4517       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4518   LValue NumDepsBase = CGF.MakeAddrLValue(
4519       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4520       Base.getBaseInfo(), Base.getTBAAInfo());
4521   // NumDeps = deps[i].base_addr;
4522   LValue BaseAddrLVal = CGF.EmitLValueForField(
4523       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4524   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4525   return std::make_pair(NumDeps, Base);
4526 }
4527 
4528 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4529                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4530                            const OMPTaskDataTy::DependData &Data,
4531                            Address DependenciesArray) {
4532   CodeGenModule &CGM = CGF.CGM;
4533   ASTContext &C = CGM.getContext();
4534   QualType FlagsTy;
4535   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4536   RecordDecl *KmpDependInfoRD =
4537       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4538   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4539 
4540   OMPIteratorGeneratorScope IteratorScope(
4541       CGF, cast_or_null<OMPIteratorExpr>(
4542                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4543                                  : nullptr));
4544   for (const Expr *E : Data.DepExprs) {
4545     llvm::Value *Addr;
4546     llvm::Value *Size;
4547     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4548     LValue Base;
4549     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4550       Base = CGF.MakeAddrLValue(
4551           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4552     } else {
4553       LValue &PosLVal = *Pos.get<LValue *>();
4554       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4555       Base = CGF.MakeAddrLValue(
4556           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
4557                   DependenciesArray.getAlignment()),
4558           KmpDependInfoTy);
4559     }
4560     // deps[i].base_addr = &<Dependencies[i].second>;
4561     LValue BaseAddrLVal = CGF.EmitLValueForField(
4562         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4563     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4564                           BaseAddrLVal);
4565     // deps[i].len = sizeof(<Dependencies[i].second>);
4566     LValue LenLVal = CGF.EmitLValueForField(
4567         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4568     CGF.EmitStoreOfScalar(Size, LenLVal);
4569     // deps[i].flags = <Dependencies[i].first>;
4570     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4571     LValue FlagsLVal = CGF.EmitLValueForField(
4572         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4573     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4574                           FlagsLVal);
4575     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4576       ++(*P);
4577     } else {
4578       LValue &PosLVal = *Pos.get<LValue *>();
4579       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4580       Idx = CGF.Builder.CreateNUWAdd(Idx,
4581                                      llvm::ConstantInt::get(Idx->getType(), 1));
4582       CGF.EmitStoreOfScalar(Idx, PosLVal);
4583     }
4584   }
4585 }
4586 
4587 static SmallVector<llvm::Value *, 4>
4588 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4589                         const OMPTaskDataTy::DependData &Data) {
4590   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4591          "Expected depobj dependecy kind.");
4592   SmallVector<llvm::Value *, 4> Sizes;
4593   SmallVector<LValue, 4> SizeLVals;
4594   ASTContext &C = CGF.getContext();
4595   QualType FlagsTy;
4596   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4597   RecordDecl *KmpDependInfoRD =
4598       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4599   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4600   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4601   {
4602     OMPIteratorGeneratorScope IteratorScope(
4603         CGF, cast_or_null<OMPIteratorExpr>(
4604                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4605                                    : nullptr));
4606     for (const Expr *E : Data.DepExprs) {
4607       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4608       LValue Base = CGF.EmitLoadOfPointerLValue(
4609           DepobjLVal.getAddress(CGF),
4610           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4611       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4612           Base.getAddress(CGF), KmpDependInfoPtrT);
4613       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4614                                 Base.getTBAAInfo());
4615       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4616           Addr.getPointer(),
4617           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4618       LValue NumDepsBase = CGF.MakeAddrLValue(
4619           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4620           Base.getBaseInfo(), Base.getTBAAInfo());
4621       // NumDeps = deps[i].base_addr;
4622       LValue BaseAddrLVal = CGF.EmitLValueForField(
4623           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4624       llvm::Value *NumDeps =
4625           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4626       LValue NumLVal = CGF.MakeAddrLValue(
4627           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4628           C.getUIntPtrType());
4629       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4630                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4631       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4632       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4633       CGF.EmitStoreOfScalar(Add, NumLVal);
4634       SizeLVals.push_back(NumLVal);
4635     }
4636   }
4637   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4638     llvm::Value *Size =
4639         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4640     Sizes.push_back(Size);
4641   }
4642   return Sizes;
4643 }
4644 
4645 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4646                                LValue PosLVal,
4647                                const OMPTaskDataTy::DependData &Data,
4648                                Address DependenciesArray) {
4649   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4650          "Expected depobj dependecy kind.");
4651   ASTContext &C = CGF.getContext();
4652   QualType FlagsTy;
4653   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4654   RecordDecl *KmpDependInfoRD =
4655       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4656   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4657   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4658   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4659   {
4660     OMPIteratorGeneratorScope IteratorScope(
4661         CGF, cast_or_null<OMPIteratorExpr>(
4662                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4663                                    : nullptr));
4664     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4665       const Expr *E = Data.DepExprs[I];
4666       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4667       LValue Base = CGF.EmitLoadOfPointerLValue(
4668           DepobjLVal.getAddress(CGF),
4669           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4670       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4671           Base.getAddress(CGF), KmpDependInfoPtrT);
4672       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4673                                 Base.getTBAAInfo());
4674 
4675       // Get number of elements in a single depobj.
4676       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4677           Addr.getPointer(),
4678           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4679       LValue NumDepsBase = CGF.MakeAddrLValue(
4680           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4681           Base.getBaseInfo(), Base.getTBAAInfo());
4682       // NumDeps = deps[i].base_addr;
4683       LValue BaseAddrLVal = CGF.EmitLValueForField(
4684           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4685       llvm::Value *NumDeps =
4686           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4687 
4688       // memcopy dependency data.
4689       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4690           ElSize,
4691           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4692       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4693       Address DepAddr =
4694           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
4695                   DependenciesArray.getAlignment());
4696       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4697 
4698       // Increase pos.
4699       // pos += size;
4700       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4701       CGF.EmitStoreOfScalar(Add, PosLVal);
4702     }
4703   }
4704 }
4705 
4706 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4707     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4708     SourceLocation Loc) {
4709   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4710         return D.DepExprs.empty();
4711       }))
4712     return std::make_pair(nullptr, Address::invalid());
4713   // Process list of dependencies.
4714   ASTContext &C = CGM.getContext();
4715   Address DependenciesArray = Address::invalid();
4716   llvm::Value *NumOfElements = nullptr;
4717   unsigned NumDependencies = std::accumulate(
4718       Dependencies.begin(), Dependencies.end(), 0,
4719       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4720         return D.DepKind == OMPC_DEPEND_depobj
4721                    ? V
4722                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4723       });
4724   QualType FlagsTy;
4725   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4726   bool HasDepobjDeps = false;
4727   bool HasRegularWithIterators = false;
4728   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4729   llvm::Value *NumOfRegularWithIterators =
4730       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4731   // Calculate number of depobj dependecies and regular deps with the iterators.
4732   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4733     if (D.DepKind == OMPC_DEPEND_depobj) {
4734       SmallVector<llvm::Value *, 4> Sizes =
4735           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4736       for (llvm::Value *Size : Sizes) {
4737         NumOfDepobjElements =
4738             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4739       }
4740       HasDepobjDeps = true;
4741       continue;
4742     }
4743     // Include number of iterations, if any.
4744     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4745       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4746         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4747         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4748         NumOfRegularWithIterators =
4749             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4750       }
4751       HasRegularWithIterators = true;
4752       continue;
4753     }
4754   }
4755 
4756   QualType KmpDependInfoArrayTy;
4757   if (HasDepobjDeps || HasRegularWithIterators) {
4758     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4759                                            /*isSigned=*/false);
4760     if (HasDepobjDeps) {
4761       NumOfElements =
4762           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4763     }
4764     if (HasRegularWithIterators) {
4765       NumOfElements =
4766           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4767     }
4768     OpaqueValueExpr OVE(Loc,
4769                         C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4770                         VK_RValue);
4771     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4772                                                   RValue::get(NumOfElements));
4773     KmpDependInfoArrayTy =
4774         C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4775                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4776     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4777     // Properly emit variable-sized array.
4778     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4779                                          ImplicitParamDecl::Other);
4780     CGF.EmitVarDecl(*PD);
4781     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4782     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4783                                               /*isSigned=*/false);
4784   } else {
4785     KmpDependInfoArrayTy = C.getConstantArrayType(
4786         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4787         ArrayType::Normal, /*IndexTypeQuals=*/0);
4788     DependenciesArray =
4789         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4790     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4791     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4792                                            /*isSigned=*/false);
4793   }
4794   unsigned Pos = 0;
4795   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4796     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4797         Dependencies[I].IteratorExpr)
4798       continue;
4799     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4800                    DependenciesArray);
4801   }
4802   // Copy regular dependecies with iterators.
4803   LValue PosLVal = CGF.MakeAddrLValue(
4804       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4805   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4806   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4807     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4808         !Dependencies[I].IteratorExpr)
4809       continue;
4810     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4811                    DependenciesArray);
4812   }
4813   // Copy final depobj arrays without iterators.
4814   if (HasDepobjDeps) {
4815     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4816       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4817         continue;
4818       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4819                          DependenciesArray);
4820     }
4821   }
4822   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4823       DependenciesArray, CGF.VoidPtrTy);
4824   return std::make_pair(NumOfElements, DependenciesArray);
4825 }
4826 
4827 Address CGOpenMPRuntime::emitDepobjDependClause(
4828     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4829     SourceLocation Loc) {
4830   if (Dependencies.DepExprs.empty())
4831     return Address::invalid();
4832   // Process list of dependencies.
4833   ASTContext &C = CGM.getContext();
4834   Address DependenciesArray = Address::invalid();
4835   unsigned NumDependencies = Dependencies.DepExprs.size();
4836   QualType FlagsTy;
4837   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4838   RecordDecl *KmpDependInfoRD =
4839       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4840 
4841   llvm::Value *Size;
4842   // Define type kmp_depend_info[<Dependencies.size()>];
4843   // For depobj reserve one extra element to store the number of elements.
4844   // It is required to handle depobj(x) update(in) construct.
4845   // kmp_depend_info[<Dependencies.size()>] deps;
4846   llvm::Value *NumDepsVal;
4847   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4848   if (const auto *IE =
4849           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4850     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4851     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4852       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4853       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4854       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4855     }
4856     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4857                                     NumDepsVal);
4858     CharUnits SizeInBytes =
4859         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4860     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4861     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4862     NumDepsVal =
4863         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4864   } else {
4865     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4866         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4867         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4868     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4869     Size = CGM.getSize(Sz.alignTo(Align));
4870     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4871   }
4872   // Need to allocate on the dynamic memory.
4873   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4874   // Use default allocator.
4875   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4876   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4877 
4878   llvm::Value *Addr =
4879       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4880                               CGM.getModule(), OMPRTL___kmpc_alloc),
4881                           Args, ".dep.arr.addr");
4882   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4883       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
4884   DependenciesArray = Address(Addr, Align);
4885   // Write number of elements in the first element of array for depobj.
4886   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4887   // deps[i].base_addr = NumDependencies;
4888   LValue BaseAddrLVal = CGF.EmitLValueForField(
4889       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4890   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4891   llvm::PointerUnion<unsigned *, LValue *> Pos;
4892   unsigned Idx = 1;
4893   LValue PosLVal;
4894   if (Dependencies.IteratorExpr) {
4895     PosLVal = CGF.MakeAddrLValue(
4896         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4897         C.getSizeType());
4898     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4899                           /*IsInit=*/true);
4900     Pos = &PosLVal;
4901   } else {
4902     Pos = &Idx;
4903   }
4904   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4905   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4906       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
4907   return DependenciesArray;
4908 }
4909 
4910 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4911                                         SourceLocation Loc) {
4912   ASTContext &C = CGM.getContext();
4913   QualType FlagsTy;
4914   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4915   LValue Base = CGF.EmitLoadOfPointerLValue(
4916       DepobjLVal.getAddress(CGF),
4917       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4918   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4919   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4920       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4921   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4922       Addr.getPointer(),
4923       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4924   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4925                                                                CGF.VoidPtrTy);
4926   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4927   // Use default allocator.
4928   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4929   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4930 
4931   // _kmpc_free(gtid, addr, nullptr);
4932   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4933                                 CGM.getModule(), OMPRTL___kmpc_free),
4934                             Args);
4935 }
4936 
4937 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4938                                        OpenMPDependClauseKind NewDepKind,
4939                                        SourceLocation Loc) {
4940   ASTContext &C = CGM.getContext();
4941   QualType FlagsTy;
4942   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4943   RecordDecl *KmpDependInfoRD =
4944       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4945   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4946   llvm::Value *NumDeps;
4947   LValue Base;
4948   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4949 
4950   Address Begin = Base.getAddress(CGF);
4951   // Cast from pointer to array type to pointer to single element.
4952   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
4953   // The basic structure here is a while-do loop.
4954   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4955   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4956   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4957   CGF.EmitBlock(BodyBB);
4958   llvm::PHINode *ElementPHI =
4959       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4960   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
4961   Begin = Address(ElementPHI, Begin.getAlignment());
4962   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4963                             Base.getTBAAInfo());
4964   // deps[i].flags = NewDepKind;
4965   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4966   LValue FlagsLVal = CGF.EmitLValueForField(
4967       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4968   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4969                         FlagsLVal);
4970 
4971   // Shift the address forward by one element.
4972   Address ElementNext =
4973       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
4974   ElementPHI->addIncoming(ElementNext.getPointer(),
4975                           CGF.Builder.GetInsertBlock());
4976   llvm::Value *IsEmpty =
4977       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
4978   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4979   // Done.
4980   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4981 }
4982 
4983 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4984                                    const OMPExecutableDirective &D,
4985                                    llvm::Function *TaskFunction,
4986                                    QualType SharedsTy, Address Shareds,
4987                                    const Expr *IfCond,
4988                                    const OMPTaskDataTy &Data) {
4989   if (!CGF.HaveInsertPoint())
4990     return;
4991 
4992   TaskResultTy Result =
4993       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4994   llvm::Value *NewTask = Result.NewTask;
4995   llvm::Function *TaskEntry = Result.TaskEntry;
4996   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4997   LValue TDBase = Result.TDBase;
4998   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4999   // Process list of dependences.
5000   Address DependenciesArray = Address::invalid();
5001   llvm::Value *NumOfElements;
5002   std::tie(NumOfElements, DependenciesArray) =
5003       emitDependClause(CGF, Data.Dependences, Loc);
5004 
5005   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5006   // libcall.
5007   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5008   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5009   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5010   // list is not empty
5011   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5012   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5013   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5014   llvm::Value *DepTaskArgs[7];
5015   if (!Data.Dependences.empty()) {
5016     DepTaskArgs[0] = UpLoc;
5017     DepTaskArgs[1] = ThreadID;
5018     DepTaskArgs[2] = NewTask;
5019     DepTaskArgs[3] = NumOfElements;
5020     DepTaskArgs[4] = DependenciesArray.getPointer();
5021     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5022     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5023   }
5024   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5025                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5026     if (!Data.Tied) {
5027       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5028       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5029       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5030     }
5031     if (!Data.Dependences.empty()) {
5032       CGF.EmitRuntimeCall(
5033           OMPBuilder.getOrCreateRuntimeFunction(
5034               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5035           DepTaskArgs);
5036     } else {
5037       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5038                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5039                           TaskArgs);
5040     }
5041     // Check if parent region is untied and build return for untied task;
5042     if (auto *Region =
5043             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5044       Region->emitUntiedSwitch(CGF);
5045   };
5046 
5047   llvm::Value *DepWaitTaskArgs[6];
5048   if (!Data.Dependences.empty()) {
5049     DepWaitTaskArgs[0] = UpLoc;
5050     DepWaitTaskArgs[1] = ThreadID;
5051     DepWaitTaskArgs[2] = NumOfElements;
5052     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5053     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5054     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5055   }
5056   auto &M = CGM.getModule();
5057   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5058                         TaskEntry, &Data, &DepWaitTaskArgs,
5059                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5060     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5061     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5062     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5063     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5064     // is specified.
5065     if (!Data.Dependences.empty())
5066       CGF.EmitRuntimeCall(
5067           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5068           DepWaitTaskArgs);
5069     // Call proxy_task_entry(gtid, new_task);
5070     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5071                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5072       Action.Enter(CGF);
5073       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5074       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5075                                                           OutlinedFnArgs);
5076     };
5077 
5078     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5079     // kmp_task_t *new_task);
5080     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5081     // kmp_task_t *new_task);
5082     RegionCodeGenTy RCG(CodeGen);
5083     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5084                               M, OMPRTL___kmpc_omp_task_begin_if0),
5085                           TaskArgs,
5086                           OMPBuilder.getOrCreateRuntimeFunction(
5087                               M, OMPRTL___kmpc_omp_task_complete_if0),
5088                           TaskArgs);
5089     RCG.setAction(Action);
5090     RCG(CGF);
5091   };
5092 
5093   if (IfCond) {
5094     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5095   } else {
5096     RegionCodeGenTy ThenRCG(ThenCodeGen);
5097     ThenRCG(CGF);
5098   }
5099 }
5100 
5101 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5102                                        const OMPLoopDirective &D,
5103                                        llvm::Function *TaskFunction,
5104                                        QualType SharedsTy, Address Shareds,
5105                                        const Expr *IfCond,
5106                                        const OMPTaskDataTy &Data) {
5107   if (!CGF.HaveInsertPoint())
5108     return;
5109   TaskResultTy Result =
5110       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5111   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5112   // libcall.
5113   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5114   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5115   // sched, kmp_uint64 grainsize, void *task_dup);
5116   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5117   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5118   llvm::Value *IfVal;
5119   if (IfCond) {
5120     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5121                                       /*isSigned=*/true);
5122   } else {
5123     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5124   }
5125 
5126   LValue LBLVal = CGF.EmitLValueForField(
5127       Result.TDBase,
5128       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5129   const auto *LBVar =
5130       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5131   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5132                        LBLVal.getQuals(),
5133                        /*IsInitializer=*/true);
5134   LValue UBLVal = CGF.EmitLValueForField(
5135       Result.TDBase,
5136       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5137   const auto *UBVar =
5138       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5139   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5140                        UBLVal.getQuals(),
5141                        /*IsInitializer=*/true);
5142   LValue StLVal = CGF.EmitLValueForField(
5143       Result.TDBase,
5144       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5145   const auto *StVar =
5146       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5147   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5148                        StLVal.getQuals(),
5149                        /*IsInitializer=*/true);
5150   // Store reductions address.
5151   LValue RedLVal = CGF.EmitLValueForField(
5152       Result.TDBase,
5153       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5154   if (Data.Reductions) {
5155     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5156   } else {
5157     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5158                                CGF.getContext().VoidPtrTy);
5159   }
5160   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5161   llvm::Value *TaskArgs[] = {
5162       UpLoc,
5163       ThreadID,
5164       Result.NewTask,
5165       IfVal,
5166       LBLVal.getPointer(CGF),
5167       UBLVal.getPointer(CGF),
5168       CGF.EmitLoadOfScalar(StLVal, Loc),
5169       llvm::ConstantInt::getSigned(
5170           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5171       llvm::ConstantInt::getSigned(
5172           CGF.IntTy, Data.Schedule.getPointer()
5173                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5174                          : NoSchedule),
5175       Data.Schedule.getPointer()
5176           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5177                                       /*isSigned=*/false)
5178           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5179       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5180                              Result.TaskDupFn, CGF.VoidPtrTy)
5181                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5182   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5183                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5184                       TaskArgs);
5185 }
5186 
5187 /// Emit reduction operation for each element of array (required for
5188 /// array sections) LHS op = RHS.
5189 /// \param Type Type of array.
5190 /// \param LHSVar Variable on the left side of the reduction operation
5191 /// (references element of array in original variable).
5192 /// \param RHSVar Variable on the right side of the reduction operation
5193 /// (references element of array in original variable).
5194 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5195 /// RHSVar.
5196 static void EmitOMPAggregateReduction(
5197     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5198     const VarDecl *RHSVar,
5199     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5200                                   const Expr *, const Expr *)> &RedOpGen,
5201     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5202     const Expr *UpExpr = nullptr) {
5203   // Perform element-by-element initialization.
5204   QualType ElementTy;
5205   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5206   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5207 
5208   // Drill down to the base element type on both arrays.
5209   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5210   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5211 
5212   llvm::Value *RHSBegin = RHSAddr.getPointer();
5213   llvm::Value *LHSBegin = LHSAddr.getPointer();
5214   // Cast from pointer to array type to pointer to single element.
5215   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5216   // The basic structure here is a while-do loop.
5217   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5218   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5219   llvm::Value *IsEmpty =
5220       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5221   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5222 
5223   // Enter the loop body, making that address the current address.
5224   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5225   CGF.EmitBlock(BodyBB);
5226 
5227   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5228 
5229   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5230       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5231   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5232   Address RHSElementCurrent =
5233       Address(RHSElementPHI,
5234               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5235 
5236   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5237       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5238   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5239   Address LHSElementCurrent =
5240       Address(LHSElementPHI,
5241               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5242 
5243   // Emit copy.
5244   CodeGenFunction::OMPPrivateScope Scope(CGF);
5245   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5246   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5247   Scope.Privatize();
5248   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5249   Scope.ForceCleanup();
5250 
5251   // Shift the address forward by one element.
5252   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5253       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5254   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5255       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5256   // Check whether we've reached the end.
5257   llvm::Value *Done =
5258       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5259   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5260   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5261   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5262 
5263   // Done.
5264   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5265 }
5266 
5267 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5268 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5269 /// UDR combiner function.
5270 static void emitReductionCombiner(CodeGenFunction &CGF,
5271                                   const Expr *ReductionOp) {
5272   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5273     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5274       if (const auto *DRE =
5275               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5276         if (const auto *DRD =
5277                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5278           std::pair<llvm::Function *, llvm::Function *> Reduction =
5279               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5280           RValue Func = RValue::get(Reduction.first);
5281           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5282           CGF.EmitIgnoredExpr(ReductionOp);
5283           return;
5284         }
5285   CGF.EmitIgnoredExpr(ReductionOp);
5286 }
5287 
5288 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5289     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5290     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5291     ArrayRef<const Expr *> ReductionOps) {
5292   ASTContext &C = CGM.getContext();
5293 
5294   // void reduction_func(void *LHSArg, void *RHSArg);
5295   FunctionArgList Args;
5296   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5297                            ImplicitParamDecl::Other);
5298   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5299                            ImplicitParamDecl::Other);
5300   Args.push_back(&LHSArg);
5301   Args.push_back(&RHSArg);
5302   const auto &CGFI =
5303       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5304   std::string Name = getName({"omp", "reduction", "reduction_func"});
5305   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5306                                     llvm::GlobalValue::InternalLinkage, Name,
5307                                     &CGM.getModule());
5308   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5309   Fn->setDoesNotRecurse();
5310   CodeGenFunction CGF(CGM);
5311   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5312 
5313   // Dst = (void*[n])(LHSArg);
5314   // Src = (void*[n])(RHSArg);
5315   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5316       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5317       ArgsType), CGF.getPointerAlign());
5318   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5319       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5320       ArgsType), CGF.getPointerAlign());
5321 
5322   //  ...
5323   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5324   //  ...
5325   CodeGenFunction::OMPPrivateScope Scope(CGF);
5326   auto IPriv = Privates.begin();
5327   unsigned Idx = 0;
5328   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5329     const auto *RHSVar =
5330         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5331     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5332       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5333     });
5334     const auto *LHSVar =
5335         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5336     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5337       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5338     });
5339     QualType PrivTy = (*IPriv)->getType();
5340     if (PrivTy->isVariablyModifiedType()) {
5341       // Get array size and emit VLA type.
5342       ++Idx;
5343       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5344       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5345       const VariableArrayType *VLA =
5346           CGF.getContext().getAsVariableArrayType(PrivTy);
5347       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5348       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5349           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5350       CGF.EmitVariablyModifiedType(PrivTy);
5351     }
5352   }
5353   Scope.Privatize();
5354   IPriv = Privates.begin();
5355   auto ILHS = LHSExprs.begin();
5356   auto IRHS = RHSExprs.begin();
5357   for (const Expr *E : ReductionOps) {
5358     if ((*IPriv)->getType()->isArrayType()) {
5359       // Emit reduction for array section.
5360       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5361       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5362       EmitOMPAggregateReduction(
5363           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5364           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5365             emitReductionCombiner(CGF, E);
5366           });
5367     } else {
5368       // Emit reduction for array subscript or single variable.
5369       emitReductionCombiner(CGF, E);
5370     }
5371     ++IPriv;
5372     ++ILHS;
5373     ++IRHS;
5374   }
5375   Scope.ForceCleanup();
5376   CGF.FinishFunction();
5377   return Fn;
5378 }
5379 
5380 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5381                                                   const Expr *ReductionOp,
5382                                                   const Expr *PrivateRef,
5383                                                   const DeclRefExpr *LHS,
5384                                                   const DeclRefExpr *RHS) {
5385   if (PrivateRef->getType()->isArrayType()) {
5386     // Emit reduction for array section.
5387     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5388     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5389     EmitOMPAggregateReduction(
5390         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5391         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5392           emitReductionCombiner(CGF, ReductionOp);
5393         });
5394   } else {
5395     // Emit reduction for array subscript or single variable.
5396     emitReductionCombiner(CGF, ReductionOp);
5397   }
5398 }
5399 
5400 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5401                                     ArrayRef<const Expr *> Privates,
5402                                     ArrayRef<const Expr *> LHSExprs,
5403                                     ArrayRef<const Expr *> RHSExprs,
5404                                     ArrayRef<const Expr *> ReductionOps,
5405                                     ReductionOptionsTy Options) {
5406   if (!CGF.HaveInsertPoint())
5407     return;
5408 
5409   bool WithNowait = Options.WithNowait;
5410   bool SimpleReduction = Options.SimpleReduction;
5411 
5412   // Next code should be emitted for reduction:
5413   //
5414   // static kmp_critical_name lock = { 0 };
5415   //
5416   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5417   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5418   //  ...
5419   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5420   //  *(Type<n>-1*)rhs[<n>-1]);
5421   // }
5422   //
5423   // ...
5424   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5425   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5426   // RedList, reduce_func, &<lock>)) {
5427   // case 1:
5428   //  ...
5429   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5430   //  ...
5431   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5432   // break;
5433   // case 2:
5434   //  ...
5435   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5436   //  ...
5437   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5438   // break;
5439   // default:;
5440   // }
5441   //
5442   // if SimpleReduction is true, only the next code is generated:
5443   //  ...
5444   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5445   //  ...
5446 
5447   ASTContext &C = CGM.getContext();
5448 
5449   if (SimpleReduction) {
5450     CodeGenFunction::RunCleanupsScope Scope(CGF);
5451     auto IPriv = Privates.begin();
5452     auto ILHS = LHSExprs.begin();
5453     auto IRHS = RHSExprs.begin();
5454     for (const Expr *E : ReductionOps) {
5455       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5456                                   cast<DeclRefExpr>(*IRHS));
5457       ++IPriv;
5458       ++ILHS;
5459       ++IRHS;
5460     }
5461     return;
5462   }
5463 
5464   // 1. Build a list of reduction variables.
5465   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5466   auto Size = RHSExprs.size();
5467   for (const Expr *E : Privates) {
5468     if (E->getType()->isVariablyModifiedType())
5469       // Reserve place for array size.
5470       ++Size;
5471   }
5472   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5473   QualType ReductionArrayTy =
5474       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5475                              /*IndexTypeQuals=*/0);
5476   Address ReductionList =
5477       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5478   auto IPriv = Privates.begin();
5479   unsigned Idx = 0;
5480   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5481     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5482     CGF.Builder.CreateStore(
5483         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5484             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5485         Elem);
5486     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5487       // Store array size.
5488       ++Idx;
5489       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5490       llvm::Value *Size = CGF.Builder.CreateIntCast(
5491           CGF.getVLASize(
5492                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5493               .NumElts,
5494           CGF.SizeTy, /*isSigned=*/false);
5495       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5496                               Elem);
5497     }
5498   }
5499 
5500   // 2. Emit reduce_func().
5501   llvm::Function *ReductionFn = emitReductionFunction(
5502       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5503       LHSExprs, RHSExprs, ReductionOps);
5504 
5505   // 3. Create static kmp_critical_name lock = { 0 };
5506   std::string Name = getName({"reduction"});
5507   llvm::Value *Lock = getCriticalRegionLock(Name);
5508 
5509   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5510   // RedList, reduce_func, &<lock>);
5511   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5512   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5513   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5514   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5515       ReductionList.getPointer(), CGF.VoidPtrTy);
5516   llvm::Value *Args[] = {
5517       IdentTLoc,                             // ident_t *<loc>
5518       ThreadId,                              // i32 <gtid>
5519       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5520       ReductionArrayTySize,                  // size_type sizeof(RedList)
5521       RL,                                    // void *RedList
5522       ReductionFn, // void (*) (void *, void *) <reduce_func>
5523       Lock         // kmp_critical_name *&<lock>
5524   };
5525   llvm::Value *Res = CGF.EmitRuntimeCall(
5526       OMPBuilder.getOrCreateRuntimeFunction(
5527           CGM.getModule(),
5528           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5529       Args);
5530 
5531   // 5. Build switch(res)
5532   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5533   llvm::SwitchInst *SwInst =
5534       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5535 
5536   // 6. Build case 1:
5537   //  ...
5538   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5539   //  ...
5540   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5541   // break;
5542   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5543   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5544   CGF.EmitBlock(Case1BB);
5545 
5546   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5547   llvm::Value *EndArgs[] = {
5548       IdentTLoc, // ident_t *<loc>
5549       ThreadId,  // i32 <gtid>
5550       Lock       // kmp_critical_name *&<lock>
5551   };
5552   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5553                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5554     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5555     auto IPriv = Privates.begin();
5556     auto ILHS = LHSExprs.begin();
5557     auto IRHS = RHSExprs.begin();
5558     for (const Expr *E : ReductionOps) {
5559       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5560                                      cast<DeclRefExpr>(*IRHS));
5561       ++IPriv;
5562       ++ILHS;
5563       ++IRHS;
5564     }
5565   };
5566   RegionCodeGenTy RCG(CodeGen);
5567   CommonActionTy Action(
5568       nullptr, llvm::None,
5569       OMPBuilder.getOrCreateRuntimeFunction(
5570           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5571                                       : OMPRTL___kmpc_end_reduce),
5572       EndArgs);
5573   RCG.setAction(Action);
5574   RCG(CGF);
5575 
5576   CGF.EmitBranch(DefaultBB);
5577 
5578   // 7. Build case 2:
5579   //  ...
5580   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5581   //  ...
5582   // break;
5583   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5584   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5585   CGF.EmitBlock(Case2BB);
5586 
5587   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5588                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5589     auto ILHS = LHSExprs.begin();
5590     auto IRHS = RHSExprs.begin();
5591     auto IPriv = Privates.begin();
5592     for (const Expr *E : ReductionOps) {
5593       const Expr *XExpr = nullptr;
5594       const Expr *EExpr = nullptr;
5595       const Expr *UpExpr = nullptr;
5596       BinaryOperatorKind BO = BO_Comma;
5597       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5598         if (BO->getOpcode() == BO_Assign) {
5599           XExpr = BO->getLHS();
5600           UpExpr = BO->getRHS();
5601         }
5602       }
5603       // Try to emit update expression as a simple atomic.
5604       const Expr *RHSExpr = UpExpr;
5605       if (RHSExpr) {
5606         // Analyze RHS part of the whole expression.
5607         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5608                 RHSExpr->IgnoreParenImpCasts())) {
5609           // If this is a conditional operator, analyze its condition for
5610           // min/max reduction operator.
5611           RHSExpr = ACO->getCond();
5612         }
5613         if (const auto *BORHS =
5614                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5615           EExpr = BORHS->getRHS();
5616           BO = BORHS->getOpcode();
5617         }
5618       }
5619       if (XExpr) {
5620         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5621         auto &&AtomicRedGen = [BO, VD,
5622                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5623                                     const Expr *EExpr, const Expr *UpExpr) {
5624           LValue X = CGF.EmitLValue(XExpr);
5625           RValue E;
5626           if (EExpr)
5627             E = CGF.EmitAnyExpr(EExpr);
5628           CGF.EmitOMPAtomicSimpleUpdateExpr(
5629               X, E, BO, /*IsXLHSInRHSPart=*/true,
5630               llvm::AtomicOrdering::Monotonic, Loc,
5631               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5632                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5633                 PrivateScope.addPrivate(
5634                     VD, [&CGF, VD, XRValue, Loc]() {
5635                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5636                       CGF.emitOMPSimpleStore(
5637                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5638                           VD->getType().getNonReferenceType(), Loc);
5639                       return LHSTemp;
5640                     });
5641                 (void)PrivateScope.Privatize();
5642                 return CGF.EmitAnyExpr(UpExpr);
5643               });
5644         };
5645         if ((*IPriv)->getType()->isArrayType()) {
5646           // Emit atomic reduction for array section.
5647           const auto *RHSVar =
5648               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5649           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5650                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5651         } else {
5652           // Emit atomic reduction for array subscript or single variable.
5653           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5654         }
5655       } else {
5656         // Emit as a critical region.
5657         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5658                                            const Expr *, const Expr *) {
5659           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5660           std::string Name = RT.getName({"atomic_reduction"});
5661           RT.emitCriticalRegion(
5662               CGF, Name,
5663               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5664                 Action.Enter(CGF);
5665                 emitReductionCombiner(CGF, E);
5666               },
5667               Loc);
5668         };
5669         if ((*IPriv)->getType()->isArrayType()) {
5670           const auto *LHSVar =
5671               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5672           const auto *RHSVar =
5673               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5674           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5675                                     CritRedGen);
5676         } else {
5677           CritRedGen(CGF, nullptr, nullptr, nullptr);
5678         }
5679       }
5680       ++ILHS;
5681       ++IRHS;
5682       ++IPriv;
5683     }
5684   };
5685   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5686   if (!WithNowait) {
5687     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5688     llvm::Value *EndArgs[] = {
5689         IdentTLoc, // ident_t *<loc>
5690         ThreadId,  // i32 <gtid>
5691         Lock       // kmp_critical_name *&<lock>
5692     };
5693     CommonActionTy Action(nullptr, llvm::None,
5694                           OMPBuilder.getOrCreateRuntimeFunction(
5695                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5696                           EndArgs);
5697     AtomicRCG.setAction(Action);
5698     AtomicRCG(CGF);
5699   } else {
5700     AtomicRCG(CGF);
5701   }
5702 
5703   CGF.EmitBranch(DefaultBB);
5704   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5705 }
5706 
5707 /// Generates unique name for artificial threadprivate variables.
5708 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5709 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5710                                       const Expr *Ref) {
5711   SmallString<256> Buffer;
5712   llvm::raw_svector_ostream Out(Buffer);
5713   const clang::DeclRefExpr *DE;
5714   const VarDecl *D = ::getBaseDecl(Ref, DE);
5715   if (!D)
5716     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5717   D = D->getCanonicalDecl();
5718   std::string Name = CGM.getOpenMPRuntime().getName(
5719       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5720   Out << Prefix << Name << "_"
5721       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5722   return std::string(Out.str());
5723 }
5724 
5725 /// Emits reduction initializer function:
5726 /// \code
5727 /// void @.red_init(void* %arg, void* %orig) {
5728 /// %0 = bitcast void* %arg to <type>*
5729 /// store <type> <init>, <type>* %0
5730 /// ret void
5731 /// }
5732 /// \endcode
5733 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5734                                            SourceLocation Loc,
5735                                            ReductionCodeGen &RCG, unsigned N) {
5736   ASTContext &C = CGM.getContext();
5737   QualType VoidPtrTy = C.VoidPtrTy;
5738   VoidPtrTy.addRestrict();
5739   FunctionArgList Args;
5740   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5741                           ImplicitParamDecl::Other);
5742   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5743                               ImplicitParamDecl::Other);
5744   Args.emplace_back(&Param);
5745   Args.emplace_back(&ParamOrig);
5746   const auto &FnInfo =
5747       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5748   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5749   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5750   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5751                                     Name, &CGM.getModule());
5752   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5753   Fn->setDoesNotRecurse();
5754   CodeGenFunction CGF(CGM);
5755   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5756   Address PrivateAddr = CGF.EmitLoadOfPointer(
5757       CGF.GetAddrOfLocalVar(&Param),
5758       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5759   llvm::Value *Size = nullptr;
5760   // If the size of the reduction item is non-constant, load it from global
5761   // threadprivate variable.
5762   if (RCG.getSizes(N).second) {
5763     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5764         CGF, CGM.getContext().getSizeType(),
5765         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5766     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5767                                 CGM.getContext().getSizeType(), Loc);
5768   }
5769   RCG.emitAggregateType(CGF, N, Size);
5770   LValue OrigLVal;
5771   // If initializer uses initializer from declare reduction construct, emit a
5772   // pointer to the address of the original reduction item (reuired by reduction
5773   // initializer)
5774   if (RCG.usesReductionInitializer(N)) {
5775     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5776     SharedAddr = CGF.EmitLoadOfPointer(
5777         SharedAddr,
5778         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5779     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5780   } else {
5781     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5782         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5783         CGM.getContext().VoidPtrTy);
5784   }
5785   // Emit the initializer:
5786   // %0 = bitcast void* %arg to <type>*
5787   // store <type> <init>, <type>* %0
5788   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5789                          [](CodeGenFunction &) { return false; });
5790   CGF.FinishFunction();
5791   return Fn;
5792 }
5793 
5794 /// Emits reduction combiner function:
5795 /// \code
5796 /// void @.red_comb(void* %arg0, void* %arg1) {
5797 /// %lhs = bitcast void* %arg0 to <type>*
5798 /// %rhs = bitcast void* %arg1 to <type>*
5799 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5800 /// store <type> %2, <type>* %lhs
5801 /// ret void
5802 /// }
5803 /// \endcode
5804 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5805                                            SourceLocation Loc,
5806                                            ReductionCodeGen &RCG, unsigned N,
5807                                            const Expr *ReductionOp,
5808                                            const Expr *LHS, const Expr *RHS,
5809                                            const Expr *PrivateRef) {
5810   ASTContext &C = CGM.getContext();
5811   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5812   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5813   FunctionArgList Args;
5814   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5815                                C.VoidPtrTy, ImplicitParamDecl::Other);
5816   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5817                             ImplicitParamDecl::Other);
5818   Args.emplace_back(&ParamInOut);
5819   Args.emplace_back(&ParamIn);
5820   const auto &FnInfo =
5821       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5822   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5823   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5824   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5825                                     Name, &CGM.getModule());
5826   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5827   Fn->setDoesNotRecurse();
5828   CodeGenFunction CGF(CGM);
5829   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5830   llvm::Value *Size = nullptr;
5831   // If the size of the reduction item is non-constant, load it from global
5832   // threadprivate variable.
5833   if (RCG.getSizes(N).second) {
5834     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5835         CGF, CGM.getContext().getSizeType(),
5836         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5837     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5838                                 CGM.getContext().getSizeType(), Loc);
5839   }
5840   RCG.emitAggregateType(CGF, N, Size);
5841   // Remap lhs and rhs variables to the addresses of the function arguments.
5842   // %lhs = bitcast void* %arg0 to <type>*
5843   // %rhs = bitcast void* %arg1 to <type>*
5844   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5845   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5846     // Pull out the pointer to the variable.
5847     Address PtrAddr = CGF.EmitLoadOfPointer(
5848         CGF.GetAddrOfLocalVar(&ParamInOut),
5849         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5850     return CGF.Builder.CreateElementBitCast(
5851         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5852   });
5853   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5854     // Pull out the pointer to the variable.
5855     Address PtrAddr = CGF.EmitLoadOfPointer(
5856         CGF.GetAddrOfLocalVar(&ParamIn),
5857         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5858     return CGF.Builder.CreateElementBitCast(
5859         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5860   });
5861   PrivateScope.Privatize();
5862   // Emit the combiner body:
5863   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5864   // store <type> %2, <type>* %lhs
5865   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5866       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5867       cast<DeclRefExpr>(RHS));
5868   CGF.FinishFunction();
5869   return Fn;
5870 }
5871 
5872 /// Emits reduction finalizer function:
5873 /// \code
5874 /// void @.red_fini(void* %arg) {
5875 /// %0 = bitcast void* %arg to <type>*
5876 /// <destroy>(<type>* %0)
5877 /// ret void
5878 /// }
5879 /// \endcode
5880 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5881                                            SourceLocation Loc,
5882                                            ReductionCodeGen &RCG, unsigned N) {
5883   if (!RCG.needCleanups(N))
5884     return nullptr;
5885   ASTContext &C = CGM.getContext();
5886   FunctionArgList Args;
5887   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5888                           ImplicitParamDecl::Other);
5889   Args.emplace_back(&Param);
5890   const auto &FnInfo =
5891       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5892   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5893   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5894   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5895                                     Name, &CGM.getModule());
5896   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5897   Fn->setDoesNotRecurse();
5898   CodeGenFunction CGF(CGM);
5899   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5900   Address PrivateAddr = CGF.EmitLoadOfPointer(
5901       CGF.GetAddrOfLocalVar(&Param),
5902       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5903   llvm::Value *Size = nullptr;
5904   // If the size of the reduction item is non-constant, load it from global
5905   // threadprivate variable.
5906   if (RCG.getSizes(N).second) {
5907     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5908         CGF, CGM.getContext().getSizeType(),
5909         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5910     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5911                                 CGM.getContext().getSizeType(), Loc);
5912   }
5913   RCG.emitAggregateType(CGF, N, Size);
5914   // Emit the finalizer body:
5915   // <destroy>(<type>* %0)
5916   RCG.emitCleanups(CGF, N, PrivateAddr);
5917   CGF.FinishFunction(Loc);
5918   return Fn;
5919 }
5920 
5921 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5922     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5923     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5924   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5925     return nullptr;
5926 
5927   // Build typedef struct:
5928   // kmp_taskred_input {
5929   //   void *reduce_shar; // shared reduction item
5930   //   void *reduce_orig; // original reduction item used for initialization
5931   //   size_t reduce_size; // size of data item
5932   //   void *reduce_init; // data initialization routine
5933   //   void *reduce_fini; // data finalization routine
5934   //   void *reduce_comb; // data combiner routine
5935   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
5936   // } kmp_taskred_input_t;
5937   ASTContext &C = CGM.getContext();
5938   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5939   RD->startDefinition();
5940   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5941   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5942   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5943   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5944   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5945   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5946   const FieldDecl *FlagsFD = addFieldToRecordDecl(
5947       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5948   RD->completeDefinition();
5949   QualType RDType = C.getRecordType(RD);
5950   unsigned Size = Data.ReductionVars.size();
5951   llvm::APInt ArraySize(/*numBits=*/64, Size);
5952   QualType ArrayRDType = C.getConstantArrayType(
5953       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5954   // kmp_task_red_input_t .rd_input.[Size];
5955   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5956   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5957                        Data.ReductionCopies, Data.ReductionOps);
5958   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5959     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5960     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5961                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5962     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5963         TaskRedInput.getPointer(), Idxs,
5964         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5965         ".rd_input.gep.");
5966     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
5967     // ElemLVal.reduce_shar = &Shareds[Cnt];
5968     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5969     RCG.emitSharedOrigLValue(CGF, Cnt);
5970     llvm::Value *CastedShared =
5971         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
5972     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
5973     // ElemLVal.reduce_orig = &Origs[Cnt];
5974     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5975     llvm::Value *CastedOrig =
5976         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
5977     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
5978     RCG.emitAggregateType(CGF, Cnt);
5979     llvm::Value *SizeValInChars;
5980     llvm::Value *SizeVal;
5981     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5982     // We use delayed creation/initialization for VLAs and array sections. It is
5983     // required because runtime does not provide the way to pass the sizes of
5984     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5985     // threadprivate global variables are used to store these values and use
5986     // them in the functions.
5987     bool DelayedCreation = !!SizeVal;
5988     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5989                                                /*isSigned=*/false);
5990     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5991     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5992     // ElemLVal.reduce_init = init;
5993     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5994     llvm::Value *InitAddr =
5995         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
5996     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5997     // ElemLVal.reduce_fini = fini;
5998     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5999     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6000     llvm::Value *FiniAddr = Fini
6001                                 ? CGF.EmitCastToVoidPtr(Fini)
6002                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6003     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6004     // ElemLVal.reduce_comb = comb;
6005     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6006     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6007         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6008         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6009     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6010     // ElemLVal.flags = 0;
6011     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6012     if (DelayedCreation) {
6013       CGF.EmitStoreOfScalar(
6014           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6015           FlagsLVal);
6016     } else
6017       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6018                                  FlagsLVal.getType());
6019   }
6020   if (Data.IsReductionWithTaskMod) {
6021     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6022     // is_ws, int num, void *data);
6023     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6024     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6025                                                   CGM.IntTy, /*isSigned=*/true);
6026     llvm::Value *Args[] = {
6027         IdentTLoc, GTid,
6028         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6029                                /*isSigned=*/true),
6030         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6031         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6032             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6033     return CGF.EmitRuntimeCall(
6034         OMPBuilder.getOrCreateRuntimeFunction(
6035             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6036         Args);
6037   }
6038   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6039   llvm::Value *Args[] = {
6040       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6041                                 /*isSigned=*/true),
6042       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6043       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6044                                                       CGM.VoidPtrTy)};
6045   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6046                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6047                              Args);
6048 }
6049 
6050 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6051                                             SourceLocation Loc,
6052                                             bool IsWorksharingReduction) {
6053   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6054   // is_ws, int num, void *data);
6055   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6056   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6057                                                 CGM.IntTy, /*isSigned=*/true);
6058   llvm::Value *Args[] = {IdentTLoc, GTid,
6059                          llvm::ConstantInt::get(CGM.IntTy,
6060                                                 IsWorksharingReduction ? 1 : 0,
6061                                                 /*isSigned=*/true)};
6062   (void)CGF.EmitRuntimeCall(
6063       OMPBuilder.getOrCreateRuntimeFunction(
6064           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6065       Args);
6066 }
6067 
6068 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6069                                               SourceLocation Loc,
6070                                               ReductionCodeGen &RCG,
6071                                               unsigned N) {
6072   auto Sizes = RCG.getSizes(N);
6073   // Emit threadprivate global variable if the type is non-constant
6074   // (Sizes.second = nullptr).
6075   if (Sizes.second) {
6076     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6077                                                      /*isSigned=*/false);
6078     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6079         CGF, CGM.getContext().getSizeType(),
6080         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6081     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6082   }
6083 }
6084 
6085 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6086                                               SourceLocation Loc,
6087                                               llvm::Value *ReductionsPtr,
6088                                               LValue SharedLVal) {
6089   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6090   // *d);
6091   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6092                                                    CGM.IntTy,
6093                                                    /*isSigned=*/true),
6094                          ReductionsPtr,
6095                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6096                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6097   return Address(
6098       CGF.EmitRuntimeCall(
6099           OMPBuilder.getOrCreateRuntimeFunction(
6100               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6101           Args),
6102       SharedLVal.getAlignment());
6103 }
6104 
6105 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6106                                        SourceLocation Loc) {
6107   if (!CGF.HaveInsertPoint())
6108     return;
6109 
6110   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6111     OMPBuilder.CreateTaskwait(CGF.Builder);
6112   } else {
6113     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6114     // global_tid);
6115     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6116     // Ignore return result until untied tasks are supported.
6117     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6118                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6119                         Args);
6120   }
6121 
6122   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6123     Region->emitUntiedSwitch(CGF);
6124 }
6125 
6126 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6127                                            OpenMPDirectiveKind InnerKind,
6128                                            const RegionCodeGenTy &CodeGen,
6129                                            bool HasCancel) {
6130   if (!CGF.HaveInsertPoint())
6131     return;
6132   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6133   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6134 }
6135 
6136 namespace {
6137 enum RTCancelKind {
6138   CancelNoreq = 0,
6139   CancelParallel = 1,
6140   CancelLoop = 2,
6141   CancelSections = 3,
6142   CancelTaskgroup = 4
6143 };
6144 } // anonymous namespace
6145 
6146 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6147   RTCancelKind CancelKind = CancelNoreq;
6148   if (CancelRegion == OMPD_parallel)
6149     CancelKind = CancelParallel;
6150   else if (CancelRegion == OMPD_for)
6151     CancelKind = CancelLoop;
6152   else if (CancelRegion == OMPD_sections)
6153     CancelKind = CancelSections;
6154   else {
6155     assert(CancelRegion == OMPD_taskgroup);
6156     CancelKind = CancelTaskgroup;
6157   }
6158   return CancelKind;
6159 }
6160 
6161 void CGOpenMPRuntime::emitCancellationPointCall(
6162     CodeGenFunction &CGF, SourceLocation Loc,
6163     OpenMPDirectiveKind CancelRegion) {
6164   if (!CGF.HaveInsertPoint())
6165     return;
6166   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6167   // global_tid, kmp_int32 cncl_kind);
6168   if (auto *OMPRegionInfo =
6169           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6170     // For 'cancellation point taskgroup', the task region info may not have a
6171     // cancel. This may instead happen in another adjacent task.
6172     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6173       llvm::Value *Args[] = {
6174           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6175           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6176       // Ignore return result until untied tasks are supported.
6177       llvm::Value *Result = CGF.EmitRuntimeCall(
6178           OMPBuilder.getOrCreateRuntimeFunction(
6179               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6180           Args);
6181       // if (__kmpc_cancellationpoint()) {
6182       //   exit from construct;
6183       // }
6184       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6185       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6186       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6187       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6188       CGF.EmitBlock(ExitBB);
6189       // exit from construct;
6190       CodeGenFunction::JumpDest CancelDest =
6191           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6192       CGF.EmitBranchThroughCleanup(CancelDest);
6193       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6194     }
6195   }
6196 }
6197 
6198 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6199                                      const Expr *IfCond,
6200                                      OpenMPDirectiveKind CancelRegion) {
6201   if (!CGF.HaveInsertPoint())
6202     return;
6203   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6204   // kmp_int32 cncl_kind);
6205   auto &M = CGM.getModule();
6206   if (auto *OMPRegionInfo =
6207           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6208     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6209                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6210       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6211       llvm::Value *Args[] = {
6212           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6213           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6214       // Ignore return result until untied tasks are supported.
6215       llvm::Value *Result = CGF.EmitRuntimeCall(
6216           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6217       // if (__kmpc_cancel()) {
6218       //   exit from construct;
6219       // }
6220       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6221       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6222       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6223       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6224       CGF.EmitBlock(ExitBB);
6225       // exit from construct;
6226       CodeGenFunction::JumpDest CancelDest =
6227           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6228       CGF.EmitBranchThroughCleanup(CancelDest);
6229       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6230     };
6231     if (IfCond) {
6232       emitIfClause(CGF, IfCond, ThenGen,
6233                    [](CodeGenFunction &, PrePostActionTy &) {});
6234     } else {
6235       RegionCodeGenTy ThenRCG(ThenGen);
6236       ThenRCG(CGF);
6237     }
6238   }
6239 }
6240 
6241 namespace {
6242 /// Cleanup action for uses_allocators support.
6243 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6244   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6245 
6246 public:
6247   OMPUsesAllocatorsActionTy(
6248       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6249       : Allocators(Allocators) {}
6250   void Enter(CodeGenFunction &CGF) override {
6251     if (!CGF.HaveInsertPoint())
6252       return;
6253     for (const auto &AllocatorData : Allocators) {
6254       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6255           CGF, AllocatorData.first, AllocatorData.second);
6256     }
6257   }
6258   void Exit(CodeGenFunction &CGF) override {
6259     if (!CGF.HaveInsertPoint())
6260       return;
6261     for (const auto &AllocatorData : Allocators) {
6262       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6263                                                         AllocatorData.first);
6264     }
6265   }
6266 };
6267 } // namespace
6268 
6269 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6270     const OMPExecutableDirective &D, StringRef ParentName,
6271     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6272     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6273   assert(!ParentName.empty() && "Invalid target region parent name!");
6274   HasEmittedTargetRegion = true;
6275   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6276   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6277     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6278       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6279       if (!D.AllocatorTraits)
6280         continue;
6281       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6282     }
6283   }
6284   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6285   CodeGen.setAction(UsesAllocatorAction);
6286   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6287                                    IsOffloadEntry, CodeGen);
6288 }
6289 
6290 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6291                                              const Expr *Allocator,
6292                                              const Expr *AllocatorTraits) {
6293   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6294   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6295   // Use default memspace handle.
6296   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6297   llvm::Value *NumTraits = llvm::ConstantInt::get(
6298       CGF.IntTy, cast<ConstantArrayType>(
6299                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6300                      ->getSize()
6301                      .getLimitedValue());
6302   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6303   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6304       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6305   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6306                                            AllocatorTraitsLVal.getBaseInfo(),
6307                                            AllocatorTraitsLVal.getTBAAInfo());
6308   llvm::Value *Traits =
6309       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6310 
6311   llvm::Value *AllocatorVal =
6312       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6313                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6314                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6315   // Store to allocator.
6316   CGF.EmitVarDecl(*cast<VarDecl>(
6317       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6318   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6319   AllocatorVal =
6320       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6321                                Allocator->getType(), Allocator->getExprLoc());
6322   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6323 }
6324 
6325 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6326                                              const Expr *Allocator) {
6327   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6328   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6329   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6330   llvm::Value *AllocatorVal =
6331       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6332   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6333                                           CGF.getContext().VoidPtrTy,
6334                                           Allocator->getExprLoc());
6335   (void)CGF.EmitRuntimeCall(
6336       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6337                                             OMPRTL___kmpc_destroy_allocator),
6338       {ThreadId, AllocatorVal});
6339 }
6340 
6341 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6342     const OMPExecutableDirective &D, StringRef ParentName,
6343     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6344     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6345   // Create a unique name for the entry function using the source location
6346   // information of the current target region. The name will be something like:
6347   //
6348   // __omp_offloading_DD_FFFF_PP_lBB
6349   //
6350   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6351   // mangled name of the function that encloses the target region and BB is the
6352   // line number of the target region.
6353 
6354   unsigned DeviceID;
6355   unsigned FileID;
6356   unsigned Line;
6357   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6358                            Line);
6359   SmallString<64> EntryFnName;
6360   {
6361     llvm::raw_svector_ostream OS(EntryFnName);
6362     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6363        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6364   }
6365 
6366   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6367 
6368   CodeGenFunction CGF(CGM, true);
6369   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6370   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6371 
6372   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6373 
6374   // If this target outline function is not an offload entry, we don't need to
6375   // register it.
6376   if (!IsOffloadEntry)
6377     return;
6378 
6379   // The target region ID is used by the runtime library to identify the current
6380   // target region, so it only has to be unique and not necessarily point to
6381   // anything. It could be the pointer to the outlined function that implements
6382   // the target region, but we aren't using that so that the compiler doesn't
6383   // need to keep that, and could therefore inline the host function if proven
6384   // worthwhile during optimization. In the other hand, if emitting code for the
6385   // device, the ID has to be the function address so that it can retrieved from
6386   // the offloading entry and launched by the runtime library. We also mark the
6387   // outlined function to have external linkage in case we are emitting code for
6388   // the device, because these functions will be entry points to the device.
6389 
6390   if (CGM.getLangOpts().OpenMPIsDevice) {
6391     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6392     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6393     OutlinedFn->setDSOLocal(false);
6394   } else {
6395     std::string Name = getName({EntryFnName, "region_id"});
6396     OutlinedFnID = new llvm::GlobalVariable(
6397         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6398         llvm::GlobalValue::WeakAnyLinkage,
6399         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6400   }
6401 
6402   // Register the information for the entry associated with this target region.
6403   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6404       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6405       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6406 }
6407 
6408 /// Checks if the expression is constant or does not have non-trivial function
6409 /// calls.
6410 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6411   // We can skip constant expressions.
6412   // We can skip expressions with trivial calls or simple expressions.
6413   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6414           !E->hasNonTrivialCall(Ctx)) &&
6415          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6416 }
6417 
6418 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6419                                                     const Stmt *Body) {
6420   const Stmt *Child = Body->IgnoreContainers();
6421   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6422     Child = nullptr;
6423     for (const Stmt *S : C->body()) {
6424       if (const auto *E = dyn_cast<Expr>(S)) {
6425         if (isTrivial(Ctx, E))
6426           continue;
6427       }
6428       // Some of the statements can be ignored.
6429       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6430           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6431         continue;
6432       // Analyze declarations.
6433       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6434         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6435               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6436                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6437                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6438                   isa<UsingDirectiveDecl>(D) ||
6439                   isa<OMPDeclareReductionDecl>(D) ||
6440                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6441                 return true;
6442               const auto *VD = dyn_cast<VarDecl>(D);
6443               if (!VD)
6444                 return false;
6445               return VD->isConstexpr() ||
6446                      ((VD->getType().isTrivialType(Ctx) ||
6447                        VD->getType()->isReferenceType()) &&
6448                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6449             }))
6450           continue;
6451       }
6452       // Found multiple children - cannot get the one child only.
6453       if (Child)
6454         return nullptr;
6455       Child = S;
6456     }
6457     if (Child)
6458       Child = Child->IgnoreContainers();
6459   }
6460   return Child;
6461 }
6462 
6463 /// Emit the number of teams for a target directive.  Inspect the num_teams
6464 /// clause associated with a teams construct combined or closely nested
6465 /// with the target directive.
6466 ///
6467 /// Emit a team of size one for directives such as 'target parallel' that
6468 /// have no associated teams construct.
6469 ///
6470 /// Otherwise, return nullptr.
6471 static llvm::Value *
6472 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6473                                const OMPExecutableDirective &D) {
6474   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6475          "Clauses associated with the teams directive expected to be emitted "
6476          "only for the host!");
6477   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6478   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6479          "Expected target-based executable directive.");
6480   CGBuilderTy &Bld = CGF.Builder;
6481   switch (DirectiveKind) {
6482   case OMPD_target: {
6483     const auto *CS = D.getInnermostCapturedStmt();
6484     const auto *Body =
6485         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6486     const Stmt *ChildStmt =
6487         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6488     if (const auto *NestedDir =
6489             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6490       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6491         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6492           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6493           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6494           const Expr *NumTeams =
6495               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6496           llvm::Value *NumTeamsVal =
6497               CGF.EmitScalarExpr(NumTeams,
6498                                  /*IgnoreResultAssign*/ true);
6499           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6500                                    /*isSigned=*/true);
6501         }
6502         return Bld.getInt32(0);
6503       }
6504       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6505           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6506         return Bld.getInt32(1);
6507       return Bld.getInt32(0);
6508     }
6509     return nullptr;
6510   }
6511   case OMPD_target_teams:
6512   case OMPD_target_teams_distribute:
6513   case OMPD_target_teams_distribute_simd:
6514   case OMPD_target_teams_distribute_parallel_for:
6515   case OMPD_target_teams_distribute_parallel_for_simd: {
6516     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6517       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6518       const Expr *NumTeams =
6519           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6520       llvm::Value *NumTeamsVal =
6521           CGF.EmitScalarExpr(NumTeams,
6522                              /*IgnoreResultAssign*/ true);
6523       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6524                                /*isSigned=*/true);
6525     }
6526     return Bld.getInt32(0);
6527   }
6528   case OMPD_target_parallel:
6529   case OMPD_target_parallel_for:
6530   case OMPD_target_parallel_for_simd:
6531   case OMPD_target_simd:
6532     return Bld.getInt32(1);
6533   case OMPD_parallel:
6534   case OMPD_for:
6535   case OMPD_parallel_for:
6536   case OMPD_parallel_master:
6537   case OMPD_parallel_sections:
6538   case OMPD_for_simd:
6539   case OMPD_parallel_for_simd:
6540   case OMPD_cancel:
6541   case OMPD_cancellation_point:
6542   case OMPD_ordered:
6543   case OMPD_threadprivate:
6544   case OMPD_allocate:
6545   case OMPD_task:
6546   case OMPD_simd:
6547   case OMPD_sections:
6548   case OMPD_section:
6549   case OMPD_single:
6550   case OMPD_master:
6551   case OMPD_critical:
6552   case OMPD_taskyield:
6553   case OMPD_barrier:
6554   case OMPD_taskwait:
6555   case OMPD_taskgroup:
6556   case OMPD_atomic:
6557   case OMPD_flush:
6558   case OMPD_depobj:
6559   case OMPD_scan:
6560   case OMPD_teams:
6561   case OMPD_target_data:
6562   case OMPD_target_exit_data:
6563   case OMPD_target_enter_data:
6564   case OMPD_distribute:
6565   case OMPD_distribute_simd:
6566   case OMPD_distribute_parallel_for:
6567   case OMPD_distribute_parallel_for_simd:
6568   case OMPD_teams_distribute:
6569   case OMPD_teams_distribute_simd:
6570   case OMPD_teams_distribute_parallel_for:
6571   case OMPD_teams_distribute_parallel_for_simd:
6572   case OMPD_target_update:
6573   case OMPD_declare_simd:
6574   case OMPD_declare_variant:
6575   case OMPD_begin_declare_variant:
6576   case OMPD_end_declare_variant:
6577   case OMPD_declare_target:
6578   case OMPD_end_declare_target:
6579   case OMPD_declare_reduction:
6580   case OMPD_declare_mapper:
6581   case OMPD_taskloop:
6582   case OMPD_taskloop_simd:
6583   case OMPD_master_taskloop:
6584   case OMPD_master_taskloop_simd:
6585   case OMPD_parallel_master_taskloop:
6586   case OMPD_parallel_master_taskloop_simd:
6587   case OMPD_requires:
6588   case OMPD_unknown:
6589     break;
6590   default:
6591     break;
6592   }
6593   llvm_unreachable("Unexpected directive kind.");
6594 }
6595 
6596 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6597                                   llvm::Value *DefaultThreadLimitVal) {
6598   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6599       CGF.getContext(), CS->getCapturedStmt());
6600   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6601     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6602       llvm::Value *NumThreads = nullptr;
6603       llvm::Value *CondVal = nullptr;
6604       // Handle if clause. If if clause present, the number of threads is
6605       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6606       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6607         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6608         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6609         const OMPIfClause *IfClause = nullptr;
6610         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6611           if (C->getNameModifier() == OMPD_unknown ||
6612               C->getNameModifier() == OMPD_parallel) {
6613             IfClause = C;
6614             break;
6615           }
6616         }
6617         if (IfClause) {
6618           const Expr *Cond = IfClause->getCondition();
6619           bool Result;
6620           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6621             if (!Result)
6622               return CGF.Builder.getInt32(1);
6623           } else {
6624             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6625             if (const auto *PreInit =
6626                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6627               for (const auto *I : PreInit->decls()) {
6628                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6629                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6630                 } else {
6631                   CodeGenFunction::AutoVarEmission Emission =
6632                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6633                   CGF.EmitAutoVarCleanups(Emission);
6634                 }
6635               }
6636             }
6637             CondVal = CGF.EvaluateExprAsBool(Cond);
6638           }
6639         }
6640       }
6641       // Check the value of num_threads clause iff if clause was not specified
6642       // or is not evaluated to false.
6643       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6644         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6645         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6646         const auto *NumThreadsClause =
6647             Dir->getSingleClause<OMPNumThreadsClause>();
6648         CodeGenFunction::LexicalScope Scope(
6649             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6650         if (const auto *PreInit =
6651                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6652           for (const auto *I : PreInit->decls()) {
6653             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6654               CGF.EmitVarDecl(cast<VarDecl>(*I));
6655             } else {
6656               CodeGenFunction::AutoVarEmission Emission =
6657                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6658               CGF.EmitAutoVarCleanups(Emission);
6659             }
6660           }
6661         }
6662         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6663         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6664                                                /*isSigned=*/false);
6665         if (DefaultThreadLimitVal)
6666           NumThreads = CGF.Builder.CreateSelect(
6667               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6668               DefaultThreadLimitVal, NumThreads);
6669       } else {
6670         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6671                                            : CGF.Builder.getInt32(0);
6672       }
6673       // Process condition of the if clause.
6674       if (CondVal) {
6675         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6676                                               CGF.Builder.getInt32(1));
6677       }
6678       return NumThreads;
6679     }
6680     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6681       return CGF.Builder.getInt32(1);
6682     return DefaultThreadLimitVal;
6683   }
6684   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6685                                : CGF.Builder.getInt32(0);
6686 }
6687 
6688 /// Emit the number of threads for a target directive.  Inspect the
6689 /// thread_limit clause associated with a teams construct combined or closely
6690 /// nested with the target directive.
6691 ///
6692 /// Emit the num_threads clause for directives such as 'target parallel' that
6693 /// have no associated teams construct.
6694 ///
6695 /// Otherwise, return nullptr.
6696 static llvm::Value *
6697 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6698                                  const OMPExecutableDirective &D) {
6699   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6700          "Clauses associated with the teams directive expected to be emitted "
6701          "only for the host!");
6702   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6703   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6704          "Expected target-based executable directive.");
6705   CGBuilderTy &Bld = CGF.Builder;
6706   llvm::Value *ThreadLimitVal = nullptr;
6707   llvm::Value *NumThreadsVal = nullptr;
6708   switch (DirectiveKind) {
6709   case OMPD_target: {
6710     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6711     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6712       return NumThreads;
6713     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6714         CGF.getContext(), CS->getCapturedStmt());
6715     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6716       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6717         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6718         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6719         const auto *ThreadLimitClause =
6720             Dir->getSingleClause<OMPThreadLimitClause>();
6721         CodeGenFunction::LexicalScope Scope(
6722             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6723         if (const auto *PreInit =
6724                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6725           for (const auto *I : PreInit->decls()) {
6726             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6727               CGF.EmitVarDecl(cast<VarDecl>(*I));
6728             } else {
6729               CodeGenFunction::AutoVarEmission Emission =
6730                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6731               CGF.EmitAutoVarCleanups(Emission);
6732             }
6733           }
6734         }
6735         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6736             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6737         ThreadLimitVal =
6738             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6739       }
6740       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6741           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6742         CS = Dir->getInnermostCapturedStmt();
6743         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6744             CGF.getContext(), CS->getCapturedStmt());
6745         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6746       }
6747       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6748           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6749         CS = Dir->getInnermostCapturedStmt();
6750         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6751           return NumThreads;
6752       }
6753       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6754         return Bld.getInt32(1);
6755     }
6756     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6757   }
6758   case OMPD_target_teams: {
6759     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6760       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6761       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6762       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6763           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6764       ThreadLimitVal =
6765           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6766     }
6767     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6768     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6769       return NumThreads;
6770     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6771         CGF.getContext(), CS->getCapturedStmt());
6772     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6773       if (Dir->getDirectiveKind() == OMPD_distribute) {
6774         CS = Dir->getInnermostCapturedStmt();
6775         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6776           return NumThreads;
6777       }
6778     }
6779     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6780   }
6781   case OMPD_target_teams_distribute:
6782     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6783       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6784       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6785       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6786           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6787       ThreadLimitVal =
6788           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6789     }
6790     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6791   case OMPD_target_parallel:
6792   case OMPD_target_parallel_for:
6793   case OMPD_target_parallel_for_simd:
6794   case OMPD_target_teams_distribute_parallel_for:
6795   case OMPD_target_teams_distribute_parallel_for_simd: {
6796     llvm::Value *CondVal = nullptr;
6797     // Handle if clause. If if clause present, the number of threads is
6798     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6799     if (D.hasClausesOfKind<OMPIfClause>()) {
6800       const OMPIfClause *IfClause = nullptr;
6801       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6802         if (C->getNameModifier() == OMPD_unknown ||
6803             C->getNameModifier() == OMPD_parallel) {
6804           IfClause = C;
6805           break;
6806         }
6807       }
6808       if (IfClause) {
6809         const Expr *Cond = IfClause->getCondition();
6810         bool Result;
6811         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6812           if (!Result)
6813             return Bld.getInt32(1);
6814         } else {
6815           CodeGenFunction::RunCleanupsScope Scope(CGF);
6816           CondVal = CGF.EvaluateExprAsBool(Cond);
6817         }
6818       }
6819     }
6820     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6821       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6822       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6823       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6824           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6825       ThreadLimitVal =
6826           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6827     }
6828     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6829       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6830       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6831       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6832           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6833       NumThreadsVal =
6834           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6835       ThreadLimitVal = ThreadLimitVal
6836                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6837                                                                 ThreadLimitVal),
6838                                               NumThreadsVal, ThreadLimitVal)
6839                            : NumThreadsVal;
6840     }
6841     if (!ThreadLimitVal)
6842       ThreadLimitVal = Bld.getInt32(0);
6843     if (CondVal)
6844       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6845     return ThreadLimitVal;
6846   }
6847   case OMPD_target_teams_distribute_simd:
6848   case OMPD_target_simd:
6849     return Bld.getInt32(1);
6850   case OMPD_parallel:
6851   case OMPD_for:
6852   case OMPD_parallel_for:
6853   case OMPD_parallel_master:
6854   case OMPD_parallel_sections:
6855   case OMPD_for_simd:
6856   case OMPD_parallel_for_simd:
6857   case OMPD_cancel:
6858   case OMPD_cancellation_point:
6859   case OMPD_ordered:
6860   case OMPD_threadprivate:
6861   case OMPD_allocate:
6862   case OMPD_task:
6863   case OMPD_simd:
6864   case OMPD_sections:
6865   case OMPD_section:
6866   case OMPD_single:
6867   case OMPD_master:
6868   case OMPD_critical:
6869   case OMPD_taskyield:
6870   case OMPD_barrier:
6871   case OMPD_taskwait:
6872   case OMPD_taskgroup:
6873   case OMPD_atomic:
6874   case OMPD_flush:
6875   case OMPD_depobj:
6876   case OMPD_scan:
6877   case OMPD_teams:
6878   case OMPD_target_data:
6879   case OMPD_target_exit_data:
6880   case OMPD_target_enter_data:
6881   case OMPD_distribute:
6882   case OMPD_distribute_simd:
6883   case OMPD_distribute_parallel_for:
6884   case OMPD_distribute_parallel_for_simd:
6885   case OMPD_teams_distribute:
6886   case OMPD_teams_distribute_simd:
6887   case OMPD_teams_distribute_parallel_for:
6888   case OMPD_teams_distribute_parallel_for_simd:
6889   case OMPD_target_update:
6890   case OMPD_declare_simd:
6891   case OMPD_declare_variant:
6892   case OMPD_begin_declare_variant:
6893   case OMPD_end_declare_variant:
6894   case OMPD_declare_target:
6895   case OMPD_end_declare_target:
6896   case OMPD_declare_reduction:
6897   case OMPD_declare_mapper:
6898   case OMPD_taskloop:
6899   case OMPD_taskloop_simd:
6900   case OMPD_master_taskloop:
6901   case OMPD_master_taskloop_simd:
6902   case OMPD_parallel_master_taskloop:
6903   case OMPD_parallel_master_taskloop_simd:
6904   case OMPD_requires:
6905   case OMPD_unknown:
6906     break;
6907   default:
6908     break;
6909   }
6910   llvm_unreachable("Unsupported directive kind.");
6911 }
6912 
6913 namespace {
6914 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6915 
6916 // Utility to handle information from clauses associated with a given
6917 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6918 // It provides a convenient interface to obtain the information and generate
6919 // code for that information.
6920 class MappableExprsHandler {
6921 public:
6922   /// Values for bit flags used to specify the mapping type for
6923   /// offloading.
6924   enum OpenMPOffloadMappingFlags : uint64_t {
6925     /// No flags
6926     OMP_MAP_NONE = 0x0,
6927     /// Allocate memory on the device and move data from host to device.
6928     OMP_MAP_TO = 0x01,
6929     /// Allocate memory on the device and move data from device to host.
6930     OMP_MAP_FROM = 0x02,
6931     /// Always perform the requested mapping action on the element, even
6932     /// if it was already mapped before.
6933     OMP_MAP_ALWAYS = 0x04,
6934     /// Delete the element from the device environment, ignoring the
6935     /// current reference count associated with the element.
6936     OMP_MAP_DELETE = 0x08,
6937     /// The element being mapped is a pointer-pointee pair; both the
6938     /// pointer and the pointee should be mapped.
6939     OMP_MAP_PTR_AND_OBJ = 0x10,
6940     /// This flags signals that the base address of an entry should be
6941     /// passed to the target kernel as an argument.
6942     OMP_MAP_TARGET_PARAM = 0x20,
6943     /// Signal that the runtime library has to return the device pointer
6944     /// in the current position for the data being mapped. Used when we have the
6945     /// use_device_ptr or use_device_addr clause.
6946     OMP_MAP_RETURN_PARAM = 0x40,
6947     /// This flag signals that the reference being passed is a pointer to
6948     /// private data.
6949     OMP_MAP_PRIVATE = 0x80,
6950     /// Pass the element to the device by value.
6951     OMP_MAP_LITERAL = 0x100,
6952     /// Implicit map
6953     OMP_MAP_IMPLICIT = 0x200,
6954     /// Close is a hint to the runtime to allocate memory close to
6955     /// the target device.
6956     OMP_MAP_CLOSE = 0x400,
6957     /// The 16 MSBs of the flags indicate whether the entry is member of some
6958     /// struct/class.
6959     OMP_MAP_MEMBER_OF = 0xffff000000000000,
6960     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
6961   };
6962 
6963   /// Get the offset of the OMP_MAP_MEMBER_OF field.
6964   static unsigned getFlagMemberOffset() {
6965     unsigned Offset = 0;
6966     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
6967          Remain = Remain >> 1)
6968       Offset++;
6969     return Offset;
6970   }
6971 
6972   /// Class that associates information with a base pointer to be passed to the
6973   /// runtime library.
6974   class BasePointerInfo {
6975     /// The base pointer.
6976     llvm::Value *Ptr = nullptr;
6977     /// The base declaration that refers to this device pointer, or null if
6978     /// there is none.
6979     const ValueDecl *DevPtrDecl = nullptr;
6980 
6981   public:
6982     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
6983         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
6984     llvm::Value *operator*() const { return Ptr; }
6985     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
6986     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
6987   };
6988 
6989   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
6990   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
6991   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
6992 
6993   /// Map between a struct and the its lowest & highest elements which have been
6994   /// mapped.
6995   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6996   ///                    HE(FieldIndex, Pointer)}
6997   struct StructRangeInfoTy {
6998     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6999         0, Address::invalid()};
7000     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7001         0, Address::invalid()};
7002     Address Base = Address::invalid();
7003   };
7004 
7005 private:
7006   /// Kind that defines how a device pointer has to be returned.
7007   struct MapInfo {
7008     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7009     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7010     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7011     bool ReturnDevicePointer = false;
7012     bool IsImplicit = false;
7013     bool ForDeviceAddr = false;
7014 
7015     MapInfo() = default;
7016     MapInfo(
7017         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7018         OpenMPMapClauseKind MapType,
7019         ArrayRef<OpenMPMapModifierKind> MapModifiers, bool ReturnDevicePointer,
7020         bool IsImplicit, bool ForDeviceAddr = false)
7021         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7022           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7023           ForDeviceAddr(ForDeviceAddr) {}
7024   };
7025 
7026   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7027   /// member and there is no map information about it, then emission of that
7028   /// entry is deferred until the whole struct has been processed.
7029   struct DeferredDevicePtrEntryTy {
7030     const Expr *IE = nullptr;
7031     const ValueDecl *VD = nullptr;
7032     bool ForDeviceAddr = false;
7033 
7034     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7035                              bool ForDeviceAddr)
7036         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7037   };
7038 
7039   /// The target directive from where the mappable clauses were extracted. It
7040   /// is either a executable directive or a user-defined mapper directive.
7041   llvm::PointerUnion<const OMPExecutableDirective *,
7042                      const OMPDeclareMapperDecl *>
7043       CurDir;
7044 
7045   /// Function the directive is being generated for.
7046   CodeGenFunction &CGF;
7047 
7048   /// Set of all first private variables in the current directive.
7049   /// bool data is set to true if the variable is implicitly marked as
7050   /// firstprivate, false otherwise.
7051   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7052 
7053   /// Map between device pointer declarations and their expression components.
7054   /// The key value for declarations in 'this' is null.
7055   llvm::DenseMap<
7056       const ValueDecl *,
7057       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7058       DevPointersMap;
7059 
7060   llvm::Value *getExprTypeSize(const Expr *E) const {
7061     QualType ExprTy = E->getType().getCanonicalType();
7062 
7063     // Calculate the size for array shaping expression.
7064     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7065       llvm::Value *Size =
7066           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7067       for (const Expr *SE : OAE->getDimensions()) {
7068         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7069         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7070                                       CGF.getContext().getSizeType(),
7071                                       SE->getExprLoc());
7072         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7073       }
7074       return Size;
7075     }
7076 
7077     // Reference types are ignored for mapping purposes.
7078     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7079       ExprTy = RefTy->getPointeeType().getCanonicalType();
7080 
7081     // Given that an array section is considered a built-in type, we need to
7082     // do the calculation based on the length of the section instead of relying
7083     // on CGF.getTypeSize(E->getType()).
7084     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7085       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7086                             OAE->getBase()->IgnoreParenImpCasts())
7087                             .getCanonicalType();
7088 
7089       // If there is no length associated with the expression and lower bound is
7090       // not specified too, that means we are using the whole length of the
7091       // base.
7092       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7093           !OAE->getLowerBound())
7094         return CGF.getTypeSize(BaseTy);
7095 
7096       llvm::Value *ElemSize;
7097       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7098         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7099       } else {
7100         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7101         assert(ATy && "Expecting array type if not a pointer type.");
7102         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7103       }
7104 
7105       // If we don't have a length at this point, that is because we have an
7106       // array section with a single element.
7107       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7108         return ElemSize;
7109 
7110       if (const Expr *LenExpr = OAE->getLength()) {
7111         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7112         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7113                                              CGF.getContext().getSizeType(),
7114                                              LenExpr->getExprLoc());
7115         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7116       }
7117       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7118              OAE->getLowerBound() && "expected array_section[lb:].");
7119       // Size = sizetype - lb * elemtype;
7120       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7121       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7122       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7123                                        CGF.getContext().getSizeType(),
7124                                        OAE->getLowerBound()->getExprLoc());
7125       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7126       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7127       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7128       LengthVal = CGF.Builder.CreateSelect(
7129           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7130       return LengthVal;
7131     }
7132     return CGF.getTypeSize(ExprTy);
7133   }
7134 
7135   /// Return the corresponding bits for a given map clause modifier. Add
7136   /// a flag marking the map as a pointer if requested. Add a flag marking the
7137   /// map as the first one of a series of maps that relate to the same map
7138   /// expression.
7139   OpenMPOffloadMappingFlags getMapTypeBits(
7140       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7141       bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7142     OpenMPOffloadMappingFlags Bits =
7143         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7144     switch (MapType) {
7145     case OMPC_MAP_alloc:
7146     case OMPC_MAP_release:
7147       // alloc and release is the default behavior in the runtime library,  i.e.
7148       // if we don't pass any bits alloc/release that is what the runtime is
7149       // going to do. Therefore, we don't need to signal anything for these two
7150       // type modifiers.
7151       break;
7152     case OMPC_MAP_to:
7153       Bits |= OMP_MAP_TO;
7154       break;
7155     case OMPC_MAP_from:
7156       Bits |= OMP_MAP_FROM;
7157       break;
7158     case OMPC_MAP_tofrom:
7159       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7160       break;
7161     case OMPC_MAP_delete:
7162       Bits |= OMP_MAP_DELETE;
7163       break;
7164     case OMPC_MAP_unknown:
7165       llvm_unreachable("Unexpected map type!");
7166     }
7167     if (AddPtrFlag)
7168       Bits |= OMP_MAP_PTR_AND_OBJ;
7169     if (AddIsTargetParamFlag)
7170       Bits |= OMP_MAP_TARGET_PARAM;
7171     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7172         != MapModifiers.end())
7173       Bits |= OMP_MAP_ALWAYS;
7174     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7175         != MapModifiers.end())
7176       Bits |= OMP_MAP_CLOSE;
7177     return Bits;
7178   }
7179 
7180   /// Return true if the provided expression is a final array section. A
7181   /// final array section, is one whose length can't be proved to be one.
7182   bool isFinalArraySectionExpression(const Expr *E) const {
7183     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7184 
7185     // It is not an array section and therefore not a unity-size one.
7186     if (!OASE)
7187       return false;
7188 
7189     // An array section with no colon always refer to a single element.
7190     if (OASE->getColonLocFirst().isInvalid())
7191       return false;
7192 
7193     const Expr *Length = OASE->getLength();
7194 
7195     // If we don't have a length we have to check if the array has size 1
7196     // for this dimension. Also, we should always expect a length if the
7197     // base type is pointer.
7198     if (!Length) {
7199       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7200                              OASE->getBase()->IgnoreParenImpCasts())
7201                              .getCanonicalType();
7202       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7203         return ATy->getSize().getSExtValue() != 1;
7204       // If we don't have a constant dimension length, we have to consider
7205       // the current section as having any size, so it is not necessarily
7206       // unitary. If it happen to be unity size, that's user fault.
7207       return true;
7208     }
7209 
7210     // Check if the length evaluates to 1.
7211     Expr::EvalResult Result;
7212     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7213       return true; // Can have more that size 1.
7214 
7215     llvm::APSInt ConstLength = Result.Val.getInt();
7216     return ConstLength.getSExtValue() != 1;
7217   }
7218 
7219   /// Generate the base pointers, section pointers, sizes and map type
7220   /// bits for the provided map type, map modifier, and expression components.
7221   /// \a IsFirstComponent should be set to true if the provided set of
7222   /// components is the first associated with a capture.
7223   void generateInfoForComponentList(
7224       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7225       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7226       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7227       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7228       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7229       bool IsImplicit, bool ForDeviceAddr = false,
7230       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7231           OverlappedElements = llvm::None) const {
7232     // The following summarizes what has to be generated for each map and the
7233     // types below. The generated information is expressed in this order:
7234     // base pointer, section pointer, size, flags
7235     // (to add to the ones that come from the map type and modifier).
7236     //
7237     // double d;
7238     // int i[100];
7239     // float *p;
7240     //
7241     // struct S1 {
7242     //   int i;
7243     //   float f[50];
7244     // }
7245     // struct S2 {
7246     //   int i;
7247     //   float f[50];
7248     //   S1 s;
7249     //   double *p;
7250     //   struct S2 *ps;
7251     // }
7252     // S2 s;
7253     // S2 *ps;
7254     //
7255     // map(d)
7256     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7257     //
7258     // map(i)
7259     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7260     //
7261     // map(i[1:23])
7262     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7263     //
7264     // map(p)
7265     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7266     //
7267     // map(p[1:24])
7268     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7269     // in unified shared memory mode or for local pointers
7270     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7271     //
7272     // map(s)
7273     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7274     //
7275     // map(s.i)
7276     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7277     //
7278     // map(s.s.f)
7279     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7280     //
7281     // map(s.p)
7282     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7283     //
7284     // map(to: s.p[:22])
7285     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7286     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7287     // &(s.p), &(s.p[0]), 22*sizeof(double),
7288     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7289     // (*) alloc space for struct members, only this is a target parameter
7290     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7291     //      optimizes this entry out, same in the examples below)
7292     // (***) map the pointee (map: to)
7293     //
7294     // map(s.ps)
7295     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7296     //
7297     // map(from: s.ps->s.i)
7298     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7299     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7300     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7301     //
7302     // map(to: s.ps->ps)
7303     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7304     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7305     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7306     //
7307     // map(s.ps->ps->ps)
7308     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7309     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7310     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7311     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7312     //
7313     // map(to: s.ps->ps->s.f[:22])
7314     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7315     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7316     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7317     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7318     //
7319     // map(ps)
7320     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7321     //
7322     // map(ps->i)
7323     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7324     //
7325     // map(ps->s.f)
7326     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7327     //
7328     // map(from: ps->p)
7329     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7330     //
7331     // map(to: ps->p[:22])
7332     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7333     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7334     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7335     //
7336     // map(ps->ps)
7337     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7338     //
7339     // map(from: ps->ps->s.i)
7340     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7341     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7342     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7343     //
7344     // map(from: ps->ps->ps)
7345     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7346     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7347     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7348     //
7349     // map(ps->ps->ps->ps)
7350     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7351     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7352     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7353     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7354     //
7355     // map(to: ps->ps->ps->s.f[:22])
7356     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7357     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7358     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7359     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7360     //
7361     // map(to: s.f[:22]) map(from: s.p[:33])
7362     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7363     //     sizeof(double*) (**), TARGET_PARAM
7364     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7365     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7366     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7367     // (*) allocate contiguous space needed to fit all mapped members even if
7368     //     we allocate space for members not mapped (in this example,
7369     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7370     //     them as well because they fall between &s.f[0] and &s.p)
7371     //
7372     // map(from: s.f[:22]) map(to: ps->p[:33])
7373     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7374     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7375     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7376     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7377     // (*) the struct this entry pertains to is the 2nd element in the list of
7378     //     arguments, hence MEMBER_OF(2)
7379     //
7380     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7381     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7382     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7383     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7384     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7385     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7386     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7387     // (*) the struct this entry pertains to is the 4th element in the list
7388     //     of arguments, hence MEMBER_OF(4)
7389 
7390     // Track if the map information being generated is the first for a capture.
7391     bool IsCaptureFirstInfo = IsFirstComponentList;
7392     // When the variable is on a declare target link or in a to clause with
7393     // unified memory, a reference is needed to hold the host/device address
7394     // of the variable.
7395     bool RequiresReference = false;
7396 
7397     // Scan the components from the base to the complete expression.
7398     auto CI = Components.rbegin();
7399     auto CE = Components.rend();
7400     auto I = CI;
7401 
7402     // Track if the map information being generated is the first for a list of
7403     // components.
7404     bool IsExpressionFirstInfo = true;
7405     bool FirstPointerInComplexData = false;
7406     Address BP = Address::invalid();
7407     const Expr *AssocExpr = I->getAssociatedExpression();
7408     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7409     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7410     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7411 
7412     if (isa<MemberExpr>(AssocExpr)) {
7413       // The base is the 'this' pointer. The content of the pointer is going
7414       // to be the base of the field being mapped.
7415       BP = CGF.LoadCXXThisAddress();
7416     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7417                (OASE &&
7418                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7419       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7420     } else if (OAShE &&
7421                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7422       BP = Address(
7423           CGF.EmitScalarExpr(OAShE->getBase()),
7424           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7425     } else {
7426       // The base is the reference to the variable.
7427       // BP = &Var.
7428       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7429       if (const auto *VD =
7430               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7431         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7432                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7433           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7434               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7435                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7436             RequiresReference = true;
7437             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7438           }
7439         }
7440       }
7441 
7442       // If the variable is a pointer and is being dereferenced (i.e. is not
7443       // the last component), the base has to be the pointer itself, not its
7444       // reference. References are ignored for mapping purposes.
7445       QualType Ty =
7446           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7447       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7448         // No need to generate individual map information for the pointer, it
7449         // can be associated with the combined storage if shared memory mode is
7450         // active or the base declaration is not global variable.
7451         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7452          if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7453             !VD || VD->hasLocalStorage())
7454           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7455         else
7456           FirstPointerInComplexData = IsCaptureFirstInfo;
7457         ++I;
7458       }
7459     }
7460 
7461     // Track whether a component of the list should be marked as MEMBER_OF some
7462     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7463     // in a component list should be marked as MEMBER_OF, all subsequent entries
7464     // do not belong to the base struct. E.g.
7465     // struct S2 s;
7466     // s.ps->ps->ps->f[:]
7467     //   (1) (2) (3) (4)
7468     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7469     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7470     // is the pointee of ps(2) which is not member of struct s, so it should not
7471     // be marked as such (it is still PTR_AND_OBJ).
7472     // The variable is initialized to false so that PTR_AND_OBJ entries which
7473     // are not struct members are not considered (e.g. array of pointers to
7474     // data).
7475     bool ShouldBeMemberOf = false;
7476 
7477     // Variable keeping track of whether or not we have encountered a component
7478     // in the component list which is a member expression. Useful when we have a
7479     // pointer or a final array section, in which case it is the previous
7480     // component in the list which tells us whether we have a member expression.
7481     // E.g. X.f[:]
7482     // While processing the final array section "[:]" it is "f" which tells us
7483     // whether we are dealing with a member of a declared struct.
7484     const MemberExpr *EncounteredME = nullptr;
7485 
7486     for (; I != CE; ++I) {
7487       // If the current component is member of a struct (parent struct) mark it.
7488       if (!EncounteredME) {
7489         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7490         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7491         // as MEMBER_OF the parent struct.
7492         if (EncounteredME) {
7493           ShouldBeMemberOf = true;
7494           // Do not emit as complex pointer if this is actually not array-like
7495           // expression.
7496           if (FirstPointerInComplexData) {
7497             QualType Ty = std::prev(I)
7498                               ->getAssociatedDeclaration()
7499                               ->getType()
7500                               .getNonReferenceType();
7501             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7502             FirstPointerInComplexData = false;
7503           }
7504         }
7505       }
7506 
7507       auto Next = std::next(I);
7508 
7509       // We need to generate the addresses and sizes if this is the last
7510       // component, if the component is a pointer or if it is an array section
7511       // whose length can't be proved to be one. If this is a pointer, it
7512       // becomes the base address for the following components.
7513 
7514       // A final array section, is one whose length can't be proved to be one.
7515       bool IsFinalArraySection =
7516           isFinalArraySectionExpression(I->getAssociatedExpression());
7517 
7518       // Get information on whether the element is a pointer. Have to do a
7519       // special treatment for array sections given that they are built-in
7520       // types.
7521       const auto *OASE =
7522           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7523       const auto *OAShE =
7524           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7525       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7526       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7527       bool IsPointer =
7528           OAShE ||
7529           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7530                        .getCanonicalType()
7531                        ->isAnyPointerType()) ||
7532           I->getAssociatedExpression()->getType()->isAnyPointerType();
7533       bool IsNonDerefPointer = IsPointer && !UO && !BO;
7534 
7535       if (Next == CE || IsNonDerefPointer || IsFinalArraySection) {
7536         // If this is not the last component, we expect the pointer to be
7537         // associated with an array expression or member expression.
7538         assert((Next == CE ||
7539                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7540                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7541                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7542                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7543                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7544                "Unexpected expression");
7545 
7546         Address LB = Address::invalid();
7547         if (OAShE) {
7548           LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
7549                        CGF.getContext().getTypeAlignInChars(
7550                            OAShE->getBase()->getType()));
7551         } else {
7552           LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7553                    .getAddress(CGF);
7554         }
7555 
7556         // If this component is a pointer inside the base struct then we don't
7557         // need to create any entry for it - it will be combined with the object
7558         // it is pointing to into a single PTR_AND_OBJ entry.
7559         bool IsMemberPointerOrAddr =
7560             (IsPointer || ForDeviceAddr) && EncounteredME &&
7561             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7562              EncounteredME);
7563         if (!OverlappedElements.empty()) {
7564           // Handle base element with the info for overlapped elements.
7565           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7566           assert(Next == CE &&
7567                  "Expected last element for the overlapped elements.");
7568           assert(!IsPointer &&
7569                  "Unexpected base element with the pointer type.");
7570           // Mark the whole struct as the struct that requires allocation on the
7571           // device.
7572           PartialStruct.LowestElem = {0, LB};
7573           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7574               I->getAssociatedExpression()->getType());
7575           Address HB = CGF.Builder.CreateConstGEP(
7576               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7577                                                               CGF.VoidPtrTy),
7578               TypeSize.getQuantity() - 1);
7579           PartialStruct.HighestElem = {
7580               std::numeric_limits<decltype(
7581                   PartialStruct.HighestElem.first)>::max(),
7582               HB};
7583           PartialStruct.Base = BP;
7584           // Emit data for non-overlapped data.
7585           OpenMPOffloadMappingFlags Flags =
7586               OMP_MAP_MEMBER_OF |
7587               getMapTypeBits(MapType, MapModifiers, IsImplicit,
7588                              /*AddPtrFlag=*/false,
7589                              /*AddIsTargetParamFlag=*/false);
7590           LB = BP;
7591           llvm::Value *Size = nullptr;
7592           // Do bitcopy of all non-overlapped structure elements.
7593           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7594                    Component : OverlappedElements) {
7595             Address ComponentLB = Address::invalid();
7596             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7597                  Component) {
7598               if (MC.getAssociatedDeclaration()) {
7599                 ComponentLB =
7600                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7601                         .getAddress(CGF);
7602                 Size = CGF.Builder.CreatePtrDiff(
7603                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7604                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7605                 break;
7606               }
7607             }
7608             BasePointers.push_back(BP.getPointer());
7609             Pointers.push_back(LB.getPointer());
7610             Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
7611                                                       /*isSigned=*/true));
7612             Types.push_back(Flags);
7613             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7614           }
7615           BasePointers.push_back(BP.getPointer());
7616           Pointers.push_back(LB.getPointer());
7617           Size = CGF.Builder.CreatePtrDiff(
7618               CGF.EmitCastToVoidPtr(
7619                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7620               CGF.EmitCastToVoidPtr(LB.getPointer()));
7621           Sizes.push_back(
7622               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7623           Types.push_back(Flags);
7624           break;
7625         }
7626         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7627         if (!IsMemberPointerOrAddr) {
7628           BasePointers.push_back(BP.getPointer());
7629           Pointers.push_back(LB.getPointer());
7630           Sizes.push_back(
7631               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7632 
7633           // We need to add a pointer flag for each map that comes from the
7634           // same expression except for the first one. We also need to signal
7635           // this map is the first one that relates with the current capture
7636           // (there is a set of entries for each capture).
7637           OpenMPOffloadMappingFlags Flags =
7638               getMapTypeBits(MapType, MapModifiers, IsImplicit,
7639                              !IsExpressionFirstInfo || RequiresReference ||
7640                                  FirstPointerInComplexData,
7641                              IsCaptureFirstInfo && !RequiresReference);
7642 
7643           if (!IsExpressionFirstInfo) {
7644             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7645             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7646             if (IsPointer)
7647               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7648                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7649 
7650             if (ShouldBeMemberOf) {
7651               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7652               // should be later updated with the correct value of MEMBER_OF.
7653               Flags |= OMP_MAP_MEMBER_OF;
7654               // From now on, all subsequent PTR_AND_OBJ entries should not be
7655               // marked as MEMBER_OF.
7656               ShouldBeMemberOf = false;
7657             }
7658           }
7659 
7660           Types.push_back(Flags);
7661         }
7662 
7663         // If we have encountered a member expression so far, keep track of the
7664         // mapped member. If the parent is "*this", then the value declaration
7665         // is nullptr.
7666         if (EncounteredME) {
7667           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7668           unsigned FieldIndex = FD->getFieldIndex();
7669 
7670           // Update info about the lowest and highest elements for this struct
7671           if (!PartialStruct.Base.isValid()) {
7672             PartialStruct.LowestElem = {FieldIndex, LB};
7673             if (IsFinalArraySection) {
7674               Address HB =
7675                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7676                       .getAddress(CGF);
7677               PartialStruct.HighestElem = {FieldIndex, HB};
7678             } else {
7679               PartialStruct.HighestElem = {FieldIndex, LB};
7680             }
7681             PartialStruct.Base = BP;
7682           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7683             PartialStruct.LowestElem = {FieldIndex, LB};
7684           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7685             PartialStruct.HighestElem = {FieldIndex, LB};
7686           }
7687         }
7688 
7689         // If we have a final array section, we are done with this expression.
7690         if (IsFinalArraySection)
7691           break;
7692 
7693         // The pointer becomes the base for the next element.
7694         if (Next != CE)
7695           BP = LB;
7696 
7697         IsExpressionFirstInfo = false;
7698         IsCaptureFirstInfo = false;
7699         FirstPointerInComplexData = false;
7700       }
7701     }
7702   }
7703 
7704   /// Return the adjusted map modifiers if the declaration a capture refers to
7705   /// appears in a first-private clause. This is expected to be used only with
7706   /// directives that start with 'target'.
7707   MappableExprsHandler::OpenMPOffloadMappingFlags
7708   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7709     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7710 
7711     // A first private variable captured by reference will use only the
7712     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7713     // declaration is known as first-private in this handler.
7714     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7715       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7716           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7717         return MappableExprsHandler::OMP_MAP_ALWAYS |
7718                MappableExprsHandler::OMP_MAP_TO;
7719       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7720         return MappableExprsHandler::OMP_MAP_TO |
7721                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7722       return MappableExprsHandler::OMP_MAP_PRIVATE |
7723              MappableExprsHandler::OMP_MAP_TO;
7724     }
7725     return MappableExprsHandler::OMP_MAP_TO |
7726            MappableExprsHandler::OMP_MAP_FROM;
7727   }
7728 
7729   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7730     // Rotate by getFlagMemberOffset() bits.
7731     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7732                                                   << getFlagMemberOffset());
7733   }
7734 
7735   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7736                                      OpenMPOffloadMappingFlags MemberOfFlag) {
7737     // If the entry is PTR_AND_OBJ but has not been marked with the special
7738     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7739     // marked as MEMBER_OF.
7740     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7741         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7742       return;
7743 
7744     // Reset the placeholder value to prepare the flag for the assignment of the
7745     // proper MEMBER_OF value.
7746     Flags &= ~OMP_MAP_MEMBER_OF;
7747     Flags |= MemberOfFlag;
7748   }
7749 
7750   void getPlainLayout(const CXXRecordDecl *RD,
7751                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7752                       bool AsBase) const {
7753     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7754 
7755     llvm::StructType *St =
7756         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7757 
7758     unsigned NumElements = St->getNumElements();
7759     llvm::SmallVector<
7760         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7761         RecordLayout(NumElements);
7762 
7763     // Fill bases.
7764     for (const auto &I : RD->bases()) {
7765       if (I.isVirtual())
7766         continue;
7767       const auto *Base = I.getType()->getAsCXXRecordDecl();
7768       // Ignore empty bases.
7769       if (Base->isEmpty() || CGF.getContext()
7770                                  .getASTRecordLayout(Base)
7771                                  .getNonVirtualSize()
7772                                  .isZero())
7773         continue;
7774 
7775       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7776       RecordLayout[FieldIndex] = Base;
7777     }
7778     // Fill in virtual bases.
7779     for (const auto &I : RD->vbases()) {
7780       const auto *Base = I.getType()->getAsCXXRecordDecl();
7781       // Ignore empty bases.
7782       if (Base->isEmpty())
7783         continue;
7784       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7785       if (RecordLayout[FieldIndex])
7786         continue;
7787       RecordLayout[FieldIndex] = Base;
7788     }
7789     // Fill in all the fields.
7790     assert(!RD->isUnion() && "Unexpected union.");
7791     for (const auto *Field : RD->fields()) {
7792       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7793       // will fill in later.)
7794       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7795         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7796         RecordLayout[FieldIndex] = Field;
7797       }
7798     }
7799     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7800              &Data : RecordLayout) {
7801       if (Data.isNull())
7802         continue;
7803       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7804         getPlainLayout(Base, Layout, /*AsBase=*/true);
7805       else
7806         Layout.push_back(Data.get<const FieldDecl *>());
7807     }
7808   }
7809 
7810 public:
7811   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7812       : CurDir(&Dir), CGF(CGF) {
7813     // Extract firstprivate clause information.
7814     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7815       for (const auto *D : C->varlists())
7816         FirstPrivateDecls.try_emplace(
7817             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
7818     // Extract implicit firstprivates from uses_allocators clauses.
7819     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
7820       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
7821         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
7822         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
7823           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
7824                                         /*Implicit=*/true);
7825         else if (const auto *VD = dyn_cast<VarDecl>(
7826                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
7827                          ->getDecl()))
7828           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
7829       }
7830     }
7831     // Extract device pointer clause information.
7832     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7833       for (auto L : C->component_lists())
7834         DevPointersMap[L.first].push_back(L.second);
7835   }
7836 
7837   /// Constructor for the declare mapper directive.
7838   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
7839       : CurDir(&Dir), CGF(CGF) {}
7840 
7841   /// Generate code for the combined entry if we have a partially mapped struct
7842   /// and take care of the mapping flags of the arguments corresponding to
7843   /// individual struct members.
7844   void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7845                          MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7846                          MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7847                          const StructRangeInfoTy &PartialStruct) const {
7848     // Base is the base of the struct
7849     BasePointers.push_back(PartialStruct.Base.getPointer());
7850     // Pointer is the address of the lowest element
7851     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7852     Pointers.push_back(LB);
7853     // Size is (addr of {highest+1} element) - (addr of lowest element)
7854     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7855     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7856     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7857     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7858     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7859     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
7860                                                   /*isSigned=*/false);
7861     Sizes.push_back(Size);
7862     // Map type is always TARGET_PARAM
7863     Types.push_back(OMP_MAP_TARGET_PARAM);
7864     // Remove TARGET_PARAM flag from the first element
7865     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7866 
7867     // All other current entries will be MEMBER_OF the combined entry
7868     // (except for PTR_AND_OBJ entries which do not have a placeholder value
7869     // 0xFFFF in the MEMBER_OF field).
7870     OpenMPOffloadMappingFlags MemberOfFlag =
7871         getMemberOfFlag(BasePointers.size() - 1);
7872     for (auto &M : CurTypes)
7873       setCorrectMemberOfFlag(M, MemberOfFlag);
7874   }
7875 
7876   /// Generate all the base pointers, section pointers, sizes and map
7877   /// types for the extracted mappable expressions. Also, for each item that
7878   /// relates with a device pointer, a pair of the relevant declaration and
7879   /// index where it occurs is appended to the device pointers info array.
7880   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7881                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7882                        MapFlagsArrayTy &Types) const {
7883     // We have to process the component lists that relate with the same
7884     // declaration in a single chunk so that we can generate the map flags
7885     // correctly. Therefore, we organize all lists in a map.
7886     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7887 
7888     // Helper function to fill the information map for the different supported
7889     // clauses.
7890     auto &&InfoGen =
7891         [&Info](const ValueDecl *D,
7892                 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7893                 OpenMPMapClauseKind MapType,
7894                 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7895                 bool ReturnDevicePointer, bool IsImplicit,
7896                 bool ForDeviceAddr = false) {
7897           const ValueDecl *VD =
7898               D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7899           Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7900                                 IsImplicit, ForDeviceAddr);
7901         };
7902 
7903     assert(CurDir.is<const OMPExecutableDirective *>() &&
7904            "Expect a executable directive");
7905     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
7906     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
7907       for (const auto L : C->component_lists()) {
7908         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
7909             /*ReturnDevicePointer=*/false, C->isImplicit());
7910       }
7911     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
7912       for (const auto L : C->component_lists()) {
7913         InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
7914             /*ReturnDevicePointer=*/false, C->isImplicit());
7915       }
7916     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
7917       for (const auto L : C->component_lists()) {
7918         InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
7919             /*ReturnDevicePointer=*/false, C->isImplicit());
7920       }
7921 
7922     // Look at the use_device_ptr clause information and mark the existing map
7923     // entries as such. If there is no map information for an entry in the
7924     // use_device_ptr list, we create one with map type 'alloc' and zero size
7925     // section. It is the user fault if that was not mapped before. If there is
7926     // no map information and the pointer is a struct member, then we defer the
7927     // emission of that entry until the whole struct has been processed.
7928     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
7929         DeferredInfo;
7930     MapBaseValuesArrayTy UseDevicePtrBasePointers;
7931     MapValuesArrayTy UseDevicePtrPointers;
7932     MapValuesArrayTy UseDevicePtrSizes;
7933     MapFlagsArrayTy UseDevicePtrTypes;
7934 
7935     for (const auto *C :
7936          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
7937       for (const auto L : C->component_lists()) {
7938         assert(!L.second.empty() && "Not expecting empty list of components!");
7939         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
7940         VD = cast<ValueDecl>(VD->getCanonicalDecl());
7941         const Expr *IE = L.second.back().getAssociatedExpression();
7942         // If the first component is a member expression, we have to look into
7943         // 'this', which maps to null in the map of map information. Otherwise
7944         // look directly for the information.
7945         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7946 
7947         // We potentially have map information for this declaration already.
7948         // Look for the first set of components that refer to it.
7949         if (It != Info.end()) {
7950           auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
7951             return MI.Components.back().getAssociatedDeclaration() == VD;
7952           });
7953           // If we found a map entry, signal that the pointer has to be returned
7954           // and move on to the next declaration.
7955           // Exclude cases where the base pointer is mapped as array subscript,
7956           // array section or array shaping. The base address is passed as a
7957           // pointer to base in this case and cannot be used as a base for
7958           // use_device_ptr list item.
7959           if (CI != It->second.end()) {
7960             auto PrevCI = std::next(CI->Components.rbegin());
7961             const auto *VarD = dyn_cast<VarDecl>(VD);
7962             if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7963                 isa<MemberExpr>(IE) ||
7964                 !VD->getType().getNonReferenceType()->isPointerType() ||
7965                 PrevCI == CI->Components.rend() ||
7966                 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
7967                 VarD->hasLocalStorage()) {
7968               CI->ReturnDevicePointer = true;
7969               continue;
7970             }
7971           }
7972         }
7973 
7974         // We didn't find any match in our map information - generate a zero
7975         // size array section - if the pointer is a struct member we defer this
7976         // action until the whole struct has been processed.
7977         if (isa<MemberExpr>(IE)) {
7978           // Insert the pointer into Info to be processed by
7979           // generateInfoForComponentList. Because it is a member pointer
7980           // without a pointee, no entry will be generated for it, therefore
7981           // we need to generate one after the whole struct has been processed.
7982           // Nonetheless, generateInfoForComponentList must be called to take
7983           // the pointer into account for the calculation of the range of the
7984           // partial struct.
7985           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
7986                   /*ReturnDevicePointer=*/false, C->isImplicit());
7987           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
7988         } else {
7989           llvm::Value *Ptr =
7990               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
7991           UseDevicePtrBasePointers.emplace_back(Ptr, VD);
7992           UseDevicePtrPointers.push_back(Ptr);
7993           UseDevicePtrSizes.push_back(
7994               llvm::Constant::getNullValue(CGF.Int64Ty));
7995           UseDevicePtrTypes.push_back(OMP_MAP_RETURN_PARAM |
7996                                       OMP_MAP_TARGET_PARAM);
7997         }
7998       }
7999     }
8000 
8001     // Look at the use_device_addr clause information and mark the existing map
8002     // entries as such. If there is no map information for an entry in the
8003     // use_device_addr list, we create one with map type 'alloc' and zero size
8004     // section. It is the user fault if that was not mapped before. If there is
8005     // no map information and the pointer is a struct member, then we defer the
8006     // emission of that entry until the whole struct has been processed.
8007     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8008     for (const auto *C :
8009          CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) {
8010       for (const auto L : C->component_lists()) {
8011         assert(!L.second.empty() && "Not expecting empty list of components!");
8012         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
8013         if (!Processed.insert(VD).second)
8014           continue;
8015         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8016         const Expr *IE = L.second.back().getAssociatedExpression();
8017         // If the first component is a member expression, we have to look into
8018         // 'this', which maps to null in the map of map information. Otherwise
8019         // look directly for the information.
8020         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8021 
8022         // We potentially have map information for this declaration already.
8023         // Look for the first set of components that refer to it.
8024         if (It != Info.end()) {
8025           auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
8026             return MI.Components.back().getAssociatedDeclaration() == VD;
8027           });
8028           // If we found a map entry, signal that the pointer has to be returned
8029           // and move on to the next declaration.
8030           if (CI != It->second.end()) {
8031             CI->ReturnDevicePointer = true;
8032             continue;
8033           }
8034         }
8035 
8036         // We didn't find any match in our map information - generate a zero
8037         // size array section - if the pointer is a struct member we defer this
8038         // action until the whole struct has been processed.
8039         if (isa<MemberExpr>(IE)) {
8040           // Insert the pointer into Info to be processed by
8041           // generateInfoForComponentList. Because it is a member pointer
8042           // without a pointee, no entry will be generated for it, therefore
8043           // we need to generate one after the whole struct has been processed.
8044           // Nonetheless, generateInfoForComponentList must be called to take
8045           // the pointer into account for the calculation of the range of the
8046           // partial struct.
8047           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
8048                   /*ReturnDevicePointer=*/false, C->isImplicit(),
8049                   /*ForDeviceAddr=*/true);
8050           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8051         } else {
8052           llvm::Value *Ptr;
8053           if (IE->isGLValue())
8054             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8055           else
8056             Ptr = CGF.EmitScalarExpr(IE);
8057           UseDevicePtrBasePointers.emplace_back(Ptr, VD);
8058           UseDevicePtrPointers.push_back(Ptr);
8059           UseDevicePtrSizes.push_back(
8060               llvm::Constant::getNullValue(CGF.Int64Ty));
8061           UseDevicePtrTypes.push_back(OMP_MAP_RETURN_PARAM |
8062                                       OMP_MAP_TARGET_PARAM);
8063         }
8064       }
8065     }
8066 
8067     for (const auto &M : Info) {
8068       // We need to know when we generate information for the first component
8069       // associated with a capture, because the mapping flags depend on it.
8070       bool IsFirstComponentList = true;
8071 
8072       // Temporary versions of arrays
8073       MapBaseValuesArrayTy CurBasePointers;
8074       MapValuesArrayTy CurPointers;
8075       MapValuesArrayTy CurSizes;
8076       MapFlagsArrayTy CurTypes;
8077       StructRangeInfoTy PartialStruct;
8078 
8079       for (const MapInfo &L : M.second) {
8080         assert(!L.Components.empty() &&
8081                "Not expecting declaration with no component lists.");
8082 
8083         // Remember the current base pointer index.
8084         unsigned CurrentBasePointersIdx = CurBasePointers.size();
8085         generateInfoForComponentList(
8086             L.MapType, L.MapModifiers, L.Components, CurBasePointers,
8087             CurPointers, CurSizes, CurTypes, PartialStruct,
8088             IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr);
8089 
8090         // If this entry relates with a device pointer, set the relevant
8091         // declaration and add the 'return pointer' flag.
8092         if (L.ReturnDevicePointer) {
8093           assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8094                  "Unexpected number of mapped base pointers.");
8095 
8096           const ValueDecl *RelevantVD =
8097               L.Components.back().getAssociatedDeclaration();
8098           assert(RelevantVD &&
8099                  "No relevant declaration related with device pointer??");
8100 
8101           CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8102           CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8103         }
8104         IsFirstComponentList = false;
8105       }
8106 
8107       // Append any pending zero-length pointers which are struct members and
8108       // used with use_device_ptr or use_device_addr.
8109       auto CI = DeferredInfo.find(M.first);
8110       if (CI != DeferredInfo.end()) {
8111         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8112           llvm::Value *BasePtr;
8113           llvm::Value *Ptr;
8114           if (L.ForDeviceAddr) {
8115             if (L.IE->isGLValue())
8116               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8117             else
8118               Ptr = this->CGF.EmitScalarExpr(L.IE);
8119             BasePtr = Ptr;
8120             // Entry is RETURN_PARAM. Also, set the placeholder value
8121             // MEMBER_OF=FFFF so that the entry is later updated with the
8122             // correct value of MEMBER_OF.
8123             CurTypes.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8124           } else {
8125             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8126             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8127                                              L.IE->getExprLoc());
8128             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8129             // value MEMBER_OF=FFFF so that the entry is later updated with the
8130             // correct value of MEMBER_OF.
8131             CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8132                                OMP_MAP_MEMBER_OF);
8133           }
8134           CurBasePointers.emplace_back(BasePtr, L.VD);
8135           CurPointers.push_back(Ptr);
8136           CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8137         }
8138       }
8139 
8140       // If there is an entry in PartialStruct it means we have a struct with
8141       // individual members mapped. Emit an extra combined entry.
8142       if (PartialStruct.Base.isValid())
8143         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8144                           PartialStruct);
8145 
8146       // We need to append the results of this capture to what we already have.
8147       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8148       Pointers.append(CurPointers.begin(), CurPointers.end());
8149       Sizes.append(CurSizes.begin(), CurSizes.end());
8150       Types.append(CurTypes.begin(), CurTypes.end());
8151     }
8152     // Append data for use_device_ptr clauses.
8153     BasePointers.append(UseDevicePtrBasePointers.begin(),
8154                         UseDevicePtrBasePointers.end());
8155     Pointers.append(UseDevicePtrPointers.begin(), UseDevicePtrPointers.end());
8156     Sizes.append(UseDevicePtrSizes.begin(), UseDevicePtrSizes.end());
8157     Types.append(UseDevicePtrTypes.begin(), UseDevicePtrTypes.end());
8158   }
8159 
8160   /// Generate all the base pointers, section pointers, sizes and map types for
8161   /// the extracted map clauses of user-defined mapper.
8162   void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
8163                                 MapValuesArrayTy &Pointers,
8164                                 MapValuesArrayTy &Sizes,
8165                                 MapFlagsArrayTy &Types) const {
8166     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8167            "Expect a declare mapper directive");
8168     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8169     // We have to process the component lists that relate with the same
8170     // declaration in a single chunk so that we can generate the map flags
8171     // correctly. Therefore, we organize all lists in a map.
8172     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8173 
8174     // Helper function to fill the information map for the different supported
8175     // clauses.
8176     auto &&InfoGen = [&Info](
8177         const ValueDecl *D,
8178         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8179         OpenMPMapClauseKind MapType,
8180         ArrayRef<OpenMPMapModifierKind> MapModifiers,
8181         bool ReturnDevicePointer, bool IsImplicit) {
8182       const ValueDecl *VD =
8183           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8184       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8185                             IsImplicit);
8186     };
8187 
8188     for (const auto *C : CurMapperDir->clauselists()) {
8189       const auto *MC = cast<OMPMapClause>(C);
8190       for (const auto L : MC->component_lists()) {
8191         InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
8192                 /*ReturnDevicePointer=*/false, MC->isImplicit());
8193       }
8194     }
8195 
8196     for (const auto &M : Info) {
8197       // We need to know when we generate information for the first component
8198       // associated with a capture, because the mapping flags depend on it.
8199       bool IsFirstComponentList = true;
8200 
8201       // Temporary versions of arrays
8202       MapBaseValuesArrayTy CurBasePointers;
8203       MapValuesArrayTy CurPointers;
8204       MapValuesArrayTy CurSizes;
8205       MapFlagsArrayTy CurTypes;
8206       StructRangeInfoTy PartialStruct;
8207 
8208       for (const MapInfo &L : M.second) {
8209         assert(!L.Components.empty() &&
8210                "Not expecting declaration with no component lists.");
8211         generateInfoForComponentList(
8212             L.MapType, L.MapModifiers, L.Components, CurBasePointers,
8213             CurPointers, CurSizes, CurTypes, PartialStruct,
8214             IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr);
8215         IsFirstComponentList = false;
8216       }
8217 
8218       // If there is an entry in PartialStruct it means we have a struct with
8219       // individual members mapped. Emit an extra combined entry.
8220       if (PartialStruct.Base.isValid())
8221         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8222                           PartialStruct);
8223 
8224       // We need to append the results of this capture to what we already have.
8225       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8226       Pointers.append(CurPointers.begin(), CurPointers.end());
8227       Sizes.append(CurSizes.begin(), CurSizes.end());
8228       Types.append(CurTypes.begin(), CurTypes.end());
8229     }
8230   }
8231 
8232   /// Emit capture info for lambdas for variables captured by reference.
8233   void generateInfoForLambdaCaptures(
8234       const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8235       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8236       MapFlagsArrayTy &Types,
8237       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8238     const auto *RD = VD->getType()
8239                          .getCanonicalType()
8240                          .getNonReferenceType()
8241                          ->getAsCXXRecordDecl();
8242     if (!RD || !RD->isLambda())
8243       return;
8244     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8245     LValue VDLVal = CGF.MakeAddrLValue(
8246         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8247     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8248     FieldDecl *ThisCapture = nullptr;
8249     RD->getCaptureFields(Captures, ThisCapture);
8250     if (ThisCapture) {
8251       LValue ThisLVal =
8252           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8253       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8254       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8255                                  VDLVal.getPointer(CGF));
8256       BasePointers.push_back(ThisLVal.getPointer(CGF));
8257       Pointers.push_back(ThisLValVal.getPointer(CGF));
8258       Sizes.push_back(
8259           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8260                                     CGF.Int64Ty, /*isSigned=*/true));
8261       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8262                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8263     }
8264     for (const LambdaCapture &LC : RD->captures()) {
8265       if (!LC.capturesVariable())
8266         continue;
8267       const VarDecl *VD = LC.getCapturedVar();
8268       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8269         continue;
8270       auto It = Captures.find(VD);
8271       assert(It != Captures.end() && "Found lambda capture without field.");
8272       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8273       if (LC.getCaptureKind() == LCK_ByRef) {
8274         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8275         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8276                                    VDLVal.getPointer(CGF));
8277         BasePointers.push_back(VarLVal.getPointer(CGF));
8278         Pointers.push_back(VarLValVal.getPointer(CGF));
8279         Sizes.push_back(CGF.Builder.CreateIntCast(
8280             CGF.getTypeSize(
8281                 VD->getType().getCanonicalType().getNonReferenceType()),
8282             CGF.Int64Ty, /*isSigned=*/true));
8283       } else {
8284         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8285         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8286                                    VDLVal.getPointer(CGF));
8287         BasePointers.push_back(VarLVal.getPointer(CGF));
8288         Pointers.push_back(VarRVal.getScalarVal());
8289         Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8290       }
8291       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8292                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8293     }
8294   }
8295 
8296   /// Set correct indices for lambdas captures.
8297   void adjustMemberOfForLambdaCaptures(
8298       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8299       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8300       MapFlagsArrayTy &Types) const {
8301     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8302       // Set correct member_of idx for all implicit lambda captures.
8303       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8304                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8305         continue;
8306       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8307       assert(BasePtr && "Unable to find base lambda address.");
8308       int TgtIdx = -1;
8309       for (unsigned J = I; J > 0; --J) {
8310         unsigned Idx = J - 1;
8311         if (Pointers[Idx] != BasePtr)
8312           continue;
8313         TgtIdx = Idx;
8314         break;
8315       }
8316       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8317       // All other current entries will be MEMBER_OF the combined entry
8318       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8319       // 0xFFFF in the MEMBER_OF field).
8320       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8321       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8322     }
8323   }
8324 
8325   /// Generate the base pointers, section pointers, sizes and map types
8326   /// associated to a given capture.
8327   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8328                               llvm::Value *Arg,
8329                               MapBaseValuesArrayTy &BasePointers,
8330                               MapValuesArrayTy &Pointers,
8331                               MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8332                               StructRangeInfoTy &PartialStruct) const {
8333     assert(!Cap->capturesVariableArrayType() &&
8334            "Not expecting to generate map info for a variable array type!");
8335 
8336     // We need to know when we generating information for the first component
8337     const ValueDecl *VD = Cap->capturesThis()
8338                               ? nullptr
8339                               : Cap->getCapturedVar()->getCanonicalDecl();
8340 
8341     // If this declaration appears in a is_device_ptr clause we just have to
8342     // pass the pointer by value. If it is a reference to a declaration, we just
8343     // pass its value.
8344     if (DevPointersMap.count(VD)) {
8345       BasePointers.emplace_back(Arg, VD);
8346       Pointers.push_back(Arg);
8347       Sizes.push_back(
8348           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8349                                     CGF.Int64Ty, /*isSigned=*/true));
8350       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8351       return;
8352     }
8353 
8354     using MapData =
8355         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8356                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8357     SmallVector<MapData, 4> DeclComponentLists;
8358     assert(CurDir.is<const OMPExecutableDirective *>() &&
8359            "Expect a executable directive");
8360     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8361     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8362       for (const auto L : C->decl_component_lists(VD)) {
8363         assert(L.first == VD &&
8364                "We got information for the wrong declaration??");
8365         assert(!L.second.empty() &&
8366                "Not expecting declaration with no component lists.");
8367         DeclComponentLists.emplace_back(L.second, C->getMapType(),
8368                                         C->getMapTypeModifiers(),
8369                                         C->isImplicit());
8370       }
8371     }
8372 
8373     // Find overlapping elements (including the offset from the base element).
8374     llvm::SmallDenseMap<
8375         const MapData *,
8376         llvm::SmallVector<
8377             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8378         4>
8379         OverlappedData;
8380     size_t Count = 0;
8381     for (const MapData &L : DeclComponentLists) {
8382       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8383       OpenMPMapClauseKind MapType;
8384       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8385       bool IsImplicit;
8386       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8387       ++Count;
8388       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8389         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8390         std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8391         auto CI = Components.rbegin();
8392         auto CE = Components.rend();
8393         auto SI = Components1.rbegin();
8394         auto SE = Components1.rend();
8395         for (; CI != CE && SI != SE; ++CI, ++SI) {
8396           if (CI->getAssociatedExpression()->getStmtClass() !=
8397               SI->getAssociatedExpression()->getStmtClass())
8398             break;
8399           // Are we dealing with different variables/fields?
8400           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8401             break;
8402         }
8403         // Found overlapping if, at least for one component, reached the head of
8404         // the components list.
8405         if (CI == CE || SI == SE) {
8406           assert((CI != CE || SI != SE) &&
8407                  "Unexpected full match of the mapping components.");
8408           const MapData &BaseData = CI == CE ? L : L1;
8409           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8410               SI == SE ? Components : Components1;
8411           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8412           OverlappedElements.getSecond().push_back(SubData);
8413         }
8414       }
8415     }
8416     // Sort the overlapped elements for each item.
8417     llvm::SmallVector<const FieldDecl *, 4> Layout;
8418     if (!OverlappedData.empty()) {
8419       if (const auto *CRD =
8420               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8421         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8422       else {
8423         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8424         Layout.append(RD->field_begin(), RD->field_end());
8425       }
8426     }
8427     for (auto &Pair : OverlappedData) {
8428       llvm::sort(
8429           Pair.getSecond(),
8430           [&Layout](
8431               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8432               OMPClauseMappableExprCommon::MappableExprComponentListRef
8433                   Second) {
8434             auto CI = First.rbegin();
8435             auto CE = First.rend();
8436             auto SI = Second.rbegin();
8437             auto SE = Second.rend();
8438             for (; CI != CE && SI != SE; ++CI, ++SI) {
8439               if (CI->getAssociatedExpression()->getStmtClass() !=
8440                   SI->getAssociatedExpression()->getStmtClass())
8441                 break;
8442               // Are we dealing with different variables/fields?
8443               if (CI->getAssociatedDeclaration() !=
8444                   SI->getAssociatedDeclaration())
8445                 break;
8446             }
8447 
8448             // Lists contain the same elements.
8449             if (CI == CE && SI == SE)
8450               return false;
8451 
8452             // List with less elements is less than list with more elements.
8453             if (CI == CE || SI == SE)
8454               return CI == CE;
8455 
8456             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8457             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8458             if (FD1->getParent() == FD2->getParent())
8459               return FD1->getFieldIndex() < FD2->getFieldIndex();
8460             const auto It =
8461                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8462                   return FD == FD1 || FD == FD2;
8463                 });
8464             return *It == FD1;
8465           });
8466     }
8467 
8468     // Associated with a capture, because the mapping flags depend on it.
8469     // Go through all of the elements with the overlapped elements.
8470     for (const auto &Pair : OverlappedData) {
8471       const MapData &L = *Pair.getFirst();
8472       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8473       OpenMPMapClauseKind MapType;
8474       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8475       bool IsImplicit;
8476       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8477       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8478           OverlappedComponents = Pair.getSecond();
8479       bool IsFirstComponentList = true;
8480       generateInfoForComponentList(
8481           MapType, MapModifiers, Components, BasePointers, Pointers, Sizes,
8482           Types, PartialStruct, IsFirstComponentList, IsImplicit,
8483           /*ForDeviceAddr=*/false, OverlappedComponents);
8484     }
8485     // Go through other elements without overlapped elements.
8486     bool IsFirstComponentList = OverlappedData.empty();
8487     for (const MapData &L : DeclComponentLists) {
8488       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8489       OpenMPMapClauseKind MapType;
8490       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8491       bool IsImplicit;
8492       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8493       auto It = OverlappedData.find(&L);
8494       if (It == OverlappedData.end())
8495         generateInfoForComponentList(MapType, MapModifiers, Components,
8496                                      BasePointers, Pointers, Sizes, Types,
8497                                      PartialStruct, IsFirstComponentList,
8498                                      IsImplicit);
8499       IsFirstComponentList = false;
8500     }
8501   }
8502 
8503   /// Generate the base pointers, section pointers, sizes and map types
8504   /// associated with the declare target link variables.
8505   void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8506                                         MapValuesArrayTy &Pointers,
8507                                         MapValuesArrayTy &Sizes,
8508                                         MapFlagsArrayTy &Types) const {
8509     assert(CurDir.is<const OMPExecutableDirective *>() &&
8510            "Expect a executable directive");
8511     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8512     // Map other list items in the map clause which are not captured variables
8513     // but "declare target link" global variables.
8514     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8515       for (const auto L : C->component_lists()) {
8516         if (!L.first)
8517           continue;
8518         const auto *VD = dyn_cast<VarDecl>(L.first);
8519         if (!VD)
8520           continue;
8521         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8522             OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8523         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8524             !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8525           continue;
8526         StructRangeInfoTy PartialStruct;
8527         generateInfoForComponentList(
8528             C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8529             Pointers, Sizes, Types, PartialStruct,
8530             /*IsFirstComponentList=*/true, C->isImplicit());
8531         assert(!PartialStruct.Base.isValid() &&
8532                "No partial structs for declare target link expected.");
8533       }
8534     }
8535   }
8536 
8537   /// Generate the default map information for a given capture \a CI,
8538   /// record field declaration \a RI and captured value \a CV.
8539   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8540                               const FieldDecl &RI, llvm::Value *CV,
8541                               MapBaseValuesArrayTy &CurBasePointers,
8542                               MapValuesArrayTy &CurPointers,
8543                               MapValuesArrayTy &CurSizes,
8544                               MapFlagsArrayTy &CurMapTypes) const {
8545     bool IsImplicit = true;
8546     // Do the default mapping.
8547     if (CI.capturesThis()) {
8548       CurBasePointers.push_back(CV);
8549       CurPointers.push_back(CV);
8550       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8551       CurSizes.push_back(
8552           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8553                                     CGF.Int64Ty, /*isSigned=*/true));
8554       // Default map type.
8555       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8556     } else if (CI.capturesVariableByCopy()) {
8557       CurBasePointers.push_back(CV);
8558       CurPointers.push_back(CV);
8559       if (!RI.getType()->isAnyPointerType()) {
8560         // We have to signal to the runtime captures passed by value that are
8561         // not pointers.
8562         CurMapTypes.push_back(OMP_MAP_LITERAL);
8563         CurSizes.push_back(CGF.Builder.CreateIntCast(
8564             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8565       } else {
8566         // Pointers are implicitly mapped with a zero size and no flags
8567         // (other than first map that is added for all implicit maps).
8568         CurMapTypes.push_back(OMP_MAP_NONE);
8569         CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8570       }
8571       const VarDecl *VD = CI.getCapturedVar();
8572       auto I = FirstPrivateDecls.find(VD);
8573       if (I != FirstPrivateDecls.end())
8574         IsImplicit = I->getSecond();
8575     } else {
8576       assert(CI.capturesVariable() && "Expected captured reference.");
8577       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8578       QualType ElementType = PtrTy->getPointeeType();
8579       CurSizes.push_back(CGF.Builder.CreateIntCast(
8580           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8581       // The default map type for a scalar/complex type is 'to' because by
8582       // default the value doesn't have to be retrieved. For an aggregate
8583       // type, the default is 'tofrom'.
8584       CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8585       const VarDecl *VD = CI.getCapturedVar();
8586       auto I = FirstPrivateDecls.find(VD);
8587       if (I != FirstPrivateDecls.end() &&
8588           VD->getType().isConstant(CGF.getContext())) {
8589         llvm::Constant *Addr =
8590             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8591         // Copy the value of the original variable to the new global copy.
8592         CGF.Builder.CreateMemCpy(
8593             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
8594             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8595             CurSizes.back(), /*IsVolatile=*/false);
8596         // Use new global variable as the base pointers.
8597         CurBasePointers.push_back(Addr);
8598         CurPointers.push_back(Addr);
8599       } else {
8600         CurBasePointers.push_back(CV);
8601         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8602           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8603               CV, ElementType, CGF.getContext().getDeclAlign(VD),
8604               AlignmentSource::Decl));
8605           CurPointers.push_back(PtrAddr.getPointer());
8606         } else {
8607           CurPointers.push_back(CV);
8608         }
8609       }
8610       if (I != FirstPrivateDecls.end())
8611         IsImplicit = I->getSecond();
8612     }
8613     // Every default map produces a single argument which is a target parameter.
8614     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8615 
8616     // Add flag stating this is an implicit map.
8617     if (IsImplicit)
8618       CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8619   }
8620 };
8621 } // anonymous namespace
8622 
8623 /// Emit the arrays used to pass the captures and map information to the
8624 /// offloading runtime library. If there is no map or capture information,
8625 /// return nullptr by reference.
8626 static void
8627 emitOffloadingArrays(CodeGenFunction &CGF,
8628                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
8629                      MappableExprsHandler::MapValuesArrayTy &Pointers,
8630                      MappableExprsHandler::MapValuesArrayTy &Sizes,
8631                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
8632                      CGOpenMPRuntime::TargetDataInfo &Info) {
8633   CodeGenModule &CGM = CGF.CGM;
8634   ASTContext &Ctx = CGF.getContext();
8635 
8636   // Reset the array information.
8637   Info.clearArrayInfo();
8638   Info.NumberOfPtrs = BasePointers.size();
8639 
8640   if (Info.NumberOfPtrs) {
8641     // Detect if we have any capture size requiring runtime evaluation of the
8642     // size so that a constant array could be eventually used.
8643     bool hasRuntimeEvaluationCaptureSize = false;
8644     for (llvm::Value *S : Sizes)
8645       if (!isa<llvm::Constant>(S)) {
8646         hasRuntimeEvaluationCaptureSize = true;
8647         break;
8648       }
8649 
8650     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8651     QualType PointerArrayType = Ctx.getConstantArrayType(
8652         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
8653         /*IndexTypeQuals=*/0);
8654 
8655     Info.BasePointersArray =
8656         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8657     Info.PointersArray =
8658         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8659 
8660     // If we don't have any VLA types or other types that require runtime
8661     // evaluation, we can use a constant array for the map sizes, otherwise we
8662     // need to fill up the arrays as we do for the pointers.
8663     QualType Int64Ty =
8664         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8665     if (hasRuntimeEvaluationCaptureSize) {
8666       QualType SizeArrayType = Ctx.getConstantArrayType(
8667           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
8668           /*IndexTypeQuals=*/0);
8669       Info.SizesArray =
8670           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8671     } else {
8672       // We expect all the sizes to be constant, so we collect them to create
8673       // a constant array.
8674       SmallVector<llvm::Constant *, 16> ConstSizes;
8675       for (llvm::Value *S : Sizes)
8676         ConstSizes.push_back(cast<llvm::Constant>(S));
8677 
8678       auto *SizesArrayInit = llvm::ConstantArray::get(
8679           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8680       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8681       auto *SizesArrayGbl = new llvm::GlobalVariable(
8682           CGM.getModule(), SizesArrayInit->getType(),
8683           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8684           SizesArrayInit, Name);
8685       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8686       Info.SizesArray = SizesArrayGbl;
8687     }
8688 
8689     // The map types are always constant so we don't need to generate code to
8690     // fill arrays. Instead, we create an array constant.
8691     SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8692     llvm::copy(MapTypes, Mapping.begin());
8693     llvm::Constant *MapTypesArrayInit =
8694         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8695     std::string MaptypesName =
8696         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8697     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8698         CGM.getModule(), MapTypesArrayInit->getType(),
8699         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8700         MapTypesArrayInit, MaptypesName);
8701     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8702     Info.MapTypesArray = MapTypesArrayGbl;
8703 
8704     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8705       llvm::Value *BPVal = *BasePointers[I];
8706       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8707           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8708           Info.BasePointersArray, 0, I);
8709       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8710           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8711       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8712       CGF.Builder.CreateStore(BPVal, BPAddr);
8713 
8714       if (Info.requiresDevicePointerInfo())
8715         if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8716           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8717 
8718       llvm::Value *PVal = Pointers[I];
8719       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8720           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8721           Info.PointersArray, 0, I);
8722       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8723           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8724       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8725       CGF.Builder.CreateStore(PVal, PAddr);
8726 
8727       if (hasRuntimeEvaluationCaptureSize) {
8728         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8729             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8730             Info.SizesArray,
8731             /*Idx0=*/0,
8732             /*Idx1=*/I);
8733         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8734         CGF.Builder.CreateStore(
8735             CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
8736             SAddr);
8737       }
8738     }
8739   }
8740 }
8741 
8742 /// Emit the arguments to be passed to the runtime library based on the
8743 /// arrays of pointers, sizes and map types.
8744 static void emitOffloadingArraysArgument(
8745     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8746     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8747     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8748   CodeGenModule &CGM = CGF.CGM;
8749   if (Info.NumberOfPtrs) {
8750     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8751         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8752         Info.BasePointersArray,
8753         /*Idx0=*/0, /*Idx1=*/0);
8754     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8755         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8756         Info.PointersArray,
8757         /*Idx0=*/0,
8758         /*Idx1=*/0);
8759     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8760         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8761         /*Idx0=*/0, /*Idx1=*/0);
8762     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8763         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8764         Info.MapTypesArray,
8765         /*Idx0=*/0,
8766         /*Idx1=*/0);
8767   } else {
8768     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8769     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8770     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8771     MapTypesArrayArg =
8772         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8773   }
8774 }
8775 
8776 /// Check for inner distribute directive.
8777 static const OMPExecutableDirective *
8778 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8779   const auto *CS = D.getInnermostCapturedStmt();
8780   const auto *Body =
8781       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8782   const Stmt *ChildStmt =
8783       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8784 
8785   if (const auto *NestedDir =
8786           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8787     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8788     switch (D.getDirectiveKind()) {
8789     case OMPD_target:
8790       if (isOpenMPDistributeDirective(DKind))
8791         return NestedDir;
8792       if (DKind == OMPD_teams) {
8793         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8794             /*IgnoreCaptured=*/true);
8795         if (!Body)
8796           return nullptr;
8797         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8798         if (const auto *NND =
8799                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8800           DKind = NND->getDirectiveKind();
8801           if (isOpenMPDistributeDirective(DKind))
8802             return NND;
8803         }
8804       }
8805       return nullptr;
8806     case OMPD_target_teams:
8807       if (isOpenMPDistributeDirective(DKind))
8808         return NestedDir;
8809       return nullptr;
8810     case OMPD_target_parallel:
8811     case OMPD_target_simd:
8812     case OMPD_target_parallel_for:
8813     case OMPD_target_parallel_for_simd:
8814       return nullptr;
8815     case OMPD_target_teams_distribute:
8816     case OMPD_target_teams_distribute_simd:
8817     case OMPD_target_teams_distribute_parallel_for:
8818     case OMPD_target_teams_distribute_parallel_for_simd:
8819     case OMPD_parallel:
8820     case OMPD_for:
8821     case OMPD_parallel_for:
8822     case OMPD_parallel_master:
8823     case OMPD_parallel_sections:
8824     case OMPD_for_simd:
8825     case OMPD_parallel_for_simd:
8826     case OMPD_cancel:
8827     case OMPD_cancellation_point:
8828     case OMPD_ordered:
8829     case OMPD_threadprivate:
8830     case OMPD_allocate:
8831     case OMPD_task:
8832     case OMPD_simd:
8833     case OMPD_sections:
8834     case OMPD_section:
8835     case OMPD_single:
8836     case OMPD_master:
8837     case OMPD_critical:
8838     case OMPD_taskyield:
8839     case OMPD_barrier:
8840     case OMPD_taskwait:
8841     case OMPD_taskgroup:
8842     case OMPD_atomic:
8843     case OMPD_flush:
8844     case OMPD_depobj:
8845     case OMPD_scan:
8846     case OMPD_teams:
8847     case OMPD_target_data:
8848     case OMPD_target_exit_data:
8849     case OMPD_target_enter_data:
8850     case OMPD_distribute:
8851     case OMPD_distribute_simd:
8852     case OMPD_distribute_parallel_for:
8853     case OMPD_distribute_parallel_for_simd:
8854     case OMPD_teams_distribute:
8855     case OMPD_teams_distribute_simd:
8856     case OMPD_teams_distribute_parallel_for:
8857     case OMPD_teams_distribute_parallel_for_simd:
8858     case OMPD_target_update:
8859     case OMPD_declare_simd:
8860     case OMPD_declare_variant:
8861     case OMPD_begin_declare_variant:
8862     case OMPD_end_declare_variant:
8863     case OMPD_declare_target:
8864     case OMPD_end_declare_target:
8865     case OMPD_declare_reduction:
8866     case OMPD_declare_mapper:
8867     case OMPD_taskloop:
8868     case OMPD_taskloop_simd:
8869     case OMPD_master_taskloop:
8870     case OMPD_master_taskloop_simd:
8871     case OMPD_parallel_master_taskloop:
8872     case OMPD_parallel_master_taskloop_simd:
8873     case OMPD_requires:
8874     case OMPD_unknown:
8875     default:
8876       llvm_unreachable("Unexpected directive.");
8877     }
8878   }
8879 
8880   return nullptr;
8881 }
8882 
8883 /// Emit the user-defined mapper function. The code generation follows the
8884 /// pattern in the example below.
8885 /// \code
8886 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
8887 ///                                           void *base, void *begin,
8888 ///                                           int64_t size, int64_t type) {
8889 ///   // Allocate space for an array section first.
8890 ///   if (size > 1 && !maptype.IsDelete)
8891 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8892 ///                                 size*sizeof(Ty), clearToFrom(type));
8893 ///   // Map members.
8894 ///   for (unsigned i = 0; i < size; i++) {
8895 ///     // For each component specified by this mapper:
8896 ///     for (auto c : all_components) {
8897 ///       if (c.hasMapper())
8898 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
8899 ///                       c.arg_type);
8900 ///       else
8901 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
8902 ///                                     c.arg_begin, c.arg_size, c.arg_type);
8903 ///     }
8904 ///   }
8905 ///   // Delete the array section.
8906 ///   if (size > 1 && maptype.IsDelete)
8907 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8908 ///                                 size*sizeof(Ty), clearToFrom(type));
8909 /// }
8910 /// \endcode
8911 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
8912                                             CodeGenFunction *CGF) {
8913   if (UDMMap.count(D) > 0)
8914     return;
8915   ASTContext &C = CGM.getContext();
8916   QualType Ty = D->getType();
8917   QualType PtrTy = C.getPointerType(Ty).withRestrict();
8918   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
8919   auto *MapperVarDecl =
8920       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
8921   SourceLocation Loc = D->getLocation();
8922   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
8923 
8924   // Prepare mapper function arguments and attributes.
8925   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8926                               C.VoidPtrTy, ImplicitParamDecl::Other);
8927   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
8928                             ImplicitParamDecl::Other);
8929   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8930                              C.VoidPtrTy, ImplicitParamDecl::Other);
8931   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8932                             ImplicitParamDecl::Other);
8933   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8934                             ImplicitParamDecl::Other);
8935   FunctionArgList Args;
8936   Args.push_back(&HandleArg);
8937   Args.push_back(&BaseArg);
8938   Args.push_back(&BeginArg);
8939   Args.push_back(&SizeArg);
8940   Args.push_back(&TypeArg);
8941   const CGFunctionInfo &FnInfo =
8942       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
8943   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
8944   SmallString<64> TyStr;
8945   llvm::raw_svector_ostream Out(TyStr);
8946   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
8947   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
8948   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
8949                                     Name, &CGM.getModule());
8950   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
8951   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
8952   // Start the mapper function code generation.
8953   CodeGenFunction MapperCGF(CGM);
8954   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
8955   // Compute the starting and end addreses of array elements.
8956   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
8957       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
8958       C.getPointerType(Int64Ty), Loc);
8959   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
8960       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
8961       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
8962   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
8963   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
8964       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
8965       C.getPointerType(Int64Ty), Loc);
8966   // Prepare common arguments for array initiation and deletion.
8967   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
8968       MapperCGF.GetAddrOfLocalVar(&HandleArg),
8969       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8970   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
8971       MapperCGF.GetAddrOfLocalVar(&BaseArg),
8972       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8973   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
8974       MapperCGF.GetAddrOfLocalVar(&BeginArg),
8975       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8976 
8977   // Emit array initiation if this is an array section and \p MapType indicates
8978   // that memory allocation is required.
8979   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
8980   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
8981                              ElementSize, HeadBB, /*IsInit=*/true);
8982 
8983   // Emit a for loop to iterate through SizeArg of elements and map all of them.
8984 
8985   // Emit the loop header block.
8986   MapperCGF.EmitBlock(HeadBB);
8987   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
8988   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
8989   // Evaluate whether the initial condition is satisfied.
8990   llvm::Value *IsEmpty =
8991       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
8992   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
8993   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
8994 
8995   // Emit the loop body block.
8996   MapperCGF.EmitBlock(BodyBB);
8997   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
8998       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
8999   PtrPHI->addIncoming(PtrBegin, EntryBB);
9000   Address PtrCurrent =
9001       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9002                           .getAlignment()
9003                           .alignmentOfArrayElement(ElementSize));
9004   // Privatize the declared variable of mapper to be the current array element.
9005   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9006   Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
9007     return MapperCGF
9008         .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
9009         .getAddress(MapperCGF);
9010   });
9011   (void)Scope.Privatize();
9012 
9013   // Get map clause information. Fill up the arrays with all mapped variables.
9014   MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9015   MappableExprsHandler::MapValuesArrayTy Pointers;
9016   MappableExprsHandler::MapValuesArrayTy Sizes;
9017   MappableExprsHandler::MapFlagsArrayTy MapTypes;
9018   MappableExprsHandler MEHandler(*D, MapperCGF);
9019   MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
9020 
9021   // Call the runtime API __tgt_mapper_num_components to get the number of
9022   // pre-existing components.
9023   llvm::Value *OffloadingArgs[] = {Handle};
9024   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9025       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9026                                             OMPRTL___tgt_mapper_num_components),
9027       OffloadingArgs);
9028   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9029       PreviousSize,
9030       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9031 
9032   // Fill up the runtime mapper handle for all components.
9033   for (unsigned I = 0; I < BasePointers.size(); ++I) {
9034     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9035         *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9036     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9037         Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9038     llvm::Value *CurSizeArg = Sizes[I];
9039 
9040     // Extract the MEMBER_OF field from the map type.
9041     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
9042     MapperCGF.EmitBlock(MemberBB);
9043     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
9044     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
9045         OriMapType,
9046         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
9047     llvm::BasicBlock *MemberCombineBB =
9048         MapperCGF.createBasicBlock("omp.member.combine");
9049     llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
9050     llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
9051     MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
9052     // Add the number of pre-existing components to the MEMBER_OF field if it
9053     // is valid.
9054     MapperCGF.EmitBlock(MemberCombineBB);
9055     llvm::Value *CombinedMember =
9056         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9057     // Do nothing if it is not a member of previous components.
9058     MapperCGF.EmitBlock(TypeBB);
9059     llvm::PHINode *MemberMapType =
9060         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
9061     MemberMapType->addIncoming(OriMapType, MemberBB);
9062     MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
9063 
9064     // Combine the map type inherited from user-defined mapper with that
9065     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9066     // bits of the \a MapType, which is the input argument of the mapper
9067     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9068     // bits of MemberMapType.
9069     // [OpenMP 5.0], 1.2.6. map-type decay.
9070     //        | alloc |  to   | from  | tofrom | release | delete
9071     // ----------------------------------------------------------
9072     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9073     // to     | alloc |  to   | alloc |   to   | release | delete
9074     // from   | alloc | alloc | from  |  from  | release | delete
9075     // tofrom | alloc |  to   | from  | tofrom | release | delete
9076     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9077         MapType,
9078         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9079                                    MappableExprsHandler::OMP_MAP_FROM));
9080     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9081     llvm::BasicBlock *AllocElseBB =
9082         MapperCGF.createBasicBlock("omp.type.alloc.else");
9083     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9084     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9085     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9086     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9087     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9088     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9089     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9090     MapperCGF.EmitBlock(AllocBB);
9091     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9092         MemberMapType,
9093         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9094                                      MappableExprsHandler::OMP_MAP_FROM)));
9095     MapperCGF.Builder.CreateBr(EndBB);
9096     MapperCGF.EmitBlock(AllocElseBB);
9097     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9098         LeftToFrom,
9099         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9100     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9101     // In case of to, clear OMP_MAP_FROM.
9102     MapperCGF.EmitBlock(ToBB);
9103     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9104         MemberMapType,
9105         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9106     MapperCGF.Builder.CreateBr(EndBB);
9107     MapperCGF.EmitBlock(ToElseBB);
9108     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9109         LeftToFrom,
9110         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9111     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9112     // In case of from, clear OMP_MAP_TO.
9113     MapperCGF.EmitBlock(FromBB);
9114     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9115         MemberMapType,
9116         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9117     // In case of tofrom, do nothing.
9118     MapperCGF.EmitBlock(EndBB);
9119     llvm::PHINode *CurMapType =
9120         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9121     CurMapType->addIncoming(AllocMapType, AllocBB);
9122     CurMapType->addIncoming(ToMapType, ToBB);
9123     CurMapType->addIncoming(FromMapType, FromBB);
9124     CurMapType->addIncoming(MemberMapType, ToElseBB);
9125 
9126     // TODO: call the corresponding mapper function if a user-defined mapper is
9127     // associated with this map clause.
9128     // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9129     // data structure.
9130     llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9131                                      CurSizeArg, CurMapType};
9132     MapperCGF.EmitRuntimeCall(
9133         OMPBuilder.getOrCreateRuntimeFunction(
9134             CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9135         OffloadingArgs);
9136   }
9137 
9138   // Update the pointer to point to the next element that needs to be mapped,
9139   // and check whether we have mapped all elements.
9140   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9141       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9142   PtrPHI->addIncoming(PtrNext, BodyBB);
9143   llvm::Value *IsDone =
9144       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9145   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9146   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9147 
9148   MapperCGF.EmitBlock(ExitBB);
9149   // Emit array deletion if this is an array section and \p MapType indicates
9150   // that deletion is required.
9151   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9152                              ElementSize, DoneBB, /*IsInit=*/false);
9153 
9154   // Emit the function exit block.
9155   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9156   MapperCGF.FinishFunction();
9157   UDMMap.try_emplace(D, Fn);
9158   if (CGF) {
9159     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9160     Decls.second.push_back(D);
9161   }
9162 }
9163 
9164 /// Emit the array initialization or deletion portion for user-defined mapper
9165 /// code generation. First, it evaluates whether an array section is mapped and
9166 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9167 /// true, and \a MapType indicates to not delete this array, array
9168 /// initialization code is generated. If \a IsInit is false, and \a MapType
9169 /// indicates to not this array, array deletion code is generated.
9170 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9171     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9172     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9173     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9174   StringRef Prefix = IsInit ? ".init" : ".del";
9175 
9176   // Evaluate if this is an array section.
9177   llvm::BasicBlock *IsDeleteBB =
9178       MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"}));
9179   llvm::BasicBlock *BodyBB =
9180       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9181   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9182       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9183   MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9184 
9185   // Evaluate if we are going to delete this section.
9186   MapperCGF.EmitBlock(IsDeleteBB);
9187   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9188       MapType,
9189       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9190   llvm::Value *DeleteCond;
9191   if (IsInit) {
9192     DeleteCond = MapperCGF.Builder.CreateIsNull(
9193         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9194   } else {
9195     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9196         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9197   }
9198   MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9199 
9200   MapperCGF.EmitBlock(BodyBB);
9201   // Get the array size by multiplying element size and element number (i.e., \p
9202   // Size).
9203   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9204       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9205   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9206   // memory allocation/deletion purpose only.
9207   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9208       MapType,
9209       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9210                                    MappableExprsHandler::OMP_MAP_FROM)));
9211   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9212   // data structure.
9213   llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9214   MapperCGF.EmitRuntimeCall(
9215       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9216                                             OMPRTL___tgt_push_mapper_component),
9217       OffloadingArgs);
9218 }
9219 
9220 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9221     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9222     llvm::Value *DeviceID,
9223     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9224                                      const OMPLoopDirective &D)>
9225         SizeEmitter) {
9226   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9227   const OMPExecutableDirective *TD = &D;
9228   // Get nested teams distribute kind directive, if any.
9229   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9230     TD = getNestedDistributeDirective(CGM.getContext(), D);
9231   if (!TD)
9232     return;
9233   const auto *LD = cast<OMPLoopDirective>(TD);
9234   auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
9235                                                      PrePostActionTy &) {
9236     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9237       llvm::Value *Args[] = {DeviceID, NumIterations};
9238       CGF.EmitRuntimeCall(
9239           OMPBuilder.getOrCreateRuntimeFunction(
9240               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount),
9241           Args);
9242     }
9243   };
9244   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9245 }
9246 
9247 void CGOpenMPRuntime::emitTargetCall(
9248     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9249     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9250     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9251     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9252                                      const OMPLoopDirective &D)>
9253         SizeEmitter) {
9254   if (!CGF.HaveInsertPoint())
9255     return;
9256 
9257   assert(OutlinedFn && "Invalid outlined function!");
9258 
9259   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
9260   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9261   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9262   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9263                                             PrePostActionTy &) {
9264     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9265   };
9266   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9267 
9268   CodeGenFunction::OMPTargetDataInfo InputInfo;
9269   llvm::Value *MapTypesArray = nullptr;
9270   // Fill up the pointer arrays and transfer execution to the device.
9271   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9272                     &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
9273                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9274     if (Device.getInt() == OMPC_DEVICE_ancestor) {
9275       // Reverse offloading is not supported, so just execute on the host.
9276       if (RequiresOuterTask) {
9277         CapturedVars.clear();
9278         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9279       }
9280       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9281       return;
9282     }
9283 
9284     // On top of the arrays that were filled up, the target offloading call
9285     // takes as arguments the device id as well as the host pointer. The host
9286     // pointer is used by the runtime library to identify the current target
9287     // region, so it only has to be unique and not necessarily point to
9288     // anything. It could be the pointer to the outlined function that
9289     // implements the target region, but we aren't using that so that the
9290     // compiler doesn't need to keep that, and could therefore inline the host
9291     // function if proven worthwhile during optimization.
9292 
9293     // From this point on, we need to have an ID of the target region defined.
9294     assert(OutlinedFnID && "Invalid outlined function ID!");
9295 
9296     // Emit device ID if any.
9297     llvm::Value *DeviceID;
9298     if (Device.getPointer()) {
9299       assert((Device.getInt() == OMPC_DEVICE_unknown ||
9300               Device.getInt() == OMPC_DEVICE_device_num) &&
9301              "Expected device_num modifier.");
9302       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9303       DeviceID =
9304           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9305     } else {
9306       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9307     }
9308 
9309     // Emit the number of elements in the offloading arrays.
9310     llvm::Value *PointerNum =
9311         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9312 
9313     // Return value of the runtime offloading call.
9314     llvm::Value *Return;
9315 
9316     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9317     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9318 
9319     // Emit tripcount for the target loop-based directive.
9320     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
9321 
9322     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9323     // The target region is an outlined function launched by the runtime
9324     // via calls __tgt_target() or __tgt_target_teams().
9325     //
9326     // __tgt_target() launches a target region with one team and one thread,
9327     // executing a serial region.  This master thread may in turn launch
9328     // more threads within its team upon encountering a parallel region,
9329     // however, no additional teams can be launched on the device.
9330     //
9331     // __tgt_target_teams() launches a target region with one or more teams,
9332     // each with one or more threads.  This call is required for target
9333     // constructs such as:
9334     //  'target teams'
9335     //  'target' / 'teams'
9336     //  'target teams distribute parallel for'
9337     //  'target parallel'
9338     // and so on.
9339     //
9340     // Note that on the host and CPU targets, the runtime implementation of
9341     // these calls simply call the outlined function without forking threads.
9342     // The outlined functions themselves have runtime calls to
9343     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9344     // the compiler in emitTeamsCall() and emitParallelCall().
9345     //
9346     // In contrast, on the NVPTX target, the implementation of
9347     // __tgt_target_teams() launches a GPU kernel with the requested number
9348     // of teams and threads so no additional calls to the runtime are required.
9349     if (NumTeams) {
9350       // If we have NumTeams defined this means that we have an enclosed teams
9351       // region. Therefore we also expect to have NumThreads defined. These two
9352       // values should be defined in the presence of a teams directive,
9353       // regardless of having any clauses associated. If the user is using teams
9354       // but no clauses, these two values will be the default that should be
9355       // passed to the runtime library - a 32-bit integer with the value zero.
9356       assert(NumThreads && "Thread limit expression should be available along "
9357                            "with number of teams.");
9358       llvm::Value *OffloadingArgs[] = {DeviceID,
9359                                        OutlinedFnID,
9360                                        PointerNum,
9361                                        InputInfo.BasePointersArray.getPointer(),
9362                                        InputInfo.PointersArray.getPointer(),
9363                                        InputInfo.SizesArray.getPointer(),
9364                                        MapTypesArray,
9365                                        NumTeams,
9366                                        NumThreads};
9367       Return = CGF.EmitRuntimeCall(
9368           OMPBuilder.getOrCreateRuntimeFunction(
9369               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_teams_nowait
9370                                          : OMPRTL___tgt_target_teams),
9371           OffloadingArgs);
9372     } else {
9373       llvm::Value *OffloadingArgs[] = {DeviceID,
9374                                        OutlinedFnID,
9375                                        PointerNum,
9376                                        InputInfo.BasePointersArray.getPointer(),
9377                                        InputInfo.PointersArray.getPointer(),
9378                                        InputInfo.SizesArray.getPointer(),
9379                                        MapTypesArray};
9380       Return = CGF.EmitRuntimeCall(
9381           OMPBuilder.getOrCreateRuntimeFunction(
9382               CGM.getModule(),
9383               HasNowait ? OMPRTL___tgt_target_nowait : OMPRTL___tgt_target),
9384           OffloadingArgs);
9385     }
9386 
9387     // Check the error code and execute the host version if required.
9388     llvm::BasicBlock *OffloadFailedBlock =
9389         CGF.createBasicBlock("omp_offload.failed");
9390     llvm::BasicBlock *OffloadContBlock =
9391         CGF.createBasicBlock("omp_offload.cont");
9392     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9393     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9394 
9395     CGF.EmitBlock(OffloadFailedBlock);
9396     if (RequiresOuterTask) {
9397       CapturedVars.clear();
9398       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9399     }
9400     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9401     CGF.EmitBranch(OffloadContBlock);
9402 
9403     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9404   };
9405 
9406   // Notify that the host version must be executed.
9407   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
9408                     RequiresOuterTask](CodeGenFunction &CGF,
9409                                        PrePostActionTy &) {
9410     if (RequiresOuterTask) {
9411       CapturedVars.clear();
9412       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9413     }
9414     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9415   };
9416 
9417   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9418                           &CapturedVars, RequiresOuterTask,
9419                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9420     // Fill up the arrays with all the captured variables.
9421     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9422     MappableExprsHandler::MapValuesArrayTy Pointers;
9423     MappableExprsHandler::MapValuesArrayTy Sizes;
9424     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9425 
9426     // Get mappable expression information.
9427     MappableExprsHandler MEHandler(D, CGF);
9428     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9429 
9430     auto RI = CS.getCapturedRecordDecl()->field_begin();
9431     auto CV = CapturedVars.begin();
9432     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9433                                               CE = CS.capture_end();
9434          CI != CE; ++CI, ++RI, ++CV) {
9435       MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
9436       MappableExprsHandler::MapValuesArrayTy CurPointers;
9437       MappableExprsHandler::MapValuesArrayTy CurSizes;
9438       MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
9439       MappableExprsHandler::StructRangeInfoTy PartialStruct;
9440 
9441       // VLA sizes are passed to the outlined region by copy and do not have map
9442       // information associated.
9443       if (CI->capturesVariableArrayType()) {
9444         CurBasePointers.push_back(*CV);
9445         CurPointers.push_back(*CV);
9446         CurSizes.push_back(CGF.Builder.CreateIntCast(
9447             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9448         // Copy to the device as an argument. No need to retrieve it.
9449         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
9450                               MappableExprsHandler::OMP_MAP_TARGET_PARAM |
9451                               MappableExprsHandler::OMP_MAP_IMPLICIT);
9452       } else {
9453         // If we have any information in the map clause, we use it, otherwise we
9454         // just do a default mapping.
9455         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
9456                                          CurSizes, CurMapTypes, PartialStruct);
9457         if (CurBasePointers.empty())
9458           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
9459                                            CurPointers, CurSizes, CurMapTypes);
9460         // Generate correct mapping for variables captured by reference in
9461         // lambdas.
9462         if (CI->capturesVariable())
9463           MEHandler.generateInfoForLambdaCaptures(
9464               CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
9465               CurMapTypes, LambdaPointers);
9466       }
9467       // We expect to have at least an element of information for this capture.
9468       assert(!CurBasePointers.empty() &&
9469              "Non-existing map pointer for capture!");
9470       assert(CurBasePointers.size() == CurPointers.size() &&
9471              CurBasePointers.size() == CurSizes.size() &&
9472              CurBasePointers.size() == CurMapTypes.size() &&
9473              "Inconsistent map information sizes!");
9474 
9475       // If there is an entry in PartialStruct it means we have a struct with
9476       // individual members mapped. Emit an extra combined entry.
9477       if (PartialStruct.Base.isValid())
9478         MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
9479                                     CurMapTypes, PartialStruct);
9480 
9481       // We need to append the results of this capture to what we already have.
9482       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
9483       Pointers.append(CurPointers.begin(), CurPointers.end());
9484       Sizes.append(CurSizes.begin(), CurSizes.end());
9485       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
9486     }
9487     // Adjust MEMBER_OF flags for the lambdas captures.
9488     MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
9489                                               Pointers, MapTypes);
9490     // Map other list items in the map clause which are not captured variables
9491     // but "declare target link" global variables.
9492     MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
9493                                                MapTypes);
9494 
9495     TargetDataInfo Info;
9496     // Fill up the arrays and create the arguments.
9497     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9498     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9499                                  Info.PointersArray, Info.SizesArray,
9500                                  Info.MapTypesArray, Info);
9501     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9502     InputInfo.BasePointersArray =
9503         Address(Info.BasePointersArray, CGM.getPointerAlign());
9504     InputInfo.PointersArray =
9505         Address(Info.PointersArray, CGM.getPointerAlign());
9506     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
9507     MapTypesArray = Info.MapTypesArray;
9508     if (RequiresOuterTask)
9509       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9510     else
9511       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9512   };
9513 
9514   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
9515                              CodeGenFunction &CGF, PrePostActionTy &) {
9516     if (RequiresOuterTask) {
9517       CodeGenFunction::OMPTargetDataInfo InputInfo;
9518       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9519     } else {
9520       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9521     }
9522   };
9523 
9524   // If we have a target function ID it means that we need to support
9525   // offloading, otherwise, just execute on the host. We need to execute on host
9526   // regardless of the conditional in the if clause if, e.g., the user do not
9527   // specify target triples.
9528   if (OutlinedFnID) {
9529     if (IfCond) {
9530       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9531     } else {
9532       RegionCodeGenTy ThenRCG(TargetThenGen);
9533       ThenRCG(CGF);
9534     }
9535   } else {
9536     RegionCodeGenTy ElseRCG(TargetElseGen);
9537     ElseRCG(CGF);
9538   }
9539 }
9540 
9541 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9542                                                     StringRef ParentName) {
9543   if (!S)
9544     return;
9545 
9546   // Codegen OMP target directives that offload compute to the device.
9547   bool RequiresDeviceCodegen =
9548       isa<OMPExecutableDirective>(S) &&
9549       isOpenMPTargetExecutionDirective(
9550           cast<OMPExecutableDirective>(S)->getDirectiveKind());
9551 
9552   if (RequiresDeviceCodegen) {
9553     const auto &E = *cast<OMPExecutableDirective>(S);
9554     unsigned DeviceID;
9555     unsigned FileID;
9556     unsigned Line;
9557     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
9558                              FileID, Line);
9559 
9560     // Is this a target region that should not be emitted as an entry point? If
9561     // so just signal we are done with this target region.
9562     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
9563                                                             ParentName, Line))
9564       return;
9565 
9566     switch (E.getDirectiveKind()) {
9567     case OMPD_target:
9568       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9569                                                    cast<OMPTargetDirective>(E));
9570       break;
9571     case OMPD_target_parallel:
9572       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9573           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9574       break;
9575     case OMPD_target_teams:
9576       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9577           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9578       break;
9579     case OMPD_target_teams_distribute:
9580       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9581           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9582       break;
9583     case OMPD_target_teams_distribute_simd:
9584       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9585           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9586       break;
9587     case OMPD_target_parallel_for:
9588       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9589           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9590       break;
9591     case OMPD_target_parallel_for_simd:
9592       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9593           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9594       break;
9595     case OMPD_target_simd:
9596       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9597           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9598       break;
9599     case OMPD_target_teams_distribute_parallel_for:
9600       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9601           CGM, ParentName,
9602           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9603       break;
9604     case OMPD_target_teams_distribute_parallel_for_simd:
9605       CodeGenFunction::
9606           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9607               CGM, ParentName,
9608               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9609       break;
9610     case OMPD_parallel:
9611     case OMPD_for:
9612     case OMPD_parallel_for:
9613     case OMPD_parallel_master:
9614     case OMPD_parallel_sections:
9615     case OMPD_for_simd:
9616     case OMPD_parallel_for_simd:
9617     case OMPD_cancel:
9618     case OMPD_cancellation_point:
9619     case OMPD_ordered:
9620     case OMPD_threadprivate:
9621     case OMPD_allocate:
9622     case OMPD_task:
9623     case OMPD_simd:
9624     case OMPD_sections:
9625     case OMPD_section:
9626     case OMPD_single:
9627     case OMPD_master:
9628     case OMPD_critical:
9629     case OMPD_taskyield:
9630     case OMPD_barrier:
9631     case OMPD_taskwait:
9632     case OMPD_taskgroup:
9633     case OMPD_atomic:
9634     case OMPD_flush:
9635     case OMPD_depobj:
9636     case OMPD_scan:
9637     case OMPD_teams:
9638     case OMPD_target_data:
9639     case OMPD_target_exit_data:
9640     case OMPD_target_enter_data:
9641     case OMPD_distribute:
9642     case OMPD_distribute_simd:
9643     case OMPD_distribute_parallel_for:
9644     case OMPD_distribute_parallel_for_simd:
9645     case OMPD_teams_distribute:
9646     case OMPD_teams_distribute_simd:
9647     case OMPD_teams_distribute_parallel_for:
9648     case OMPD_teams_distribute_parallel_for_simd:
9649     case OMPD_target_update:
9650     case OMPD_declare_simd:
9651     case OMPD_declare_variant:
9652     case OMPD_begin_declare_variant:
9653     case OMPD_end_declare_variant:
9654     case OMPD_declare_target:
9655     case OMPD_end_declare_target:
9656     case OMPD_declare_reduction:
9657     case OMPD_declare_mapper:
9658     case OMPD_taskloop:
9659     case OMPD_taskloop_simd:
9660     case OMPD_master_taskloop:
9661     case OMPD_master_taskloop_simd:
9662     case OMPD_parallel_master_taskloop:
9663     case OMPD_parallel_master_taskloop_simd:
9664     case OMPD_requires:
9665     case OMPD_unknown:
9666     default:
9667       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9668     }
9669     return;
9670   }
9671 
9672   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9673     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9674       return;
9675 
9676     scanForTargetRegionsFunctions(
9677         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9678     return;
9679   }
9680 
9681   // If this is a lambda function, look into its body.
9682   if (const auto *L = dyn_cast<LambdaExpr>(S))
9683     S = L->getBody();
9684 
9685   // Keep looking for target regions recursively.
9686   for (const Stmt *II : S->children())
9687     scanForTargetRegionsFunctions(II, ParentName);
9688 }
9689 
9690 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9691   // If emitting code for the host, we do not process FD here. Instead we do
9692   // the normal code generation.
9693   if (!CGM.getLangOpts().OpenMPIsDevice) {
9694     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
9695       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9696           OMPDeclareTargetDeclAttr::getDeviceType(FD);
9697       // Do not emit device_type(nohost) functions for the host.
9698       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9699         return true;
9700     }
9701     return false;
9702   }
9703 
9704   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9705   // Try to detect target regions in the function.
9706   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9707     StringRef Name = CGM.getMangledName(GD);
9708     scanForTargetRegionsFunctions(FD->getBody(), Name);
9709     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9710         OMPDeclareTargetDeclAttr::getDeviceType(FD);
9711     // Do not emit device_type(nohost) functions for the host.
9712     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9713       return true;
9714   }
9715 
9716   // Do not to emit function if it is not marked as declare target.
9717   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9718          AlreadyEmittedTargetDecls.count(VD) == 0;
9719 }
9720 
9721 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9722   if (!CGM.getLangOpts().OpenMPIsDevice)
9723     return false;
9724 
9725   // Check if there are Ctors/Dtors in this declaration and look for target
9726   // regions in it. We use the complete variant to produce the kernel name
9727   // mangling.
9728   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9729   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9730     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9731       StringRef ParentName =
9732           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9733       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9734     }
9735     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9736       StringRef ParentName =
9737           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9738       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9739     }
9740   }
9741 
9742   // Do not to emit variable if it is not marked as declare target.
9743   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9744       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9745           cast<VarDecl>(GD.getDecl()));
9746   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9747       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9748        HasRequiresUnifiedSharedMemory)) {
9749     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9750     return true;
9751   }
9752   return false;
9753 }
9754 
9755 llvm::Constant *
9756 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9757                                                 const VarDecl *VD) {
9758   assert(VD->getType().isConstant(CGM.getContext()) &&
9759          "Expected constant variable.");
9760   StringRef VarName;
9761   llvm::Constant *Addr;
9762   llvm::GlobalValue::LinkageTypes Linkage;
9763   QualType Ty = VD->getType();
9764   SmallString<128> Buffer;
9765   {
9766     unsigned DeviceID;
9767     unsigned FileID;
9768     unsigned Line;
9769     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9770                              FileID, Line);
9771     llvm::raw_svector_ostream OS(Buffer);
9772     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9773        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9774     VarName = OS.str();
9775   }
9776   Linkage = llvm::GlobalValue::InternalLinkage;
9777   Addr =
9778       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9779                                   getDefaultFirstprivateAddressSpace());
9780   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9781   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9782   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9783   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9784       VarName, Addr, VarSize,
9785       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9786   return Addr;
9787 }
9788 
9789 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9790                                                    llvm::Constant *Addr) {
9791   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9792       !CGM.getLangOpts().OpenMPIsDevice)
9793     return;
9794   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9795       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9796   if (!Res) {
9797     if (CGM.getLangOpts().OpenMPIsDevice) {
9798       // Register non-target variables being emitted in device code (debug info
9799       // may cause this).
9800       StringRef VarName = CGM.getMangledName(VD);
9801       EmittedNonTargetVariables.try_emplace(VarName, Addr);
9802     }
9803     return;
9804   }
9805   // Register declare target variables.
9806   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9807   StringRef VarName;
9808   CharUnits VarSize;
9809   llvm::GlobalValue::LinkageTypes Linkage;
9810 
9811   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9812       !HasRequiresUnifiedSharedMemory) {
9813     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9814     VarName = CGM.getMangledName(VD);
9815     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9816       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9817       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9818     } else {
9819       VarSize = CharUnits::Zero();
9820     }
9821     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9822     // Temp solution to prevent optimizations of the internal variables.
9823     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9824       std::string RefName = getName({VarName, "ref"});
9825       if (!CGM.GetGlobalValue(RefName)) {
9826         llvm::Constant *AddrRef =
9827             getOrCreateInternalVariable(Addr->getType(), RefName);
9828         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9829         GVAddrRef->setConstant(/*Val=*/true);
9830         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9831         GVAddrRef->setInitializer(Addr);
9832         CGM.addCompilerUsedGlobal(GVAddrRef);
9833       }
9834     }
9835   } else {
9836     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
9837             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9838              HasRequiresUnifiedSharedMemory)) &&
9839            "Declare target attribute must link or to with unified memory.");
9840     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
9841       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
9842     else
9843       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9844 
9845     if (CGM.getLangOpts().OpenMPIsDevice) {
9846       VarName = Addr->getName();
9847       Addr = nullptr;
9848     } else {
9849       VarName = getAddrOfDeclareTargetVar(VD).getName();
9850       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
9851     }
9852     VarSize = CGM.getPointerSize();
9853     Linkage = llvm::GlobalValue::WeakAnyLinkage;
9854   }
9855 
9856   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9857       VarName, Addr, VarSize, Flags, Linkage);
9858 }
9859 
9860 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9861   if (isa<FunctionDecl>(GD.getDecl()) ||
9862       isa<OMPDeclareReductionDecl>(GD.getDecl()))
9863     return emitTargetFunctions(GD);
9864 
9865   return emitTargetGlobalVariable(GD);
9866 }
9867 
9868 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9869   for (const VarDecl *VD : DeferredGlobalVariables) {
9870     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9871         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9872     if (!Res)
9873       continue;
9874     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9875         !HasRequiresUnifiedSharedMemory) {
9876       CGM.EmitGlobal(VD);
9877     } else {
9878       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9879               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9880                HasRequiresUnifiedSharedMemory)) &&
9881              "Expected link clause or to clause with unified memory.");
9882       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
9883     }
9884   }
9885 }
9886 
9887 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
9888     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9889   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9890          " Expected target-based directive.");
9891 }
9892 
9893 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
9894   for (const OMPClause *Clause : D->clauselists()) {
9895     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9896       HasRequiresUnifiedSharedMemory = true;
9897     } else if (const auto *AC =
9898                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
9899       switch (AC->getAtomicDefaultMemOrderKind()) {
9900       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
9901         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
9902         break;
9903       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
9904         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
9905         break;
9906       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
9907         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
9908         break;
9909       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
9910         break;
9911       }
9912     }
9913   }
9914 }
9915 
9916 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
9917   return RequiresAtomicOrdering;
9918 }
9919 
9920 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
9921                                                        LangAS &AS) {
9922   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9923     return false;
9924   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9925   switch(A->getAllocatorType()) {
9926   case OMPAllocateDeclAttr::OMPNullMemAlloc:
9927   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9928   // Not supported, fallback to the default mem space.
9929   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9930   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9931   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9932   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9933   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9934   case OMPAllocateDeclAttr::OMPConstMemAlloc:
9935   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9936     AS = LangAS::Default;
9937     return true;
9938   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9939     llvm_unreachable("Expected predefined allocator for the variables with the "
9940                      "static storage.");
9941   }
9942   return false;
9943 }
9944 
9945 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
9946   return HasRequiresUnifiedSharedMemory;
9947 }
9948 
9949 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
9950     CodeGenModule &CGM)
9951     : CGM(CGM) {
9952   if (CGM.getLangOpts().OpenMPIsDevice) {
9953     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
9954     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
9955   }
9956 }
9957 
9958 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
9959   if (CGM.getLangOpts().OpenMPIsDevice)
9960     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
9961 }
9962 
9963 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
9964   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
9965     return true;
9966 
9967   const auto *D = cast<FunctionDecl>(GD.getDecl());
9968   // Do not to emit function if it is marked as declare target as it was already
9969   // emitted.
9970   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
9971     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
9972       if (auto *F = dyn_cast_or_null<llvm::Function>(
9973               CGM.GetGlobalValue(CGM.getMangledName(GD))))
9974         return !F->isDeclaration();
9975       return false;
9976     }
9977     return true;
9978   }
9979 
9980   return !AlreadyEmittedTargetDecls.insert(D).second;
9981 }
9982 
9983 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
9984   // If we don't have entries or if we are emitting code for the device, we
9985   // don't need to do anything.
9986   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
9987       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
9988       (OffloadEntriesInfoManager.empty() &&
9989        !HasEmittedDeclareTargetRegion &&
9990        !HasEmittedTargetRegion))
9991     return nullptr;
9992 
9993   // Create and register the function that handles the requires directives.
9994   ASTContext &C = CGM.getContext();
9995 
9996   llvm::Function *RequiresRegFn;
9997   {
9998     CodeGenFunction CGF(CGM);
9999     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10000     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10001     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10002     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10003     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10004     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10005     // TODO: check for other requires clauses.
10006     // The requires directive takes effect only when a target region is
10007     // present in the compilation unit. Otherwise it is ignored and not
10008     // passed to the runtime. This avoids the runtime from throwing an error
10009     // for mismatching requires clauses across compilation units that don't
10010     // contain at least 1 target region.
10011     assert((HasEmittedTargetRegion ||
10012             HasEmittedDeclareTargetRegion ||
10013             !OffloadEntriesInfoManager.empty()) &&
10014            "Target or declare target region expected.");
10015     if (HasRequiresUnifiedSharedMemory)
10016       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10017     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10018                             CGM.getModule(), OMPRTL___tgt_register_requires),
10019                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10020     CGF.FinishFunction();
10021   }
10022   return RequiresRegFn;
10023 }
10024 
10025 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10026                                     const OMPExecutableDirective &D,
10027                                     SourceLocation Loc,
10028                                     llvm::Function *OutlinedFn,
10029                                     ArrayRef<llvm::Value *> CapturedVars) {
10030   if (!CGF.HaveInsertPoint())
10031     return;
10032 
10033   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10034   CodeGenFunction::RunCleanupsScope Scope(CGF);
10035 
10036   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10037   llvm::Value *Args[] = {
10038       RTLoc,
10039       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10040       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10041   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10042   RealArgs.append(std::begin(Args), std::end(Args));
10043   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10044 
10045   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10046       CGM.getModule(), OMPRTL___kmpc_fork_teams);
10047   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10048 }
10049 
10050 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10051                                          const Expr *NumTeams,
10052                                          const Expr *ThreadLimit,
10053                                          SourceLocation Loc) {
10054   if (!CGF.HaveInsertPoint())
10055     return;
10056 
10057   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10058 
10059   llvm::Value *NumTeamsVal =
10060       NumTeams
10061           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10062                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10063           : CGF.Builder.getInt32(0);
10064 
10065   llvm::Value *ThreadLimitVal =
10066       ThreadLimit
10067           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10068                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10069           : CGF.Builder.getInt32(0);
10070 
10071   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10072   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10073                                      ThreadLimitVal};
10074   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10075                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10076                       PushNumTeamsArgs);
10077 }
10078 
10079 void CGOpenMPRuntime::emitTargetDataCalls(
10080     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10081     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10082   if (!CGF.HaveInsertPoint())
10083     return;
10084 
10085   // Action used to replace the default codegen action and turn privatization
10086   // off.
10087   PrePostActionTy NoPrivAction;
10088 
10089   // Generate the code for the opening of the data environment. Capture all the
10090   // arguments of the runtime call by reference because they are used in the
10091   // closing of the region.
10092   auto &&BeginThenGen = [this, &D, Device, &Info,
10093                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10094     // Fill up the arrays with all the mapped variables.
10095     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10096     MappableExprsHandler::MapValuesArrayTy Pointers;
10097     MappableExprsHandler::MapValuesArrayTy Sizes;
10098     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10099 
10100     // Get map clause information.
10101     MappableExprsHandler MCHandler(D, CGF);
10102     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10103 
10104     // Fill up the arrays and create the arguments.
10105     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10106 
10107     llvm::Value *BasePointersArrayArg = nullptr;
10108     llvm::Value *PointersArrayArg = nullptr;
10109     llvm::Value *SizesArrayArg = nullptr;
10110     llvm::Value *MapTypesArrayArg = nullptr;
10111     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10112                                  SizesArrayArg, MapTypesArrayArg, Info);
10113 
10114     // Emit device ID if any.
10115     llvm::Value *DeviceID = nullptr;
10116     if (Device) {
10117       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10118                                            CGF.Int64Ty, /*isSigned=*/true);
10119     } else {
10120       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10121     }
10122 
10123     // Emit the number of elements in the offloading arrays.
10124     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10125 
10126     llvm::Value *OffloadingArgs[] = {
10127         DeviceID,         PointerNum,    BasePointersArrayArg,
10128         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10129     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10130                             CGM.getModule(), OMPRTL___tgt_target_data_begin),
10131                         OffloadingArgs);
10132 
10133     // If device pointer privatization is required, emit the body of the region
10134     // here. It will have to be duplicated: with and without privatization.
10135     if (!Info.CaptureDeviceAddrMap.empty())
10136       CodeGen(CGF);
10137   };
10138 
10139   // Generate code for the closing of the data region.
10140   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10141                                             PrePostActionTy &) {
10142     assert(Info.isValid() && "Invalid data environment closing arguments.");
10143 
10144     llvm::Value *BasePointersArrayArg = nullptr;
10145     llvm::Value *PointersArrayArg = nullptr;
10146     llvm::Value *SizesArrayArg = nullptr;
10147     llvm::Value *MapTypesArrayArg = nullptr;
10148     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10149                                  SizesArrayArg, MapTypesArrayArg, Info);
10150 
10151     // Emit device ID if any.
10152     llvm::Value *DeviceID = nullptr;
10153     if (Device) {
10154       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10155                                            CGF.Int64Ty, /*isSigned=*/true);
10156     } else {
10157       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10158     }
10159 
10160     // Emit the number of elements in the offloading arrays.
10161     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10162 
10163     llvm::Value *OffloadingArgs[] = {
10164         DeviceID,         PointerNum,    BasePointersArrayArg,
10165         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10166     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10167                             CGM.getModule(), OMPRTL___tgt_target_data_end),
10168                         OffloadingArgs);
10169   };
10170 
10171   // If we need device pointer privatization, we need to emit the body of the
10172   // region with no privatization in the 'else' branch of the conditional.
10173   // Otherwise, we don't have to do anything.
10174   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10175                                                          PrePostActionTy &) {
10176     if (!Info.CaptureDeviceAddrMap.empty()) {
10177       CodeGen.setAction(NoPrivAction);
10178       CodeGen(CGF);
10179     }
10180   };
10181 
10182   // We don't have to do anything to close the region if the if clause evaluates
10183   // to false.
10184   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10185 
10186   if (IfCond) {
10187     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10188   } else {
10189     RegionCodeGenTy RCG(BeginThenGen);
10190     RCG(CGF);
10191   }
10192 
10193   // If we don't require privatization of device pointers, we emit the body in
10194   // between the runtime calls. This avoids duplicating the body code.
10195   if (Info.CaptureDeviceAddrMap.empty()) {
10196     CodeGen.setAction(NoPrivAction);
10197     CodeGen(CGF);
10198   }
10199 
10200   if (IfCond) {
10201     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10202   } else {
10203     RegionCodeGenTy RCG(EndThenGen);
10204     RCG(CGF);
10205   }
10206 }
10207 
10208 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10209     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10210     const Expr *Device) {
10211   if (!CGF.HaveInsertPoint())
10212     return;
10213 
10214   assert((isa<OMPTargetEnterDataDirective>(D) ||
10215           isa<OMPTargetExitDataDirective>(D) ||
10216           isa<OMPTargetUpdateDirective>(D)) &&
10217          "Expecting either target enter, exit data, or update directives.");
10218 
10219   CodeGenFunction::OMPTargetDataInfo InputInfo;
10220   llvm::Value *MapTypesArray = nullptr;
10221   // Generate the code for the opening of the data environment.
10222   auto &&ThenGen = [this, &D, Device, &InputInfo,
10223                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10224     // Emit device ID if any.
10225     llvm::Value *DeviceID = nullptr;
10226     if (Device) {
10227       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10228                                            CGF.Int64Ty, /*isSigned=*/true);
10229     } else {
10230       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10231     }
10232 
10233     // Emit the number of elements in the offloading arrays.
10234     llvm::Constant *PointerNum =
10235         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10236 
10237     llvm::Value *OffloadingArgs[] = {DeviceID,
10238                                      PointerNum,
10239                                      InputInfo.BasePointersArray.getPointer(),
10240                                      InputInfo.PointersArray.getPointer(),
10241                                      InputInfo.SizesArray.getPointer(),
10242                                      MapTypesArray};
10243 
10244     // Select the right runtime function call for each expected standalone
10245     // directive.
10246     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10247     RuntimeFunction RTLFn;
10248     switch (D.getDirectiveKind()) {
10249     case OMPD_target_enter_data:
10250       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait
10251                         : OMPRTL___tgt_target_data_begin;
10252       break;
10253     case OMPD_target_exit_data:
10254       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait
10255                         : OMPRTL___tgt_target_data_end;
10256       break;
10257     case OMPD_target_update:
10258       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait
10259                         : OMPRTL___tgt_target_data_update;
10260       break;
10261     case OMPD_parallel:
10262     case OMPD_for:
10263     case OMPD_parallel_for:
10264     case OMPD_parallel_master:
10265     case OMPD_parallel_sections:
10266     case OMPD_for_simd:
10267     case OMPD_parallel_for_simd:
10268     case OMPD_cancel:
10269     case OMPD_cancellation_point:
10270     case OMPD_ordered:
10271     case OMPD_threadprivate:
10272     case OMPD_allocate:
10273     case OMPD_task:
10274     case OMPD_simd:
10275     case OMPD_sections:
10276     case OMPD_section:
10277     case OMPD_single:
10278     case OMPD_master:
10279     case OMPD_critical:
10280     case OMPD_taskyield:
10281     case OMPD_barrier:
10282     case OMPD_taskwait:
10283     case OMPD_taskgroup:
10284     case OMPD_atomic:
10285     case OMPD_flush:
10286     case OMPD_depobj:
10287     case OMPD_scan:
10288     case OMPD_teams:
10289     case OMPD_target_data:
10290     case OMPD_distribute:
10291     case OMPD_distribute_simd:
10292     case OMPD_distribute_parallel_for:
10293     case OMPD_distribute_parallel_for_simd:
10294     case OMPD_teams_distribute:
10295     case OMPD_teams_distribute_simd:
10296     case OMPD_teams_distribute_parallel_for:
10297     case OMPD_teams_distribute_parallel_for_simd:
10298     case OMPD_declare_simd:
10299     case OMPD_declare_variant:
10300     case OMPD_begin_declare_variant:
10301     case OMPD_end_declare_variant:
10302     case OMPD_declare_target:
10303     case OMPD_end_declare_target:
10304     case OMPD_declare_reduction:
10305     case OMPD_declare_mapper:
10306     case OMPD_taskloop:
10307     case OMPD_taskloop_simd:
10308     case OMPD_master_taskloop:
10309     case OMPD_master_taskloop_simd:
10310     case OMPD_parallel_master_taskloop:
10311     case OMPD_parallel_master_taskloop_simd:
10312     case OMPD_target:
10313     case OMPD_target_simd:
10314     case OMPD_target_teams_distribute:
10315     case OMPD_target_teams_distribute_simd:
10316     case OMPD_target_teams_distribute_parallel_for:
10317     case OMPD_target_teams_distribute_parallel_for_simd:
10318     case OMPD_target_teams:
10319     case OMPD_target_parallel:
10320     case OMPD_target_parallel_for:
10321     case OMPD_target_parallel_for_simd:
10322     case OMPD_requires:
10323     case OMPD_unknown:
10324     default:
10325       llvm_unreachable("Unexpected standalone target data directive.");
10326       break;
10327     }
10328     CGF.EmitRuntimeCall(
10329         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10330         OffloadingArgs);
10331   };
10332 
10333   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
10334                              CodeGenFunction &CGF, PrePostActionTy &) {
10335     // Fill up the arrays with all the mapped variables.
10336     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10337     MappableExprsHandler::MapValuesArrayTy Pointers;
10338     MappableExprsHandler::MapValuesArrayTy Sizes;
10339     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10340 
10341     // Get map clause information.
10342     MappableExprsHandler MEHandler(D, CGF);
10343     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10344 
10345     TargetDataInfo Info;
10346     // Fill up the arrays and create the arguments.
10347     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10348     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10349                                  Info.PointersArray, Info.SizesArray,
10350                                  Info.MapTypesArray, Info);
10351     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10352     InputInfo.BasePointersArray =
10353         Address(Info.BasePointersArray, CGM.getPointerAlign());
10354     InputInfo.PointersArray =
10355         Address(Info.PointersArray, CGM.getPointerAlign());
10356     InputInfo.SizesArray =
10357         Address(Info.SizesArray, CGM.getPointerAlign());
10358     MapTypesArray = Info.MapTypesArray;
10359     if (D.hasClausesOfKind<OMPDependClause>())
10360       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10361     else
10362       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10363   };
10364 
10365   if (IfCond) {
10366     emitIfClause(CGF, IfCond, TargetThenGen,
10367                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
10368   } else {
10369     RegionCodeGenTy ThenRCG(TargetThenGen);
10370     ThenRCG(CGF);
10371   }
10372 }
10373 
10374 namespace {
10375   /// Kind of parameter in a function with 'declare simd' directive.
10376   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
10377   /// Attribute set of the parameter.
10378   struct ParamAttrTy {
10379     ParamKindTy Kind = Vector;
10380     llvm::APSInt StrideOrArg;
10381     llvm::APSInt Alignment;
10382   };
10383 } // namespace
10384 
10385 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10386                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10387   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10388   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10389   // of that clause. The VLEN value must be power of 2.
10390   // In other case the notion of the function`s "characteristic data type" (CDT)
10391   // is used to compute the vector length.
10392   // CDT is defined in the following order:
10393   //   a) For non-void function, the CDT is the return type.
10394   //   b) If the function has any non-uniform, non-linear parameters, then the
10395   //   CDT is the type of the first such parameter.
10396   //   c) If the CDT determined by a) or b) above is struct, union, or class
10397   //   type which is pass-by-value (except for the type that maps to the
10398   //   built-in complex data type), the characteristic data type is int.
10399   //   d) If none of the above three cases is applicable, the CDT is int.
10400   // The VLEN is then determined based on the CDT and the size of vector
10401   // register of that ISA for which current vector version is generated. The
10402   // VLEN is computed using the formula below:
10403   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10404   // where vector register size specified in section 3.2.1 Registers and the
10405   // Stack Frame of original AMD64 ABI document.
10406   QualType RetType = FD->getReturnType();
10407   if (RetType.isNull())
10408     return 0;
10409   ASTContext &C = FD->getASTContext();
10410   QualType CDT;
10411   if (!RetType.isNull() && !RetType->isVoidType()) {
10412     CDT = RetType;
10413   } else {
10414     unsigned Offset = 0;
10415     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10416       if (ParamAttrs[Offset].Kind == Vector)
10417         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10418       ++Offset;
10419     }
10420     if (CDT.isNull()) {
10421       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10422         if (ParamAttrs[I + Offset].Kind == Vector) {
10423           CDT = FD->getParamDecl(I)->getType();
10424           break;
10425         }
10426       }
10427     }
10428   }
10429   if (CDT.isNull())
10430     CDT = C.IntTy;
10431   CDT = CDT->getCanonicalTypeUnqualified();
10432   if (CDT->isRecordType() || CDT->isUnionType())
10433     CDT = C.IntTy;
10434   return C.getTypeSize(CDT);
10435 }
10436 
10437 static void
10438 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10439                            const llvm::APSInt &VLENVal,
10440                            ArrayRef<ParamAttrTy> ParamAttrs,
10441                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
10442   struct ISADataTy {
10443     char ISA;
10444     unsigned VecRegSize;
10445   };
10446   ISADataTy ISAData[] = {
10447       {
10448           'b', 128
10449       }, // SSE
10450       {
10451           'c', 256
10452       }, // AVX
10453       {
10454           'd', 256
10455       }, // AVX2
10456       {
10457           'e', 512
10458       }, // AVX512
10459   };
10460   llvm::SmallVector<char, 2> Masked;
10461   switch (State) {
10462   case OMPDeclareSimdDeclAttr::BS_Undefined:
10463     Masked.push_back('N');
10464     Masked.push_back('M');
10465     break;
10466   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10467     Masked.push_back('N');
10468     break;
10469   case OMPDeclareSimdDeclAttr::BS_Inbranch:
10470     Masked.push_back('M');
10471     break;
10472   }
10473   for (char Mask : Masked) {
10474     for (const ISADataTy &Data : ISAData) {
10475       SmallString<256> Buffer;
10476       llvm::raw_svector_ostream Out(Buffer);
10477       Out << "_ZGV" << Data.ISA << Mask;
10478       if (!VLENVal) {
10479         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10480         assert(NumElts && "Non-zero simdlen/cdtsize expected");
10481         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10482       } else {
10483         Out << VLENVal;
10484       }
10485       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
10486         switch (ParamAttr.Kind){
10487         case LinearWithVarStride:
10488           Out << 's' << ParamAttr.StrideOrArg;
10489           break;
10490         case Linear:
10491           Out << 'l';
10492           if (ParamAttr.StrideOrArg != 1)
10493             Out << ParamAttr.StrideOrArg;
10494           break;
10495         case Uniform:
10496           Out << 'u';
10497           break;
10498         case Vector:
10499           Out << 'v';
10500           break;
10501         }
10502         if (!!ParamAttr.Alignment)
10503           Out << 'a' << ParamAttr.Alignment;
10504       }
10505       Out << '_' << Fn->getName();
10506       Fn->addFnAttr(Out.str());
10507     }
10508   }
10509 }
10510 
10511 // This are the Functions that are needed to mangle the name of the
10512 // vector functions generated by the compiler, according to the rules
10513 // defined in the "Vector Function ABI specifications for AArch64",
10514 // available at
10515 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10516 
10517 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
10518 ///
10519 /// TODO: Need to implement the behavior for reference marked with a
10520 /// var or no linear modifiers (1.b in the section). For this, we
10521 /// need to extend ParamKindTy to support the linear modifiers.
10522 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10523   QT = QT.getCanonicalType();
10524 
10525   if (QT->isVoidType())
10526     return false;
10527 
10528   if (Kind == ParamKindTy::Uniform)
10529     return false;
10530 
10531   if (Kind == ParamKindTy::Linear)
10532     return false;
10533 
10534   // TODO: Handle linear references with modifiers
10535 
10536   if (Kind == ParamKindTy::LinearWithVarStride)
10537     return false;
10538 
10539   return true;
10540 }
10541 
10542 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10543 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10544   QT = QT.getCanonicalType();
10545   unsigned Size = C.getTypeSize(QT);
10546 
10547   // Only scalars and complex within 16 bytes wide set PVB to true.
10548   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10549     return false;
10550 
10551   if (QT->isFloatingType())
10552     return true;
10553 
10554   if (QT->isIntegerType())
10555     return true;
10556 
10557   if (QT->isPointerType())
10558     return true;
10559 
10560   // TODO: Add support for complex types (section 3.1.2, item 2).
10561 
10562   return false;
10563 }
10564 
10565 /// Computes the lane size (LS) of a return type or of an input parameter,
10566 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10567 /// TODO: Add support for references, section 3.2.1, item 1.
10568 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10569   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10570     QualType PTy = QT.getCanonicalType()->getPointeeType();
10571     if (getAArch64PBV(PTy, C))
10572       return C.getTypeSize(PTy);
10573   }
10574   if (getAArch64PBV(QT, C))
10575     return C.getTypeSize(QT);
10576 
10577   return C.getTypeSize(C.getUIntPtrType());
10578 }
10579 
10580 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10581 // signature of the scalar function, as defined in 3.2.2 of the
10582 // AAVFABI.
10583 static std::tuple<unsigned, unsigned, bool>
10584 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10585   QualType RetType = FD->getReturnType().getCanonicalType();
10586 
10587   ASTContext &C = FD->getASTContext();
10588 
10589   bool OutputBecomesInput = false;
10590 
10591   llvm::SmallVector<unsigned, 8> Sizes;
10592   if (!RetType->isVoidType()) {
10593     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10594     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10595       OutputBecomesInput = true;
10596   }
10597   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10598     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10599     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10600   }
10601 
10602   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10603   // The LS of a function parameter / return value can only be a power
10604   // of 2, starting from 8 bits, up to 128.
10605   assert(std::all_of(Sizes.begin(), Sizes.end(),
10606                      [](unsigned Size) {
10607                        return Size == 8 || Size == 16 || Size == 32 ||
10608                               Size == 64 || Size == 128;
10609                      }) &&
10610          "Invalid size");
10611 
10612   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10613                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
10614                          OutputBecomesInput);
10615 }
10616 
10617 /// Mangle the parameter part of the vector function name according to
10618 /// their OpenMP classification. The mangling function is defined in
10619 /// section 3.5 of the AAVFABI.
10620 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10621   SmallString<256> Buffer;
10622   llvm::raw_svector_ostream Out(Buffer);
10623   for (const auto &ParamAttr : ParamAttrs) {
10624     switch (ParamAttr.Kind) {
10625     case LinearWithVarStride:
10626       Out << "ls" << ParamAttr.StrideOrArg;
10627       break;
10628     case Linear:
10629       Out << 'l';
10630       // Don't print the step value if it is not present or if it is
10631       // equal to 1.
10632       if (ParamAttr.StrideOrArg != 1)
10633         Out << ParamAttr.StrideOrArg;
10634       break;
10635     case Uniform:
10636       Out << 'u';
10637       break;
10638     case Vector:
10639       Out << 'v';
10640       break;
10641     }
10642 
10643     if (!!ParamAttr.Alignment)
10644       Out << 'a' << ParamAttr.Alignment;
10645   }
10646 
10647   return std::string(Out.str());
10648 }
10649 
10650 // Function used to add the attribute. The parameter `VLEN` is
10651 // templated to allow the use of "x" when targeting scalable functions
10652 // for SVE.
10653 template <typename T>
10654 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10655                                  char ISA, StringRef ParSeq,
10656                                  StringRef MangledName, bool OutputBecomesInput,
10657                                  llvm::Function *Fn) {
10658   SmallString<256> Buffer;
10659   llvm::raw_svector_ostream Out(Buffer);
10660   Out << Prefix << ISA << LMask << VLEN;
10661   if (OutputBecomesInput)
10662     Out << "v";
10663   Out << ParSeq << "_" << MangledName;
10664   Fn->addFnAttr(Out.str());
10665 }
10666 
10667 // Helper function to generate the Advanced SIMD names depending on
10668 // the value of the NDS when simdlen is not present.
10669 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10670                                       StringRef Prefix, char ISA,
10671                                       StringRef ParSeq, StringRef MangledName,
10672                                       bool OutputBecomesInput,
10673                                       llvm::Function *Fn) {
10674   switch (NDS) {
10675   case 8:
10676     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10677                          OutputBecomesInput, Fn);
10678     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10679                          OutputBecomesInput, Fn);
10680     break;
10681   case 16:
10682     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10683                          OutputBecomesInput, Fn);
10684     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10685                          OutputBecomesInput, Fn);
10686     break;
10687   case 32:
10688     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10689                          OutputBecomesInput, Fn);
10690     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10691                          OutputBecomesInput, Fn);
10692     break;
10693   case 64:
10694   case 128:
10695     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10696                          OutputBecomesInput, Fn);
10697     break;
10698   default:
10699     llvm_unreachable("Scalar type is too wide.");
10700   }
10701 }
10702 
10703 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10704 static void emitAArch64DeclareSimdFunction(
10705     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10706     ArrayRef<ParamAttrTy> ParamAttrs,
10707     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10708     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10709 
10710   // Get basic data for building the vector signature.
10711   const auto Data = getNDSWDS(FD, ParamAttrs);
10712   const unsigned NDS = std::get<0>(Data);
10713   const unsigned WDS = std::get<1>(Data);
10714   const bool OutputBecomesInput = std::get<2>(Data);
10715 
10716   // Check the values provided via `simdlen` by the user.
10717   // 1. A `simdlen(1)` doesn't produce vector signatures,
10718   if (UserVLEN == 1) {
10719     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10720         DiagnosticsEngine::Warning,
10721         "The clause simdlen(1) has no effect when targeting aarch64.");
10722     CGM.getDiags().Report(SLoc, DiagID);
10723     return;
10724   }
10725 
10726   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10727   // Advanced SIMD output.
10728   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10729     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10730         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10731                                     "power of 2 when targeting Advanced SIMD.");
10732     CGM.getDiags().Report(SLoc, DiagID);
10733     return;
10734   }
10735 
10736   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10737   // limits.
10738   if (ISA == 's' && UserVLEN != 0) {
10739     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10740       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10741           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10742                                       "lanes in the architectural constraints "
10743                                       "for SVE (min is 128-bit, max is "
10744                                       "2048-bit, by steps of 128-bit)");
10745       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10746       return;
10747     }
10748   }
10749 
10750   // Sort out parameter sequence.
10751   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10752   StringRef Prefix = "_ZGV";
10753   // Generate simdlen from user input (if any).
10754   if (UserVLEN) {
10755     if (ISA == 's') {
10756       // SVE generates only a masked function.
10757       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10758                            OutputBecomesInput, Fn);
10759     } else {
10760       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10761       // Advanced SIMD generates one or two functions, depending on
10762       // the `[not]inbranch` clause.
10763       switch (State) {
10764       case OMPDeclareSimdDeclAttr::BS_Undefined:
10765         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10766                              OutputBecomesInput, Fn);
10767         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10768                              OutputBecomesInput, Fn);
10769         break;
10770       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10771         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10772                              OutputBecomesInput, Fn);
10773         break;
10774       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10775         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10776                              OutputBecomesInput, Fn);
10777         break;
10778       }
10779     }
10780   } else {
10781     // If no user simdlen is provided, follow the AAVFABI rules for
10782     // generating the vector length.
10783     if (ISA == 's') {
10784       // SVE, section 3.4.1, item 1.
10785       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10786                            OutputBecomesInput, Fn);
10787     } else {
10788       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10789       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10790       // two vector names depending on the use of the clause
10791       // `[not]inbranch`.
10792       switch (State) {
10793       case OMPDeclareSimdDeclAttr::BS_Undefined:
10794         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10795                                   OutputBecomesInput, Fn);
10796         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10797                                   OutputBecomesInput, Fn);
10798         break;
10799       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10800         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10801                                   OutputBecomesInput, Fn);
10802         break;
10803       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10804         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10805                                   OutputBecomesInput, Fn);
10806         break;
10807       }
10808     }
10809   }
10810 }
10811 
10812 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10813                                               llvm::Function *Fn) {
10814   ASTContext &C = CGM.getContext();
10815   FD = FD->getMostRecentDecl();
10816   // Map params to their positions in function decl.
10817   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10818   if (isa<CXXMethodDecl>(FD))
10819     ParamPositions.try_emplace(FD, 0);
10820   unsigned ParamPos = ParamPositions.size();
10821   for (const ParmVarDecl *P : FD->parameters()) {
10822     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10823     ++ParamPos;
10824   }
10825   while (FD) {
10826     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10827       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10828       // Mark uniform parameters.
10829       for (const Expr *E : Attr->uniforms()) {
10830         E = E->IgnoreParenImpCasts();
10831         unsigned Pos;
10832         if (isa<CXXThisExpr>(E)) {
10833           Pos = ParamPositions[FD];
10834         } else {
10835           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10836                                 ->getCanonicalDecl();
10837           Pos = ParamPositions[PVD];
10838         }
10839         ParamAttrs[Pos].Kind = Uniform;
10840       }
10841       // Get alignment info.
10842       auto NI = Attr->alignments_begin();
10843       for (const Expr *E : Attr->aligneds()) {
10844         E = E->IgnoreParenImpCasts();
10845         unsigned Pos;
10846         QualType ParmTy;
10847         if (isa<CXXThisExpr>(E)) {
10848           Pos = ParamPositions[FD];
10849           ParmTy = E->getType();
10850         } else {
10851           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10852                                 ->getCanonicalDecl();
10853           Pos = ParamPositions[PVD];
10854           ParmTy = PVD->getType();
10855         }
10856         ParamAttrs[Pos].Alignment =
10857             (*NI)
10858                 ? (*NI)->EvaluateKnownConstInt(C)
10859                 : llvm::APSInt::getUnsigned(
10860                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10861                           .getQuantity());
10862         ++NI;
10863       }
10864       // Mark linear parameters.
10865       auto SI = Attr->steps_begin();
10866       auto MI = Attr->modifiers_begin();
10867       for (const Expr *E : Attr->linears()) {
10868         E = E->IgnoreParenImpCasts();
10869         unsigned Pos;
10870         // Rescaling factor needed to compute the linear parameter
10871         // value in the mangled name.
10872         unsigned PtrRescalingFactor = 1;
10873         if (isa<CXXThisExpr>(E)) {
10874           Pos = ParamPositions[FD];
10875         } else {
10876           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10877                                 ->getCanonicalDecl();
10878           Pos = ParamPositions[PVD];
10879           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
10880             PtrRescalingFactor = CGM.getContext()
10881                                      .getTypeSizeInChars(P->getPointeeType())
10882                                      .getQuantity();
10883         }
10884         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10885         ParamAttr.Kind = Linear;
10886         // Assuming a stride of 1, for `linear` without modifiers.
10887         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
10888         if (*SI) {
10889           Expr::EvalResult Result;
10890           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10891             if (const auto *DRE =
10892                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10893               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10894                 ParamAttr.Kind = LinearWithVarStride;
10895                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10896                     ParamPositions[StridePVD->getCanonicalDecl()]);
10897               }
10898             }
10899           } else {
10900             ParamAttr.StrideOrArg = Result.Val.getInt();
10901           }
10902         }
10903         // If we are using a linear clause on a pointer, we need to
10904         // rescale the value of linear_step with the byte size of the
10905         // pointee type.
10906         if (Linear == ParamAttr.Kind)
10907           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
10908         ++SI;
10909         ++MI;
10910       }
10911       llvm::APSInt VLENVal;
10912       SourceLocation ExprLoc;
10913       const Expr *VLENExpr = Attr->getSimdlen();
10914       if (VLENExpr) {
10915         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10916         ExprLoc = VLENExpr->getExprLoc();
10917       }
10918       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10919       if (CGM.getTriple().isX86()) {
10920         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10921       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10922         unsigned VLEN = VLENVal.getExtValue();
10923         StringRef MangledName = Fn->getName();
10924         if (CGM.getTarget().hasFeature("sve"))
10925           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10926                                          MangledName, 's', 128, Fn, ExprLoc);
10927         if (CGM.getTarget().hasFeature("neon"))
10928           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10929                                          MangledName, 'n', 128, Fn, ExprLoc);
10930       }
10931     }
10932     FD = FD->getPreviousDecl();
10933   }
10934 }
10935 
10936 namespace {
10937 /// Cleanup action for doacross support.
10938 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10939 public:
10940   static const int DoacrossFinArgs = 2;
10941 
10942 private:
10943   llvm::FunctionCallee RTLFn;
10944   llvm::Value *Args[DoacrossFinArgs];
10945 
10946 public:
10947   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10948                     ArrayRef<llvm::Value *> CallArgs)
10949       : RTLFn(RTLFn) {
10950     assert(CallArgs.size() == DoacrossFinArgs);
10951     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10952   }
10953   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10954     if (!CGF.HaveInsertPoint())
10955       return;
10956     CGF.EmitRuntimeCall(RTLFn, Args);
10957   }
10958 };
10959 } // namespace
10960 
10961 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10962                                        const OMPLoopDirective &D,
10963                                        ArrayRef<Expr *> NumIterations) {
10964   if (!CGF.HaveInsertPoint())
10965     return;
10966 
10967   ASTContext &C = CGM.getContext();
10968   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
10969   RecordDecl *RD;
10970   if (KmpDimTy.isNull()) {
10971     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
10972     //  kmp_int64 lo; // lower
10973     //  kmp_int64 up; // upper
10974     //  kmp_int64 st; // stride
10975     // };
10976     RD = C.buildImplicitRecord("kmp_dim");
10977     RD->startDefinition();
10978     addFieldToRecordDecl(C, RD, Int64Ty);
10979     addFieldToRecordDecl(C, RD, Int64Ty);
10980     addFieldToRecordDecl(C, RD, Int64Ty);
10981     RD->completeDefinition();
10982     KmpDimTy = C.getRecordType(RD);
10983   } else {
10984     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
10985   }
10986   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
10987   QualType ArrayTy =
10988       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
10989 
10990   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
10991   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
10992   enum { LowerFD = 0, UpperFD, StrideFD };
10993   // Fill dims with data.
10994   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
10995     LValue DimsLVal = CGF.MakeAddrLValue(
10996         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
10997     // dims.upper = num_iterations;
10998     LValue UpperLVal = CGF.EmitLValueForField(
10999         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11000     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11001         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11002         Int64Ty, NumIterations[I]->getExprLoc());
11003     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11004     // dims.stride = 1;
11005     LValue StrideLVal = CGF.EmitLValueForField(
11006         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11007     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11008                           StrideLVal);
11009   }
11010 
11011   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11012   // kmp_int32 num_dims, struct kmp_dim * dims);
11013   llvm::Value *Args[] = {
11014       emitUpdateLocation(CGF, D.getBeginLoc()),
11015       getThreadID(CGF, D.getBeginLoc()),
11016       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11017       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11018           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11019           CGM.VoidPtrTy)};
11020 
11021   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11022       CGM.getModule(), OMPRTL___kmpc_doacross_init);
11023   CGF.EmitRuntimeCall(RTLFn, Args);
11024   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11025       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11026   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11027       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11028   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11029                                              llvm::makeArrayRef(FiniArgs));
11030 }
11031 
11032 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11033                                           const OMPDependClause *C) {
11034   QualType Int64Ty =
11035       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11036   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11037   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11038       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11039   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11040   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11041     const Expr *CounterVal = C->getLoopData(I);
11042     assert(CounterVal);
11043     llvm::Value *CntVal = CGF.EmitScalarConversion(
11044         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11045         CounterVal->getExprLoc());
11046     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11047                           /*Volatile=*/false, Int64Ty);
11048   }
11049   llvm::Value *Args[] = {
11050       emitUpdateLocation(CGF, C->getBeginLoc()),
11051       getThreadID(CGF, C->getBeginLoc()),
11052       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11053   llvm::FunctionCallee RTLFn;
11054   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11055     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11056                                                   OMPRTL___kmpc_doacross_post);
11057   } else {
11058     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11059     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11060                                                   OMPRTL___kmpc_doacross_wait);
11061   }
11062   CGF.EmitRuntimeCall(RTLFn, Args);
11063 }
11064 
11065 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11066                                llvm::FunctionCallee Callee,
11067                                ArrayRef<llvm::Value *> Args) const {
11068   assert(Loc.isValid() && "Outlined function call location must be valid.");
11069   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11070 
11071   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11072     if (Fn->doesNotThrow()) {
11073       CGF.EmitNounwindRuntimeCall(Fn, Args);
11074       return;
11075     }
11076   }
11077   CGF.EmitRuntimeCall(Callee, Args);
11078 }
11079 
11080 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11081     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11082     ArrayRef<llvm::Value *> Args) const {
11083   emitCall(CGF, Loc, OutlinedFn, Args);
11084 }
11085 
11086 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11087   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11088     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11089       HasEmittedDeclareTargetRegion = true;
11090 }
11091 
11092 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11093                                              const VarDecl *NativeParam,
11094                                              const VarDecl *TargetParam) const {
11095   return CGF.GetAddrOfLocalVar(NativeParam);
11096 }
11097 
11098 namespace {
11099 /// Cleanup action for allocate support.
11100 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11101 public:
11102   static const int CleanupArgs = 3;
11103 
11104 private:
11105   llvm::FunctionCallee RTLFn;
11106   llvm::Value *Args[CleanupArgs];
11107 
11108 public:
11109   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11110                        ArrayRef<llvm::Value *> CallArgs)
11111       : RTLFn(RTLFn) {
11112     assert(CallArgs.size() == CleanupArgs &&
11113            "Size of arguments does not match.");
11114     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11115   }
11116   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11117     if (!CGF.HaveInsertPoint())
11118       return;
11119     CGF.EmitRuntimeCall(RTLFn, Args);
11120   }
11121 };
11122 } // namespace
11123 
11124 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11125                                                    const VarDecl *VD) {
11126   if (!VD)
11127     return Address::invalid();
11128   const VarDecl *CVD = VD->getCanonicalDecl();
11129   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
11130     return Address::invalid();
11131   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11132   // Use the default allocation.
11133   if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
11134        AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
11135       !AA->getAllocator())
11136     return Address::invalid();
11137   llvm::Value *Size;
11138   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11139   if (CVD->getType()->isVariablyModifiedType()) {
11140     Size = CGF.getTypeSize(CVD->getType());
11141     // Align the size: ((size + align - 1) / align) * align
11142     Size = CGF.Builder.CreateNUWAdd(
11143         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11144     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11145     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11146   } else {
11147     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11148     Size = CGM.getSize(Sz.alignTo(Align));
11149   }
11150   llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11151   assert(AA->getAllocator() &&
11152          "Expected allocator expression for non-default allocator.");
11153   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11154   // According to the standard, the original allocator type is a enum (integer).
11155   // Convert to pointer type, if required.
11156   if (Allocator->getType()->isIntegerTy())
11157     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
11158   else if (Allocator->getType()->isPointerTy())
11159     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
11160                                                                 CGM.VoidPtrTy);
11161   llvm::Value *Args[] = {ThreadID, Size, Allocator};
11162 
11163   llvm::Value *Addr =
11164       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11165                               CGM.getModule(), OMPRTL___kmpc_alloc),
11166                           Args, getName({CVD->getName(), ".void.addr"}));
11167   llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
11168                                                               Allocator};
11169   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11170       CGM.getModule(), OMPRTL___kmpc_free);
11171 
11172   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11173                                                 llvm::makeArrayRef(FiniArgs));
11174   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11175       Addr,
11176       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
11177       getName({CVD->getName(), ".addr"}));
11178   return Address(Addr, Align);
11179 }
11180 
11181 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11182     CodeGenModule &CGM, const OMPLoopDirective &S)
11183     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11184   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11185   if (!NeedToPush)
11186     return;
11187   NontemporalDeclsSet &DS =
11188       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11189   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11190     for (const Stmt *Ref : C->private_refs()) {
11191       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11192       const ValueDecl *VD;
11193       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11194         VD = DRE->getDecl();
11195       } else {
11196         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11197         assert((ME->isImplicitCXXThis() ||
11198                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11199                "Expected member of current class.");
11200         VD = ME->getMemberDecl();
11201       }
11202       DS.insert(VD);
11203     }
11204   }
11205 }
11206 
11207 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11208   if (!NeedToPush)
11209     return;
11210   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11211 }
11212 
11213 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11214   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11215 
11216   return llvm::any_of(
11217       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11218       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
11219 }
11220 
11221 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11222     const OMPExecutableDirective &S,
11223     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11224     const {
11225   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11226   // Vars in target/task regions must be excluded completely.
11227   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11228       isOpenMPTaskingDirective(S.getDirectiveKind())) {
11229     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11230     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11231     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11232     for (const CapturedStmt::Capture &Cap : CS->captures()) {
11233       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11234         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11235     }
11236   }
11237   // Exclude vars in private clauses.
11238   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11239     for (const Expr *Ref : C->varlists()) {
11240       if (!Ref->getType()->isScalarType())
11241         continue;
11242       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11243       if (!DRE)
11244         continue;
11245       NeedToCheckForLPCs.insert(DRE->getDecl());
11246     }
11247   }
11248   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11249     for (const Expr *Ref : C->varlists()) {
11250       if (!Ref->getType()->isScalarType())
11251         continue;
11252       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11253       if (!DRE)
11254         continue;
11255       NeedToCheckForLPCs.insert(DRE->getDecl());
11256     }
11257   }
11258   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11259     for (const Expr *Ref : C->varlists()) {
11260       if (!Ref->getType()->isScalarType())
11261         continue;
11262       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11263       if (!DRE)
11264         continue;
11265       NeedToCheckForLPCs.insert(DRE->getDecl());
11266     }
11267   }
11268   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11269     for (const Expr *Ref : C->varlists()) {
11270       if (!Ref->getType()->isScalarType())
11271         continue;
11272       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11273       if (!DRE)
11274         continue;
11275       NeedToCheckForLPCs.insert(DRE->getDecl());
11276     }
11277   }
11278   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11279     for (const Expr *Ref : C->varlists()) {
11280       if (!Ref->getType()->isScalarType())
11281         continue;
11282       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11283       if (!DRE)
11284         continue;
11285       NeedToCheckForLPCs.insert(DRE->getDecl());
11286     }
11287   }
11288   for (const Decl *VD : NeedToCheckForLPCs) {
11289     for (const LastprivateConditionalData &Data :
11290          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11291       if (Data.DeclToUniqueName.count(VD) > 0) {
11292         if (!Data.Disabled)
11293           NeedToAddForLPCsAsDisabled.insert(VD);
11294         break;
11295       }
11296     }
11297   }
11298 }
11299 
11300 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11301     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11302     : CGM(CGF.CGM),
11303       Action((CGM.getLangOpts().OpenMP >= 50 &&
11304               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11305                            [](const OMPLastprivateClause *C) {
11306                              return C->getKind() ==
11307                                     OMPC_LASTPRIVATE_conditional;
11308                            }))
11309                  ? ActionToDo::PushAsLastprivateConditional
11310                  : ActionToDo::DoNotPush) {
11311   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11312   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11313     return;
11314   assert(Action == ActionToDo::PushAsLastprivateConditional &&
11315          "Expected a push action.");
11316   LastprivateConditionalData &Data =
11317       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11318   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11319     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11320       continue;
11321 
11322     for (const Expr *Ref : C->varlists()) {
11323       Data.DeclToUniqueName.insert(std::make_pair(
11324           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11325           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11326     }
11327   }
11328   Data.IVLVal = IVLVal;
11329   Data.Fn = CGF.CurFn;
11330 }
11331 
11332 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11333     CodeGenFunction &CGF, const OMPExecutableDirective &S)
11334     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11335   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11336   if (CGM.getLangOpts().OpenMP < 50)
11337     return;
11338   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11339   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11340   if (!NeedToAddForLPCsAsDisabled.empty()) {
11341     Action = ActionToDo::DisableLastprivateConditional;
11342     LastprivateConditionalData &Data =
11343         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11344     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11345       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11346     Data.Fn = CGF.CurFn;
11347     Data.Disabled = true;
11348   }
11349 }
11350 
11351 CGOpenMPRuntime::LastprivateConditionalRAII
11352 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11353     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11354   return LastprivateConditionalRAII(CGF, S);
11355 }
11356 
11357 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11358   if (CGM.getLangOpts().OpenMP < 50)
11359     return;
11360   if (Action == ActionToDo::DisableLastprivateConditional) {
11361     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11362            "Expected list of disabled private vars.");
11363     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11364   }
11365   if (Action == ActionToDo::PushAsLastprivateConditional) {
11366     assert(
11367         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11368         "Expected list of lastprivate conditional vars.");
11369     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11370   }
11371 }
11372 
11373 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11374                                                         const VarDecl *VD) {
11375   ASTContext &C = CGM.getContext();
11376   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11377   if (I == LastprivateConditionalToTypes.end())
11378     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11379   QualType NewType;
11380   const FieldDecl *VDField;
11381   const FieldDecl *FiredField;
11382   LValue BaseLVal;
11383   auto VI = I->getSecond().find(VD);
11384   if (VI == I->getSecond().end()) {
11385     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11386     RD->startDefinition();
11387     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11388     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11389     RD->completeDefinition();
11390     NewType = C.getRecordType(RD);
11391     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11392     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11393     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11394   } else {
11395     NewType = std::get<0>(VI->getSecond());
11396     VDField = std::get<1>(VI->getSecond());
11397     FiredField = std::get<2>(VI->getSecond());
11398     BaseLVal = std::get<3>(VI->getSecond());
11399   }
11400   LValue FiredLVal =
11401       CGF.EmitLValueForField(BaseLVal, FiredField);
11402   CGF.EmitStoreOfScalar(
11403       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11404       FiredLVal);
11405   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
11406 }
11407 
11408 namespace {
11409 /// Checks if the lastprivate conditional variable is referenced in LHS.
11410 class LastprivateConditionalRefChecker final
11411     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11412   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11413   const Expr *FoundE = nullptr;
11414   const Decl *FoundD = nullptr;
11415   StringRef UniqueDeclName;
11416   LValue IVLVal;
11417   llvm::Function *FoundFn = nullptr;
11418   SourceLocation Loc;
11419 
11420 public:
11421   bool VisitDeclRefExpr(const DeclRefExpr *E) {
11422     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11423          llvm::reverse(LPM)) {
11424       auto It = D.DeclToUniqueName.find(E->getDecl());
11425       if (It == D.DeclToUniqueName.end())
11426         continue;
11427       if (D.Disabled)
11428         return false;
11429       FoundE = E;
11430       FoundD = E->getDecl()->getCanonicalDecl();
11431       UniqueDeclName = It->second;
11432       IVLVal = D.IVLVal;
11433       FoundFn = D.Fn;
11434       break;
11435     }
11436     return FoundE == E;
11437   }
11438   bool VisitMemberExpr(const MemberExpr *E) {
11439     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11440       return false;
11441     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11442          llvm::reverse(LPM)) {
11443       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11444       if (It == D.DeclToUniqueName.end())
11445         continue;
11446       if (D.Disabled)
11447         return false;
11448       FoundE = E;
11449       FoundD = E->getMemberDecl()->getCanonicalDecl();
11450       UniqueDeclName = It->second;
11451       IVLVal = D.IVLVal;
11452       FoundFn = D.Fn;
11453       break;
11454     }
11455     return FoundE == E;
11456   }
11457   bool VisitStmt(const Stmt *S) {
11458     for (const Stmt *Child : S->children()) {
11459       if (!Child)
11460         continue;
11461       if (const auto *E = dyn_cast<Expr>(Child))
11462         if (!E->isGLValue())
11463           continue;
11464       if (Visit(Child))
11465         return true;
11466     }
11467     return false;
11468   }
11469   explicit LastprivateConditionalRefChecker(
11470       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11471       : LPM(LPM) {}
11472   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11473   getFoundData() const {
11474     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11475   }
11476 };
11477 } // namespace
11478 
11479 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11480                                                        LValue IVLVal,
11481                                                        StringRef UniqueDeclName,
11482                                                        LValue LVal,
11483                                                        SourceLocation Loc) {
11484   // Last updated loop counter for the lastprivate conditional var.
11485   // int<xx> last_iv = 0;
11486   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11487   llvm::Constant *LastIV =
11488       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
11489   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11490       IVLVal.getAlignment().getAsAlign());
11491   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11492 
11493   // Last value of the lastprivate conditional.
11494   // decltype(priv_a) last_a;
11495   llvm::Constant *Last = getOrCreateInternalVariable(
11496       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11497   cast<llvm::GlobalVariable>(Last)->setAlignment(
11498       LVal.getAlignment().getAsAlign());
11499   LValue LastLVal =
11500       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11501 
11502   // Global loop counter. Required to handle inner parallel-for regions.
11503   // iv
11504   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11505 
11506   // #pragma omp critical(a)
11507   // if (last_iv <= iv) {
11508   //   last_iv = iv;
11509   //   last_a = priv_a;
11510   // }
11511   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11512                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11513     Action.Enter(CGF);
11514     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11515     // (last_iv <= iv) ? Check if the variable is updated and store new
11516     // value in global var.
11517     llvm::Value *CmpRes;
11518     if (IVLVal.getType()->isSignedIntegerType()) {
11519       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11520     } else {
11521       assert(IVLVal.getType()->isUnsignedIntegerType() &&
11522              "Loop iteration variable must be integer.");
11523       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11524     }
11525     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11526     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11527     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11528     // {
11529     CGF.EmitBlock(ThenBB);
11530 
11531     //   last_iv = iv;
11532     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11533 
11534     //   last_a = priv_a;
11535     switch (CGF.getEvaluationKind(LVal.getType())) {
11536     case TEK_Scalar: {
11537       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11538       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11539       break;
11540     }
11541     case TEK_Complex: {
11542       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11543       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11544       break;
11545     }
11546     case TEK_Aggregate:
11547       llvm_unreachable(
11548           "Aggregates are not supported in lastprivate conditional.");
11549     }
11550     // }
11551     CGF.EmitBranch(ExitBB);
11552     // There is no need to emit line number for unconditional branch.
11553     (void)ApplyDebugLocation::CreateEmpty(CGF);
11554     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11555   };
11556 
11557   if (CGM.getLangOpts().OpenMPSimd) {
11558     // Do not emit as a critical region as no parallel region could be emitted.
11559     RegionCodeGenTy ThenRCG(CodeGen);
11560     ThenRCG(CGF);
11561   } else {
11562     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11563   }
11564 }
11565 
11566 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11567                                                          const Expr *LHS) {
11568   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11569     return;
11570   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11571   if (!Checker.Visit(LHS))
11572     return;
11573   const Expr *FoundE;
11574   const Decl *FoundD;
11575   StringRef UniqueDeclName;
11576   LValue IVLVal;
11577   llvm::Function *FoundFn;
11578   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11579       Checker.getFoundData();
11580   if (FoundFn != CGF.CurFn) {
11581     // Special codegen for inner parallel regions.
11582     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11583     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11584     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11585            "Lastprivate conditional is not found in outer region.");
11586     QualType StructTy = std::get<0>(It->getSecond());
11587     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11588     LValue PrivLVal = CGF.EmitLValue(FoundE);
11589     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11590         PrivLVal.getAddress(CGF),
11591         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
11592     LValue BaseLVal =
11593         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11594     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11595     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11596                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11597                         FiredLVal, llvm::AtomicOrdering::Unordered,
11598                         /*IsVolatile=*/true, /*isInit=*/false);
11599     return;
11600   }
11601 
11602   // Private address of the lastprivate conditional in the current context.
11603   // priv_a
11604   LValue LVal = CGF.EmitLValue(FoundE);
11605   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11606                                    FoundE->getExprLoc());
11607 }
11608 
11609 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11610     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11611     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11612   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11613     return;
11614   auto Range = llvm::reverse(LastprivateConditionalStack);
11615   auto It = llvm::find_if(
11616       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11617   if (It == Range.end() || It->Fn != CGF.CurFn)
11618     return;
11619   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11620   assert(LPCI != LastprivateConditionalToTypes.end() &&
11621          "Lastprivates must be registered already.");
11622   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11623   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11624   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11625   for (const auto &Pair : It->DeclToUniqueName) {
11626     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11627     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
11628       continue;
11629     auto I = LPCI->getSecond().find(Pair.first);
11630     assert(I != LPCI->getSecond().end() &&
11631            "Lastprivate must be rehistered already.");
11632     // bool Cmp = priv_a.Fired != 0;
11633     LValue BaseLVal = std::get<3>(I->getSecond());
11634     LValue FiredLVal =
11635         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11636     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11637     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11638     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11639     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11640     // if (Cmp) {
11641     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11642     CGF.EmitBlock(ThenBB);
11643     Address Addr = CGF.GetAddrOfLocalVar(VD);
11644     LValue LVal;
11645     if (VD->getType()->isReferenceType())
11646       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11647                                            AlignmentSource::Decl);
11648     else
11649       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11650                                 AlignmentSource::Decl);
11651     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11652                                      D.getBeginLoc());
11653     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11654     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11655     // }
11656   }
11657 }
11658 
11659 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11660     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11661     SourceLocation Loc) {
11662   if (CGF.getLangOpts().OpenMP < 50)
11663     return;
11664   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11665   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11666          "Unknown lastprivate conditional variable.");
11667   StringRef UniqueName = It->second;
11668   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11669   // The variable was not updated in the region - exit.
11670   if (!GV)
11671     return;
11672   LValue LPLVal = CGF.MakeAddrLValue(
11673       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11674   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11675   CGF.EmitStoreOfScalar(Res, PrivLVal);
11676 }
11677 
11678 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11679     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11680     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11681   llvm_unreachable("Not supported in SIMD-only mode");
11682 }
11683 
11684 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11685     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11686     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11687   llvm_unreachable("Not supported in SIMD-only mode");
11688 }
11689 
11690 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11691     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11692     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11693     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11694     bool Tied, unsigned &NumberOfParts) {
11695   llvm_unreachable("Not supported in SIMD-only mode");
11696 }
11697 
11698 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11699                                            SourceLocation Loc,
11700                                            llvm::Function *OutlinedFn,
11701                                            ArrayRef<llvm::Value *> CapturedVars,
11702                                            const Expr *IfCond) {
11703   llvm_unreachable("Not supported in SIMD-only mode");
11704 }
11705 
11706 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11707     CodeGenFunction &CGF, StringRef CriticalName,
11708     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11709     const Expr *Hint) {
11710   llvm_unreachable("Not supported in SIMD-only mode");
11711 }
11712 
11713 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11714                                            const RegionCodeGenTy &MasterOpGen,
11715                                            SourceLocation Loc) {
11716   llvm_unreachable("Not supported in SIMD-only mode");
11717 }
11718 
11719 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11720                                             SourceLocation Loc) {
11721   llvm_unreachable("Not supported in SIMD-only mode");
11722 }
11723 
11724 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11725     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11726     SourceLocation Loc) {
11727   llvm_unreachable("Not supported in SIMD-only mode");
11728 }
11729 
11730 void CGOpenMPSIMDRuntime::emitSingleRegion(
11731     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11732     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11733     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11734     ArrayRef<const Expr *> AssignmentOps) {
11735   llvm_unreachable("Not supported in SIMD-only mode");
11736 }
11737 
11738 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11739                                             const RegionCodeGenTy &OrderedOpGen,
11740                                             SourceLocation Loc,
11741                                             bool IsThreads) {
11742   llvm_unreachable("Not supported in SIMD-only mode");
11743 }
11744 
11745 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11746                                           SourceLocation Loc,
11747                                           OpenMPDirectiveKind Kind,
11748                                           bool EmitChecks,
11749                                           bool ForceSimpleCall) {
11750   llvm_unreachable("Not supported in SIMD-only mode");
11751 }
11752 
11753 void CGOpenMPSIMDRuntime::emitForDispatchInit(
11754     CodeGenFunction &CGF, SourceLocation Loc,
11755     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11756     bool Ordered, const DispatchRTInput &DispatchValues) {
11757   llvm_unreachable("Not supported in SIMD-only mode");
11758 }
11759 
11760 void CGOpenMPSIMDRuntime::emitForStaticInit(
11761     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
11762     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11763   llvm_unreachable("Not supported in SIMD-only mode");
11764 }
11765 
11766 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
11767     CodeGenFunction &CGF, SourceLocation Loc,
11768     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11769   llvm_unreachable("Not supported in SIMD-only mode");
11770 }
11771 
11772 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
11773                                                      SourceLocation Loc,
11774                                                      unsigned IVSize,
11775                                                      bool IVSigned) {
11776   llvm_unreachable("Not supported in SIMD-only mode");
11777 }
11778 
11779 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
11780                                               SourceLocation Loc,
11781                                               OpenMPDirectiveKind DKind) {
11782   llvm_unreachable("Not supported in SIMD-only mode");
11783 }
11784 
11785 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
11786                                               SourceLocation Loc,
11787                                               unsigned IVSize, bool IVSigned,
11788                                               Address IL, Address LB,
11789                                               Address UB, Address ST) {
11790   llvm_unreachable("Not supported in SIMD-only mode");
11791 }
11792 
11793 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
11794                                                llvm::Value *NumThreads,
11795                                                SourceLocation Loc) {
11796   llvm_unreachable("Not supported in SIMD-only mode");
11797 }
11798 
11799 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
11800                                              ProcBindKind ProcBind,
11801                                              SourceLocation Loc) {
11802   llvm_unreachable("Not supported in SIMD-only mode");
11803 }
11804 
11805 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
11806                                                     const VarDecl *VD,
11807                                                     Address VDAddr,
11808                                                     SourceLocation Loc) {
11809   llvm_unreachable("Not supported in SIMD-only mode");
11810 }
11811 
11812 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
11813     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
11814     CodeGenFunction *CGF) {
11815   llvm_unreachable("Not supported in SIMD-only mode");
11816 }
11817 
11818 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
11819     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
11820   llvm_unreachable("Not supported in SIMD-only mode");
11821 }
11822 
11823 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
11824                                     ArrayRef<const Expr *> Vars,
11825                                     SourceLocation Loc,
11826                                     llvm::AtomicOrdering AO) {
11827   llvm_unreachable("Not supported in SIMD-only mode");
11828 }
11829 
11830 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
11831                                        const OMPExecutableDirective &D,
11832                                        llvm::Function *TaskFunction,
11833                                        QualType SharedsTy, Address Shareds,
11834                                        const Expr *IfCond,
11835                                        const OMPTaskDataTy &Data) {
11836   llvm_unreachable("Not supported in SIMD-only mode");
11837 }
11838 
11839 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
11840     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
11841     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
11842     const Expr *IfCond, const OMPTaskDataTy &Data) {
11843   llvm_unreachable("Not supported in SIMD-only mode");
11844 }
11845 
11846 void CGOpenMPSIMDRuntime::emitReduction(
11847     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
11848     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
11849     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
11850   assert(Options.SimpleReduction && "Only simple reduction is expected.");
11851   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
11852                                  ReductionOps, Options);
11853 }
11854 
11855 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
11856     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
11857     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
11858   llvm_unreachable("Not supported in SIMD-only mode");
11859 }
11860 
11861 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
11862                                                 SourceLocation Loc,
11863                                                 bool IsWorksharingReduction) {
11864   llvm_unreachable("Not supported in SIMD-only mode");
11865 }
11866 
11867 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
11868                                                   SourceLocation Loc,
11869                                                   ReductionCodeGen &RCG,
11870                                                   unsigned N) {
11871   llvm_unreachable("Not supported in SIMD-only mode");
11872 }
11873 
11874 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
11875                                                   SourceLocation Loc,
11876                                                   llvm::Value *ReductionsPtr,
11877                                                   LValue SharedLVal) {
11878   llvm_unreachable("Not supported in SIMD-only mode");
11879 }
11880 
11881 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
11882                                            SourceLocation Loc) {
11883   llvm_unreachable("Not supported in SIMD-only mode");
11884 }
11885 
11886 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
11887     CodeGenFunction &CGF, SourceLocation Loc,
11888     OpenMPDirectiveKind CancelRegion) {
11889   llvm_unreachable("Not supported in SIMD-only mode");
11890 }
11891 
11892 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
11893                                          SourceLocation Loc, const Expr *IfCond,
11894                                          OpenMPDirectiveKind CancelRegion) {
11895   llvm_unreachable("Not supported in SIMD-only mode");
11896 }
11897 
11898 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
11899     const OMPExecutableDirective &D, StringRef ParentName,
11900     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
11901     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
11902   llvm_unreachable("Not supported in SIMD-only mode");
11903 }
11904 
11905 void CGOpenMPSIMDRuntime::emitTargetCall(
11906     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11907     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
11908     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
11909     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
11910                                      const OMPLoopDirective &D)>
11911         SizeEmitter) {
11912   llvm_unreachable("Not supported in SIMD-only mode");
11913 }
11914 
11915 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
11916   llvm_unreachable("Not supported in SIMD-only mode");
11917 }
11918 
11919 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
11920   llvm_unreachable("Not supported in SIMD-only mode");
11921 }
11922 
11923 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
11924   return false;
11925 }
11926 
11927 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
11928                                         const OMPExecutableDirective &D,
11929                                         SourceLocation Loc,
11930                                         llvm::Function *OutlinedFn,
11931                                         ArrayRef<llvm::Value *> CapturedVars) {
11932   llvm_unreachable("Not supported in SIMD-only mode");
11933 }
11934 
11935 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11936                                              const Expr *NumTeams,
11937                                              const Expr *ThreadLimit,
11938                                              SourceLocation Loc) {
11939   llvm_unreachable("Not supported in SIMD-only mode");
11940 }
11941 
11942 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
11943     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11944     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11945   llvm_unreachable("Not supported in SIMD-only mode");
11946 }
11947 
11948 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
11949     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11950     const Expr *Device) {
11951   llvm_unreachable("Not supported in SIMD-only mode");
11952 }
11953 
11954 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11955                                            const OMPLoopDirective &D,
11956                                            ArrayRef<Expr *> NumIterations) {
11957   llvm_unreachable("Not supported in SIMD-only mode");
11958 }
11959 
11960 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11961                                               const OMPDependClause *C) {
11962   llvm_unreachable("Not supported in SIMD-only mode");
11963 }
11964 
11965 const VarDecl *
11966 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
11967                                         const VarDecl *NativeParam) const {
11968   llvm_unreachable("Not supported in SIMD-only mode");
11969 }
11970 
11971 Address
11972 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
11973                                          const VarDecl *NativeParam,
11974                                          const VarDecl *TargetParam) const {
11975   llvm_unreachable("Not supported in SIMD-only mode");
11976 }
11977