xref: /freebsd/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp (revision 68d75eff68281c1b445e3010bb975eae07aac225)
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGCXXABI.h"
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/CodeGen/ConstantInitBuilder.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/Basic/BitmaskEnum.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/Bitcode/BitcodeReader.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/IR/Value.h"
27 #include "llvm/Support/Format.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <cassert>
30 
31 using namespace clang;
32 using namespace CodeGen;
33 
34 namespace {
35 /// Base class for handling code generation inside OpenMP regions.
36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
37 public:
38   /// Kinds of OpenMP regions used in codegen.
39   enum CGOpenMPRegionKind {
40     /// Region with outlined function for standalone 'parallel'
41     /// directive.
42     ParallelOutlinedRegion,
43     /// Region with outlined function for standalone 'task' directive.
44     TaskOutlinedRegion,
45     /// Region for constructs that do not require function outlining,
46     /// like 'for', 'sections', 'atomic' etc. directives.
47     InlinedRegion,
48     /// Region with outlined function for standalone 'target' directive.
49     TargetRegion,
50   };
51 
52   CGOpenMPRegionInfo(const CapturedStmt &CS,
53                      const CGOpenMPRegionKind RegionKind,
54                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
55                      bool HasCancel)
56       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
57         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
58 
59   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
60                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
61                      bool HasCancel)
62       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
63         Kind(Kind), HasCancel(HasCancel) {}
64 
65   /// Get a variable or parameter for storing global thread id
66   /// inside OpenMP construct.
67   virtual const VarDecl *getThreadIDVariable() const = 0;
68 
69   /// Emit the captured statement body.
70   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
71 
72   /// Get an LValue for the current ThreadID variable.
73   /// \return LValue for thread id variable. This LValue always has type int32*.
74   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
75 
76   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
77 
78   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
79 
80   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
81 
82   bool hasCancel() const { return HasCancel; }
83 
84   static bool classof(const CGCapturedStmtInfo *Info) {
85     return Info->getKind() == CR_OpenMP;
86   }
87 
88   ~CGOpenMPRegionInfo() override = default;
89 
90 protected:
91   CGOpenMPRegionKind RegionKind;
92   RegionCodeGenTy CodeGen;
93   OpenMPDirectiveKind Kind;
94   bool HasCancel;
95 };
96 
97 /// API for captured statement code generation in OpenMP constructs.
98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
99 public:
100   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
101                              const RegionCodeGenTy &CodeGen,
102                              OpenMPDirectiveKind Kind, bool HasCancel,
103                              StringRef HelperName)
104       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
105                            HasCancel),
106         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
107     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
108   }
109 
110   /// Get a variable or parameter for storing global thread id
111   /// inside OpenMP construct.
112   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
113 
114   /// Get the name of the capture helper.
115   StringRef getHelperName() const override { return HelperName; }
116 
117   static bool classof(const CGCapturedStmtInfo *Info) {
118     return CGOpenMPRegionInfo::classof(Info) &&
119            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
120                ParallelOutlinedRegion;
121   }
122 
123 private:
124   /// A variable or parameter storing global thread id for OpenMP
125   /// constructs.
126   const VarDecl *ThreadIDVar;
127   StringRef HelperName;
128 };
129 
130 /// API for captured statement code generation in OpenMP constructs.
131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
132 public:
133   class UntiedTaskActionTy final : public PrePostActionTy {
134     bool Untied;
135     const VarDecl *PartIDVar;
136     const RegionCodeGenTy UntiedCodeGen;
137     llvm::SwitchInst *UntiedSwitch = nullptr;
138 
139   public:
140     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
141                        const RegionCodeGenTy &UntiedCodeGen)
142         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
143     void Enter(CodeGenFunction &CGF) override {
144       if (Untied) {
145         // Emit task switching point.
146         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
147             CGF.GetAddrOfLocalVar(PartIDVar),
148             PartIDVar->getType()->castAs<PointerType>());
149         llvm::Value *Res =
150             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
151         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
152         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
153         CGF.EmitBlock(DoneBB);
154         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
155         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
156         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
157                               CGF.Builder.GetInsertBlock());
158         emitUntiedSwitch(CGF);
159       }
160     }
161     void emitUntiedSwitch(CodeGenFunction &CGF) const {
162       if (Untied) {
163         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
164             CGF.GetAddrOfLocalVar(PartIDVar),
165             PartIDVar->getType()->castAs<PointerType>());
166         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
167                               PartIdLVal);
168         UntiedCodeGen(CGF);
169         CodeGenFunction::JumpDest CurPoint =
170             CGF.getJumpDestInCurrentScope(".untied.next.");
171         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
172         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
173         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
174                               CGF.Builder.GetInsertBlock());
175         CGF.EmitBranchThroughCleanup(CurPoint);
176         CGF.EmitBlock(CurPoint.getBlock());
177       }
178     }
179     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
180   };
181   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
182                                  const VarDecl *ThreadIDVar,
183                                  const RegionCodeGenTy &CodeGen,
184                                  OpenMPDirectiveKind Kind, bool HasCancel,
185                                  const UntiedTaskActionTy &Action)
186       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
187         ThreadIDVar(ThreadIDVar), Action(Action) {
188     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
189   }
190 
191   /// Get a variable or parameter for storing global thread id
192   /// inside OpenMP construct.
193   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
194 
195   /// Get an LValue for the current ThreadID variable.
196   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
197 
198   /// Get the name of the capture helper.
199   StringRef getHelperName() const override { return ".omp_outlined."; }
200 
201   void emitUntiedSwitch(CodeGenFunction &CGF) override {
202     Action.emitUntiedSwitch(CGF);
203   }
204 
205   static bool classof(const CGCapturedStmtInfo *Info) {
206     return CGOpenMPRegionInfo::classof(Info) &&
207            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
208                TaskOutlinedRegion;
209   }
210 
211 private:
212   /// A variable or parameter storing global thread id for OpenMP
213   /// constructs.
214   const VarDecl *ThreadIDVar;
215   /// Action for emitting code for untied tasks.
216   const UntiedTaskActionTy &Action;
217 };
218 
219 /// API for inlined captured statement code generation in OpenMP
220 /// constructs.
221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
222 public:
223   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
224                             const RegionCodeGenTy &CodeGen,
225                             OpenMPDirectiveKind Kind, bool HasCancel)
226       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
227         OldCSI(OldCSI),
228         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
229 
230   // Retrieve the value of the context parameter.
231   llvm::Value *getContextValue() const override {
232     if (OuterRegionInfo)
233       return OuterRegionInfo->getContextValue();
234     llvm_unreachable("No context value for inlined OpenMP region");
235   }
236 
237   void setContextValue(llvm::Value *V) override {
238     if (OuterRegionInfo) {
239       OuterRegionInfo->setContextValue(V);
240       return;
241     }
242     llvm_unreachable("No context value for inlined OpenMP region");
243   }
244 
245   /// Lookup the captured field decl for a variable.
246   const FieldDecl *lookup(const VarDecl *VD) const override {
247     if (OuterRegionInfo)
248       return OuterRegionInfo->lookup(VD);
249     // If there is no outer outlined region,no need to lookup in a list of
250     // captured variables, we can use the original one.
251     return nullptr;
252   }
253 
254   FieldDecl *getThisFieldDecl() const override {
255     if (OuterRegionInfo)
256       return OuterRegionInfo->getThisFieldDecl();
257     return nullptr;
258   }
259 
260   /// Get a variable or parameter for storing global thread id
261   /// inside OpenMP construct.
262   const VarDecl *getThreadIDVariable() const override {
263     if (OuterRegionInfo)
264       return OuterRegionInfo->getThreadIDVariable();
265     return nullptr;
266   }
267 
268   /// Get an LValue for the current ThreadID variable.
269   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
270     if (OuterRegionInfo)
271       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
272     llvm_unreachable("No LValue for inlined OpenMP construct");
273   }
274 
275   /// Get the name of the capture helper.
276   StringRef getHelperName() const override {
277     if (auto *OuterRegionInfo = getOldCSI())
278       return OuterRegionInfo->getHelperName();
279     llvm_unreachable("No helper name for inlined OpenMP construct");
280   }
281 
282   void emitUntiedSwitch(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       OuterRegionInfo->emitUntiedSwitch(CGF);
285   }
286 
287   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
288 
289   static bool classof(const CGCapturedStmtInfo *Info) {
290     return CGOpenMPRegionInfo::classof(Info) &&
291            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
292   }
293 
294   ~CGOpenMPInlinedRegionInfo() override = default;
295 
296 private:
297   /// CodeGen info about outer OpenMP region.
298   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
299   CGOpenMPRegionInfo *OuterRegionInfo;
300 };
301 
302 /// API for captured statement code generation in OpenMP target
303 /// constructs. For this captures, implicit parameters are used instead of the
304 /// captured fields. The name of the target region has to be unique in a given
305 /// application so it is provided by the client, because only the client has
306 /// the information to generate that.
307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
308 public:
309   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
310                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
311       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
312                            /*HasCancel=*/false),
313         HelperName(HelperName) {}
314 
315   /// This is unused for target regions because each starts executing
316   /// with a single thread.
317   const VarDecl *getThreadIDVariable() const override { return nullptr; }
318 
319   /// Get the name of the capture helper.
320   StringRef getHelperName() const override { return HelperName; }
321 
322   static bool classof(const CGCapturedStmtInfo *Info) {
323     return CGOpenMPRegionInfo::classof(Info) &&
324            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
325   }
326 
327 private:
328   StringRef HelperName;
329 };
330 
331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
332   llvm_unreachable("No codegen for expressions");
333 }
334 /// API for generation of expressions captured in a innermost OpenMP
335 /// region.
336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
337 public:
338   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
339       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
340                                   OMPD_unknown,
341                                   /*HasCancel=*/false),
342         PrivScope(CGF) {
343     // Make sure the globals captured in the provided statement are local by
344     // using the privatization logic. We assume the same variable is not
345     // captured more than once.
346     for (const auto &C : CS.captures()) {
347       if (!C.capturesVariable() && !C.capturesVariableByCopy())
348         continue;
349 
350       const VarDecl *VD = C.getCapturedVar();
351       if (VD->isLocalVarDeclOrParm())
352         continue;
353 
354       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
355                       /*RefersToEnclosingVariableOrCapture=*/false,
356                       VD->getType().getNonReferenceType(), VK_LValue,
357                       C.getLocation());
358       PrivScope.addPrivate(
359           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); });
360     }
361     (void)PrivScope.Privatize();
362   }
363 
364   /// Lookup the captured field decl for a variable.
365   const FieldDecl *lookup(const VarDecl *VD) const override {
366     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
367       return FD;
368     return nullptr;
369   }
370 
371   /// Emit the captured statement body.
372   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
373     llvm_unreachable("No body for expressions");
374   }
375 
376   /// Get a variable or parameter for storing global thread id
377   /// inside OpenMP construct.
378   const VarDecl *getThreadIDVariable() const override {
379     llvm_unreachable("No thread id for expressions");
380   }
381 
382   /// Get the name of the capture helper.
383   StringRef getHelperName() const override {
384     llvm_unreachable("No helper name for expressions");
385   }
386 
387   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
388 
389 private:
390   /// Private scope to capture global variables.
391   CodeGenFunction::OMPPrivateScope PrivScope;
392 };
393 
394 /// RAII for emitting code of OpenMP constructs.
395 class InlinedOpenMPRegionRAII {
396   CodeGenFunction &CGF;
397   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
398   FieldDecl *LambdaThisCaptureField = nullptr;
399   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
400 
401 public:
402   /// Constructs region for combined constructs.
403   /// \param CodeGen Code generation sequence for combined directives. Includes
404   /// a list of functions used for code generation of implicitly inlined
405   /// regions.
406   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
407                           OpenMPDirectiveKind Kind, bool HasCancel)
408       : CGF(CGF) {
409     // Start emission for the construct.
410     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
411         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
412     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
413     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
414     CGF.LambdaThisCaptureField = nullptr;
415     BlockInfo = CGF.BlockInfo;
416     CGF.BlockInfo = nullptr;
417   }
418 
419   ~InlinedOpenMPRegionRAII() {
420     // Restore original CapturedStmtInfo only if we're done with code emission.
421     auto *OldCSI =
422         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
423     delete CGF.CapturedStmtInfo;
424     CGF.CapturedStmtInfo = OldCSI;
425     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
426     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
427     CGF.BlockInfo = BlockInfo;
428   }
429 };
430 
431 /// Values for bit flags used in the ident_t to describe the fields.
432 /// All enumeric elements are named and described in accordance with the code
433 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
434 enum OpenMPLocationFlags : unsigned {
435   /// Use trampoline for internal microtask.
436   OMP_IDENT_IMD = 0x01,
437   /// Use c-style ident structure.
438   OMP_IDENT_KMPC = 0x02,
439   /// Atomic reduction option for kmpc_reduce.
440   OMP_ATOMIC_REDUCE = 0x10,
441   /// Explicit 'barrier' directive.
442   OMP_IDENT_BARRIER_EXPL = 0x20,
443   /// Implicit barrier in code.
444   OMP_IDENT_BARRIER_IMPL = 0x40,
445   /// Implicit barrier in 'for' directive.
446   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
447   /// Implicit barrier in 'sections' directive.
448   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
449   /// Implicit barrier in 'single' directive.
450   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
451   /// Call of __kmp_for_static_init for static loop.
452   OMP_IDENT_WORK_LOOP = 0x200,
453   /// Call of __kmp_for_static_init for sections.
454   OMP_IDENT_WORK_SECTIONS = 0x400,
455   /// Call of __kmp_for_static_init for distribute.
456   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
457   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
458 };
459 
460 namespace {
461 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
462 /// Values for bit flags for marking which requires clauses have been used.
463 enum OpenMPOffloadingRequiresDirFlags : int64_t {
464   /// flag undefined.
465   OMP_REQ_UNDEFINED               = 0x000,
466   /// no requires clause present.
467   OMP_REQ_NONE                    = 0x001,
468   /// reverse_offload clause.
469   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
470   /// unified_address clause.
471   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
472   /// unified_shared_memory clause.
473   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
474   /// dynamic_allocators clause.
475   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
476   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
477 };
478 
479 enum OpenMPOffloadingReservedDeviceIDs {
480   /// Device ID if the device was not defined, runtime should get it
481   /// from environment variables in the spec.
482   OMP_DEVICEID_UNDEF = -1,
483 };
484 } // anonymous namespace
485 
486 /// Describes ident structure that describes a source location.
487 /// All descriptions are taken from
488 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
489 /// Original structure:
490 /// typedef struct ident {
491 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
492 ///                                  see above  */
493 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
494 ///                                  KMP_IDENT_KMPC identifies this union
495 ///                                  member  */
496 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
497 ///                                  see above */
498 ///#if USE_ITT_BUILD
499 ///                            /*  but currently used for storing
500 ///                                region-specific ITT */
501 ///                            /*  contextual information. */
502 ///#endif /* USE_ITT_BUILD */
503 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
504 ///                                 C++  */
505 ///    char const *psource;    /**< String describing the source location.
506 ///                            The string is composed of semi-colon separated
507 //                             fields which describe the source file,
508 ///                            the function and a pair of line numbers that
509 ///                            delimit the construct.
510 ///                             */
511 /// } ident_t;
512 enum IdentFieldIndex {
513   /// might be used in Fortran
514   IdentField_Reserved_1,
515   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
516   IdentField_Flags,
517   /// Not really used in Fortran any more
518   IdentField_Reserved_2,
519   /// Source[4] in Fortran, do not use for C++
520   IdentField_Reserved_3,
521   /// String describing the source location. The string is composed of
522   /// semi-colon separated fields which describe the source file, the function
523   /// and a pair of line numbers that delimit the construct.
524   IdentField_PSource
525 };
526 
527 /// Schedule types for 'omp for' loops (these enumerators are taken from
528 /// the enum sched_type in kmp.h).
529 enum OpenMPSchedType {
530   /// Lower bound for default (unordered) versions.
531   OMP_sch_lower = 32,
532   OMP_sch_static_chunked = 33,
533   OMP_sch_static = 34,
534   OMP_sch_dynamic_chunked = 35,
535   OMP_sch_guided_chunked = 36,
536   OMP_sch_runtime = 37,
537   OMP_sch_auto = 38,
538   /// static with chunk adjustment (e.g., simd)
539   OMP_sch_static_balanced_chunked = 45,
540   /// Lower bound for 'ordered' versions.
541   OMP_ord_lower = 64,
542   OMP_ord_static_chunked = 65,
543   OMP_ord_static = 66,
544   OMP_ord_dynamic_chunked = 67,
545   OMP_ord_guided_chunked = 68,
546   OMP_ord_runtime = 69,
547   OMP_ord_auto = 70,
548   OMP_sch_default = OMP_sch_static,
549   /// dist_schedule types
550   OMP_dist_sch_static_chunked = 91,
551   OMP_dist_sch_static = 92,
552   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
553   /// Set if the monotonic schedule modifier was present.
554   OMP_sch_modifier_monotonic = (1 << 29),
555   /// Set if the nonmonotonic schedule modifier was present.
556   OMP_sch_modifier_nonmonotonic = (1 << 30),
557 };
558 
559 enum OpenMPRTLFunction {
560   /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
561   /// kmpc_micro microtask, ...);
562   OMPRTL__kmpc_fork_call,
563   /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
564   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
565   OMPRTL__kmpc_threadprivate_cached,
566   /// Call to void __kmpc_threadprivate_register( ident_t *,
567   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
568   OMPRTL__kmpc_threadprivate_register,
569   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
570   OMPRTL__kmpc_global_thread_num,
571   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
572   // kmp_critical_name *crit);
573   OMPRTL__kmpc_critical,
574   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
575   // global_tid, kmp_critical_name *crit, uintptr_t hint);
576   OMPRTL__kmpc_critical_with_hint,
577   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
578   // kmp_critical_name *crit);
579   OMPRTL__kmpc_end_critical,
580   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
581   // global_tid);
582   OMPRTL__kmpc_cancel_barrier,
583   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
584   OMPRTL__kmpc_barrier,
585   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
586   OMPRTL__kmpc_for_static_fini,
587   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
588   // global_tid);
589   OMPRTL__kmpc_serialized_parallel,
590   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
591   // global_tid);
592   OMPRTL__kmpc_end_serialized_parallel,
593   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
594   // kmp_int32 num_threads);
595   OMPRTL__kmpc_push_num_threads,
596   // Call to void __kmpc_flush(ident_t *loc);
597   OMPRTL__kmpc_flush,
598   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
599   OMPRTL__kmpc_master,
600   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
601   OMPRTL__kmpc_end_master,
602   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
603   // int end_part);
604   OMPRTL__kmpc_omp_taskyield,
605   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
606   OMPRTL__kmpc_single,
607   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
608   OMPRTL__kmpc_end_single,
609   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
610   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
611   // kmp_routine_entry_t *task_entry);
612   OMPRTL__kmpc_omp_task_alloc,
613   // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *,
614   // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
615   // size_t sizeof_shareds, kmp_routine_entry_t *task_entry,
616   // kmp_int64 device_id);
617   OMPRTL__kmpc_omp_target_task_alloc,
618   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
619   // new_task);
620   OMPRTL__kmpc_omp_task,
621   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
622   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
623   // kmp_int32 didit);
624   OMPRTL__kmpc_copyprivate,
625   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
626   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
627   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
628   OMPRTL__kmpc_reduce,
629   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
630   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
631   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
632   // *lck);
633   OMPRTL__kmpc_reduce_nowait,
634   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
635   // kmp_critical_name *lck);
636   OMPRTL__kmpc_end_reduce,
637   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
638   // kmp_critical_name *lck);
639   OMPRTL__kmpc_end_reduce_nowait,
640   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
641   // kmp_task_t * new_task);
642   OMPRTL__kmpc_omp_task_begin_if0,
643   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
644   // kmp_task_t * new_task);
645   OMPRTL__kmpc_omp_task_complete_if0,
646   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
647   OMPRTL__kmpc_ordered,
648   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
649   OMPRTL__kmpc_end_ordered,
650   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
651   // global_tid);
652   OMPRTL__kmpc_omp_taskwait,
653   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
654   OMPRTL__kmpc_taskgroup,
655   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
656   OMPRTL__kmpc_end_taskgroup,
657   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
658   // int proc_bind);
659   OMPRTL__kmpc_push_proc_bind,
660   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
661   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
662   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
663   OMPRTL__kmpc_omp_task_with_deps,
664   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
665   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
666   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
667   OMPRTL__kmpc_omp_wait_deps,
668   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
669   // global_tid, kmp_int32 cncl_kind);
670   OMPRTL__kmpc_cancellationpoint,
671   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
672   // kmp_int32 cncl_kind);
673   OMPRTL__kmpc_cancel,
674   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
675   // kmp_int32 num_teams, kmp_int32 thread_limit);
676   OMPRTL__kmpc_push_num_teams,
677   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
678   // microtask, ...);
679   OMPRTL__kmpc_fork_teams,
680   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
681   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
682   // sched, kmp_uint64 grainsize, void *task_dup);
683   OMPRTL__kmpc_taskloop,
684   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
685   // num_dims, struct kmp_dim *dims);
686   OMPRTL__kmpc_doacross_init,
687   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
688   OMPRTL__kmpc_doacross_fini,
689   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
690   // *vec);
691   OMPRTL__kmpc_doacross_post,
692   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
693   // *vec);
694   OMPRTL__kmpc_doacross_wait,
695   // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
696   // *data);
697   OMPRTL__kmpc_task_reduction_init,
698   // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
699   // *d);
700   OMPRTL__kmpc_task_reduction_get_th_data,
701   // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
702   OMPRTL__kmpc_alloc,
703   // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
704   OMPRTL__kmpc_free,
705 
706   //
707   // Offloading related calls
708   //
709   // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
710   // size);
711   OMPRTL__kmpc_push_target_tripcount,
712   // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
713   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
714   // *arg_types);
715   OMPRTL__tgt_target,
716   // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
717   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
718   // *arg_types);
719   OMPRTL__tgt_target_nowait,
720   // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
721   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
722   // *arg_types, int32_t num_teams, int32_t thread_limit);
723   OMPRTL__tgt_target_teams,
724   // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
725   // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
726   // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
727   OMPRTL__tgt_target_teams_nowait,
728   // Call to void __tgt_register_requires(int64_t flags);
729   OMPRTL__tgt_register_requires,
730   // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
731   OMPRTL__tgt_register_lib,
732   // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
733   OMPRTL__tgt_unregister_lib,
734   // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
735   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
736   OMPRTL__tgt_target_data_begin,
737   // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
738   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
739   // *arg_types);
740   OMPRTL__tgt_target_data_begin_nowait,
741   // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
742   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
743   OMPRTL__tgt_target_data_end,
744   // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
745   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
746   // *arg_types);
747   OMPRTL__tgt_target_data_end_nowait,
748   // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
749   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
750   OMPRTL__tgt_target_data_update,
751   // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
752   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
753   // *arg_types);
754   OMPRTL__tgt_target_data_update_nowait,
755   // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
756   OMPRTL__tgt_mapper_num_components,
757   // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void
758   // *base, void *begin, int64_t size, int64_t type);
759   OMPRTL__tgt_push_mapper_component,
760 };
761 
762 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
763 /// region.
764 class CleanupTy final : public EHScopeStack::Cleanup {
765   PrePostActionTy *Action;
766 
767 public:
768   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
769   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
770     if (!CGF.HaveInsertPoint())
771       return;
772     Action->Exit(CGF);
773   }
774 };
775 
776 } // anonymous namespace
777 
778 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
779   CodeGenFunction::RunCleanupsScope Scope(CGF);
780   if (PrePostAction) {
781     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
782     Callback(CodeGen, CGF, *PrePostAction);
783   } else {
784     PrePostActionTy Action;
785     Callback(CodeGen, CGF, Action);
786   }
787 }
788 
789 /// Check if the combiner is a call to UDR combiner and if it is so return the
790 /// UDR decl used for reduction.
791 static const OMPDeclareReductionDecl *
792 getReductionInit(const Expr *ReductionOp) {
793   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
794     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
795       if (const auto *DRE =
796               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
797         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
798           return DRD;
799   return nullptr;
800 }
801 
802 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
803                                              const OMPDeclareReductionDecl *DRD,
804                                              const Expr *InitOp,
805                                              Address Private, Address Original,
806                                              QualType Ty) {
807   if (DRD->getInitializer()) {
808     std::pair<llvm::Function *, llvm::Function *> Reduction =
809         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
810     const auto *CE = cast<CallExpr>(InitOp);
811     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
812     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
813     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
814     const auto *LHSDRE =
815         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
816     const auto *RHSDRE =
817         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
818     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
819     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
820                             [=]() { return Private; });
821     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
822                             [=]() { return Original; });
823     (void)PrivateScope.Privatize();
824     RValue Func = RValue::get(Reduction.second);
825     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
826     CGF.EmitIgnoredExpr(InitOp);
827   } else {
828     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
829     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
830     auto *GV = new llvm::GlobalVariable(
831         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
832         llvm::GlobalValue::PrivateLinkage, Init, Name);
833     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
834     RValue InitRVal;
835     switch (CGF.getEvaluationKind(Ty)) {
836     case TEK_Scalar:
837       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
838       break;
839     case TEK_Complex:
840       InitRVal =
841           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
842       break;
843     case TEK_Aggregate:
844       InitRVal = RValue::getAggregate(LV.getAddress());
845       break;
846     }
847     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
848     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
849     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
850                          /*IsInitializer=*/false);
851   }
852 }
853 
854 /// Emit initialization of arrays of complex types.
855 /// \param DestAddr Address of the array.
856 /// \param Type Type of array.
857 /// \param Init Initial expression of array.
858 /// \param SrcAddr Address of the original array.
859 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
860                                  QualType Type, bool EmitDeclareReductionInit,
861                                  const Expr *Init,
862                                  const OMPDeclareReductionDecl *DRD,
863                                  Address SrcAddr = Address::invalid()) {
864   // Perform element-by-element initialization.
865   QualType ElementTy;
866 
867   // Drill down to the base element type on both arrays.
868   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
869   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
870   DestAddr =
871       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
872   if (DRD)
873     SrcAddr =
874         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
875 
876   llvm::Value *SrcBegin = nullptr;
877   if (DRD)
878     SrcBegin = SrcAddr.getPointer();
879   llvm::Value *DestBegin = DestAddr.getPointer();
880   // Cast from pointer to array type to pointer to single element.
881   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
882   // The basic structure here is a while-do loop.
883   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
884   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
885   llvm::Value *IsEmpty =
886       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
887   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
888 
889   // Enter the loop body, making that address the current address.
890   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
891   CGF.EmitBlock(BodyBB);
892 
893   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
894 
895   llvm::PHINode *SrcElementPHI = nullptr;
896   Address SrcElementCurrent = Address::invalid();
897   if (DRD) {
898     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
899                                           "omp.arraycpy.srcElementPast");
900     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
901     SrcElementCurrent =
902         Address(SrcElementPHI,
903                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
904   }
905   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
906       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
907   DestElementPHI->addIncoming(DestBegin, EntryBB);
908   Address DestElementCurrent =
909       Address(DestElementPHI,
910               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
911 
912   // Emit copy.
913   {
914     CodeGenFunction::RunCleanupsScope InitScope(CGF);
915     if (EmitDeclareReductionInit) {
916       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
917                                        SrcElementCurrent, ElementTy);
918     } else
919       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
920                            /*IsInitializer=*/false);
921   }
922 
923   if (DRD) {
924     // Shift the address forward by one element.
925     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
926         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
927     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
928   }
929 
930   // Shift the address forward by one element.
931   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
932       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
933   // Check whether we've reached the end.
934   llvm::Value *Done =
935       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
936   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
937   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
938 
939   // Done.
940   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
941 }
942 
943 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
944   return CGF.EmitOMPSharedLValue(E);
945 }
946 
947 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
948                                             const Expr *E) {
949   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
950     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
951   return LValue();
952 }
953 
954 void ReductionCodeGen::emitAggregateInitialization(
955     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
956     const OMPDeclareReductionDecl *DRD) {
957   // Emit VarDecl with copy init for arrays.
958   // Get the address of the original variable captured in current
959   // captured region.
960   const auto *PrivateVD =
961       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
962   bool EmitDeclareReductionInit =
963       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
964   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
965                        EmitDeclareReductionInit,
966                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
967                                                 : PrivateVD->getInit(),
968                        DRD, SharedLVal.getAddress());
969 }
970 
971 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
972                                    ArrayRef<const Expr *> Privates,
973                                    ArrayRef<const Expr *> ReductionOps) {
974   ClausesData.reserve(Shareds.size());
975   SharedAddresses.reserve(Shareds.size());
976   Sizes.reserve(Shareds.size());
977   BaseDecls.reserve(Shareds.size());
978   auto IPriv = Privates.begin();
979   auto IRed = ReductionOps.begin();
980   for (const Expr *Ref : Shareds) {
981     ClausesData.emplace_back(Ref, *IPriv, *IRed);
982     std::advance(IPriv, 1);
983     std::advance(IRed, 1);
984   }
985 }
986 
987 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
988   assert(SharedAddresses.size() == N &&
989          "Number of generated lvalues must be exactly N.");
990   LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
991   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
992   SharedAddresses.emplace_back(First, Second);
993 }
994 
995 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
996   const auto *PrivateVD =
997       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
998   QualType PrivateType = PrivateVD->getType();
999   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
1000   if (!PrivateType->isVariablyModifiedType()) {
1001     Sizes.emplace_back(
1002         CGF.getTypeSize(
1003             SharedAddresses[N].first.getType().getNonReferenceType()),
1004         nullptr);
1005     return;
1006   }
1007   llvm::Value *Size;
1008   llvm::Value *SizeInChars;
1009   auto *ElemType =
1010       cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
1011           ->getElementType();
1012   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
1013   if (AsArraySection) {
1014     Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
1015                                      SharedAddresses[N].first.getPointer());
1016     Size = CGF.Builder.CreateNUWAdd(
1017         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
1018     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
1019   } else {
1020     SizeInChars = CGF.getTypeSize(
1021         SharedAddresses[N].first.getType().getNonReferenceType());
1022     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
1023   }
1024   Sizes.emplace_back(SizeInChars, Size);
1025   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1026       CGF,
1027       cast<OpaqueValueExpr>(
1028           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1029       RValue::get(Size));
1030   CGF.EmitVariablyModifiedType(PrivateType);
1031 }
1032 
1033 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
1034                                          llvm::Value *Size) {
1035   const auto *PrivateVD =
1036       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1037   QualType PrivateType = PrivateVD->getType();
1038   if (!PrivateType->isVariablyModifiedType()) {
1039     assert(!Size && !Sizes[N].second &&
1040            "Size should be nullptr for non-variably modified reduction "
1041            "items.");
1042     return;
1043   }
1044   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1045       CGF,
1046       cast<OpaqueValueExpr>(
1047           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1048       RValue::get(Size));
1049   CGF.EmitVariablyModifiedType(PrivateType);
1050 }
1051 
1052 void ReductionCodeGen::emitInitialization(
1053     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1054     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1055   assert(SharedAddresses.size() > N && "No variable was generated");
1056   const auto *PrivateVD =
1057       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1058   const OMPDeclareReductionDecl *DRD =
1059       getReductionInit(ClausesData[N].ReductionOp);
1060   QualType PrivateType = PrivateVD->getType();
1061   PrivateAddr = CGF.Builder.CreateElementBitCast(
1062       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1063   QualType SharedType = SharedAddresses[N].first.getType();
1064   SharedLVal = CGF.MakeAddrLValue(
1065       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
1066                                        CGF.ConvertTypeForMem(SharedType)),
1067       SharedType, SharedAddresses[N].first.getBaseInfo(),
1068       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1069   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1070     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1071   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1072     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1073                                      PrivateAddr, SharedLVal.getAddress(),
1074                                      SharedLVal.getType());
1075   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1076              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1077     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1078                          PrivateVD->getType().getQualifiers(),
1079                          /*IsInitializer=*/false);
1080   }
1081 }
1082 
1083 bool ReductionCodeGen::needCleanups(unsigned N) {
1084   const auto *PrivateVD =
1085       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1086   QualType PrivateType = PrivateVD->getType();
1087   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1088   return DTorKind != QualType::DK_none;
1089 }
1090 
1091 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1092                                     Address PrivateAddr) {
1093   const auto *PrivateVD =
1094       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1095   QualType PrivateType = PrivateVD->getType();
1096   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1097   if (needCleanups(N)) {
1098     PrivateAddr = CGF.Builder.CreateElementBitCast(
1099         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1100     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1101   }
1102 }
1103 
1104 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1105                           LValue BaseLV) {
1106   BaseTy = BaseTy.getNonReferenceType();
1107   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1108          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1109     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1110       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
1111     } else {
1112       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
1113       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1114     }
1115     BaseTy = BaseTy->getPointeeType();
1116   }
1117   return CGF.MakeAddrLValue(
1118       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(),
1119                                        CGF.ConvertTypeForMem(ElTy)),
1120       BaseLV.getType(), BaseLV.getBaseInfo(),
1121       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1122 }
1123 
1124 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1125                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1126                           llvm::Value *Addr) {
1127   Address Tmp = Address::invalid();
1128   Address TopTmp = Address::invalid();
1129   Address MostTopTmp = Address::invalid();
1130   BaseTy = BaseTy.getNonReferenceType();
1131   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1132          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1133     Tmp = CGF.CreateMemTemp(BaseTy);
1134     if (TopTmp.isValid())
1135       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1136     else
1137       MostTopTmp = Tmp;
1138     TopTmp = Tmp;
1139     BaseTy = BaseTy->getPointeeType();
1140   }
1141   llvm::Type *Ty = BaseLVType;
1142   if (Tmp.isValid())
1143     Ty = Tmp.getElementType();
1144   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1145   if (Tmp.isValid()) {
1146     CGF.Builder.CreateStore(Addr, Tmp);
1147     return MostTopTmp;
1148   }
1149   return Address(Addr, BaseLVAlignment);
1150 }
1151 
1152 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1153   const VarDecl *OrigVD = nullptr;
1154   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1155     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1156     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1157       Base = TempOASE->getBase()->IgnoreParenImpCasts();
1158     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1159       Base = TempASE->getBase()->IgnoreParenImpCasts();
1160     DE = cast<DeclRefExpr>(Base);
1161     OrigVD = cast<VarDecl>(DE->getDecl());
1162   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1163     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1164     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1165       Base = TempASE->getBase()->IgnoreParenImpCasts();
1166     DE = cast<DeclRefExpr>(Base);
1167     OrigVD = cast<VarDecl>(DE->getDecl());
1168   }
1169   return OrigVD;
1170 }
1171 
1172 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1173                                                Address PrivateAddr) {
1174   const DeclRefExpr *DE;
1175   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1176     BaseDecls.emplace_back(OrigVD);
1177     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1178     LValue BaseLValue =
1179         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1180                     OriginalBaseLValue);
1181     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1182         BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
1183     llvm::Value *PrivatePointer =
1184         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1185             PrivateAddr.getPointer(),
1186             SharedAddresses[N].first.getAddress().getType());
1187     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1188     return castToBase(CGF, OrigVD->getType(),
1189                       SharedAddresses[N].first.getType(),
1190                       OriginalBaseLValue.getAddress().getType(),
1191                       OriginalBaseLValue.getAlignment(), Ptr);
1192   }
1193   BaseDecls.emplace_back(
1194       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1195   return PrivateAddr;
1196 }
1197 
1198 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1199   const OMPDeclareReductionDecl *DRD =
1200       getReductionInit(ClausesData[N].ReductionOp);
1201   return DRD && DRD->getInitializer();
1202 }
1203 
1204 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1205   return CGF.EmitLoadOfPointerLValue(
1206       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1207       getThreadIDVariable()->getType()->castAs<PointerType>());
1208 }
1209 
1210 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1211   if (!CGF.HaveInsertPoint())
1212     return;
1213   // 1.2.2 OpenMP Language Terminology
1214   // Structured block - An executable statement with a single entry at the
1215   // top and a single exit at the bottom.
1216   // The point of exit cannot be a branch out of the structured block.
1217   // longjmp() and throw() must not violate the entry/exit criteria.
1218   CGF.EHStack.pushTerminate();
1219   CodeGen(CGF);
1220   CGF.EHStack.popTerminate();
1221 }
1222 
1223 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1224     CodeGenFunction &CGF) {
1225   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1226                             getThreadIDVariable()->getType(),
1227                             AlignmentSource::Decl);
1228 }
1229 
1230 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1231                                        QualType FieldTy) {
1232   auto *Field = FieldDecl::Create(
1233       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1234       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1235       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1236   Field->setAccess(AS_public);
1237   DC->addDecl(Field);
1238   return Field;
1239 }
1240 
1241 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1242                                  StringRef Separator)
1243     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1244       OffloadEntriesInfoManager(CGM) {
1245   ASTContext &C = CGM.getContext();
1246   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1247   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1248   RD->startDefinition();
1249   // reserved_1
1250   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1251   // flags
1252   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1253   // reserved_2
1254   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1255   // reserved_3
1256   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1257   // psource
1258   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1259   RD->completeDefinition();
1260   IdentQTy = C.getRecordType(RD);
1261   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1262   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1263 
1264   loadOffloadInfoMetadata();
1265 }
1266 
1267 bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD,
1268                                             const GlobalDecl &OldGD,
1269                                             llvm::GlobalValue *OrigAddr,
1270                                             bool IsForDefinition) {
1271   // Emit at least a definition for the aliasee if the the address of the
1272   // original function is requested.
1273   if (IsForDefinition || OrigAddr)
1274     (void)CGM.GetAddrOfGlobal(NewGD);
1275   StringRef NewMangledName = CGM.getMangledName(NewGD);
1276   llvm::GlobalValue *Addr = CGM.GetGlobalValue(NewMangledName);
1277   if (Addr && !Addr->isDeclaration()) {
1278     const auto *D = cast<FunctionDecl>(OldGD.getDecl());
1279     const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(OldGD);
1280     llvm::Type *DeclTy = CGM.getTypes().GetFunctionType(FI);
1281 
1282     // Create a reference to the named value.  This ensures that it is emitted
1283     // if a deferred decl.
1284     llvm::GlobalValue::LinkageTypes LT = CGM.getFunctionLinkage(OldGD);
1285 
1286     // Create the new alias itself, but don't set a name yet.
1287     auto *GA =
1288         llvm::GlobalAlias::create(DeclTy, 0, LT, "", Addr, &CGM.getModule());
1289 
1290     if (OrigAddr) {
1291       assert(OrigAddr->isDeclaration() && "Expected declaration");
1292 
1293       GA->takeName(OrigAddr);
1294       OrigAddr->replaceAllUsesWith(
1295           llvm::ConstantExpr::getBitCast(GA, OrigAddr->getType()));
1296       OrigAddr->eraseFromParent();
1297     } else {
1298       GA->setName(CGM.getMangledName(OldGD));
1299     }
1300 
1301     // Set attributes which are particular to an alias; this is a
1302     // specialization of the attributes which may be set on a global function.
1303     if (D->hasAttr<WeakAttr>() || D->hasAttr<WeakRefAttr>() ||
1304         D->isWeakImported())
1305       GA->setLinkage(llvm::Function::WeakAnyLinkage);
1306 
1307     CGM.SetCommonAttributes(OldGD, GA);
1308     return true;
1309   }
1310   return false;
1311 }
1312 
1313 void CGOpenMPRuntime::clear() {
1314   InternalVars.clear();
1315   // Clean non-target variable declarations possibly used only in debug info.
1316   for (const auto &Data : EmittedNonTargetVariables) {
1317     if (!Data.getValue().pointsToAliveValue())
1318       continue;
1319     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1320     if (!GV)
1321       continue;
1322     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1323       continue;
1324     GV->eraseFromParent();
1325   }
1326   // Emit aliases for the deferred aliasees.
1327   for (const auto &Pair : DeferredVariantFunction) {
1328     StringRef MangledName = CGM.getMangledName(Pair.second.second);
1329     llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName);
1330     // If not able to emit alias, just emit original declaration.
1331     (void)tryEmitDeclareVariant(Pair.second.first, Pair.second.second, Addr,
1332                                 /*IsForDefinition=*/false);
1333   }
1334 }
1335 
1336 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1337   SmallString<128> Buffer;
1338   llvm::raw_svector_ostream OS(Buffer);
1339   StringRef Sep = FirstSeparator;
1340   for (StringRef Part : Parts) {
1341     OS << Sep << Part;
1342     Sep = Separator;
1343   }
1344   return OS.str();
1345 }
1346 
1347 static llvm::Function *
1348 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1349                           const Expr *CombinerInitializer, const VarDecl *In,
1350                           const VarDecl *Out, bool IsCombiner) {
1351   // void .omp_combiner.(Ty *in, Ty *out);
1352   ASTContext &C = CGM.getContext();
1353   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1354   FunctionArgList Args;
1355   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1356                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1357   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1358                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1359   Args.push_back(&OmpOutParm);
1360   Args.push_back(&OmpInParm);
1361   const CGFunctionInfo &FnInfo =
1362       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1363   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1364   std::string Name = CGM.getOpenMPRuntime().getName(
1365       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1366   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1367                                     Name, &CGM.getModule());
1368   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1369   if (CGM.getLangOpts().Optimize) {
1370     Fn->removeFnAttr(llvm::Attribute::NoInline);
1371     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1372     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1373   }
1374   CodeGenFunction CGF(CGM);
1375   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1376   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1377   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1378                     Out->getLocation());
1379   CodeGenFunction::OMPPrivateScope Scope(CGF);
1380   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1381   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1382     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1383         .getAddress();
1384   });
1385   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1386   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1387     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1388         .getAddress();
1389   });
1390   (void)Scope.Privatize();
1391   if (!IsCombiner && Out->hasInit() &&
1392       !CGF.isTrivialInitializer(Out->getInit())) {
1393     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1394                          Out->getType().getQualifiers(),
1395                          /*IsInitializer=*/true);
1396   }
1397   if (CombinerInitializer)
1398     CGF.EmitIgnoredExpr(CombinerInitializer);
1399   Scope.ForceCleanup();
1400   CGF.FinishFunction();
1401   return Fn;
1402 }
1403 
1404 void CGOpenMPRuntime::emitUserDefinedReduction(
1405     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1406   if (UDRMap.count(D) > 0)
1407     return;
1408   llvm::Function *Combiner = emitCombinerOrInitializer(
1409       CGM, D->getType(), D->getCombiner(),
1410       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1411       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1412       /*IsCombiner=*/true);
1413   llvm::Function *Initializer = nullptr;
1414   if (const Expr *Init = D->getInitializer()) {
1415     Initializer = emitCombinerOrInitializer(
1416         CGM, D->getType(),
1417         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1418                                                                      : nullptr,
1419         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1420         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1421         /*IsCombiner=*/false);
1422   }
1423   UDRMap.try_emplace(D, Combiner, Initializer);
1424   if (CGF) {
1425     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1426     Decls.second.push_back(D);
1427   }
1428 }
1429 
1430 std::pair<llvm::Function *, llvm::Function *>
1431 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1432   auto I = UDRMap.find(D);
1433   if (I != UDRMap.end())
1434     return I->second;
1435   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1436   return UDRMap.lookup(D);
1437 }
1438 
1439 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1440     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1441     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1442     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1443   assert(ThreadIDVar->getType()->isPointerType() &&
1444          "thread id variable must be of type kmp_int32 *");
1445   CodeGenFunction CGF(CGM, true);
1446   bool HasCancel = false;
1447   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1448     HasCancel = OPD->hasCancel();
1449   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1450     HasCancel = OPSD->hasCancel();
1451   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1452     HasCancel = OPFD->hasCancel();
1453   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1454     HasCancel = OPFD->hasCancel();
1455   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1456     HasCancel = OPFD->hasCancel();
1457   else if (const auto *OPFD =
1458                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1459     HasCancel = OPFD->hasCancel();
1460   else if (const auto *OPFD =
1461                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1462     HasCancel = OPFD->hasCancel();
1463   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1464                                     HasCancel, OutlinedHelperName);
1465   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1466   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1467 }
1468 
1469 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1470     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1471     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1472   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1473   return emitParallelOrTeamsOutlinedFunction(
1474       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1475 }
1476 
1477 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1478     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1479     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1480   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1481   return emitParallelOrTeamsOutlinedFunction(
1482       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1483 }
1484 
1485 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1486     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1487     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1488     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1489     bool Tied, unsigned &NumberOfParts) {
1490   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1491                                               PrePostActionTy &) {
1492     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1493     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1494     llvm::Value *TaskArgs[] = {
1495         UpLoc, ThreadID,
1496         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1497                                     TaskTVar->getType()->castAs<PointerType>())
1498             .getPointer()};
1499     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1500   };
1501   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1502                                                             UntiedCodeGen);
1503   CodeGen.setAction(Action);
1504   assert(!ThreadIDVar->getType()->isPointerType() &&
1505          "thread id variable must be of type kmp_int32 for tasks");
1506   const OpenMPDirectiveKind Region =
1507       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1508                                                       : OMPD_task;
1509   const CapturedStmt *CS = D.getCapturedStmt(Region);
1510   const auto *TD = dyn_cast<OMPTaskDirective>(&D);
1511   CodeGenFunction CGF(CGM, true);
1512   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1513                                         InnermostKind,
1514                                         TD ? TD->hasCancel() : false, Action);
1515   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1516   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1517   if (!Tied)
1518     NumberOfParts = Action.getNumberOfParts();
1519   return Res;
1520 }
1521 
1522 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1523                              const RecordDecl *RD, const CGRecordLayout &RL,
1524                              ArrayRef<llvm::Constant *> Data) {
1525   llvm::StructType *StructTy = RL.getLLVMType();
1526   unsigned PrevIdx = 0;
1527   ConstantInitBuilder CIBuilder(CGM);
1528   auto DI = Data.begin();
1529   for (const FieldDecl *FD : RD->fields()) {
1530     unsigned Idx = RL.getLLVMFieldNo(FD);
1531     // Fill the alignment.
1532     for (unsigned I = PrevIdx; I < Idx; ++I)
1533       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1534     PrevIdx = Idx + 1;
1535     Fields.add(*DI);
1536     ++DI;
1537   }
1538 }
1539 
1540 template <class... As>
1541 static llvm::GlobalVariable *
1542 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1543                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1544                    As &&... Args) {
1545   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1546   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1547   ConstantInitBuilder CIBuilder(CGM);
1548   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1549   buildStructValue(Fields, CGM, RD, RL, Data);
1550   return Fields.finishAndCreateGlobal(
1551       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1552       std::forward<As>(Args)...);
1553 }
1554 
1555 template <typename T>
1556 static void
1557 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1558                                          ArrayRef<llvm::Constant *> Data,
1559                                          T &Parent) {
1560   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1561   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1562   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1563   buildStructValue(Fields, CGM, RD, RL, Data);
1564   Fields.finishAndAddTo(Parent);
1565 }
1566 
1567 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1568   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1569   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1570   FlagsTy FlagsKey(Flags, Reserved2Flags);
1571   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1572   if (!Entry) {
1573     if (!DefaultOpenMPPSource) {
1574       // Initialize default location for psource field of ident_t structure of
1575       // all ident_t objects. Format is ";file;function;line;column;;".
1576       // Taken from
1577       // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1578       DefaultOpenMPPSource =
1579           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1580       DefaultOpenMPPSource =
1581           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1582     }
1583 
1584     llvm::Constant *Data[] = {
1585         llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1586         llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1587         llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1588         llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1589     llvm::GlobalValue *DefaultOpenMPLocation =
1590         createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1591                            llvm::GlobalValue::PrivateLinkage);
1592     DefaultOpenMPLocation->setUnnamedAddr(
1593         llvm::GlobalValue::UnnamedAddr::Global);
1594 
1595     OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1596   }
1597   return Address(Entry, Align);
1598 }
1599 
1600 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1601                                              bool AtCurrentPoint) {
1602   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1603   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1604 
1605   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1606   if (AtCurrentPoint) {
1607     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1608         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1609   } else {
1610     Elem.second.ServiceInsertPt =
1611         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1612     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1613   }
1614 }
1615 
1616 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1617   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1618   if (Elem.second.ServiceInsertPt) {
1619     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1620     Elem.second.ServiceInsertPt = nullptr;
1621     Ptr->eraseFromParent();
1622   }
1623 }
1624 
1625 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1626                                                  SourceLocation Loc,
1627                                                  unsigned Flags) {
1628   Flags |= OMP_IDENT_KMPC;
1629   // If no debug info is generated - return global default location.
1630   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1631       Loc.isInvalid())
1632     return getOrCreateDefaultLocation(Flags).getPointer();
1633 
1634   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1635 
1636   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1637   Address LocValue = Address::invalid();
1638   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1639   if (I != OpenMPLocThreadIDMap.end())
1640     LocValue = Address(I->second.DebugLoc, Align);
1641 
1642   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1643   // GetOpenMPThreadID was called before this routine.
1644   if (!LocValue.isValid()) {
1645     // Generate "ident_t .kmpc_loc.addr;"
1646     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1647     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1648     Elem.second.DebugLoc = AI.getPointer();
1649     LocValue = AI;
1650 
1651     if (!Elem.second.ServiceInsertPt)
1652       setLocThreadIdInsertPt(CGF);
1653     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1654     CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1655     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1656                              CGF.getTypeSize(IdentQTy));
1657   }
1658 
1659   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1660   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1661   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1662   LValue PSource =
1663       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1664 
1665   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1666   if (OMPDebugLoc == nullptr) {
1667     SmallString<128> Buffer2;
1668     llvm::raw_svector_ostream OS2(Buffer2);
1669     // Build debug location
1670     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1671     OS2 << ";" << PLoc.getFilename() << ";";
1672     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1673       OS2 << FD->getQualifiedNameAsString();
1674     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1675     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1676     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1677   }
1678   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1679   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1680 
1681   // Our callers always pass this to a runtime function, so for
1682   // convenience, go ahead and return a naked pointer.
1683   return LocValue.getPointer();
1684 }
1685 
1686 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1687                                           SourceLocation Loc) {
1688   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1689 
1690   llvm::Value *ThreadID = nullptr;
1691   // Check whether we've already cached a load of the thread id in this
1692   // function.
1693   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1694   if (I != OpenMPLocThreadIDMap.end()) {
1695     ThreadID = I->second.ThreadID;
1696     if (ThreadID != nullptr)
1697       return ThreadID;
1698   }
1699   // If exceptions are enabled, do not use parameter to avoid possible crash.
1700   if (auto *OMPRegionInfo =
1701           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1702     if (OMPRegionInfo->getThreadIDVariable()) {
1703       // Check if this an outlined function with thread id passed as argument.
1704       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1705       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1706       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1707           !CGF.getLangOpts().CXXExceptions ||
1708           CGF.Builder.GetInsertBlock() == TopBlock ||
1709           !isa<llvm::Instruction>(LVal.getPointer()) ||
1710           cast<llvm::Instruction>(LVal.getPointer())->getParent() == TopBlock ||
1711           cast<llvm::Instruction>(LVal.getPointer())->getParent() ==
1712               CGF.Builder.GetInsertBlock()) {
1713         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1714         // If value loaded in entry block, cache it and use it everywhere in
1715         // function.
1716         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1717           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1718           Elem.second.ThreadID = ThreadID;
1719         }
1720         return ThreadID;
1721       }
1722     }
1723   }
1724 
1725   // This is not an outlined function region - need to call __kmpc_int32
1726   // kmpc_global_thread_num(ident_t *loc).
1727   // Generate thread id value and cache this value for use across the
1728   // function.
1729   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1730   if (!Elem.second.ServiceInsertPt)
1731     setLocThreadIdInsertPt(CGF);
1732   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1733   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1734   llvm::CallInst *Call = CGF.Builder.CreateCall(
1735       createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1736       emitUpdateLocation(CGF, Loc));
1737   Call->setCallingConv(CGF.getRuntimeCC());
1738   Elem.second.ThreadID = Call;
1739   return Call;
1740 }
1741 
1742 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1743   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1744   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1745     clearLocThreadIdInsertPt(CGF);
1746     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1747   }
1748   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1749     for(auto *D : FunctionUDRMap[CGF.CurFn])
1750       UDRMap.erase(D);
1751     FunctionUDRMap.erase(CGF.CurFn);
1752   }
1753   auto I = FunctionUDMMap.find(CGF.CurFn);
1754   if (I != FunctionUDMMap.end()) {
1755     for(auto *D : I->second)
1756       UDMMap.erase(D);
1757     FunctionUDMMap.erase(I);
1758   }
1759 }
1760 
1761 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1762   return IdentTy->getPointerTo();
1763 }
1764 
1765 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1766   if (!Kmpc_MicroTy) {
1767     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1768     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1769                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1770     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1771   }
1772   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1773 }
1774 
1775 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1776   llvm::FunctionCallee RTLFn = nullptr;
1777   switch (static_cast<OpenMPRTLFunction>(Function)) {
1778   case OMPRTL__kmpc_fork_call: {
1779     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1780     // microtask, ...);
1781     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1782                                 getKmpc_MicroPointerTy()};
1783     auto *FnTy =
1784         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1785     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1786     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
1787       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1788         llvm::LLVMContext &Ctx = F->getContext();
1789         llvm::MDBuilder MDB(Ctx);
1790         // Annotate the callback behavior of the __kmpc_fork_call:
1791         //  - The callback callee is argument number 2 (microtask).
1792         //  - The first two arguments of the callback callee are unknown (-1).
1793         //  - All variadic arguments to the __kmpc_fork_call are passed to the
1794         //    callback callee.
1795         F->addMetadata(
1796             llvm::LLVMContext::MD_callback,
1797             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
1798                                         2, {-1, -1},
1799                                         /* VarArgsArePassed */ true)}));
1800       }
1801     }
1802     break;
1803   }
1804   case OMPRTL__kmpc_global_thread_num: {
1805     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1806     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1807     auto *FnTy =
1808         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1809     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1810     break;
1811   }
1812   case OMPRTL__kmpc_threadprivate_cached: {
1813     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1814     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1815     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1816                                 CGM.VoidPtrTy, CGM.SizeTy,
1817                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1818     auto *FnTy =
1819         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1820     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1821     break;
1822   }
1823   case OMPRTL__kmpc_critical: {
1824     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1825     // kmp_critical_name *crit);
1826     llvm::Type *TypeParams[] = {
1827         getIdentTyPointerTy(), CGM.Int32Ty,
1828         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1829     auto *FnTy =
1830         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1831     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1832     break;
1833   }
1834   case OMPRTL__kmpc_critical_with_hint: {
1835     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1836     // kmp_critical_name *crit, uintptr_t hint);
1837     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1838                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1839                                 CGM.IntPtrTy};
1840     auto *FnTy =
1841         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1842     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1843     break;
1844   }
1845   case OMPRTL__kmpc_threadprivate_register: {
1846     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1847     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1848     // typedef void *(*kmpc_ctor)(void *);
1849     auto *KmpcCtorTy =
1850         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1851                                 /*isVarArg*/ false)->getPointerTo();
1852     // typedef void *(*kmpc_cctor)(void *, void *);
1853     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1854     auto *KmpcCopyCtorTy =
1855         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1856                                 /*isVarArg*/ false)
1857             ->getPointerTo();
1858     // typedef void (*kmpc_dtor)(void *);
1859     auto *KmpcDtorTy =
1860         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1861             ->getPointerTo();
1862     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1863                               KmpcCopyCtorTy, KmpcDtorTy};
1864     auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1865                                         /*isVarArg*/ false);
1866     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1867     break;
1868   }
1869   case OMPRTL__kmpc_end_critical: {
1870     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1871     // kmp_critical_name *crit);
1872     llvm::Type *TypeParams[] = {
1873         getIdentTyPointerTy(), CGM.Int32Ty,
1874         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1875     auto *FnTy =
1876         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1877     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1878     break;
1879   }
1880   case OMPRTL__kmpc_cancel_barrier: {
1881     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1882     // global_tid);
1883     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1884     auto *FnTy =
1885         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1886     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1887     break;
1888   }
1889   case OMPRTL__kmpc_barrier: {
1890     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1891     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1892     auto *FnTy =
1893         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1894     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1895     break;
1896   }
1897   case OMPRTL__kmpc_for_static_fini: {
1898     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1899     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1900     auto *FnTy =
1901         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1902     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1903     break;
1904   }
1905   case OMPRTL__kmpc_push_num_threads: {
1906     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1907     // kmp_int32 num_threads)
1908     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1909                                 CGM.Int32Ty};
1910     auto *FnTy =
1911         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1912     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1913     break;
1914   }
1915   case OMPRTL__kmpc_serialized_parallel: {
1916     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1917     // global_tid);
1918     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1919     auto *FnTy =
1920         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1921     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1922     break;
1923   }
1924   case OMPRTL__kmpc_end_serialized_parallel: {
1925     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1926     // global_tid);
1927     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1928     auto *FnTy =
1929         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1930     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1931     break;
1932   }
1933   case OMPRTL__kmpc_flush: {
1934     // Build void __kmpc_flush(ident_t *loc);
1935     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1936     auto *FnTy =
1937         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1938     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1939     break;
1940   }
1941   case OMPRTL__kmpc_master: {
1942     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1943     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1944     auto *FnTy =
1945         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1946     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1947     break;
1948   }
1949   case OMPRTL__kmpc_end_master: {
1950     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1951     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1952     auto *FnTy =
1953         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1954     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1955     break;
1956   }
1957   case OMPRTL__kmpc_omp_taskyield: {
1958     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1959     // int end_part);
1960     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1961     auto *FnTy =
1962         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1963     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1964     break;
1965   }
1966   case OMPRTL__kmpc_single: {
1967     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1968     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1969     auto *FnTy =
1970         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1971     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1972     break;
1973   }
1974   case OMPRTL__kmpc_end_single: {
1975     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1976     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1977     auto *FnTy =
1978         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1979     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1980     break;
1981   }
1982   case OMPRTL__kmpc_omp_task_alloc: {
1983     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1984     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1985     // kmp_routine_entry_t *task_entry);
1986     assert(KmpRoutineEntryPtrTy != nullptr &&
1987            "Type kmp_routine_entry_t must be created.");
1988     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1989                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1990     // Return void * and then cast to particular kmp_task_t type.
1991     auto *FnTy =
1992         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1993     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1994     break;
1995   }
1996   case OMPRTL__kmpc_omp_target_task_alloc: {
1997     // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid,
1998     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1999     // kmp_routine_entry_t *task_entry, kmp_int64 device_id);
2000     assert(KmpRoutineEntryPtrTy != nullptr &&
2001            "Type kmp_routine_entry_t must be created.");
2002     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2003                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy,
2004                                 CGM.Int64Ty};
2005     // Return void * and then cast to particular kmp_task_t type.
2006     auto *FnTy =
2007         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2008     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc");
2009     break;
2010   }
2011   case OMPRTL__kmpc_omp_task: {
2012     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2013     // *new_task);
2014     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2015                                 CGM.VoidPtrTy};
2016     auto *FnTy =
2017         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2018     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
2019     break;
2020   }
2021   case OMPRTL__kmpc_copyprivate: {
2022     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
2023     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
2024     // kmp_int32 didit);
2025     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2026     auto *CpyFnTy =
2027         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
2028     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
2029                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
2030                                 CGM.Int32Ty};
2031     auto *FnTy =
2032         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2033     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
2034     break;
2035   }
2036   case OMPRTL__kmpc_reduce: {
2037     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
2038     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
2039     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
2040     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2041     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2042                                                /*isVarArg=*/false);
2043     llvm::Type *TypeParams[] = {
2044         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2045         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2046         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2047     auto *FnTy =
2048         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2049     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
2050     break;
2051   }
2052   case OMPRTL__kmpc_reduce_nowait: {
2053     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
2054     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
2055     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
2056     // *lck);
2057     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2058     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2059                                                /*isVarArg=*/false);
2060     llvm::Type *TypeParams[] = {
2061         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2062         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2063         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2064     auto *FnTy =
2065         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2066     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
2067     break;
2068   }
2069   case OMPRTL__kmpc_end_reduce: {
2070     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
2071     // kmp_critical_name *lck);
2072     llvm::Type *TypeParams[] = {
2073         getIdentTyPointerTy(), CGM.Int32Ty,
2074         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2075     auto *FnTy =
2076         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2077     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
2078     break;
2079   }
2080   case OMPRTL__kmpc_end_reduce_nowait: {
2081     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
2082     // kmp_critical_name *lck);
2083     llvm::Type *TypeParams[] = {
2084         getIdentTyPointerTy(), CGM.Int32Ty,
2085         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2086     auto *FnTy =
2087         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2088     RTLFn =
2089         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
2090     break;
2091   }
2092   case OMPRTL__kmpc_omp_task_begin_if0: {
2093     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2094     // *new_task);
2095     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2096                                 CGM.VoidPtrTy};
2097     auto *FnTy =
2098         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2099     RTLFn =
2100         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
2101     break;
2102   }
2103   case OMPRTL__kmpc_omp_task_complete_if0: {
2104     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2105     // *new_task);
2106     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2107                                 CGM.VoidPtrTy};
2108     auto *FnTy =
2109         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2110     RTLFn = CGM.CreateRuntimeFunction(FnTy,
2111                                       /*Name=*/"__kmpc_omp_task_complete_if0");
2112     break;
2113   }
2114   case OMPRTL__kmpc_ordered: {
2115     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
2116     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2117     auto *FnTy =
2118         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2119     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
2120     break;
2121   }
2122   case OMPRTL__kmpc_end_ordered: {
2123     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
2124     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2125     auto *FnTy =
2126         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2127     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
2128     break;
2129   }
2130   case OMPRTL__kmpc_omp_taskwait: {
2131     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
2132     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2133     auto *FnTy =
2134         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2135     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
2136     break;
2137   }
2138   case OMPRTL__kmpc_taskgroup: {
2139     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2140     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2141     auto *FnTy =
2142         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2143     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2144     break;
2145   }
2146   case OMPRTL__kmpc_end_taskgroup: {
2147     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2148     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2149     auto *FnTy =
2150         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2151     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2152     break;
2153   }
2154   case OMPRTL__kmpc_push_proc_bind: {
2155     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2156     // int proc_bind)
2157     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2158     auto *FnTy =
2159         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2160     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2161     break;
2162   }
2163   case OMPRTL__kmpc_omp_task_with_deps: {
2164     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2165     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2166     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2167     llvm::Type *TypeParams[] = {
2168         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
2169         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
2170     auto *FnTy =
2171         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2172     RTLFn =
2173         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2174     break;
2175   }
2176   case OMPRTL__kmpc_omp_wait_deps: {
2177     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2178     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2179     // kmp_depend_info_t *noalias_dep_list);
2180     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2181                                 CGM.Int32Ty,           CGM.VoidPtrTy,
2182                                 CGM.Int32Ty,           CGM.VoidPtrTy};
2183     auto *FnTy =
2184         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2185     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2186     break;
2187   }
2188   case OMPRTL__kmpc_cancellationpoint: {
2189     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2190     // global_tid, kmp_int32 cncl_kind)
2191     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2192     auto *FnTy =
2193         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2194     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2195     break;
2196   }
2197   case OMPRTL__kmpc_cancel: {
2198     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2199     // kmp_int32 cncl_kind)
2200     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2201     auto *FnTy =
2202         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2203     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2204     break;
2205   }
2206   case OMPRTL__kmpc_push_num_teams: {
2207     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2208     // kmp_int32 num_teams, kmp_int32 num_threads)
2209     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2210         CGM.Int32Ty};
2211     auto *FnTy =
2212         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2213     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2214     break;
2215   }
2216   case OMPRTL__kmpc_fork_teams: {
2217     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2218     // microtask, ...);
2219     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2220                                 getKmpc_MicroPointerTy()};
2221     auto *FnTy =
2222         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2223     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2224     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
2225       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
2226         llvm::LLVMContext &Ctx = F->getContext();
2227         llvm::MDBuilder MDB(Ctx);
2228         // Annotate the callback behavior of the __kmpc_fork_teams:
2229         //  - The callback callee is argument number 2 (microtask).
2230         //  - The first two arguments of the callback callee are unknown (-1).
2231         //  - All variadic arguments to the __kmpc_fork_teams are passed to the
2232         //    callback callee.
2233         F->addMetadata(
2234             llvm::LLVMContext::MD_callback,
2235             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2236                                         2, {-1, -1},
2237                                         /* VarArgsArePassed */ true)}));
2238       }
2239     }
2240     break;
2241   }
2242   case OMPRTL__kmpc_taskloop: {
2243     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2244     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2245     // sched, kmp_uint64 grainsize, void *task_dup);
2246     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2247                                 CGM.IntTy,
2248                                 CGM.VoidPtrTy,
2249                                 CGM.IntTy,
2250                                 CGM.Int64Ty->getPointerTo(),
2251                                 CGM.Int64Ty->getPointerTo(),
2252                                 CGM.Int64Ty,
2253                                 CGM.IntTy,
2254                                 CGM.IntTy,
2255                                 CGM.Int64Ty,
2256                                 CGM.VoidPtrTy};
2257     auto *FnTy =
2258         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2259     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2260     break;
2261   }
2262   case OMPRTL__kmpc_doacross_init: {
2263     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2264     // num_dims, struct kmp_dim *dims);
2265     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2266                                 CGM.Int32Ty,
2267                                 CGM.Int32Ty,
2268                                 CGM.VoidPtrTy};
2269     auto *FnTy =
2270         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2271     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2272     break;
2273   }
2274   case OMPRTL__kmpc_doacross_fini: {
2275     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2276     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2277     auto *FnTy =
2278         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2279     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2280     break;
2281   }
2282   case OMPRTL__kmpc_doacross_post: {
2283     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2284     // *vec);
2285     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2286                                 CGM.Int64Ty->getPointerTo()};
2287     auto *FnTy =
2288         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2289     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2290     break;
2291   }
2292   case OMPRTL__kmpc_doacross_wait: {
2293     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2294     // *vec);
2295     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2296                                 CGM.Int64Ty->getPointerTo()};
2297     auto *FnTy =
2298         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2299     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2300     break;
2301   }
2302   case OMPRTL__kmpc_task_reduction_init: {
2303     // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2304     // *data);
2305     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2306     auto *FnTy =
2307         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2308     RTLFn =
2309         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2310     break;
2311   }
2312   case OMPRTL__kmpc_task_reduction_get_th_data: {
2313     // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2314     // *d);
2315     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2316     auto *FnTy =
2317         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2318     RTLFn = CGM.CreateRuntimeFunction(
2319         FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2320     break;
2321   }
2322   case OMPRTL__kmpc_alloc: {
2323     // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
2324     // al); omp_allocator_handle_t type is void *.
2325     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
2326     auto *FnTy =
2327         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2328     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
2329     break;
2330   }
2331   case OMPRTL__kmpc_free: {
2332     // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
2333     // al); omp_allocator_handle_t type is void *.
2334     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2335     auto *FnTy =
2336         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2337     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
2338     break;
2339   }
2340   case OMPRTL__kmpc_push_target_tripcount: {
2341     // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2342     // size);
2343     llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2344     llvm::FunctionType *FnTy =
2345         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2346     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2347     break;
2348   }
2349   case OMPRTL__tgt_target: {
2350     // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2351     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2352     // *arg_types);
2353     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2354                                 CGM.VoidPtrTy,
2355                                 CGM.Int32Ty,
2356                                 CGM.VoidPtrPtrTy,
2357                                 CGM.VoidPtrPtrTy,
2358                                 CGM.Int64Ty->getPointerTo(),
2359                                 CGM.Int64Ty->getPointerTo()};
2360     auto *FnTy =
2361         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2362     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2363     break;
2364   }
2365   case OMPRTL__tgt_target_nowait: {
2366     // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2367     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2368     // int64_t *arg_types);
2369     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2370                                 CGM.VoidPtrTy,
2371                                 CGM.Int32Ty,
2372                                 CGM.VoidPtrPtrTy,
2373                                 CGM.VoidPtrPtrTy,
2374                                 CGM.Int64Ty->getPointerTo(),
2375                                 CGM.Int64Ty->getPointerTo()};
2376     auto *FnTy =
2377         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2378     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2379     break;
2380   }
2381   case OMPRTL__tgt_target_teams: {
2382     // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2383     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2384     // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2385     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2386                                 CGM.VoidPtrTy,
2387                                 CGM.Int32Ty,
2388                                 CGM.VoidPtrPtrTy,
2389                                 CGM.VoidPtrPtrTy,
2390                                 CGM.Int64Ty->getPointerTo(),
2391                                 CGM.Int64Ty->getPointerTo(),
2392                                 CGM.Int32Ty,
2393                                 CGM.Int32Ty};
2394     auto *FnTy =
2395         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2396     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2397     break;
2398   }
2399   case OMPRTL__tgt_target_teams_nowait: {
2400     // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2401     // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
2402     // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2403     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2404                                 CGM.VoidPtrTy,
2405                                 CGM.Int32Ty,
2406                                 CGM.VoidPtrPtrTy,
2407                                 CGM.VoidPtrPtrTy,
2408                                 CGM.Int64Ty->getPointerTo(),
2409                                 CGM.Int64Ty->getPointerTo(),
2410                                 CGM.Int32Ty,
2411                                 CGM.Int32Ty};
2412     auto *FnTy =
2413         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2414     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2415     break;
2416   }
2417   case OMPRTL__tgt_register_requires: {
2418     // Build void __tgt_register_requires(int64_t flags);
2419     llvm::Type *TypeParams[] = {CGM.Int64Ty};
2420     auto *FnTy =
2421         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2422     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires");
2423     break;
2424   }
2425   case OMPRTL__tgt_register_lib: {
2426     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
2427     QualType ParamTy =
2428         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2429     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2430     auto *FnTy =
2431         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2432     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
2433     break;
2434   }
2435   case OMPRTL__tgt_unregister_lib: {
2436     // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
2437     QualType ParamTy =
2438         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2439     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2440     auto *FnTy =
2441         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2442     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
2443     break;
2444   }
2445   case OMPRTL__tgt_target_data_begin: {
2446     // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2447     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2448     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2449                                 CGM.Int32Ty,
2450                                 CGM.VoidPtrPtrTy,
2451                                 CGM.VoidPtrPtrTy,
2452                                 CGM.Int64Ty->getPointerTo(),
2453                                 CGM.Int64Ty->getPointerTo()};
2454     auto *FnTy =
2455         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2456     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2457     break;
2458   }
2459   case OMPRTL__tgt_target_data_begin_nowait: {
2460     // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2461     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2462     // *arg_types);
2463     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2464                                 CGM.Int32Ty,
2465                                 CGM.VoidPtrPtrTy,
2466                                 CGM.VoidPtrPtrTy,
2467                                 CGM.Int64Ty->getPointerTo(),
2468                                 CGM.Int64Ty->getPointerTo()};
2469     auto *FnTy =
2470         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2471     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2472     break;
2473   }
2474   case OMPRTL__tgt_target_data_end: {
2475     // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2476     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2477     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2478                                 CGM.Int32Ty,
2479                                 CGM.VoidPtrPtrTy,
2480                                 CGM.VoidPtrPtrTy,
2481                                 CGM.Int64Ty->getPointerTo(),
2482                                 CGM.Int64Ty->getPointerTo()};
2483     auto *FnTy =
2484         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2485     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2486     break;
2487   }
2488   case OMPRTL__tgt_target_data_end_nowait: {
2489     // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2490     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2491     // *arg_types);
2492     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2493                                 CGM.Int32Ty,
2494                                 CGM.VoidPtrPtrTy,
2495                                 CGM.VoidPtrPtrTy,
2496                                 CGM.Int64Ty->getPointerTo(),
2497                                 CGM.Int64Ty->getPointerTo()};
2498     auto *FnTy =
2499         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2500     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2501     break;
2502   }
2503   case OMPRTL__tgt_target_data_update: {
2504     // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2505     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2506     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2507                                 CGM.Int32Ty,
2508                                 CGM.VoidPtrPtrTy,
2509                                 CGM.VoidPtrPtrTy,
2510                                 CGM.Int64Ty->getPointerTo(),
2511                                 CGM.Int64Ty->getPointerTo()};
2512     auto *FnTy =
2513         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2514     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2515     break;
2516   }
2517   case OMPRTL__tgt_target_data_update_nowait: {
2518     // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2519     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2520     // *arg_types);
2521     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2522                                 CGM.Int32Ty,
2523                                 CGM.VoidPtrPtrTy,
2524                                 CGM.VoidPtrPtrTy,
2525                                 CGM.Int64Ty->getPointerTo(),
2526                                 CGM.Int64Ty->getPointerTo()};
2527     auto *FnTy =
2528         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2529     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2530     break;
2531   }
2532   case OMPRTL__tgt_mapper_num_components: {
2533     // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
2534     llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
2535     auto *FnTy =
2536         llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
2537     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components");
2538     break;
2539   }
2540   case OMPRTL__tgt_push_mapper_component: {
2541     // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void
2542     // *base, void *begin, int64_t size, int64_t type);
2543     llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy,
2544                                 CGM.Int64Ty, CGM.Int64Ty};
2545     auto *FnTy =
2546         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2547     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component");
2548     break;
2549   }
2550   }
2551   assert(RTLFn && "Unable to find OpenMP runtime function");
2552   return RTLFn;
2553 }
2554 
2555 llvm::FunctionCallee
2556 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
2557   assert((IVSize == 32 || IVSize == 64) &&
2558          "IV size is not compatible with the omp runtime");
2559   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2560                                             : "__kmpc_for_static_init_4u")
2561                                 : (IVSigned ? "__kmpc_for_static_init_8"
2562                                             : "__kmpc_for_static_init_8u");
2563   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2564   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2565   llvm::Type *TypeParams[] = {
2566     getIdentTyPointerTy(),                     // loc
2567     CGM.Int32Ty,                               // tid
2568     CGM.Int32Ty,                               // schedtype
2569     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2570     PtrTy,                                     // p_lower
2571     PtrTy,                                     // p_upper
2572     PtrTy,                                     // p_stride
2573     ITy,                                       // incr
2574     ITy                                        // chunk
2575   };
2576   auto *FnTy =
2577       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2578   return CGM.CreateRuntimeFunction(FnTy, Name);
2579 }
2580 
2581 llvm::FunctionCallee
2582 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
2583   assert((IVSize == 32 || IVSize == 64) &&
2584          "IV size is not compatible with the omp runtime");
2585   StringRef Name =
2586       IVSize == 32
2587           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2588           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2589   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2590   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2591                                CGM.Int32Ty,           // tid
2592                                CGM.Int32Ty,           // schedtype
2593                                ITy,                   // lower
2594                                ITy,                   // upper
2595                                ITy,                   // stride
2596                                ITy                    // chunk
2597   };
2598   auto *FnTy =
2599       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2600   return CGM.CreateRuntimeFunction(FnTy, Name);
2601 }
2602 
2603 llvm::FunctionCallee
2604 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
2605   assert((IVSize == 32 || IVSize == 64) &&
2606          "IV size is not compatible with the omp runtime");
2607   StringRef Name =
2608       IVSize == 32
2609           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2610           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2611   llvm::Type *TypeParams[] = {
2612       getIdentTyPointerTy(), // loc
2613       CGM.Int32Ty,           // tid
2614   };
2615   auto *FnTy =
2616       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2617   return CGM.CreateRuntimeFunction(FnTy, Name);
2618 }
2619 
2620 llvm::FunctionCallee
2621 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
2622   assert((IVSize == 32 || IVSize == 64) &&
2623          "IV size is not compatible with the omp runtime");
2624   StringRef Name =
2625       IVSize == 32
2626           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2627           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2628   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2629   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2630   llvm::Type *TypeParams[] = {
2631     getIdentTyPointerTy(),                     // loc
2632     CGM.Int32Ty,                               // tid
2633     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2634     PtrTy,                                     // p_lower
2635     PtrTy,                                     // p_upper
2636     PtrTy                                      // p_stride
2637   };
2638   auto *FnTy =
2639       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2640   return CGM.CreateRuntimeFunction(FnTy, Name);
2641 }
2642 
2643 /// Obtain information that uniquely identifies a target entry. This
2644 /// consists of the file and device IDs as well as line number associated with
2645 /// the relevant entry source location.
2646 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2647                                      unsigned &DeviceID, unsigned &FileID,
2648                                      unsigned &LineNum) {
2649   SourceManager &SM = C.getSourceManager();
2650 
2651   // The loc should be always valid and have a file ID (the user cannot use
2652   // #pragma directives in macros)
2653 
2654   assert(Loc.isValid() && "Source location is expected to be always valid.");
2655 
2656   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2657   assert(PLoc.isValid() && "Source location is expected to be always valid.");
2658 
2659   llvm::sys::fs::UniqueID ID;
2660   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2661     SM.getDiagnostics().Report(diag::err_cannot_open_file)
2662         << PLoc.getFilename() << EC.message();
2663 
2664   DeviceID = ID.getDevice();
2665   FileID = ID.getFile();
2666   LineNum = PLoc.getLine();
2667 }
2668 
2669 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
2670   if (CGM.getLangOpts().OpenMPSimd)
2671     return Address::invalid();
2672   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2673       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2674   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
2675               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2676                HasRequiresUnifiedSharedMemory))) {
2677     SmallString<64> PtrName;
2678     {
2679       llvm::raw_svector_ostream OS(PtrName);
2680       OS << CGM.getMangledName(GlobalDecl(VD));
2681       if (!VD->isExternallyVisible()) {
2682         unsigned DeviceID, FileID, Line;
2683         getTargetEntryUniqueInfo(CGM.getContext(),
2684                                  VD->getCanonicalDecl()->getBeginLoc(),
2685                                  DeviceID, FileID, Line);
2686         OS << llvm::format("_%x", FileID);
2687       }
2688       OS << "_decl_tgt_ref_ptr";
2689     }
2690     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2691     if (!Ptr) {
2692       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2693       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2694                                         PtrName);
2695 
2696       auto *GV = cast<llvm::GlobalVariable>(Ptr);
2697       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
2698 
2699       if (!CGM.getLangOpts().OpenMPIsDevice)
2700         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2701       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2702     }
2703     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2704   }
2705   return Address::invalid();
2706 }
2707 
2708 llvm::Constant *
2709 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2710   assert(!CGM.getLangOpts().OpenMPUseTLS ||
2711          !CGM.getContext().getTargetInfo().isTLSSupported());
2712   // Lookup the entry, lazily creating it if necessary.
2713   std::string Suffix = getName({"cache", ""});
2714   return getOrCreateInternalVariable(
2715       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2716 }
2717 
2718 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2719                                                 const VarDecl *VD,
2720                                                 Address VDAddr,
2721                                                 SourceLocation Loc) {
2722   if (CGM.getLangOpts().OpenMPUseTLS &&
2723       CGM.getContext().getTargetInfo().isTLSSupported())
2724     return VDAddr;
2725 
2726   llvm::Type *VarTy = VDAddr.getElementType();
2727   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2728                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2729                                                        CGM.Int8PtrTy),
2730                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2731                          getOrCreateThreadPrivateCache(VD)};
2732   return Address(CGF.EmitRuntimeCall(
2733       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2734                  VDAddr.getAlignment());
2735 }
2736 
2737 void CGOpenMPRuntime::emitThreadPrivateVarInit(
2738     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2739     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2740   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2741   // library.
2742   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2743   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2744                       OMPLoc);
2745   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2746   // to register constructor/destructor for variable.
2747   llvm::Value *Args[] = {
2748       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2749       Ctor, CopyCtor, Dtor};
2750   CGF.EmitRuntimeCall(
2751       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2752 }
2753 
2754 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2755     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2756     bool PerformInit, CodeGenFunction *CGF) {
2757   if (CGM.getLangOpts().OpenMPUseTLS &&
2758       CGM.getContext().getTargetInfo().isTLSSupported())
2759     return nullptr;
2760 
2761   VD = VD->getDefinition(CGM.getContext());
2762   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2763     QualType ASTTy = VD->getType();
2764 
2765     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2766     const Expr *Init = VD->getAnyInitializer();
2767     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2768       // Generate function that re-emits the declaration's initializer into the
2769       // threadprivate copy of the variable VD
2770       CodeGenFunction CtorCGF(CGM);
2771       FunctionArgList Args;
2772       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2773                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2774                             ImplicitParamDecl::Other);
2775       Args.push_back(&Dst);
2776 
2777       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2778           CGM.getContext().VoidPtrTy, Args);
2779       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2780       std::string Name = getName({"__kmpc_global_ctor_", ""});
2781       llvm::Function *Fn =
2782           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2783       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2784                             Args, Loc, Loc);
2785       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2786           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2787           CGM.getContext().VoidPtrTy, Dst.getLocation());
2788       Address Arg = Address(ArgVal, VDAddr.getAlignment());
2789       Arg = CtorCGF.Builder.CreateElementBitCast(
2790           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2791       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2792                                /*IsInitializer=*/true);
2793       ArgVal = CtorCGF.EmitLoadOfScalar(
2794           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2795           CGM.getContext().VoidPtrTy, Dst.getLocation());
2796       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2797       CtorCGF.FinishFunction();
2798       Ctor = Fn;
2799     }
2800     if (VD->getType().isDestructedType() != QualType::DK_none) {
2801       // Generate function that emits destructor call for the threadprivate copy
2802       // of the variable VD
2803       CodeGenFunction DtorCGF(CGM);
2804       FunctionArgList Args;
2805       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2806                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2807                             ImplicitParamDecl::Other);
2808       Args.push_back(&Dst);
2809 
2810       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2811           CGM.getContext().VoidTy, Args);
2812       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2813       std::string Name = getName({"__kmpc_global_dtor_", ""});
2814       llvm::Function *Fn =
2815           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2816       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2817       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2818                             Loc, Loc);
2819       // Create a scope with an artificial location for the body of this function.
2820       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2821       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2822           DtorCGF.GetAddrOfLocalVar(&Dst),
2823           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2824       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2825                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2826                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2827       DtorCGF.FinishFunction();
2828       Dtor = Fn;
2829     }
2830     // Do not emit init function if it is not required.
2831     if (!Ctor && !Dtor)
2832       return nullptr;
2833 
2834     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2835     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2836                                                /*isVarArg=*/false)
2837                            ->getPointerTo();
2838     // Copying constructor for the threadprivate variable.
2839     // Must be NULL - reserved by runtime, but currently it requires that this
2840     // parameter is always NULL. Otherwise it fires assertion.
2841     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2842     if (Ctor == nullptr) {
2843       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2844                                              /*isVarArg=*/false)
2845                          ->getPointerTo();
2846       Ctor = llvm::Constant::getNullValue(CtorTy);
2847     }
2848     if (Dtor == nullptr) {
2849       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2850                                              /*isVarArg=*/false)
2851                          ->getPointerTo();
2852       Dtor = llvm::Constant::getNullValue(DtorTy);
2853     }
2854     if (!CGF) {
2855       auto *InitFunctionTy =
2856           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2857       std::string Name = getName({"__omp_threadprivate_init_", ""});
2858       llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2859           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2860       CodeGenFunction InitCGF(CGM);
2861       FunctionArgList ArgList;
2862       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2863                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
2864                             Loc, Loc);
2865       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2866       InitCGF.FinishFunction();
2867       return InitFunction;
2868     }
2869     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2870   }
2871   return nullptr;
2872 }
2873 
2874 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2875                                                      llvm::GlobalVariable *Addr,
2876                                                      bool PerformInit) {
2877   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
2878       !CGM.getLangOpts().OpenMPIsDevice)
2879     return false;
2880   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2881       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2882   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
2883       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2884        HasRequiresUnifiedSharedMemory))
2885     return CGM.getLangOpts().OpenMPIsDevice;
2886   VD = VD->getDefinition(CGM.getContext());
2887   if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2888     return CGM.getLangOpts().OpenMPIsDevice;
2889 
2890   QualType ASTTy = VD->getType();
2891 
2892   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
2893   // Produce the unique prefix to identify the new target regions. We use
2894   // the source location of the variable declaration which we know to not
2895   // conflict with any target region.
2896   unsigned DeviceID;
2897   unsigned FileID;
2898   unsigned Line;
2899   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2900   SmallString<128> Buffer, Out;
2901   {
2902     llvm::raw_svector_ostream OS(Buffer);
2903     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2904        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2905   }
2906 
2907   const Expr *Init = VD->getAnyInitializer();
2908   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2909     llvm::Constant *Ctor;
2910     llvm::Constant *ID;
2911     if (CGM.getLangOpts().OpenMPIsDevice) {
2912       // Generate function that re-emits the declaration's initializer into
2913       // the threadprivate copy of the variable VD
2914       CodeGenFunction CtorCGF(CGM);
2915 
2916       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2917       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2918       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2919           FTy, Twine(Buffer, "_ctor"), FI, Loc);
2920       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2921       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2922                             FunctionArgList(), Loc, Loc);
2923       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2924       CtorCGF.EmitAnyExprToMem(Init,
2925                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
2926                                Init->getType().getQualifiers(),
2927                                /*IsInitializer=*/true);
2928       CtorCGF.FinishFunction();
2929       Ctor = Fn;
2930       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2931       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2932     } else {
2933       Ctor = new llvm::GlobalVariable(
2934           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2935           llvm::GlobalValue::PrivateLinkage,
2936           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2937       ID = Ctor;
2938     }
2939 
2940     // Register the information for the entry associated with the constructor.
2941     Out.clear();
2942     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2943         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2944         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2945   }
2946   if (VD->getType().isDestructedType() != QualType::DK_none) {
2947     llvm::Constant *Dtor;
2948     llvm::Constant *ID;
2949     if (CGM.getLangOpts().OpenMPIsDevice) {
2950       // Generate function that emits destructor call for the threadprivate
2951       // copy of the variable VD
2952       CodeGenFunction DtorCGF(CGM);
2953 
2954       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2955       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2956       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2957           FTy, Twine(Buffer, "_dtor"), FI, Loc);
2958       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2959       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2960                             FunctionArgList(), Loc, Loc);
2961       // Create a scope with an artificial location for the body of this
2962       // function.
2963       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2964       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2965                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2966                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2967       DtorCGF.FinishFunction();
2968       Dtor = Fn;
2969       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2970       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2971     } else {
2972       Dtor = new llvm::GlobalVariable(
2973           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2974           llvm::GlobalValue::PrivateLinkage,
2975           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2976       ID = Dtor;
2977     }
2978     // Register the information for the entry associated with the destructor.
2979     Out.clear();
2980     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2981         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2982         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2983   }
2984   return CGM.getLangOpts().OpenMPIsDevice;
2985 }
2986 
2987 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2988                                                           QualType VarType,
2989                                                           StringRef Name) {
2990   std::string Suffix = getName({"artificial", ""});
2991   std::string CacheSuffix = getName({"cache", ""});
2992   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2993   llvm::Value *GAddr =
2994       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2995   llvm::Value *Args[] = {
2996       emitUpdateLocation(CGF, SourceLocation()),
2997       getThreadID(CGF, SourceLocation()),
2998       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2999       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
3000                                 /*isSigned=*/false),
3001       getOrCreateInternalVariable(
3002           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
3003   return Address(
3004       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3005           CGF.EmitRuntimeCall(
3006               createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
3007           VarLVType->getPointerTo(/*AddrSpace=*/0)),
3008       CGM.getPointerAlign());
3009 }
3010 
3011 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
3012                                       const RegionCodeGenTy &ThenGen,
3013                                       const RegionCodeGenTy &ElseGen) {
3014   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
3015 
3016   // If the condition constant folds and can be elided, try to avoid emitting
3017   // the condition and the dead arm of the if/else.
3018   bool CondConstant;
3019   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
3020     if (CondConstant)
3021       ThenGen(CGF);
3022     else
3023       ElseGen(CGF);
3024     return;
3025   }
3026 
3027   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
3028   // emit the conditional branch.
3029   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
3030   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
3031   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
3032   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
3033 
3034   // Emit the 'then' code.
3035   CGF.EmitBlock(ThenBlock);
3036   ThenGen(CGF);
3037   CGF.EmitBranch(ContBlock);
3038   // Emit the 'else' code if present.
3039   // There is no need to emit line number for unconditional branch.
3040   (void)ApplyDebugLocation::CreateEmpty(CGF);
3041   CGF.EmitBlock(ElseBlock);
3042   ElseGen(CGF);
3043   // There is no need to emit line number for unconditional branch.
3044   (void)ApplyDebugLocation::CreateEmpty(CGF);
3045   CGF.EmitBranch(ContBlock);
3046   // Emit the continuation block for code after the if.
3047   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
3048 }
3049 
3050 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
3051                                        llvm::Function *OutlinedFn,
3052                                        ArrayRef<llvm::Value *> CapturedVars,
3053                                        const Expr *IfCond) {
3054   if (!CGF.HaveInsertPoint())
3055     return;
3056   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
3057   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
3058                                                      PrePostActionTy &) {
3059     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
3060     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3061     llvm::Value *Args[] = {
3062         RTLoc,
3063         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
3064         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
3065     llvm::SmallVector<llvm::Value *, 16> RealArgs;
3066     RealArgs.append(std::begin(Args), std::end(Args));
3067     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
3068 
3069     llvm::FunctionCallee RTLFn =
3070         RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
3071     CGF.EmitRuntimeCall(RTLFn, RealArgs);
3072   };
3073   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
3074                                                           PrePostActionTy &) {
3075     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3076     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
3077     // Build calls:
3078     // __kmpc_serialized_parallel(&Loc, GTid);
3079     llvm::Value *Args[] = {RTLoc, ThreadID};
3080     CGF.EmitRuntimeCall(
3081         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
3082 
3083     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
3084     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
3085     Address ZeroAddrBound =
3086         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
3087                                          /*Name=*/".bound.zero.addr");
3088     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
3089     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
3090     // ThreadId for serialized parallels is 0.
3091     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
3092     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
3093     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
3094     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
3095 
3096     // __kmpc_end_serialized_parallel(&Loc, GTid);
3097     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
3098     CGF.EmitRuntimeCall(
3099         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
3100         EndArgs);
3101   };
3102   if (IfCond) {
3103     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
3104   } else {
3105     RegionCodeGenTy ThenRCG(ThenGen);
3106     ThenRCG(CGF);
3107   }
3108 }
3109 
3110 // If we're inside an (outlined) parallel region, use the region info's
3111 // thread-ID variable (it is passed in a first argument of the outlined function
3112 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
3113 // regular serial code region, get thread ID by calling kmp_int32
3114 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
3115 // return the address of that temp.
3116 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
3117                                              SourceLocation Loc) {
3118   if (auto *OMPRegionInfo =
3119           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3120     if (OMPRegionInfo->getThreadIDVariable())
3121       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
3122 
3123   llvm::Value *ThreadID = getThreadID(CGF, Loc);
3124   QualType Int32Ty =
3125       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
3126   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
3127   CGF.EmitStoreOfScalar(ThreadID,
3128                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
3129 
3130   return ThreadIDTemp;
3131 }
3132 
3133 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
3134     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
3135   SmallString<256> Buffer;
3136   llvm::raw_svector_ostream Out(Buffer);
3137   Out << Name;
3138   StringRef RuntimeName = Out.str();
3139   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
3140   if (Elem.second) {
3141     assert(Elem.second->getType()->getPointerElementType() == Ty &&
3142            "OMP internal variable has different type than requested");
3143     return &*Elem.second;
3144   }
3145 
3146   return Elem.second = new llvm::GlobalVariable(
3147              CGM.getModule(), Ty, /*IsConstant*/ false,
3148              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
3149              Elem.first(), /*InsertBefore=*/nullptr,
3150              llvm::GlobalValue::NotThreadLocal, AddressSpace);
3151 }
3152 
3153 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
3154   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
3155   std::string Name = getName({Prefix, "var"});
3156   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
3157 }
3158 
3159 namespace {
3160 /// Common pre(post)-action for different OpenMP constructs.
3161 class CommonActionTy final : public PrePostActionTy {
3162   llvm::FunctionCallee EnterCallee;
3163   ArrayRef<llvm::Value *> EnterArgs;
3164   llvm::FunctionCallee ExitCallee;
3165   ArrayRef<llvm::Value *> ExitArgs;
3166   bool Conditional;
3167   llvm::BasicBlock *ContBlock = nullptr;
3168 
3169 public:
3170   CommonActionTy(llvm::FunctionCallee EnterCallee,
3171                  ArrayRef<llvm::Value *> EnterArgs,
3172                  llvm::FunctionCallee ExitCallee,
3173                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
3174       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
3175         ExitArgs(ExitArgs), Conditional(Conditional) {}
3176   void Enter(CodeGenFunction &CGF) override {
3177     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
3178     if (Conditional) {
3179       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
3180       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
3181       ContBlock = CGF.createBasicBlock("omp_if.end");
3182       // Generate the branch (If-stmt)
3183       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
3184       CGF.EmitBlock(ThenBlock);
3185     }
3186   }
3187   void Done(CodeGenFunction &CGF) {
3188     // Emit the rest of blocks/branches
3189     CGF.EmitBranch(ContBlock);
3190     CGF.EmitBlock(ContBlock, true);
3191   }
3192   void Exit(CodeGenFunction &CGF) override {
3193     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
3194   }
3195 };
3196 } // anonymous namespace
3197 
3198 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
3199                                          StringRef CriticalName,
3200                                          const RegionCodeGenTy &CriticalOpGen,
3201                                          SourceLocation Loc, const Expr *Hint) {
3202   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
3203   // CriticalOpGen();
3204   // __kmpc_end_critical(ident_t *, gtid, Lock);
3205   // Prepare arguments and build a call to __kmpc_critical
3206   if (!CGF.HaveInsertPoint())
3207     return;
3208   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3209                          getCriticalRegionLock(CriticalName)};
3210   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
3211                                                 std::end(Args));
3212   if (Hint) {
3213     EnterArgs.push_back(CGF.Builder.CreateIntCast(
3214         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
3215   }
3216   CommonActionTy Action(
3217       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
3218                                  : OMPRTL__kmpc_critical),
3219       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
3220   CriticalOpGen.setAction(Action);
3221   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3222 }
3223 
3224 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
3225                                        const RegionCodeGenTy &MasterOpGen,
3226                                        SourceLocation Loc) {
3227   if (!CGF.HaveInsertPoint())
3228     return;
3229   // if(__kmpc_master(ident_t *, gtid)) {
3230   //   MasterOpGen();
3231   //   __kmpc_end_master(ident_t *, gtid);
3232   // }
3233   // Prepare arguments and build a call to __kmpc_master
3234   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3235   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3236                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
3237                         /*Conditional=*/true);
3238   MasterOpGen.setAction(Action);
3239   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3240   Action.Done(CGF);
3241 }
3242 
3243 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
3244                                         SourceLocation Loc) {
3245   if (!CGF.HaveInsertPoint())
3246     return;
3247   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3248   llvm::Value *Args[] = {
3249       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3250       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3251   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
3252   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3253     Region->emitUntiedSwitch(CGF);
3254 }
3255 
3256 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
3257                                           const RegionCodeGenTy &TaskgroupOpGen,
3258                                           SourceLocation Loc) {
3259   if (!CGF.HaveInsertPoint())
3260     return;
3261   // __kmpc_taskgroup(ident_t *, gtid);
3262   // TaskgroupOpGen();
3263   // __kmpc_end_taskgroup(ident_t *, gtid);
3264   // Prepare arguments and build a call to __kmpc_taskgroup
3265   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3266   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3267                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
3268                         Args);
3269   TaskgroupOpGen.setAction(Action);
3270   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3271 }
3272 
3273 /// Given an array of pointers to variables, project the address of a
3274 /// given variable.
3275 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
3276                                       unsigned Index, const VarDecl *Var) {
3277   // Pull out the pointer to the variable.
3278   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
3279   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3280 
3281   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3282   Addr = CGF.Builder.CreateElementBitCast(
3283       Addr, CGF.ConvertTypeForMem(Var->getType()));
3284   return Addr;
3285 }
3286 
3287 static llvm::Value *emitCopyprivateCopyFunction(
3288     CodeGenModule &CGM, llvm::Type *ArgsType,
3289     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3290     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3291     SourceLocation Loc) {
3292   ASTContext &C = CGM.getContext();
3293   // void copy_func(void *LHSArg, void *RHSArg);
3294   FunctionArgList Args;
3295   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3296                            ImplicitParamDecl::Other);
3297   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3298                            ImplicitParamDecl::Other);
3299   Args.push_back(&LHSArg);
3300   Args.push_back(&RHSArg);
3301   const auto &CGFI =
3302       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3303   std::string Name =
3304       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3305   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3306                                     llvm::GlobalValue::InternalLinkage, Name,
3307                                     &CGM.getModule());
3308   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3309   Fn->setDoesNotRecurse();
3310   CodeGenFunction CGF(CGM);
3311   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3312   // Dest = (void*[n])(LHSArg);
3313   // Src = (void*[n])(RHSArg);
3314   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3315       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3316       ArgsType), CGF.getPointerAlign());
3317   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3318       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3319       ArgsType), CGF.getPointerAlign());
3320   // *(Type0*)Dst[0] = *(Type0*)Src[0];
3321   // *(Type1*)Dst[1] = *(Type1*)Src[1];
3322   // ...
3323   // *(Typen*)Dst[n] = *(Typen*)Src[n];
3324   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3325     const auto *DestVar =
3326         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3327     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3328 
3329     const auto *SrcVar =
3330         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3331     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3332 
3333     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3334     QualType Type = VD->getType();
3335     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3336   }
3337   CGF.FinishFunction();
3338   return Fn;
3339 }
3340 
3341 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3342                                        const RegionCodeGenTy &SingleOpGen,
3343                                        SourceLocation Loc,
3344                                        ArrayRef<const Expr *> CopyprivateVars,
3345                                        ArrayRef<const Expr *> SrcExprs,
3346                                        ArrayRef<const Expr *> DstExprs,
3347                                        ArrayRef<const Expr *> AssignmentOps) {
3348   if (!CGF.HaveInsertPoint())
3349     return;
3350   assert(CopyprivateVars.size() == SrcExprs.size() &&
3351          CopyprivateVars.size() == DstExprs.size() &&
3352          CopyprivateVars.size() == AssignmentOps.size());
3353   ASTContext &C = CGM.getContext();
3354   // int32 did_it = 0;
3355   // if(__kmpc_single(ident_t *, gtid)) {
3356   //   SingleOpGen();
3357   //   __kmpc_end_single(ident_t *, gtid);
3358   //   did_it = 1;
3359   // }
3360   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3361   // <copy_func>, did_it);
3362 
3363   Address DidIt = Address::invalid();
3364   if (!CopyprivateVars.empty()) {
3365     // int32 did_it = 0;
3366     QualType KmpInt32Ty =
3367         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3368     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3369     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3370   }
3371   // Prepare arguments and build a call to __kmpc_single
3372   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3373   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3374                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3375                         /*Conditional=*/true);
3376   SingleOpGen.setAction(Action);
3377   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3378   if (DidIt.isValid()) {
3379     // did_it = 1;
3380     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3381   }
3382   Action.Done(CGF);
3383   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3384   // <copy_func>, did_it);
3385   if (DidIt.isValid()) {
3386     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3387     QualType CopyprivateArrayTy = C.getConstantArrayType(
3388         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
3389         /*IndexTypeQuals=*/0);
3390     // Create a list of all private variables for copyprivate.
3391     Address CopyprivateList =
3392         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3393     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3394       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
3395       CGF.Builder.CreateStore(
3396           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3397               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
3398           Elem);
3399     }
3400     // Build function that copies private values from single region to all other
3401     // threads in the corresponding parallel region.
3402     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
3403         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3404         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3405     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3406     Address CL =
3407       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3408                                                       CGF.VoidPtrTy);
3409     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3410     llvm::Value *Args[] = {
3411         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3412         getThreadID(CGF, Loc),        // i32 <gtid>
3413         BufSize,                      // size_t <buf_size>
3414         CL.getPointer(),              // void *<copyprivate list>
3415         CpyFn,                        // void (*) (void *, void *) <copy_func>
3416         DidItVal                      // i32 did_it
3417     };
3418     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3419   }
3420 }
3421 
3422 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3423                                         const RegionCodeGenTy &OrderedOpGen,
3424                                         SourceLocation Loc, bool IsThreads) {
3425   if (!CGF.HaveInsertPoint())
3426     return;
3427   // __kmpc_ordered(ident_t *, gtid);
3428   // OrderedOpGen();
3429   // __kmpc_end_ordered(ident_t *, gtid);
3430   // Prepare arguments and build a call to __kmpc_ordered
3431   if (IsThreads) {
3432     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3433     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3434                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3435                           Args);
3436     OrderedOpGen.setAction(Action);
3437     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3438     return;
3439   }
3440   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3441 }
3442 
3443 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
3444   unsigned Flags;
3445   if (Kind == OMPD_for)
3446     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3447   else if (Kind == OMPD_sections)
3448     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3449   else if (Kind == OMPD_single)
3450     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3451   else if (Kind == OMPD_barrier)
3452     Flags = OMP_IDENT_BARRIER_EXPL;
3453   else
3454     Flags = OMP_IDENT_BARRIER_IMPL;
3455   return Flags;
3456 }
3457 
3458 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
3459     CodeGenFunction &CGF, const OMPLoopDirective &S,
3460     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
3461   // Check if the loop directive is actually a doacross loop directive. In this
3462   // case choose static, 1 schedule.
3463   if (llvm::any_of(
3464           S.getClausesOfKind<OMPOrderedClause>(),
3465           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
3466     ScheduleKind = OMPC_SCHEDULE_static;
3467     // Chunk size is 1 in this case.
3468     llvm::APInt ChunkSize(32, 1);
3469     ChunkExpr = IntegerLiteral::Create(
3470         CGF.getContext(), ChunkSize,
3471         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3472         SourceLocation());
3473   }
3474 }
3475 
3476 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3477                                       OpenMPDirectiveKind Kind, bool EmitChecks,
3478                                       bool ForceSimpleCall) {
3479   if (!CGF.HaveInsertPoint())
3480     return;
3481   // Build call __kmpc_cancel_barrier(loc, thread_id);
3482   // Build call __kmpc_barrier(loc, thread_id);
3483   unsigned Flags = getDefaultFlagsForBarriers(Kind);
3484   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3485   // thread_id);
3486   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3487                          getThreadID(CGF, Loc)};
3488   if (auto *OMPRegionInfo =
3489           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3490     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3491       llvm::Value *Result = CGF.EmitRuntimeCall(
3492           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3493       if (EmitChecks) {
3494         // if (__kmpc_cancel_barrier()) {
3495         //   exit from construct;
3496         // }
3497         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3498         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3499         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3500         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3501         CGF.EmitBlock(ExitBB);
3502         //   exit from construct;
3503         CodeGenFunction::JumpDest CancelDestination =
3504             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3505         CGF.EmitBranchThroughCleanup(CancelDestination);
3506         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3507       }
3508       return;
3509     }
3510   }
3511   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3512 }
3513 
3514 /// Map the OpenMP loop schedule to the runtime enumeration.
3515 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3516                                           bool Chunked, bool Ordered) {
3517   switch (ScheduleKind) {
3518   case OMPC_SCHEDULE_static:
3519     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3520                    : (Ordered ? OMP_ord_static : OMP_sch_static);
3521   case OMPC_SCHEDULE_dynamic:
3522     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
3523   case OMPC_SCHEDULE_guided:
3524     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
3525   case OMPC_SCHEDULE_runtime:
3526     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3527   case OMPC_SCHEDULE_auto:
3528     return Ordered ? OMP_ord_auto : OMP_sch_auto;
3529   case OMPC_SCHEDULE_unknown:
3530     assert(!Chunked && "chunk was specified but schedule kind not known");
3531     return Ordered ? OMP_ord_static : OMP_sch_static;
3532   }
3533   llvm_unreachable("Unexpected runtime schedule");
3534 }
3535 
3536 /// Map the OpenMP distribute schedule to the runtime enumeration.
3537 static OpenMPSchedType
3538 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3539   // only static is allowed for dist_schedule
3540   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
3541 }
3542 
3543 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3544                                          bool Chunked) const {
3545   OpenMPSchedType Schedule =
3546       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3547   return Schedule == OMP_sch_static;
3548 }
3549 
3550 bool CGOpenMPRuntime::isStaticNonchunked(
3551     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3552   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3553   return Schedule == OMP_dist_sch_static;
3554 }
3555 
3556 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
3557                                       bool Chunked) const {
3558   OpenMPSchedType Schedule =
3559       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3560   return Schedule == OMP_sch_static_chunked;
3561 }
3562 
3563 bool CGOpenMPRuntime::isStaticChunked(
3564     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3565   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3566   return Schedule == OMP_dist_sch_static_chunked;
3567 }
3568 
3569 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3570   OpenMPSchedType Schedule =
3571       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3572   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3573   return Schedule != OMP_sch_static;
3574 }
3575 
3576 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
3577                                   OpenMPScheduleClauseModifier M1,
3578                                   OpenMPScheduleClauseModifier M2) {
3579   int Modifier = 0;
3580   switch (M1) {
3581   case OMPC_SCHEDULE_MODIFIER_monotonic:
3582     Modifier = OMP_sch_modifier_monotonic;
3583     break;
3584   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3585     Modifier = OMP_sch_modifier_nonmonotonic;
3586     break;
3587   case OMPC_SCHEDULE_MODIFIER_simd:
3588     if (Schedule == OMP_sch_static_chunked)
3589       Schedule = OMP_sch_static_balanced_chunked;
3590     break;
3591   case OMPC_SCHEDULE_MODIFIER_last:
3592   case OMPC_SCHEDULE_MODIFIER_unknown:
3593     break;
3594   }
3595   switch (M2) {
3596   case OMPC_SCHEDULE_MODIFIER_monotonic:
3597     Modifier = OMP_sch_modifier_monotonic;
3598     break;
3599   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3600     Modifier = OMP_sch_modifier_nonmonotonic;
3601     break;
3602   case OMPC_SCHEDULE_MODIFIER_simd:
3603     if (Schedule == OMP_sch_static_chunked)
3604       Schedule = OMP_sch_static_balanced_chunked;
3605     break;
3606   case OMPC_SCHEDULE_MODIFIER_last:
3607   case OMPC_SCHEDULE_MODIFIER_unknown:
3608     break;
3609   }
3610   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
3611   // If the static schedule kind is specified or if the ordered clause is
3612   // specified, and if the nonmonotonic modifier is not specified, the effect is
3613   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
3614   // modifier is specified, the effect is as if the nonmonotonic modifier is
3615   // specified.
3616   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
3617     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
3618           Schedule == OMP_sch_static_balanced_chunked ||
3619           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static))
3620       Modifier = OMP_sch_modifier_nonmonotonic;
3621   }
3622   return Schedule | Modifier;
3623 }
3624 
3625 void CGOpenMPRuntime::emitForDispatchInit(
3626     CodeGenFunction &CGF, SourceLocation Loc,
3627     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3628     bool Ordered, const DispatchRTInput &DispatchValues) {
3629   if (!CGF.HaveInsertPoint())
3630     return;
3631   OpenMPSchedType Schedule = getRuntimeSchedule(
3632       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3633   assert(Ordered ||
3634          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3635           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3636           Schedule != OMP_sch_static_balanced_chunked));
3637   // Call __kmpc_dispatch_init(
3638   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3639   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
3640   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
3641 
3642   // If the Chunk was not specified in the clause - use default value 1.
3643   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3644                                             : CGF.Builder.getIntN(IVSize, 1);
3645   llvm::Value *Args[] = {
3646       emitUpdateLocation(CGF, Loc),
3647       getThreadID(CGF, Loc),
3648       CGF.Builder.getInt32(addMonoNonMonoModifier(
3649           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3650       DispatchValues.LB,                                     // Lower
3651       DispatchValues.UB,                                     // Upper
3652       CGF.Builder.getIntN(IVSize, 1),                        // Stride
3653       Chunk                                                  // Chunk
3654   };
3655   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3656 }
3657 
3658 static void emitForStaticInitCall(
3659     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3660     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
3661     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3662     const CGOpenMPRuntime::StaticRTInput &Values) {
3663   if (!CGF.HaveInsertPoint())
3664     return;
3665 
3666   assert(!Values.Ordered);
3667   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3668          Schedule == OMP_sch_static_balanced_chunked ||
3669          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3670          Schedule == OMP_dist_sch_static ||
3671          Schedule == OMP_dist_sch_static_chunked);
3672 
3673   // Call __kmpc_for_static_init(
3674   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3675   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3676   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3677   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
3678   llvm::Value *Chunk = Values.Chunk;
3679   if (Chunk == nullptr) {
3680     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3681             Schedule == OMP_dist_sch_static) &&
3682            "expected static non-chunked schedule");
3683     // If the Chunk was not specified in the clause - use default value 1.
3684     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3685   } else {
3686     assert((Schedule == OMP_sch_static_chunked ||
3687             Schedule == OMP_sch_static_balanced_chunked ||
3688             Schedule == OMP_ord_static_chunked ||
3689             Schedule == OMP_dist_sch_static_chunked) &&
3690            "expected static chunked schedule");
3691   }
3692   llvm::Value *Args[] = {
3693       UpdateLocation,
3694       ThreadId,
3695       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
3696                                                   M2)), // Schedule type
3697       Values.IL.getPointer(),                           // &isLastIter
3698       Values.LB.getPointer(),                           // &LB
3699       Values.UB.getPointer(),                           // &UB
3700       Values.ST.getPointer(),                           // &Stride
3701       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
3702       Chunk                                             // Chunk
3703   };
3704   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3705 }
3706 
3707 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3708                                         SourceLocation Loc,
3709                                         OpenMPDirectiveKind DKind,
3710                                         const OpenMPScheduleTy &ScheduleKind,
3711                                         const StaticRTInput &Values) {
3712   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3713       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3714   assert(isOpenMPWorksharingDirective(DKind) &&
3715          "Expected loop-based or sections-based directive.");
3716   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3717                                              isOpenMPLoopDirective(DKind)
3718                                                  ? OMP_IDENT_WORK_LOOP
3719                                                  : OMP_IDENT_WORK_SECTIONS);
3720   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3721   llvm::FunctionCallee StaticInitFunction =
3722       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3723   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3724                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3725 }
3726 
3727 void CGOpenMPRuntime::emitDistributeStaticInit(
3728     CodeGenFunction &CGF, SourceLocation Loc,
3729     OpenMPDistScheduleClauseKind SchedKind,
3730     const CGOpenMPRuntime::StaticRTInput &Values) {
3731   OpenMPSchedType ScheduleNum =
3732       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3733   llvm::Value *UpdatedLocation =
3734       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3735   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3736   llvm::FunctionCallee StaticInitFunction =
3737       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3738   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3739                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3740                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
3741 }
3742 
3743 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3744                                           SourceLocation Loc,
3745                                           OpenMPDirectiveKind DKind) {
3746   if (!CGF.HaveInsertPoint())
3747     return;
3748   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3749   llvm::Value *Args[] = {
3750       emitUpdateLocation(CGF, Loc,
3751                          isOpenMPDistributeDirective(DKind)
3752                              ? OMP_IDENT_WORK_DISTRIBUTE
3753                              : isOpenMPLoopDirective(DKind)
3754                                    ? OMP_IDENT_WORK_LOOP
3755                                    : OMP_IDENT_WORK_SECTIONS),
3756       getThreadID(CGF, Loc)};
3757   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3758                       Args);
3759 }
3760 
3761 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3762                                                  SourceLocation Loc,
3763                                                  unsigned IVSize,
3764                                                  bool IVSigned) {
3765   if (!CGF.HaveInsertPoint())
3766     return;
3767   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3768   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3769   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3770 }
3771 
3772 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3773                                           SourceLocation Loc, unsigned IVSize,
3774                                           bool IVSigned, Address IL,
3775                                           Address LB, Address UB,
3776                                           Address ST) {
3777   // Call __kmpc_dispatch_next(
3778   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3779   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3780   //          kmp_int[32|64] *p_stride);
3781   llvm::Value *Args[] = {
3782       emitUpdateLocation(CGF, Loc),
3783       getThreadID(CGF, Loc),
3784       IL.getPointer(), // &isLastIter
3785       LB.getPointer(), // &Lower
3786       UB.getPointer(), // &Upper
3787       ST.getPointer()  // &Stride
3788   };
3789   llvm::Value *Call =
3790       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3791   return CGF.EmitScalarConversion(
3792       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3793       CGF.getContext().BoolTy, Loc);
3794 }
3795 
3796 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3797                                            llvm::Value *NumThreads,
3798                                            SourceLocation Loc) {
3799   if (!CGF.HaveInsertPoint())
3800     return;
3801   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3802   llvm::Value *Args[] = {
3803       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3804       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3805   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3806                       Args);
3807 }
3808 
3809 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3810                                          OpenMPProcBindClauseKind ProcBind,
3811                                          SourceLocation Loc) {
3812   if (!CGF.HaveInsertPoint())
3813     return;
3814   // Constants for proc bind value accepted by the runtime.
3815   enum ProcBindTy {
3816     ProcBindFalse = 0,
3817     ProcBindTrue,
3818     ProcBindMaster,
3819     ProcBindClose,
3820     ProcBindSpread,
3821     ProcBindIntel,
3822     ProcBindDefault
3823   } RuntimeProcBind;
3824   switch (ProcBind) {
3825   case OMPC_PROC_BIND_master:
3826     RuntimeProcBind = ProcBindMaster;
3827     break;
3828   case OMPC_PROC_BIND_close:
3829     RuntimeProcBind = ProcBindClose;
3830     break;
3831   case OMPC_PROC_BIND_spread:
3832     RuntimeProcBind = ProcBindSpread;
3833     break;
3834   case OMPC_PROC_BIND_unknown:
3835     llvm_unreachable("Unsupported proc_bind value.");
3836   }
3837   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3838   llvm::Value *Args[] = {
3839       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3840       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
3841   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3842 }
3843 
3844 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3845                                 SourceLocation Loc) {
3846   if (!CGF.HaveInsertPoint())
3847     return;
3848   // Build call void __kmpc_flush(ident_t *loc)
3849   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3850                       emitUpdateLocation(CGF, Loc));
3851 }
3852 
3853 namespace {
3854 /// Indexes of fields for type kmp_task_t.
3855 enum KmpTaskTFields {
3856   /// List of shared variables.
3857   KmpTaskTShareds,
3858   /// Task routine.
3859   KmpTaskTRoutine,
3860   /// Partition id for the untied tasks.
3861   KmpTaskTPartId,
3862   /// Function with call of destructors for private variables.
3863   Data1,
3864   /// Task priority.
3865   Data2,
3866   /// (Taskloops only) Lower bound.
3867   KmpTaskTLowerBound,
3868   /// (Taskloops only) Upper bound.
3869   KmpTaskTUpperBound,
3870   /// (Taskloops only) Stride.
3871   KmpTaskTStride,
3872   /// (Taskloops only) Is last iteration flag.
3873   KmpTaskTLastIter,
3874   /// (Taskloops only) Reduction data.
3875   KmpTaskTReductions,
3876 };
3877 } // anonymous namespace
3878 
3879 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3880   return OffloadEntriesTargetRegion.empty() &&
3881          OffloadEntriesDeviceGlobalVar.empty();
3882 }
3883 
3884 /// Initialize target region entry.
3885 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3886     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3887                                     StringRef ParentName, unsigned LineNum,
3888                                     unsigned Order) {
3889   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3890                                              "only required for the device "
3891                                              "code generation.");
3892   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3893       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3894                                    OMPTargetRegionEntryTargetRegion);
3895   ++OffloadingEntriesNum;
3896 }
3897 
3898 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3899     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3900                                   StringRef ParentName, unsigned LineNum,
3901                                   llvm::Constant *Addr, llvm::Constant *ID,
3902                                   OMPTargetRegionEntryKind Flags) {
3903   // If we are emitting code for a target, the entry is already initialized,
3904   // only has to be registered.
3905   if (CGM.getLangOpts().OpenMPIsDevice) {
3906     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3907       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3908           DiagnosticsEngine::Error,
3909           "Unable to find target region on line '%0' in the device code.");
3910       CGM.getDiags().Report(DiagID) << LineNum;
3911       return;
3912     }
3913     auto &Entry =
3914         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3915     assert(Entry.isValid() && "Entry not initialized!");
3916     Entry.setAddress(Addr);
3917     Entry.setID(ID);
3918     Entry.setFlags(Flags);
3919   } else {
3920     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3921     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3922     ++OffloadingEntriesNum;
3923   }
3924 }
3925 
3926 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3927     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3928     unsigned LineNum) const {
3929   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3930   if (PerDevice == OffloadEntriesTargetRegion.end())
3931     return false;
3932   auto PerFile = PerDevice->second.find(FileID);
3933   if (PerFile == PerDevice->second.end())
3934     return false;
3935   auto PerParentName = PerFile->second.find(ParentName);
3936   if (PerParentName == PerFile->second.end())
3937     return false;
3938   auto PerLine = PerParentName->second.find(LineNum);
3939   if (PerLine == PerParentName->second.end())
3940     return false;
3941   // Fail if this entry is already registered.
3942   if (PerLine->second.getAddress() || PerLine->second.getID())
3943     return false;
3944   return true;
3945 }
3946 
3947 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3948     const OffloadTargetRegionEntryInfoActTy &Action) {
3949   // Scan all target region entries and perform the provided action.
3950   for (const auto &D : OffloadEntriesTargetRegion)
3951     for (const auto &F : D.second)
3952       for (const auto &P : F.second)
3953         for (const auto &L : P.second)
3954           Action(D.first, F.first, P.first(), L.first, L.second);
3955 }
3956 
3957 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3958     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3959                                        OMPTargetGlobalVarEntryKind Flags,
3960                                        unsigned Order) {
3961   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3962                                              "only required for the device "
3963                                              "code generation.");
3964   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3965   ++OffloadingEntriesNum;
3966 }
3967 
3968 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3969     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3970                                      CharUnits VarSize,
3971                                      OMPTargetGlobalVarEntryKind Flags,
3972                                      llvm::GlobalValue::LinkageTypes Linkage) {
3973   if (CGM.getLangOpts().OpenMPIsDevice) {
3974     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3975     assert(Entry.isValid() && Entry.getFlags() == Flags &&
3976            "Entry not initialized!");
3977     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3978            "Resetting with the new address.");
3979     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3980       if (Entry.getVarSize().isZero()) {
3981         Entry.setVarSize(VarSize);
3982         Entry.setLinkage(Linkage);
3983       }
3984       return;
3985     }
3986     Entry.setVarSize(VarSize);
3987     Entry.setLinkage(Linkage);
3988     Entry.setAddress(Addr);
3989   } else {
3990     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3991       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3992       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3993              "Entry not initialized!");
3994       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3995              "Resetting with the new address.");
3996       if (Entry.getVarSize().isZero()) {
3997         Entry.setVarSize(VarSize);
3998         Entry.setLinkage(Linkage);
3999       }
4000       return;
4001     }
4002     OffloadEntriesDeviceGlobalVar.try_emplace(
4003         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
4004     ++OffloadingEntriesNum;
4005   }
4006 }
4007 
4008 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
4009     actOnDeviceGlobalVarEntriesInfo(
4010         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
4011   // Scan all target region entries and perform the provided action.
4012   for (const auto &E : OffloadEntriesDeviceGlobalVar)
4013     Action(E.getKey(), E.getValue());
4014 }
4015 
4016 void CGOpenMPRuntime::createOffloadEntry(
4017     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
4018     llvm::GlobalValue::LinkageTypes Linkage) {
4019   StringRef Name = Addr->getName();
4020   llvm::Module &M = CGM.getModule();
4021   llvm::LLVMContext &C = M.getContext();
4022 
4023   // Create constant string with the name.
4024   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
4025 
4026   std::string StringName = getName({"omp_offloading", "entry_name"});
4027   auto *Str = new llvm::GlobalVariable(
4028       M, StrPtrInit->getType(), /*isConstant=*/true,
4029       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
4030   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4031 
4032   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
4033                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
4034                             llvm::ConstantInt::get(CGM.SizeTy, Size),
4035                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
4036                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
4037   std::string EntryName = getName({"omp_offloading", "entry", ""});
4038   llvm::GlobalVariable *Entry = createGlobalStruct(
4039       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
4040       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
4041 
4042   // The entry has to be created in the section the linker expects it to be.
4043   Entry->setSection("omp_offloading_entries");
4044 }
4045 
4046 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
4047   // Emit the offloading entries and metadata so that the device codegen side
4048   // can easily figure out what to emit. The produced metadata looks like
4049   // this:
4050   //
4051   // !omp_offload.info = !{!1, ...}
4052   //
4053   // Right now we only generate metadata for function that contain target
4054   // regions.
4055 
4056   // If we are in simd mode or there are no entries, we don't need to do
4057   // anything.
4058   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
4059     return;
4060 
4061   llvm::Module &M = CGM.getModule();
4062   llvm::LLVMContext &C = M.getContext();
4063   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
4064                          SourceLocation, StringRef>,
4065               16>
4066       OrderedEntries(OffloadEntriesInfoManager.size());
4067   llvm::SmallVector<StringRef, 16> ParentFunctions(
4068       OffloadEntriesInfoManager.size());
4069 
4070   // Auxiliary methods to create metadata values and strings.
4071   auto &&GetMDInt = [this](unsigned V) {
4072     return llvm::ConstantAsMetadata::get(
4073         llvm::ConstantInt::get(CGM.Int32Ty, V));
4074   };
4075 
4076   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
4077 
4078   // Create the offloading info metadata node.
4079   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
4080 
4081   // Create function that emits metadata for each target region entry;
4082   auto &&TargetRegionMetadataEmitter =
4083       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
4084        &GetMDString](
4085           unsigned DeviceID, unsigned FileID, StringRef ParentName,
4086           unsigned Line,
4087           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
4088         // Generate metadata for target regions. Each entry of this metadata
4089         // contains:
4090         // - Entry 0 -> Kind of this type of metadata (0).
4091         // - Entry 1 -> Device ID of the file where the entry was identified.
4092         // - Entry 2 -> File ID of the file where the entry was identified.
4093         // - Entry 3 -> Mangled name of the function where the entry was
4094         // identified.
4095         // - Entry 4 -> Line in the file where the entry was identified.
4096         // - Entry 5 -> Order the entry was created.
4097         // The first element of the metadata node is the kind.
4098         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
4099                                  GetMDInt(FileID),      GetMDString(ParentName),
4100                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
4101 
4102         SourceLocation Loc;
4103         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
4104                   E = CGM.getContext().getSourceManager().fileinfo_end();
4105              I != E; ++I) {
4106           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
4107               I->getFirst()->getUniqueID().getFile() == FileID) {
4108             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
4109                 I->getFirst(), Line, 1);
4110             break;
4111           }
4112         }
4113         // Save this entry in the right position of the ordered entries array.
4114         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
4115         ParentFunctions[E.getOrder()] = ParentName;
4116 
4117         // Add metadata to the named metadata node.
4118         MD->addOperand(llvm::MDNode::get(C, Ops));
4119       };
4120 
4121   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
4122       TargetRegionMetadataEmitter);
4123 
4124   // Create function that emits metadata for each device global variable entry;
4125   auto &&DeviceGlobalVarMetadataEmitter =
4126       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4127        MD](StringRef MangledName,
4128            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
4129                &E) {
4130         // Generate metadata for global variables. Each entry of this metadata
4131         // contains:
4132         // - Entry 0 -> Kind of this type of metadata (1).
4133         // - Entry 1 -> Mangled name of the variable.
4134         // - Entry 2 -> Declare target kind.
4135         // - Entry 3 -> Order the entry was created.
4136         // The first element of the metadata node is the kind.
4137         llvm::Metadata *Ops[] = {
4138             GetMDInt(E.getKind()), GetMDString(MangledName),
4139             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4140 
4141         // Save this entry in the right position of the ordered entries array.
4142         OrderedEntries[E.getOrder()] =
4143             std::make_tuple(&E, SourceLocation(), MangledName);
4144 
4145         // Add metadata to the named metadata node.
4146         MD->addOperand(llvm::MDNode::get(C, Ops));
4147       };
4148 
4149   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
4150       DeviceGlobalVarMetadataEmitter);
4151 
4152   for (const auto &E : OrderedEntries) {
4153     assert(std::get<0>(E) && "All ordered entries must exist!");
4154     if (const auto *CE =
4155             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4156                 std::get<0>(E))) {
4157       if (!CE->getID() || !CE->getAddress()) {
4158         // Do not blame the entry if the parent funtion is not emitted.
4159         StringRef FnName = ParentFunctions[CE->getOrder()];
4160         if (!CGM.GetGlobalValue(FnName))
4161           continue;
4162         unsigned DiagID = CGM.getDiags().getCustomDiagID(
4163             DiagnosticsEngine::Error,
4164             "Offloading entry for target region in %0 is incorrect: either the "
4165             "address or the ID is invalid.");
4166         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
4167         continue;
4168       }
4169       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4170                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4171     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
4172                                              OffloadEntryInfoDeviceGlobalVar>(
4173                    std::get<0>(E))) {
4174       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
4175           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4176               CE->getFlags());
4177       switch (Flags) {
4178       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
4179         if (CGM.getLangOpts().OpenMPIsDevice &&
4180             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
4181           continue;
4182         if (!CE->getAddress()) {
4183           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4184               DiagnosticsEngine::Error, "Offloading entry for declare target "
4185                                         "variable %0 is incorrect: the "
4186                                         "address is invalid.");
4187           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
4188           continue;
4189         }
4190         // The vaiable has no definition - no need to add the entry.
4191         if (CE->getVarSize().isZero())
4192           continue;
4193         break;
4194       }
4195       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
4196         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4197                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4198                "Declaret target link address is set.");
4199         if (CGM.getLangOpts().OpenMPIsDevice)
4200           continue;
4201         if (!CE->getAddress()) {
4202           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4203               DiagnosticsEngine::Error,
4204               "Offloading entry for declare target variable is incorrect: the "
4205               "address is invalid.");
4206           CGM.getDiags().Report(DiagID);
4207           continue;
4208         }
4209         break;
4210       }
4211       createOffloadEntry(CE->getAddress(), CE->getAddress(),
4212                          CE->getVarSize().getQuantity(), Flags,
4213                          CE->getLinkage());
4214     } else {
4215       llvm_unreachable("Unsupported entry kind.");
4216     }
4217   }
4218 }
4219 
4220 /// Loads all the offload entries information from the host IR
4221 /// metadata.
4222 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
4223   // If we are in target mode, load the metadata from the host IR. This code has
4224   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4225 
4226   if (!CGM.getLangOpts().OpenMPIsDevice)
4227     return;
4228 
4229   if (CGM.getLangOpts().OMPHostIRFile.empty())
4230     return;
4231 
4232   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4233   if (auto EC = Buf.getError()) {
4234     CGM.getDiags().Report(diag::err_cannot_open_file)
4235         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4236     return;
4237   }
4238 
4239   llvm::LLVMContext C;
4240   auto ME = expectedToErrorOrAndEmitErrors(
4241       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4242 
4243   if (auto EC = ME.getError()) {
4244     unsigned DiagID = CGM.getDiags().getCustomDiagID(
4245         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4246     CGM.getDiags().Report(DiagID)
4247         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4248     return;
4249   }
4250 
4251   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4252   if (!MD)
4253     return;
4254 
4255   for (llvm::MDNode *MN : MD->operands()) {
4256     auto &&GetMDInt = [MN](unsigned Idx) {
4257       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4258       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4259     };
4260 
4261     auto &&GetMDString = [MN](unsigned Idx) {
4262       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4263       return V->getString();
4264     };
4265 
4266     switch (GetMDInt(0)) {
4267     default:
4268       llvm_unreachable("Unexpected metadata!");
4269       break;
4270     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4271         OffloadingEntryInfoTargetRegion:
4272       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
4273           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4274           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4275           /*Order=*/GetMDInt(5));
4276       break;
4277     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4278         OffloadingEntryInfoDeviceGlobalVar:
4279       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
4280           /*MangledName=*/GetMDString(1),
4281           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4282               /*Flags=*/GetMDInt(2)),
4283           /*Order=*/GetMDInt(3));
4284       break;
4285     }
4286   }
4287 }
4288 
4289 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
4290   if (!KmpRoutineEntryPtrTy) {
4291     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4292     ASTContext &C = CGM.getContext();
4293     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4294     FunctionProtoType::ExtProtoInfo EPI;
4295     KmpRoutineEntryPtrQTy = C.getPointerType(
4296         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4297     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4298   }
4299 }
4300 
4301 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
4302   // Make sure the type of the entry is already created. This is the type we
4303   // have to create:
4304   // struct __tgt_offload_entry{
4305   //   void      *addr;       // Pointer to the offload entry info.
4306   //                          // (function or global)
4307   //   char      *name;       // Name of the function or global.
4308   //   size_t     size;       // Size of the entry info (0 if it a function).
4309   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
4310   //   int32_t    reserved;   // Reserved, to use by the runtime library.
4311   // };
4312   if (TgtOffloadEntryQTy.isNull()) {
4313     ASTContext &C = CGM.getContext();
4314     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4315     RD->startDefinition();
4316     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4317     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4318     addFieldToRecordDecl(C, RD, C.getSizeType());
4319     addFieldToRecordDecl(
4320         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4321     addFieldToRecordDecl(
4322         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4323     RD->completeDefinition();
4324     RD->addAttr(PackedAttr::CreateImplicit(C));
4325     TgtOffloadEntryQTy = C.getRecordType(RD);
4326   }
4327   return TgtOffloadEntryQTy;
4328 }
4329 
4330 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
4331   // These are the types we need to build:
4332   // struct __tgt_device_image{
4333   // void   *ImageStart;       // Pointer to the target code start.
4334   // void   *ImageEnd;         // Pointer to the target code end.
4335   // // We also add the host entries to the device image, as it may be useful
4336   // // for the target runtime to have access to that information.
4337   // __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all
4338   //                                       // the entries.
4339   // __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4340   //                                       // entries (non inclusive).
4341   // };
4342   if (TgtDeviceImageQTy.isNull()) {
4343     ASTContext &C = CGM.getContext();
4344     RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image");
4345     RD->startDefinition();
4346     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4347     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4348     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4349     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4350     RD->completeDefinition();
4351     TgtDeviceImageQTy = C.getRecordType(RD);
4352   }
4353   return TgtDeviceImageQTy;
4354 }
4355 
4356 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
4357   // struct __tgt_bin_desc{
4358   //   int32_t              NumDevices;      // Number of devices supported.
4359   //   __tgt_device_image   *DeviceImages;   // Arrays of device images
4360   //                                         // (one per device).
4361   //   __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all the
4362   //                                         // entries.
4363   //   __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4364   //                                         // entries (non inclusive).
4365   // };
4366   if (TgtBinaryDescriptorQTy.isNull()) {
4367     ASTContext &C = CGM.getContext();
4368     RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc");
4369     RD->startDefinition();
4370     addFieldToRecordDecl(
4371         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4372     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
4373     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4374     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4375     RD->completeDefinition();
4376     TgtBinaryDescriptorQTy = C.getRecordType(RD);
4377   }
4378   return TgtBinaryDescriptorQTy;
4379 }
4380 
4381 namespace {
4382 struct PrivateHelpersTy {
4383   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4384                    const VarDecl *PrivateElemInit)
4385       : Original(Original), PrivateCopy(PrivateCopy),
4386         PrivateElemInit(PrivateElemInit) {}
4387   const VarDecl *Original;
4388   const VarDecl *PrivateCopy;
4389   const VarDecl *PrivateElemInit;
4390 };
4391 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4392 } // anonymous namespace
4393 
4394 static RecordDecl *
4395 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4396   if (!Privates.empty()) {
4397     ASTContext &C = CGM.getContext();
4398     // Build struct .kmp_privates_t. {
4399     //         /*  private vars  */
4400     //       };
4401     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4402     RD->startDefinition();
4403     for (const auto &Pair : Privates) {
4404       const VarDecl *VD = Pair.second.Original;
4405       QualType Type = VD->getType().getNonReferenceType();
4406       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4407       if (VD->hasAttrs()) {
4408         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4409              E(VD->getAttrs().end());
4410              I != E; ++I)
4411           FD->addAttr(*I);
4412       }
4413     }
4414     RD->completeDefinition();
4415     return RD;
4416   }
4417   return nullptr;
4418 }
4419 
4420 static RecordDecl *
4421 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4422                          QualType KmpInt32Ty,
4423                          QualType KmpRoutineEntryPointerQTy) {
4424   ASTContext &C = CGM.getContext();
4425   // Build struct kmp_task_t {
4426   //         void *              shareds;
4427   //         kmp_routine_entry_t routine;
4428   //         kmp_int32           part_id;
4429   //         kmp_cmplrdata_t data1;
4430   //         kmp_cmplrdata_t data2;
4431   // For taskloops additional fields:
4432   //         kmp_uint64          lb;
4433   //         kmp_uint64          ub;
4434   //         kmp_int64           st;
4435   //         kmp_int32           liter;
4436   //         void *              reductions;
4437   //       };
4438   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4439   UD->startDefinition();
4440   addFieldToRecordDecl(C, UD, KmpInt32Ty);
4441   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4442   UD->completeDefinition();
4443   QualType KmpCmplrdataTy = C.getRecordType(UD);
4444   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4445   RD->startDefinition();
4446   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4447   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4448   addFieldToRecordDecl(C, RD, KmpInt32Ty);
4449   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4450   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4451   if (isOpenMPTaskLoopDirective(Kind)) {
4452     QualType KmpUInt64Ty =
4453         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4454     QualType KmpInt64Ty =
4455         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4456     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4457     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4458     addFieldToRecordDecl(C, RD, KmpInt64Ty);
4459     addFieldToRecordDecl(C, RD, KmpInt32Ty);
4460     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4461   }
4462   RD->completeDefinition();
4463   return RD;
4464 }
4465 
4466 static RecordDecl *
4467 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4468                                      ArrayRef<PrivateDataTy> Privates) {
4469   ASTContext &C = CGM.getContext();
4470   // Build struct kmp_task_t_with_privates {
4471   //         kmp_task_t task_data;
4472   //         .kmp_privates_t. privates;
4473   //       };
4474   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4475   RD->startDefinition();
4476   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4477   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4478     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4479   RD->completeDefinition();
4480   return RD;
4481 }
4482 
4483 /// Emit a proxy function which accepts kmp_task_t as the second
4484 /// argument.
4485 /// \code
4486 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4487 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4488 ///   For taskloops:
4489 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4490 ///   tt->reductions, tt->shareds);
4491 ///   return 0;
4492 /// }
4493 /// \endcode
4494 static llvm::Function *
4495 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4496                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4497                       QualType KmpTaskTWithPrivatesPtrQTy,
4498                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4499                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
4500                       llvm::Value *TaskPrivatesMap) {
4501   ASTContext &C = CGM.getContext();
4502   FunctionArgList Args;
4503   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4504                             ImplicitParamDecl::Other);
4505   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4506                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4507                                 ImplicitParamDecl::Other);
4508   Args.push_back(&GtidArg);
4509   Args.push_back(&TaskTypeArg);
4510   const auto &TaskEntryFnInfo =
4511       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4512   llvm::FunctionType *TaskEntryTy =
4513       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4514   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4515   auto *TaskEntry = llvm::Function::Create(
4516       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4517   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4518   TaskEntry->setDoesNotRecurse();
4519   CodeGenFunction CGF(CGM);
4520   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4521                     Loc, Loc);
4522 
4523   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4524   // tt,
4525   // For taskloops:
4526   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4527   // tt->task_data.shareds);
4528   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4529       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4530   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4531       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4532       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4533   const auto *KmpTaskTWithPrivatesQTyRD =
4534       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4535   LValue Base =
4536       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4537   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4538   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4539   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4540   llvm::Value *PartidParam = PartIdLVal.getPointer();
4541 
4542   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4543   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4544   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4545       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4546       CGF.ConvertTypeForMem(SharedsPtrTy));
4547 
4548   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4549   llvm::Value *PrivatesParam;
4550   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4551     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4552     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4553         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
4554   } else {
4555     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4556   }
4557 
4558   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4559                                TaskPrivatesMap,
4560                                CGF.Builder
4561                                    .CreatePointerBitCastOrAddrSpaceCast(
4562                                        TDBase.getAddress(), CGF.VoidPtrTy)
4563                                    .getPointer()};
4564   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4565                                           std::end(CommonArgs));
4566   if (isOpenMPTaskLoopDirective(Kind)) {
4567     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4568     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4569     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4570     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4571     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4572     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4573     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4574     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4575     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4576     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4577     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4578     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4579     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4580     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4581     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4582     CallArgs.push_back(LBParam);
4583     CallArgs.push_back(UBParam);
4584     CallArgs.push_back(StParam);
4585     CallArgs.push_back(LIParam);
4586     CallArgs.push_back(RParam);
4587   }
4588   CallArgs.push_back(SharedsParam);
4589 
4590   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4591                                                   CallArgs);
4592   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4593                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4594   CGF.FinishFunction();
4595   return TaskEntry;
4596 }
4597 
4598 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4599                                             SourceLocation Loc,
4600                                             QualType KmpInt32Ty,
4601                                             QualType KmpTaskTWithPrivatesPtrQTy,
4602                                             QualType KmpTaskTWithPrivatesQTy) {
4603   ASTContext &C = CGM.getContext();
4604   FunctionArgList Args;
4605   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4606                             ImplicitParamDecl::Other);
4607   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4608                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4609                                 ImplicitParamDecl::Other);
4610   Args.push_back(&GtidArg);
4611   Args.push_back(&TaskTypeArg);
4612   const auto &DestructorFnInfo =
4613       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4614   llvm::FunctionType *DestructorFnTy =
4615       CGM.getTypes().GetFunctionType(DestructorFnInfo);
4616   std::string Name =
4617       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4618   auto *DestructorFn =
4619       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4620                              Name, &CGM.getModule());
4621   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4622                                     DestructorFnInfo);
4623   DestructorFn->setDoesNotRecurse();
4624   CodeGenFunction CGF(CGM);
4625   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4626                     Args, Loc, Loc);
4627 
4628   LValue Base = CGF.EmitLoadOfPointerLValue(
4629       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4630       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4631   const auto *KmpTaskTWithPrivatesQTyRD =
4632       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4633   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4634   Base = CGF.EmitLValueForField(Base, *FI);
4635   for (const auto *Field :
4636        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4637     if (QualType::DestructionKind DtorKind =
4638             Field->getType().isDestructedType()) {
4639       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4640       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
4641     }
4642   }
4643   CGF.FinishFunction();
4644   return DestructorFn;
4645 }
4646 
4647 /// Emit a privates mapping function for correct handling of private and
4648 /// firstprivate variables.
4649 /// \code
4650 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4651 /// **noalias priv1,...,  <tyn> **noalias privn) {
4652 ///   *priv1 = &.privates.priv1;
4653 ///   ...;
4654 ///   *privn = &.privates.privn;
4655 /// }
4656 /// \endcode
4657 static llvm::Value *
4658 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4659                                ArrayRef<const Expr *> PrivateVars,
4660                                ArrayRef<const Expr *> FirstprivateVars,
4661                                ArrayRef<const Expr *> LastprivateVars,
4662                                QualType PrivatesQTy,
4663                                ArrayRef<PrivateDataTy> Privates) {
4664   ASTContext &C = CGM.getContext();
4665   FunctionArgList Args;
4666   ImplicitParamDecl TaskPrivatesArg(
4667       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4668       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4669       ImplicitParamDecl::Other);
4670   Args.push_back(&TaskPrivatesArg);
4671   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4672   unsigned Counter = 1;
4673   for (const Expr *E : PrivateVars) {
4674     Args.push_back(ImplicitParamDecl::Create(
4675         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4676         C.getPointerType(C.getPointerType(E->getType()))
4677             .withConst()
4678             .withRestrict(),
4679         ImplicitParamDecl::Other));
4680     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4681     PrivateVarsPos[VD] = Counter;
4682     ++Counter;
4683   }
4684   for (const Expr *E : FirstprivateVars) {
4685     Args.push_back(ImplicitParamDecl::Create(
4686         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4687         C.getPointerType(C.getPointerType(E->getType()))
4688             .withConst()
4689             .withRestrict(),
4690         ImplicitParamDecl::Other));
4691     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4692     PrivateVarsPos[VD] = Counter;
4693     ++Counter;
4694   }
4695   for (const Expr *E : LastprivateVars) {
4696     Args.push_back(ImplicitParamDecl::Create(
4697         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4698         C.getPointerType(C.getPointerType(E->getType()))
4699             .withConst()
4700             .withRestrict(),
4701         ImplicitParamDecl::Other));
4702     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4703     PrivateVarsPos[VD] = Counter;
4704     ++Counter;
4705   }
4706   const auto &TaskPrivatesMapFnInfo =
4707       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4708   llvm::FunctionType *TaskPrivatesMapTy =
4709       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4710   std::string Name =
4711       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4712   auto *TaskPrivatesMap = llvm::Function::Create(
4713       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4714       &CGM.getModule());
4715   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4716                                     TaskPrivatesMapFnInfo);
4717   if (CGM.getLangOpts().Optimize) {
4718     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4719     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4720     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4721   }
4722   CodeGenFunction CGF(CGM);
4723   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4724                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
4725 
4726   // *privi = &.privates.privi;
4727   LValue Base = CGF.EmitLoadOfPointerLValue(
4728       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4729       TaskPrivatesArg.getType()->castAs<PointerType>());
4730   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4731   Counter = 0;
4732   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4733     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4734     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4735     LValue RefLVal =
4736         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4737     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4738         RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
4739     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
4740     ++Counter;
4741   }
4742   CGF.FinishFunction();
4743   return TaskPrivatesMap;
4744 }
4745 
4746 /// Emit initialization for private variables in task-based directives.
4747 static void emitPrivatesInit(CodeGenFunction &CGF,
4748                              const OMPExecutableDirective &D,
4749                              Address KmpTaskSharedsPtr, LValue TDBase,
4750                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4751                              QualType SharedsTy, QualType SharedsPtrTy,
4752                              const OMPTaskDataTy &Data,
4753                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4754   ASTContext &C = CGF.getContext();
4755   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4756   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4757   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4758                                  ? OMPD_taskloop
4759                                  : OMPD_task;
4760   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4761   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4762   LValue SrcBase;
4763   bool IsTargetTask =
4764       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4765       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
4766   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4767   // PointersArray and SizesArray. The original variables for these arrays are
4768   // not captured and we get their addresses explicitly.
4769   if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
4770       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4771     SrcBase = CGF.MakeAddrLValue(
4772         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4773             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4774         SharedsTy);
4775   }
4776   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4777   for (const PrivateDataTy &Pair : Privates) {
4778     const VarDecl *VD = Pair.second.PrivateCopy;
4779     const Expr *Init = VD->getAnyInitializer();
4780     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4781                              !CGF.isTrivialInitializer(Init)))) {
4782       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4783       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4784         const VarDecl *OriginalVD = Pair.second.Original;
4785         // Check if the variable is the target-based BasePointersArray,
4786         // PointersArray or SizesArray.
4787         LValue SharedRefLValue;
4788         QualType Type = PrivateLValue.getType();
4789         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4790         if (IsTargetTask && !SharedField) {
4791           assert(isa<ImplicitParamDecl>(OriginalVD) &&
4792                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4793                  cast<CapturedDecl>(OriginalVD->getDeclContext())
4794                          ->getNumParams() == 0 &&
4795                  isa<TranslationUnitDecl>(
4796                      cast<CapturedDecl>(OriginalVD->getDeclContext())
4797                          ->getDeclContext()) &&
4798                  "Expected artificial target data variable.");
4799           SharedRefLValue =
4800               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4801         } else {
4802           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4803           SharedRefLValue = CGF.MakeAddrLValue(
4804               Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
4805               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4806               SharedRefLValue.getTBAAInfo());
4807         }
4808         if (Type->isArrayType()) {
4809           // Initialize firstprivate array.
4810           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4811             // Perform simple memcpy.
4812             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4813           } else {
4814             // Initialize firstprivate array using element-by-element
4815             // initialization.
4816             CGF.EmitOMPAggregateAssign(
4817                 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
4818                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4819                                                   Address SrcElement) {
4820                   // Clean up any temporaries needed by the initialization.
4821                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
4822                   InitScope.addPrivate(
4823                       Elem, [SrcElement]() -> Address { return SrcElement; });
4824                   (void)InitScope.Privatize();
4825                   // Emit initialization for single element.
4826                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4827                       CGF, &CapturesInfo);
4828                   CGF.EmitAnyExprToMem(Init, DestElement,
4829                                        Init->getType().getQualifiers(),
4830                                        /*IsInitializer=*/false);
4831                 });
4832           }
4833         } else {
4834           CodeGenFunction::OMPPrivateScope InitScope(CGF);
4835           InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
4836             return SharedRefLValue.getAddress();
4837           });
4838           (void)InitScope.Privatize();
4839           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4840           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4841                              /*capturedByInit=*/false);
4842         }
4843       } else {
4844         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4845       }
4846     }
4847     ++FI;
4848   }
4849 }
4850 
4851 /// Check if duplication function is required for taskloops.
4852 static bool checkInitIsRequired(CodeGenFunction &CGF,
4853                                 ArrayRef<PrivateDataTy> Privates) {
4854   bool InitRequired = false;
4855   for (const PrivateDataTy &Pair : Privates) {
4856     const VarDecl *VD = Pair.second.PrivateCopy;
4857     const Expr *Init = VD->getAnyInitializer();
4858     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4859                                     !CGF.isTrivialInitializer(Init));
4860     if (InitRequired)
4861       break;
4862   }
4863   return InitRequired;
4864 }
4865 
4866 
4867 /// Emit task_dup function (for initialization of
4868 /// private/firstprivate/lastprivate vars and last_iter flag)
4869 /// \code
4870 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4871 /// lastpriv) {
4872 /// // setup lastprivate flag
4873 ///    task_dst->last = lastpriv;
4874 /// // could be constructor calls here...
4875 /// }
4876 /// \endcode
4877 static llvm::Value *
4878 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4879                     const OMPExecutableDirective &D,
4880                     QualType KmpTaskTWithPrivatesPtrQTy,
4881                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4882                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4883                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4884                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4885   ASTContext &C = CGM.getContext();
4886   FunctionArgList Args;
4887   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4888                            KmpTaskTWithPrivatesPtrQTy,
4889                            ImplicitParamDecl::Other);
4890   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4891                            KmpTaskTWithPrivatesPtrQTy,
4892                            ImplicitParamDecl::Other);
4893   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4894                                 ImplicitParamDecl::Other);
4895   Args.push_back(&DstArg);
4896   Args.push_back(&SrcArg);
4897   Args.push_back(&LastprivArg);
4898   const auto &TaskDupFnInfo =
4899       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4900   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4901   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4902   auto *TaskDup = llvm::Function::Create(
4903       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4904   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4905   TaskDup->setDoesNotRecurse();
4906   CodeGenFunction CGF(CGM);
4907   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4908                     Loc);
4909 
4910   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4911       CGF.GetAddrOfLocalVar(&DstArg),
4912       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4913   // task_dst->liter = lastpriv;
4914   if (WithLastIter) {
4915     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4916     LValue Base = CGF.EmitLValueForField(
4917         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4918     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4919     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4920         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4921     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4922   }
4923 
4924   // Emit initial values for private copies (if any).
4925   assert(!Privates.empty());
4926   Address KmpTaskSharedsPtr = Address::invalid();
4927   if (!Data.FirstprivateVars.empty()) {
4928     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4929         CGF.GetAddrOfLocalVar(&SrcArg),
4930         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4931     LValue Base = CGF.EmitLValueForField(
4932         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4933     KmpTaskSharedsPtr = Address(
4934         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4935                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4936                                                   KmpTaskTShareds)),
4937                              Loc),
4938         CGF.getNaturalTypeAlignment(SharedsTy));
4939   }
4940   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4941                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4942   CGF.FinishFunction();
4943   return TaskDup;
4944 }
4945 
4946 /// Checks if destructor function is required to be generated.
4947 /// \return true if cleanups are required, false otherwise.
4948 static bool
4949 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4950   bool NeedsCleanup = false;
4951   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4952   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4953   for (const FieldDecl *FD : PrivateRD->fields()) {
4954     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4955     if (NeedsCleanup)
4956       break;
4957   }
4958   return NeedsCleanup;
4959 }
4960 
4961 CGOpenMPRuntime::TaskResultTy
4962 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4963                               const OMPExecutableDirective &D,
4964                               llvm::Function *TaskFunction, QualType SharedsTy,
4965                               Address Shareds, const OMPTaskDataTy &Data) {
4966   ASTContext &C = CGM.getContext();
4967   llvm::SmallVector<PrivateDataTy, 4> Privates;
4968   // Aggregate privates and sort them by the alignment.
4969   auto I = Data.PrivateCopies.begin();
4970   for (const Expr *E : Data.PrivateVars) {
4971     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4972     Privates.emplace_back(
4973         C.getDeclAlign(VD),
4974         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4975                          /*PrivateElemInit=*/nullptr));
4976     ++I;
4977   }
4978   I = Data.FirstprivateCopies.begin();
4979   auto IElemInitRef = Data.FirstprivateInits.begin();
4980   for (const Expr *E : Data.FirstprivateVars) {
4981     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4982     Privates.emplace_back(
4983         C.getDeclAlign(VD),
4984         PrivateHelpersTy(
4985             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4986             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4987     ++I;
4988     ++IElemInitRef;
4989   }
4990   I = Data.LastprivateCopies.begin();
4991   for (const Expr *E : Data.LastprivateVars) {
4992     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4993     Privates.emplace_back(
4994         C.getDeclAlign(VD),
4995         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4996                          /*PrivateElemInit=*/nullptr));
4997     ++I;
4998   }
4999   llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
5000     return L.first > R.first;
5001   });
5002   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
5003   // Build type kmp_routine_entry_t (if not built yet).
5004   emitKmpRoutineEntryT(KmpInt32Ty);
5005   // Build type kmp_task_t (if not built yet).
5006   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
5007     if (SavedKmpTaskloopTQTy.isNull()) {
5008       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5009           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5010     }
5011     KmpTaskTQTy = SavedKmpTaskloopTQTy;
5012   } else {
5013     assert((D.getDirectiveKind() == OMPD_task ||
5014             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
5015             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
5016            "Expected taskloop, task or target directive");
5017     if (SavedKmpTaskTQTy.isNull()) {
5018       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5019           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5020     }
5021     KmpTaskTQTy = SavedKmpTaskTQTy;
5022   }
5023   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
5024   // Build particular struct kmp_task_t for the given task.
5025   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
5026       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
5027   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
5028   QualType KmpTaskTWithPrivatesPtrQTy =
5029       C.getPointerType(KmpTaskTWithPrivatesQTy);
5030   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
5031   llvm::Type *KmpTaskTWithPrivatesPtrTy =
5032       KmpTaskTWithPrivatesTy->getPointerTo();
5033   llvm::Value *KmpTaskTWithPrivatesTySize =
5034       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
5035   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
5036 
5037   // Emit initial values for private copies (if any).
5038   llvm::Value *TaskPrivatesMap = nullptr;
5039   llvm::Type *TaskPrivatesMapTy =
5040       std::next(TaskFunction->arg_begin(), 3)->getType();
5041   if (!Privates.empty()) {
5042     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
5043     TaskPrivatesMap = emitTaskPrivateMappingFunction(
5044         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
5045         FI->getType(), Privates);
5046     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5047         TaskPrivatesMap, TaskPrivatesMapTy);
5048   } else {
5049     TaskPrivatesMap = llvm::ConstantPointerNull::get(
5050         cast<llvm::PointerType>(TaskPrivatesMapTy));
5051   }
5052   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
5053   // kmp_task_t *tt);
5054   llvm::Function *TaskEntry = emitProxyTaskFunction(
5055       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5056       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
5057       TaskPrivatesMap);
5058 
5059   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
5060   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
5061   // kmp_routine_entry_t *task_entry);
5062   // Task flags. Format is taken from
5063   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
5064   // description of kmp_tasking_flags struct.
5065   enum {
5066     TiedFlag = 0x1,
5067     FinalFlag = 0x2,
5068     DestructorsFlag = 0x8,
5069     PriorityFlag = 0x20
5070   };
5071   unsigned Flags = Data.Tied ? TiedFlag : 0;
5072   bool NeedsCleanup = false;
5073   if (!Privates.empty()) {
5074     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
5075     if (NeedsCleanup)
5076       Flags = Flags | DestructorsFlag;
5077   }
5078   if (Data.Priority.getInt())
5079     Flags = Flags | PriorityFlag;
5080   llvm::Value *TaskFlags =
5081       Data.Final.getPointer()
5082           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
5083                                      CGF.Builder.getInt32(FinalFlag),
5084                                      CGF.Builder.getInt32(/*C=*/0))
5085           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
5086   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
5087   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
5088   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
5089       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
5090       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5091           TaskEntry, KmpRoutineEntryPtrTy)};
5092   llvm::Value *NewTask;
5093   if (D.hasClausesOfKind<OMPNowaitClause>()) {
5094     // Check if we have any device clause associated with the directive.
5095     const Expr *Device = nullptr;
5096     if (auto *C = D.getSingleClause<OMPDeviceClause>())
5097       Device = C->getDevice();
5098     // Emit device ID if any otherwise use default value.
5099     llvm::Value *DeviceID;
5100     if (Device)
5101       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
5102                                            CGF.Int64Ty, /*isSigned=*/true);
5103     else
5104       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
5105     AllocArgs.push_back(DeviceID);
5106     NewTask = CGF.EmitRuntimeCall(
5107       createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs);
5108   } else {
5109     NewTask = CGF.EmitRuntimeCall(
5110       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
5111   }
5112   llvm::Value *NewTaskNewTaskTTy =
5113       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5114           NewTask, KmpTaskTWithPrivatesPtrTy);
5115   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
5116                                                KmpTaskTWithPrivatesQTy);
5117   LValue TDBase =
5118       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
5119   // Fill the data in the resulting kmp_task_t record.
5120   // Copy shareds if there are any.
5121   Address KmpTaskSharedsPtr = Address::invalid();
5122   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
5123     KmpTaskSharedsPtr =
5124         Address(CGF.EmitLoadOfScalar(
5125                     CGF.EmitLValueForField(
5126                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
5127                                            KmpTaskTShareds)),
5128                     Loc),
5129                 CGF.getNaturalTypeAlignment(SharedsTy));
5130     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
5131     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
5132     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
5133   }
5134   // Emit initial values for private copies (if any).
5135   TaskResultTy Result;
5136   if (!Privates.empty()) {
5137     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
5138                      SharedsTy, SharedsPtrTy, Data, Privates,
5139                      /*ForDup=*/false);
5140     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
5141         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
5142       Result.TaskDupFn = emitTaskDupFunction(
5143           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
5144           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
5145           /*WithLastIter=*/!Data.LastprivateVars.empty());
5146     }
5147   }
5148   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
5149   enum { Priority = 0, Destructors = 1 };
5150   // Provide pointer to function with destructors for privates.
5151   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
5152   const RecordDecl *KmpCmplrdataUD =
5153       (*FI)->getType()->getAsUnionType()->getDecl();
5154   if (NeedsCleanup) {
5155     llvm::Value *DestructorFn = emitDestructorsFunction(
5156         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5157         KmpTaskTWithPrivatesQTy);
5158     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
5159     LValue DestructorsLV = CGF.EmitLValueForField(
5160         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
5161     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5162                               DestructorFn, KmpRoutineEntryPtrTy),
5163                           DestructorsLV);
5164   }
5165   // Set priority.
5166   if (Data.Priority.getInt()) {
5167     LValue Data2LV = CGF.EmitLValueForField(
5168         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
5169     LValue PriorityLV = CGF.EmitLValueForField(
5170         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
5171     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
5172   }
5173   Result.NewTask = NewTask;
5174   Result.TaskEntry = TaskEntry;
5175   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
5176   Result.TDBase = TDBase;
5177   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
5178   return Result;
5179 }
5180 
5181 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5182                                    const OMPExecutableDirective &D,
5183                                    llvm::Function *TaskFunction,
5184                                    QualType SharedsTy, Address Shareds,
5185                                    const Expr *IfCond,
5186                                    const OMPTaskDataTy &Data) {
5187   if (!CGF.HaveInsertPoint())
5188     return;
5189 
5190   TaskResultTy Result =
5191       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5192   llvm::Value *NewTask = Result.NewTask;
5193   llvm::Function *TaskEntry = Result.TaskEntry;
5194   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5195   LValue TDBase = Result.TDBase;
5196   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5197   ASTContext &C = CGM.getContext();
5198   // Process list of dependences.
5199   Address DependenciesArray = Address::invalid();
5200   unsigned NumDependencies = Data.Dependences.size();
5201   if (NumDependencies) {
5202     // Dependence kind for RTL.
5203     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 };
5204     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
5205     RecordDecl *KmpDependInfoRD;
5206     QualType FlagsTy =
5207         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
5208     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5209     if (KmpDependInfoTy.isNull()) {
5210       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
5211       KmpDependInfoRD->startDefinition();
5212       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
5213       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
5214       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
5215       KmpDependInfoRD->completeDefinition();
5216       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
5217     } else {
5218       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5219     }
5220     // Define type kmp_depend_info[<Dependences.size()>];
5221     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5222         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
5223         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5224     // kmp_depend_info[<Dependences.size()>] deps;
5225     DependenciesArray =
5226         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
5227     for (unsigned I = 0; I < NumDependencies; ++I) {
5228       const Expr *E = Data.Dependences[I].second;
5229       LValue Addr = CGF.EmitLValue(E);
5230       llvm::Value *Size;
5231       QualType Ty = E->getType();
5232       if (const auto *ASE =
5233               dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
5234         LValue UpAddrLVal =
5235             CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
5236         llvm::Value *UpAddr =
5237             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
5238         llvm::Value *LowIntPtr =
5239             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
5240         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
5241         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
5242       } else {
5243         Size = CGF.getTypeSize(Ty);
5244       }
5245       LValue Base = CGF.MakeAddrLValue(
5246           CGF.Builder.CreateConstArrayGEP(DependenciesArray, I),
5247           KmpDependInfoTy);
5248       // deps[i].base_addr = &<Dependences[i].second>;
5249       LValue BaseAddrLVal = CGF.EmitLValueForField(
5250           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5251       CGF.EmitStoreOfScalar(
5252           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
5253           BaseAddrLVal);
5254       // deps[i].len = sizeof(<Dependences[i].second>);
5255       LValue LenLVal = CGF.EmitLValueForField(
5256           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
5257       CGF.EmitStoreOfScalar(Size, LenLVal);
5258       // deps[i].flags = <Dependences[i].first>;
5259       RTLDependenceKindTy DepKind;
5260       switch (Data.Dependences[I].first) {
5261       case OMPC_DEPEND_in:
5262         DepKind = DepIn;
5263         break;
5264       // Out and InOut dependencies must use the same code.
5265       case OMPC_DEPEND_out:
5266       case OMPC_DEPEND_inout:
5267         DepKind = DepInOut;
5268         break;
5269       case OMPC_DEPEND_mutexinoutset:
5270         DepKind = DepMutexInOutSet;
5271         break;
5272       case OMPC_DEPEND_source:
5273       case OMPC_DEPEND_sink:
5274       case OMPC_DEPEND_unknown:
5275         llvm_unreachable("Unknown task dependence type");
5276       }
5277       LValue FlagsLVal = CGF.EmitLValueForField(
5278           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5279       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5280                             FlagsLVal);
5281     }
5282     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5283         CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy);
5284   }
5285 
5286   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5287   // libcall.
5288   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5289   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5290   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5291   // list is not empty
5292   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5293   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5294   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5295   llvm::Value *DepTaskArgs[7];
5296   if (NumDependencies) {
5297     DepTaskArgs[0] = UpLoc;
5298     DepTaskArgs[1] = ThreadID;
5299     DepTaskArgs[2] = NewTask;
5300     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
5301     DepTaskArgs[4] = DependenciesArray.getPointer();
5302     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5303     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5304   }
5305   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
5306                         &TaskArgs,
5307                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5308     if (!Data.Tied) {
5309       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5310       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5311       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5312     }
5313     if (NumDependencies) {
5314       CGF.EmitRuntimeCall(
5315           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
5316     } else {
5317       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5318                           TaskArgs);
5319     }
5320     // Check if parent region is untied and build return for untied task;
5321     if (auto *Region =
5322             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5323       Region->emitUntiedSwitch(CGF);
5324   };
5325 
5326   llvm::Value *DepWaitTaskArgs[6];
5327   if (NumDependencies) {
5328     DepWaitTaskArgs[0] = UpLoc;
5329     DepWaitTaskArgs[1] = ThreadID;
5330     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
5331     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5332     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5333     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5334   }
5335   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5336                         NumDependencies, &DepWaitTaskArgs,
5337                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5338     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5339     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5340     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5341     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5342     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5343     // is specified.
5344     if (NumDependencies)
5345       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5346                           DepWaitTaskArgs);
5347     // Call proxy_task_entry(gtid, new_task);
5348     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5349                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5350       Action.Enter(CGF);
5351       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5352       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5353                                                           OutlinedFnArgs);
5354     };
5355 
5356     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5357     // kmp_task_t *new_task);
5358     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5359     // kmp_task_t *new_task);
5360     RegionCodeGenTy RCG(CodeGen);
5361     CommonActionTy Action(
5362         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5363         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5364     RCG.setAction(Action);
5365     RCG(CGF);
5366   };
5367 
5368   if (IfCond) {
5369     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5370   } else {
5371     RegionCodeGenTy ThenRCG(ThenCodeGen);
5372     ThenRCG(CGF);
5373   }
5374 }
5375 
5376 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5377                                        const OMPLoopDirective &D,
5378                                        llvm::Function *TaskFunction,
5379                                        QualType SharedsTy, Address Shareds,
5380                                        const Expr *IfCond,
5381                                        const OMPTaskDataTy &Data) {
5382   if (!CGF.HaveInsertPoint())
5383     return;
5384   TaskResultTy Result =
5385       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5386   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5387   // libcall.
5388   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5389   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5390   // sched, kmp_uint64 grainsize, void *task_dup);
5391   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5392   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5393   llvm::Value *IfVal;
5394   if (IfCond) {
5395     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5396                                       /*isSigned=*/true);
5397   } else {
5398     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5399   }
5400 
5401   LValue LBLVal = CGF.EmitLValueForField(
5402       Result.TDBase,
5403       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5404   const auto *LBVar =
5405       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5406   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
5407                        /*IsInitializer=*/true);
5408   LValue UBLVal = CGF.EmitLValueForField(
5409       Result.TDBase,
5410       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5411   const auto *UBVar =
5412       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5413   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
5414                        /*IsInitializer=*/true);
5415   LValue StLVal = CGF.EmitLValueForField(
5416       Result.TDBase,
5417       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5418   const auto *StVar =
5419       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5420   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
5421                        /*IsInitializer=*/true);
5422   // Store reductions address.
5423   LValue RedLVal = CGF.EmitLValueForField(
5424       Result.TDBase,
5425       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5426   if (Data.Reductions) {
5427     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5428   } else {
5429     CGF.EmitNullInitialization(RedLVal.getAddress(),
5430                                CGF.getContext().VoidPtrTy);
5431   }
5432   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5433   llvm::Value *TaskArgs[] = {
5434       UpLoc,
5435       ThreadID,
5436       Result.NewTask,
5437       IfVal,
5438       LBLVal.getPointer(),
5439       UBLVal.getPointer(),
5440       CGF.EmitLoadOfScalar(StLVal, Loc),
5441       llvm::ConstantInt::getSigned(
5442               CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5443       llvm::ConstantInt::getSigned(
5444           CGF.IntTy, Data.Schedule.getPointer()
5445                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5446                          : NoSchedule),
5447       Data.Schedule.getPointer()
5448           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5449                                       /*isSigned=*/false)
5450           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5451       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5452                              Result.TaskDupFn, CGF.VoidPtrTy)
5453                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5454   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
5455 }
5456 
5457 /// Emit reduction operation for each element of array (required for
5458 /// array sections) LHS op = RHS.
5459 /// \param Type Type of array.
5460 /// \param LHSVar Variable on the left side of the reduction operation
5461 /// (references element of array in original variable).
5462 /// \param RHSVar Variable on the right side of the reduction operation
5463 /// (references element of array in original variable).
5464 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5465 /// RHSVar.
5466 static void EmitOMPAggregateReduction(
5467     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5468     const VarDecl *RHSVar,
5469     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5470                                   const Expr *, const Expr *)> &RedOpGen,
5471     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5472     const Expr *UpExpr = nullptr) {
5473   // Perform element-by-element initialization.
5474   QualType ElementTy;
5475   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5476   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5477 
5478   // Drill down to the base element type on both arrays.
5479   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5480   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5481 
5482   llvm::Value *RHSBegin = RHSAddr.getPointer();
5483   llvm::Value *LHSBegin = LHSAddr.getPointer();
5484   // Cast from pointer to array type to pointer to single element.
5485   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5486   // The basic structure here is a while-do loop.
5487   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5488   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5489   llvm::Value *IsEmpty =
5490       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5491   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5492 
5493   // Enter the loop body, making that address the current address.
5494   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5495   CGF.EmitBlock(BodyBB);
5496 
5497   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5498 
5499   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5500       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5501   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5502   Address RHSElementCurrent =
5503       Address(RHSElementPHI,
5504               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5505 
5506   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5507       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5508   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5509   Address LHSElementCurrent =
5510       Address(LHSElementPHI,
5511               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5512 
5513   // Emit copy.
5514   CodeGenFunction::OMPPrivateScope Scope(CGF);
5515   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5516   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5517   Scope.Privatize();
5518   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5519   Scope.ForceCleanup();
5520 
5521   // Shift the address forward by one element.
5522   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5523       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5524   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5525       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5526   // Check whether we've reached the end.
5527   llvm::Value *Done =
5528       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5529   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5530   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5531   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5532 
5533   // Done.
5534   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5535 }
5536 
5537 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5538 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5539 /// UDR combiner function.
5540 static void emitReductionCombiner(CodeGenFunction &CGF,
5541                                   const Expr *ReductionOp) {
5542   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5543     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5544       if (const auto *DRE =
5545               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5546         if (const auto *DRD =
5547                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5548           std::pair<llvm::Function *, llvm::Function *> Reduction =
5549               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5550           RValue Func = RValue::get(Reduction.first);
5551           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5552           CGF.EmitIgnoredExpr(ReductionOp);
5553           return;
5554         }
5555   CGF.EmitIgnoredExpr(ReductionOp);
5556 }
5557 
5558 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5559     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5560     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5561     ArrayRef<const Expr *> ReductionOps) {
5562   ASTContext &C = CGM.getContext();
5563 
5564   // void reduction_func(void *LHSArg, void *RHSArg);
5565   FunctionArgList Args;
5566   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5567                            ImplicitParamDecl::Other);
5568   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5569                            ImplicitParamDecl::Other);
5570   Args.push_back(&LHSArg);
5571   Args.push_back(&RHSArg);
5572   const auto &CGFI =
5573       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5574   std::string Name = getName({"omp", "reduction", "reduction_func"});
5575   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5576                                     llvm::GlobalValue::InternalLinkage, Name,
5577                                     &CGM.getModule());
5578   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5579   Fn->setDoesNotRecurse();
5580   CodeGenFunction CGF(CGM);
5581   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5582 
5583   // Dst = (void*[n])(LHSArg);
5584   // Src = (void*[n])(RHSArg);
5585   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5586       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5587       ArgsType), CGF.getPointerAlign());
5588   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5589       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5590       ArgsType), CGF.getPointerAlign());
5591 
5592   //  ...
5593   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5594   //  ...
5595   CodeGenFunction::OMPPrivateScope Scope(CGF);
5596   auto IPriv = Privates.begin();
5597   unsigned Idx = 0;
5598   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5599     const auto *RHSVar =
5600         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5601     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5602       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5603     });
5604     const auto *LHSVar =
5605         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5606     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5607       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5608     });
5609     QualType PrivTy = (*IPriv)->getType();
5610     if (PrivTy->isVariablyModifiedType()) {
5611       // Get array size and emit VLA type.
5612       ++Idx;
5613       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5614       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5615       const VariableArrayType *VLA =
5616           CGF.getContext().getAsVariableArrayType(PrivTy);
5617       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5618       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5619           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5620       CGF.EmitVariablyModifiedType(PrivTy);
5621     }
5622   }
5623   Scope.Privatize();
5624   IPriv = Privates.begin();
5625   auto ILHS = LHSExprs.begin();
5626   auto IRHS = RHSExprs.begin();
5627   for (const Expr *E : ReductionOps) {
5628     if ((*IPriv)->getType()->isArrayType()) {
5629       // Emit reduction for array section.
5630       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5631       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5632       EmitOMPAggregateReduction(
5633           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5634           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5635             emitReductionCombiner(CGF, E);
5636           });
5637     } else {
5638       // Emit reduction for array subscript or single variable.
5639       emitReductionCombiner(CGF, E);
5640     }
5641     ++IPriv;
5642     ++ILHS;
5643     ++IRHS;
5644   }
5645   Scope.ForceCleanup();
5646   CGF.FinishFunction();
5647   return Fn;
5648 }
5649 
5650 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5651                                                   const Expr *ReductionOp,
5652                                                   const Expr *PrivateRef,
5653                                                   const DeclRefExpr *LHS,
5654                                                   const DeclRefExpr *RHS) {
5655   if (PrivateRef->getType()->isArrayType()) {
5656     // Emit reduction for array section.
5657     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5658     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5659     EmitOMPAggregateReduction(
5660         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5661         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5662           emitReductionCombiner(CGF, ReductionOp);
5663         });
5664   } else {
5665     // Emit reduction for array subscript or single variable.
5666     emitReductionCombiner(CGF, ReductionOp);
5667   }
5668 }
5669 
5670 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5671                                     ArrayRef<const Expr *> Privates,
5672                                     ArrayRef<const Expr *> LHSExprs,
5673                                     ArrayRef<const Expr *> RHSExprs,
5674                                     ArrayRef<const Expr *> ReductionOps,
5675                                     ReductionOptionsTy Options) {
5676   if (!CGF.HaveInsertPoint())
5677     return;
5678 
5679   bool WithNowait = Options.WithNowait;
5680   bool SimpleReduction = Options.SimpleReduction;
5681 
5682   // Next code should be emitted for reduction:
5683   //
5684   // static kmp_critical_name lock = { 0 };
5685   //
5686   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5687   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5688   //  ...
5689   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5690   //  *(Type<n>-1*)rhs[<n>-1]);
5691   // }
5692   //
5693   // ...
5694   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5695   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5696   // RedList, reduce_func, &<lock>)) {
5697   // case 1:
5698   //  ...
5699   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5700   //  ...
5701   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5702   // break;
5703   // case 2:
5704   //  ...
5705   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5706   //  ...
5707   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5708   // break;
5709   // default:;
5710   // }
5711   //
5712   // if SimpleReduction is true, only the next code is generated:
5713   //  ...
5714   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5715   //  ...
5716 
5717   ASTContext &C = CGM.getContext();
5718 
5719   if (SimpleReduction) {
5720     CodeGenFunction::RunCleanupsScope Scope(CGF);
5721     auto IPriv = Privates.begin();
5722     auto ILHS = LHSExprs.begin();
5723     auto IRHS = RHSExprs.begin();
5724     for (const Expr *E : ReductionOps) {
5725       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5726                                   cast<DeclRefExpr>(*IRHS));
5727       ++IPriv;
5728       ++ILHS;
5729       ++IRHS;
5730     }
5731     return;
5732   }
5733 
5734   // 1. Build a list of reduction variables.
5735   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5736   auto Size = RHSExprs.size();
5737   for (const Expr *E : Privates) {
5738     if (E->getType()->isVariablyModifiedType())
5739       // Reserve place for array size.
5740       ++Size;
5741   }
5742   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5743   QualType ReductionArrayTy =
5744       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5745                              /*IndexTypeQuals=*/0);
5746   Address ReductionList =
5747       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5748   auto IPriv = Privates.begin();
5749   unsigned Idx = 0;
5750   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5751     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5752     CGF.Builder.CreateStore(
5753         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5754             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
5755         Elem);
5756     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5757       // Store array size.
5758       ++Idx;
5759       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5760       llvm::Value *Size = CGF.Builder.CreateIntCast(
5761           CGF.getVLASize(
5762                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5763               .NumElts,
5764           CGF.SizeTy, /*isSigned=*/false);
5765       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5766                               Elem);
5767     }
5768   }
5769 
5770   // 2. Emit reduce_func().
5771   llvm::Function *ReductionFn = emitReductionFunction(
5772       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5773       LHSExprs, RHSExprs, ReductionOps);
5774 
5775   // 3. Create static kmp_critical_name lock = { 0 };
5776   std::string Name = getName({"reduction"});
5777   llvm::Value *Lock = getCriticalRegionLock(Name);
5778 
5779   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5780   // RedList, reduce_func, &<lock>);
5781   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5782   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5783   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5784   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5785       ReductionList.getPointer(), CGF.VoidPtrTy);
5786   llvm::Value *Args[] = {
5787       IdentTLoc,                             // ident_t *<loc>
5788       ThreadId,                              // i32 <gtid>
5789       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5790       ReductionArrayTySize,                  // size_type sizeof(RedList)
5791       RL,                                    // void *RedList
5792       ReductionFn, // void (*) (void *, void *) <reduce_func>
5793       Lock         // kmp_critical_name *&<lock>
5794   };
5795   llvm::Value *Res = CGF.EmitRuntimeCall(
5796       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
5797                                        : OMPRTL__kmpc_reduce),
5798       Args);
5799 
5800   // 5. Build switch(res)
5801   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5802   llvm::SwitchInst *SwInst =
5803       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5804 
5805   // 6. Build case 1:
5806   //  ...
5807   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5808   //  ...
5809   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5810   // break;
5811   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5812   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5813   CGF.EmitBlock(Case1BB);
5814 
5815   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5816   llvm::Value *EndArgs[] = {
5817       IdentTLoc, // ident_t *<loc>
5818       ThreadId,  // i32 <gtid>
5819       Lock       // kmp_critical_name *&<lock>
5820   };
5821   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5822                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5823     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5824     auto IPriv = Privates.begin();
5825     auto ILHS = LHSExprs.begin();
5826     auto IRHS = RHSExprs.begin();
5827     for (const Expr *E : ReductionOps) {
5828       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5829                                      cast<DeclRefExpr>(*IRHS));
5830       ++IPriv;
5831       ++ILHS;
5832       ++IRHS;
5833     }
5834   };
5835   RegionCodeGenTy RCG(CodeGen);
5836   CommonActionTy Action(
5837       nullptr, llvm::None,
5838       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
5839                                        : OMPRTL__kmpc_end_reduce),
5840       EndArgs);
5841   RCG.setAction(Action);
5842   RCG(CGF);
5843 
5844   CGF.EmitBranch(DefaultBB);
5845 
5846   // 7. Build case 2:
5847   //  ...
5848   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5849   //  ...
5850   // break;
5851   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5852   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5853   CGF.EmitBlock(Case2BB);
5854 
5855   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5856                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5857     auto ILHS = LHSExprs.begin();
5858     auto IRHS = RHSExprs.begin();
5859     auto IPriv = Privates.begin();
5860     for (const Expr *E : ReductionOps) {
5861       const Expr *XExpr = nullptr;
5862       const Expr *EExpr = nullptr;
5863       const Expr *UpExpr = nullptr;
5864       BinaryOperatorKind BO = BO_Comma;
5865       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5866         if (BO->getOpcode() == BO_Assign) {
5867           XExpr = BO->getLHS();
5868           UpExpr = BO->getRHS();
5869         }
5870       }
5871       // Try to emit update expression as a simple atomic.
5872       const Expr *RHSExpr = UpExpr;
5873       if (RHSExpr) {
5874         // Analyze RHS part of the whole expression.
5875         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5876                 RHSExpr->IgnoreParenImpCasts())) {
5877           // If this is a conditional operator, analyze its condition for
5878           // min/max reduction operator.
5879           RHSExpr = ACO->getCond();
5880         }
5881         if (const auto *BORHS =
5882                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5883           EExpr = BORHS->getRHS();
5884           BO = BORHS->getOpcode();
5885         }
5886       }
5887       if (XExpr) {
5888         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5889         auto &&AtomicRedGen = [BO, VD,
5890                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5891                                     const Expr *EExpr, const Expr *UpExpr) {
5892           LValue X = CGF.EmitLValue(XExpr);
5893           RValue E;
5894           if (EExpr)
5895             E = CGF.EmitAnyExpr(EExpr);
5896           CGF.EmitOMPAtomicSimpleUpdateExpr(
5897               X, E, BO, /*IsXLHSInRHSPart=*/true,
5898               llvm::AtomicOrdering::Monotonic, Loc,
5899               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5900                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5901                 PrivateScope.addPrivate(
5902                     VD, [&CGF, VD, XRValue, Loc]() {
5903                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5904                       CGF.emitOMPSimpleStore(
5905                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5906                           VD->getType().getNonReferenceType(), Loc);
5907                       return LHSTemp;
5908                     });
5909                 (void)PrivateScope.Privatize();
5910                 return CGF.EmitAnyExpr(UpExpr);
5911               });
5912         };
5913         if ((*IPriv)->getType()->isArrayType()) {
5914           // Emit atomic reduction for array section.
5915           const auto *RHSVar =
5916               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5917           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5918                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5919         } else {
5920           // Emit atomic reduction for array subscript or single variable.
5921           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5922         }
5923       } else {
5924         // Emit as a critical region.
5925         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5926                                            const Expr *, const Expr *) {
5927           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5928           std::string Name = RT.getName({"atomic_reduction"});
5929           RT.emitCriticalRegion(
5930               CGF, Name,
5931               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5932                 Action.Enter(CGF);
5933                 emitReductionCombiner(CGF, E);
5934               },
5935               Loc);
5936         };
5937         if ((*IPriv)->getType()->isArrayType()) {
5938           const auto *LHSVar =
5939               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5940           const auto *RHSVar =
5941               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5942           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5943                                     CritRedGen);
5944         } else {
5945           CritRedGen(CGF, nullptr, nullptr, nullptr);
5946         }
5947       }
5948       ++ILHS;
5949       ++IRHS;
5950       ++IPriv;
5951     }
5952   };
5953   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5954   if (!WithNowait) {
5955     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5956     llvm::Value *EndArgs[] = {
5957         IdentTLoc, // ident_t *<loc>
5958         ThreadId,  // i32 <gtid>
5959         Lock       // kmp_critical_name *&<lock>
5960     };
5961     CommonActionTy Action(nullptr, llvm::None,
5962                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
5963                           EndArgs);
5964     AtomicRCG.setAction(Action);
5965     AtomicRCG(CGF);
5966   } else {
5967     AtomicRCG(CGF);
5968   }
5969 
5970   CGF.EmitBranch(DefaultBB);
5971   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5972 }
5973 
5974 /// Generates unique name for artificial threadprivate variables.
5975 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5976 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5977                                       const Expr *Ref) {
5978   SmallString<256> Buffer;
5979   llvm::raw_svector_ostream Out(Buffer);
5980   const clang::DeclRefExpr *DE;
5981   const VarDecl *D = ::getBaseDecl(Ref, DE);
5982   if (!D)
5983     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5984   D = D->getCanonicalDecl();
5985   std::string Name = CGM.getOpenMPRuntime().getName(
5986       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5987   Out << Prefix << Name << "_"
5988       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5989   return Out.str();
5990 }
5991 
5992 /// Emits reduction initializer function:
5993 /// \code
5994 /// void @.red_init(void* %arg) {
5995 /// %0 = bitcast void* %arg to <type>*
5996 /// store <type> <init>, <type>* %0
5997 /// ret void
5998 /// }
5999 /// \endcode
6000 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
6001                                            SourceLocation Loc,
6002                                            ReductionCodeGen &RCG, unsigned N) {
6003   ASTContext &C = CGM.getContext();
6004   FunctionArgList Args;
6005   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6006                           ImplicitParamDecl::Other);
6007   Args.emplace_back(&Param);
6008   const auto &FnInfo =
6009       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6010   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6011   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
6012   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6013                                     Name, &CGM.getModule());
6014   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6015   Fn->setDoesNotRecurse();
6016   CodeGenFunction CGF(CGM);
6017   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6018   Address PrivateAddr = CGF.EmitLoadOfPointer(
6019       CGF.GetAddrOfLocalVar(&Param),
6020       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6021   llvm::Value *Size = nullptr;
6022   // If the size of the reduction item is non-constant, load it from global
6023   // threadprivate variable.
6024   if (RCG.getSizes(N).second) {
6025     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6026         CGF, CGM.getContext().getSizeType(),
6027         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6028     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6029                                 CGM.getContext().getSizeType(), Loc);
6030   }
6031   RCG.emitAggregateType(CGF, N, Size);
6032   LValue SharedLVal;
6033   // If initializer uses initializer from declare reduction construct, emit a
6034   // pointer to the address of the original reduction item (reuired by reduction
6035   // initializer)
6036   if (RCG.usesReductionInitializer(N)) {
6037     Address SharedAddr =
6038         CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6039             CGF, CGM.getContext().VoidPtrTy,
6040             generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6041     SharedAddr = CGF.EmitLoadOfPointer(
6042         SharedAddr,
6043         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
6044     SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
6045   } else {
6046     SharedLVal = CGF.MakeNaturalAlignAddrLValue(
6047         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
6048         CGM.getContext().VoidPtrTy);
6049   }
6050   // Emit the initializer:
6051   // %0 = bitcast void* %arg to <type>*
6052   // store <type> <init>, <type>* %0
6053   RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
6054                          [](CodeGenFunction &) { return false; });
6055   CGF.FinishFunction();
6056   return Fn;
6057 }
6058 
6059 /// Emits reduction combiner function:
6060 /// \code
6061 /// void @.red_comb(void* %arg0, void* %arg1) {
6062 /// %lhs = bitcast void* %arg0 to <type>*
6063 /// %rhs = bitcast void* %arg1 to <type>*
6064 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
6065 /// store <type> %2, <type>* %lhs
6066 /// ret void
6067 /// }
6068 /// \endcode
6069 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
6070                                            SourceLocation Loc,
6071                                            ReductionCodeGen &RCG, unsigned N,
6072                                            const Expr *ReductionOp,
6073                                            const Expr *LHS, const Expr *RHS,
6074                                            const Expr *PrivateRef) {
6075   ASTContext &C = CGM.getContext();
6076   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
6077   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
6078   FunctionArgList Args;
6079   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
6080                                C.VoidPtrTy, ImplicitParamDecl::Other);
6081   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6082                             ImplicitParamDecl::Other);
6083   Args.emplace_back(&ParamInOut);
6084   Args.emplace_back(&ParamIn);
6085   const auto &FnInfo =
6086       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6087   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6088   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
6089   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6090                                     Name, &CGM.getModule());
6091   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6092   Fn->setDoesNotRecurse();
6093   CodeGenFunction CGF(CGM);
6094   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6095   llvm::Value *Size = nullptr;
6096   // If the size of the reduction item is non-constant, load it from global
6097   // threadprivate variable.
6098   if (RCG.getSizes(N).second) {
6099     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6100         CGF, CGM.getContext().getSizeType(),
6101         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6102     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6103                                 CGM.getContext().getSizeType(), Loc);
6104   }
6105   RCG.emitAggregateType(CGF, N, Size);
6106   // Remap lhs and rhs variables to the addresses of the function arguments.
6107   // %lhs = bitcast void* %arg0 to <type>*
6108   // %rhs = bitcast void* %arg1 to <type>*
6109   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6110   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6111     // Pull out the pointer to the variable.
6112     Address PtrAddr = CGF.EmitLoadOfPointer(
6113         CGF.GetAddrOfLocalVar(&ParamInOut),
6114         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6115     return CGF.Builder.CreateElementBitCast(
6116         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6117   });
6118   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6119     // Pull out the pointer to the variable.
6120     Address PtrAddr = CGF.EmitLoadOfPointer(
6121         CGF.GetAddrOfLocalVar(&ParamIn),
6122         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6123     return CGF.Builder.CreateElementBitCast(
6124         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6125   });
6126   PrivateScope.Privatize();
6127   // Emit the combiner body:
6128   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6129   // store <type> %2, <type>* %lhs
6130   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6131       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6132       cast<DeclRefExpr>(RHS));
6133   CGF.FinishFunction();
6134   return Fn;
6135 }
6136 
6137 /// Emits reduction finalizer function:
6138 /// \code
6139 /// void @.red_fini(void* %arg) {
6140 /// %0 = bitcast void* %arg to <type>*
6141 /// <destroy>(<type>* %0)
6142 /// ret void
6143 /// }
6144 /// \endcode
6145 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6146                                            SourceLocation Loc,
6147                                            ReductionCodeGen &RCG, unsigned N) {
6148   if (!RCG.needCleanups(N))
6149     return nullptr;
6150   ASTContext &C = CGM.getContext();
6151   FunctionArgList Args;
6152   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6153                           ImplicitParamDecl::Other);
6154   Args.emplace_back(&Param);
6155   const auto &FnInfo =
6156       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6157   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6158   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6159   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6160                                     Name, &CGM.getModule());
6161   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6162   Fn->setDoesNotRecurse();
6163   CodeGenFunction CGF(CGM);
6164   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6165   Address PrivateAddr = CGF.EmitLoadOfPointer(
6166       CGF.GetAddrOfLocalVar(&Param),
6167       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6168   llvm::Value *Size = nullptr;
6169   // If the size of the reduction item is non-constant, load it from global
6170   // threadprivate variable.
6171   if (RCG.getSizes(N).second) {
6172     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6173         CGF, CGM.getContext().getSizeType(),
6174         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6175     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6176                                 CGM.getContext().getSizeType(), Loc);
6177   }
6178   RCG.emitAggregateType(CGF, N, Size);
6179   // Emit the finalizer body:
6180   // <destroy>(<type>* %0)
6181   RCG.emitCleanups(CGF, N, PrivateAddr);
6182   CGF.FinishFunction();
6183   return Fn;
6184 }
6185 
6186 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6187     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6188     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6189   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6190     return nullptr;
6191 
6192   // Build typedef struct:
6193   // kmp_task_red_input {
6194   //   void *reduce_shar; // shared reduction item
6195   //   size_t reduce_size; // size of data item
6196   //   void *reduce_init; // data initialization routine
6197   //   void *reduce_fini; // data finalization routine
6198   //   void *reduce_comb; // data combiner routine
6199   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6200   // } kmp_task_red_input_t;
6201   ASTContext &C = CGM.getContext();
6202   RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
6203   RD->startDefinition();
6204   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6205   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6206   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6207   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6208   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6209   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6210       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6211   RD->completeDefinition();
6212   QualType RDType = C.getRecordType(RD);
6213   unsigned Size = Data.ReductionVars.size();
6214   llvm::APInt ArraySize(/*numBits=*/64, Size);
6215   QualType ArrayRDType = C.getConstantArrayType(
6216       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6217   // kmp_task_red_input_t .rd_input.[Size];
6218   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6219   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
6220                        Data.ReductionOps);
6221   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6222     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6223     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6224                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6225     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6226         TaskRedInput.getPointer(), Idxs,
6227         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6228         ".rd_input.gep.");
6229     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6230     // ElemLVal.reduce_shar = &Shareds[Cnt];
6231     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6232     RCG.emitSharedLValue(CGF, Cnt);
6233     llvm::Value *CastedShared =
6234         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer());
6235     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6236     RCG.emitAggregateType(CGF, Cnt);
6237     llvm::Value *SizeValInChars;
6238     llvm::Value *SizeVal;
6239     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6240     // We use delayed creation/initialization for VLAs, array sections and
6241     // custom reduction initializations. It is required because runtime does not
6242     // provide the way to pass the sizes of VLAs/array sections to
6243     // initializer/combiner/finalizer functions and does not pass the pointer to
6244     // original reduction item to the initializer. Instead threadprivate global
6245     // variables are used to store these values and use them in the functions.
6246     bool DelayedCreation = !!SizeVal;
6247     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6248                                                /*isSigned=*/false);
6249     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6250     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6251     // ElemLVal.reduce_init = init;
6252     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6253     llvm::Value *InitAddr =
6254         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6255     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6256     DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
6257     // ElemLVal.reduce_fini = fini;
6258     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6259     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6260     llvm::Value *FiniAddr = Fini
6261                                 ? CGF.EmitCastToVoidPtr(Fini)
6262                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6263     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6264     // ElemLVal.reduce_comb = comb;
6265     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6266     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6267         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6268         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6269     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6270     // ElemLVal.flags = 0;
6271     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6272     if (DelayedCreation) {
6273       CGF.EmitStoreOfScalar(
6274           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6275           FlagsLVal);
6276     } else
6277       CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
6278   }
6279   // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
6280   // *data);
6281   llvm::Value *Args[] = {
6282       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6283                                 /*isSigned=*/true),
6284       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6285       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6286                                                       CGM.VoidPtrTy)};
6287   return CGF.EmitRuntimeCall(
6288       createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
6289 }
6290 
6291 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6292                                               SourceLocation Loc,
6293                                               ReductionCodeGen &RCG,
6294                                               unsigned N) {
6295   auto Sizes = RCG.getSizes(N);
6296   // Emit threadprivate global variable if the type is non-constant
6297   // (Sizes.second = nullptr).
6298   if (Sizes.second) {
6299     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6300                                                      /*isSigned=*/false);
6301     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6302         CGF, CGM.getContext().getSizeType(),
6303         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6304     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6305   }
6306   // Store address of the original reduction item if custom initializer is used.
6307   if (RCG.usesReductionInitializer(N)) {
6308     Address SharedAddr = getAddrOfArtificialThreadPrivate(
6309         CGF, CGM.getContext().VoidPtrTy,
6310         generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6311     CGF.Builder.CreateStore(
6312         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6313             RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy),
6314         SharedAddr, /*IsVolatile=*/false);
6315   }
6316 }
6317 
6318 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6319                                               SourceLocation Loc,
6320                                               llvm::Value *ReductionsPtr,
6321                                               LValue SharedLVal) {
6322   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6323   // *d);
6324   llvm::Value *Args[] = {
6325       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6326                                 /*isSigned=*/true),
6327       ReductionsPtr,
6328       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(),
6329                                                       CGM.VoidPtrTy)};
6330   return Address(
6331       CGF.EmitRuntimeCall(
6332           createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
6333       SharedLVal.getAlignment());
6334 }
6335 
6336 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6337                                        SourceLocation Loc) {
6338   if (!CGF.HaveInsertPoint())
6339     return;
6340   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6341   // global_tid);
6342   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6343   // Ignore return result until untied tasks are supported.
6344   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
6345   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6346     Region->emitUntiedSwitch(CGF);
6347 }
6348 
6349 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6350                                            OpenMPDirectiveKind InnerKind,
6351                                            const RegionCodeGenTy &CodeGen,
6352                                            bool HasCancel) {
6353   if (!CGF.HaveInsertPoint())
6354     return;
6355   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6356   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6357 }
6358 
6359 namespace {
6360 enum RTCancelKind {
6361   CancelNoreq = 0,
6362   CancelParallel = 1,
6363   CancelLoop = 2,
6364   CancelSections = 3,
6365   CancelTaskgroup = 4
6366 };
6367 } // anonymous namespace
6368 
6369 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6370   RTCancelKind CancelKind = CancelNoreq;
6371   if (CancelRegion == OMPD_parallel)
6372     CancelKind = CancelParallel;
6373   else if (CancelRegion == OMPD_for)
6374     CancelKind = CancelLoop;
6375   else if (CancelRegion == OMPD_sections)
6376     CancelKind = CancelSections;
6377   else {
6378     assert(CancelRegion == OMPD_taskgroup);
6379     CancelKind = CancelTaskgroup;
6380   }
6381   return CancelKind;
6382 }
6383 
6384 void CGOpenMPRuntime::emitCancellationPointCall(
6385     CodeGenFunction &CGF, SourceLocation Loc,
6386     OpenMPDirectiveKind CancelRegion) {
6387   if (!CGF.HaveInsertPoint())
6388     return;
6389   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6390   // global_tid, kmp_int32 cncl_kind);
6391   if (auto *OMPRegionInfo =
6392           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6393     // For 'cancellation point taskgroup', the task region info may not have a
6394     // cancel. This may instead happen in another adjacent task.
6395     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6396       llvm::Value *Args[] = {
6397           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6398           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6399       // Ignore return result until untied tasks are supported.
6400       llvm::Value *Result = CGF.EmitRuntimeCall(
6401           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
6402       // if (__kmpc_cancellationpoint()) {
6403       //   exit from construct;
6404       // }
6405       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6406       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6407       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6408       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6409       CGF.EmitBlock(ExitBB);
6410       // exit from construct;
6411       CodeGenFunction::JumpDest CancelDest =
6412           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6413       CGF.EmitBranchThroughCleanup(CancelDest);
6414       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6415     }
6416   }
6417 }
6418 
6419 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6420                                      const Expr *IfCond,
6421                                      OpenMPDirectiveKind CancelRegion) {
6422   if (!CGF.HaveInsertPoint())
6423     return;
6424   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6425   // kmp_int32 cncl_kind);
6426   if (auto *OMPRegionInfo =
6427           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6428     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
6429                                                         PrePostActionTy &) {
6430       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6431       llvm::Value *Args[] = {
6432           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6433           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6434       // Ignore return result until untied tasks are supported.
6435       llvm::Value *Result = CGF.EmitRuntimeCall(
6436           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
6437       // if (__kmpc_cancel()) {
6438       //   exit from construct;
6439       // }
6440       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6441       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6442       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6443       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6444       CGF.EmitBlock(ExitBB);
6445       // exit from construct;
6446       CodeGenFunction::JumpDest CancelDest =
6447           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6448       CGF.EmitBranchThroughCleanup(CancelDest);
6449       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6450     };
6451     if (IfCond) {
6452       emitOMPIfClause(CGF, IfCond, ThenGen,
6453                       [](CodeGenFunction &, PrePostActionTy &) {});
6454     } else {
6455       RegionCodeGenTy ThenRCG(ThenGen);
6456       ThenRCG(CGF);
6457     }
6458   }
6459 }
6460 
6461 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6462     const OMPExecutableDirective &D, StringRef ParentName,
6463     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6464     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6465   assert(!ParentName.empty() && "Invalid target region parent name!");
6466   HasEmittedTargetRegion = true;
6467   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6468                                    IsOffloadEntry, CodeGen);
6469 }
6470 
6471 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6472     const OMPExecutableDirective &D, StringRef ParentName,
6473     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6474     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6475   // Create a unique name for the entry function using the source location
6476   // information of the current target region. The name will be something like:
6477   //
6478   // __omp_offloading_DD_FFFF_PP_lBB
6479   //
6480   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6481   // mangled name of the function that encloses the target region and BB is the
6482   // line number of the target region.
6483 
6484   unsigned DeviceID;
6485   unsigned FileID;
6486   unsigned Line;
6487   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6488                            Line);
6489   SmallString<64> EntryFnName;
6490   {
6491     llvm::raw_svector_ostream OS(EntryFnName);
6492     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6493        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6494   }
6495 
6496   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6497 
6498   CodeGenFunction CGF(CGM, true);
6499   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6500   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6501 
6502   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
6503 
6504   // If this target outline function is not an offload entry, we don't need to
6505   // register it.
6506   if (!IsOffloadEntry)
6507     return;
6508 
6509   // The target region ID is used by the runtime library to identify the current
6510   // target region, so it only has to be unique and not necessarily point to
6511   // anything. It could be the pointer to the outlined function that implements
6512   // the target region, but we aren't using that so that the compiler doesn't
6513   // need to keep that, and could therefore inline the host function if proven
6514   // worthwhile during optimization. In the other hand, if emitting code for the
6515   // device, the ID has to be the function address so that it can retrieved from
6516   // the offloading entry and launched by the runtime library. We also mark the
6517   // outlined function to have external linkage in case we are emitting code for
6518   // the device, because these functions will be entry points to the device.
6519 
6520   if (CGM.getLangOpts().OpenMPIsDevice) {
6521     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6522     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6523     OutlinedFn->setDSOLocal(false);
6524   } else {
6525     std::string Name = getName({EntryFnName, "region_id"});
6526     OutlinedFnID = new llvm::GlobalVariable(
6527         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6528         llvm::GlobalValue::WeakAnyLinkage,
6529         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6530   }
6531 
6532   // Register the information for the entry associated with this target region.
6533   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6534       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6535       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6536 }
6537 
6538 /// Checks if the expression is constant or does not have non-trivial function
6539 /// calls.
6540 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6541   // We can skip constant expressions.
6542   // We can skip expressions with trivial calls or simple expressions.
6543   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6544           !E->hasNonTrivialCall(Ctx)) &&
6545          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6546 }
6547 
6548 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6549                                                     const Stmt *Body) {
6550   const Stmt *Child = Body->IgnoreContainers();
6551   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6552     Child = nullptr;
6553     for (const Stmt *S : C->body()) {
6554       if (const auto *E = dyn_cast<Expr>(S)) {
6555         if (isTrivial(Ctx, E))
6556           continue;
6557       }
6558       // Some of the statements can be ignored.
6559       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6560           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6561         continue;
6562       // Analyze declarations.
6563       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6564         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6565               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6566                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6567                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6568                   isa<UsingDirectiveDecl>(D) ||
6569                   isa<OMPDeclareReductionDecl>(D) ||
6570                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6571                 return true;
6572               const auto *VD = dyn_cast<VarDecl>(D);
6573               if (!VD)
6574                 return false;
6575               return VD->isConstexpr() ||
6576                      ((VD->getType().isTrivialType(Ctx) ||
6577                        VD->getType()->isReferenceType()) &&
6578                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6579             }))
6580           continue;
6581       }
6582       // Found multiple children - cannot get the one child only.
6583       if (Child)
6584         return nullptr;
6585       Child = S;
6586     }
6587     if (Child)
6588       Child = Child->IgnoreContainers();
6589   }
6590   return Child;
6591 }
6592 
6593 /// Emit the number of teams for a target directive.  Inspect the num_teams
6594 /// clause associated with a teams construct combined or closely nested
6595 /// with the target directive.
6596 ///
6597 /// Emit a team of size one for directives such as 'target parallel' that
6598 /// have no associated teams construct.
6599 ///
6600 /// Otherwise, return nullptr.
6601 static llvm::Value *
6602 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6603                                const OMPExecutableDirective &D) {
6604   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6605          "Clauses associated with the teams directive expected to be emitted "
6606          "only for the host!");
6607   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6608   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6609          "Expected target-based executable directive.");
6610   CGBuilderTy &Bld = CGF.Builder;
6611   switch (DirectiveKind) {
6612   case OMPD_target: {
6613     const auto *CS = D.getInnermostCapturedStmt();
6614     const auto *Body =
6615         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6616     const Stmt *ChildStmt =
6617         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6618     if (const auto *NestedDir =
6619             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6620       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6621         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6622           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6623           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6624           const Expr *NumTeams =
6625               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6626           llvm::Value *NumTeamsVal =
6627               CGF.EmitScalarExpr(NumTeams,
6628                                  /*IgnoreResultAssign*/ true);
6629           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6630                                    /*isSigned=*/true);
6631         }
6632         return Bld.getInt32(0);
6633       }
6634       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6635           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6636         return Bld.getInt32(1);
6637       return Bld.getInt32(0);
6638     }
6639     return nullptr;
6640   }
6641   case OMPD_target_teams:
6642   case OMPD_target_teams_distribute:
6643   case OMPD_target_teams_distribute_simd:
6644   case OMPD_target_teams_distribute_parallel_for:
6645   case OMPD_target_teams_distribute_parallel_for_simd: {
6646     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6647       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6648       const Expr *NumTeams =
6649           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6650       llvm::Value *NumTeamsVal =
6651           CGF.EmitScalarExpr(NumTeams,
6652                              /*IgnoreResultAssign*/ true);
6653       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6654                                /*isSigned=*/true);
6655     }
6656     return Bld.getInt32(0);
6657   }
6658   case OMPD_target_parallel:
6659   case OMPD_target_parallel_for:
6660   case OMPD_target_parallel_for_simd:
6661   case OMPD_target_simd:
6662     return Bld.getInt32(1);
6663   case OMPD_parallel:
6664   case OMPD_for:
6665   case OMPD_parallel_for:
6666   case OMPD_parallel_sections:
6667   case OMPD_for_simd:
6668   case OMPD_parallel_for_simd:
6669   case OMPD_cancel:
6670   case OMPD_cancellation_point:
6671   case OMPD_ordered:
6672   case OMPD_threadprivate:
6673   case OMPD_allocate:
6674   case OMPD_task:
6675   case OMPD_simd:
6676   case OMPD_sections:
6677   case OMPD_section:
6678   case OMPD_single:
6679   case OMPD_master:
6680   case OMPD_critical:
6681   case OMPD_taskyield:
6682   case OMPD_barrier:
6683   case OMPD_taskwait:
6684   case OMPD_taskgroup:
6685   case OMPD_atomic:
6686   case OMPD_flush:
6687   case OMPD_teams:
6688   case OMPD_target_data:
6689   case OMPD_target_exit_data:
6690   case OMPD_target_enter_data:
6691   case OMPD_distribute:
6692   case OMPD_distribute_simd:
6693   case OMPD_distribute_parallel_for:
6694   case OMPD_distribute_parallel_for_simd:
6695   case OMPD_teams_distribute:
6696   case OMPD_teams_distribute_simd:
6697   case OMPD_teams_distribute_parallel_for:
6698   case OMPD_teams_distribute_parallel_for_simd:
6699   case OMPD_target_update:
6700   case OMPD_declare_simd:
6701   case OMPD_declare_variant:
6702   case OMPD_declare_target:
6703   case OMPD_end_declare_target:
6704   case OMPD_declare_reduction:
6705   case OMPD_declare_mapper:
6706   case OMPD_taskloop:
6707   case OMPD_taskloop_simd:
6708   case OMPD_master_taskloop:
6709   case OMPD_master_taskloop_simd:
6710   case OMPD_parallel_master_taskloop:
6711   case OMPD_requires:
6712   case OMPD_unknown:
6713     break;
6714   }
6715   llvm_unreachable("Unexpected directive kind.");
6716 }
6717 
6718 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6719                                   llvm::Value *DefaultThreadLimitVal) {
6720   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6721       CGF.getContext(), CS->getCapturedStmt());
6722   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6723     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6724       llvm::Value *NumThreads = nullptr;
6725       llvm::Value *CondVal = nullptr;
6726       // Handle if clause. If if clause present, the number of threads is
6727       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6728       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6729         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6730         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6731         const OMPIfClause *IfClause = nullptr;
6732         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6733           if (C->getNameModifier() == OMPD_unknown ||
6734               C->getNameModifier() == OMPD_parallel) {
6735             IfClause = C;
6736             break;
6737           }
6738         }
6739         if (IfClause) {
6740           const Expr *Cond = IfClause->getCondition();
6741           bool Result;
6742           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6743             if (!Result)
6744               return CGF.Builder.getInt32(1);
6745           } else {
6746             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6747             if (const auto *PreInit =
6748                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6749               for (const auto *I : PreInit->decls()) {
6750                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6751                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6752                 } else {
6753                   CodeGenFunction::AutoVarEmission Emission =
6754                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6755                   CGF.EmitAutoVarCleanups(Emission);
6756                 }
6757               }
6758             }
6759             CondVal = CGF.EvaluateExprAsBool(Cond);
6760           }
6761         }
6762       }
6763       // Check the value of num_threads clause iff if clause was not specified
6764       // or is not evaluated to false.
6765       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6766         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6767         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6768         const auto *NumThreadsClause =
6769             Dir->getSingleClause<OMPNumThreadsClause>();
6770         CodeGenFunction::LexicalScope Scope(
6771             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6772         if (const auto *PreInit =
6773                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6774           for (const auto *I : PreInit->decls()) {
6775             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6776               CGF.EmitVarDecl(cast<VarDecl>(*I));
6777             } else {
6778               CodeGenFunction::AutoVarEmission Emission =
6779                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6780               CGF.EmitAutoVarCleanups(Emission);
6781             }
6782           }
6783         }
6784         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6785         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6786                                                /*isSigned=*/false);
6787         if (DefaultThreadLimitVal)
6788           NumThreads = CGF.Builder.CreateSelect(
6789               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6790               DefaultThreadLimitVal, NumThreads);
6791       } else {
6792         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6793                                            : CGF.Builder.getInt32(0);
6794       }
6795       // Process condition of the if clause.
6796       if (CondVal) {
6797         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6798                                               CGF.Builder.getInt32(1));
6799       }
6800       return NumThreads;
6801     }
6802     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6803       return CGF.Builder.getInt32(1);
6804     return DefaultThreadLimitVal;
6805   }
6806   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6807                                : CGF.Builder.getInt32(0);
6808 }
6809 
6810 /// Emit the number of threads for a target directive.  Inspect the
6811 /// thread_limit clause associated with a teams construct combined or closely
6812 /// nested with the target directive.
6813 ///
6814 /// Emit the num_threads clause for directives such as 'target parallel' that
6815 /// have no associated teams construct.
6816 ///
6817 /// Otherwise, return nullptr.
6818 static llvm::Value *
6819 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6820                                  const OMPExecutableDirective &D) {
6821   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6822          "Clauses associated with the teams directive expected to be emitted "
6823          "only for the host!");
6824   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6825   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6826          "Expected target-based executable directive.");
6827   CGBuilderTy &Bld = CGF.Builder;
6828   llvm::Value *ThreadLimitVal = nullptr;
6829   llvm::Value *NumThreadsVal = nullptr;
6830   switch (DirectiveKind) {
6831   case OMPD_target: {
6832     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6833     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6834       return NumThreads;
6835     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6836         CGF.getContext(), CS->getCapturedStmt());
6837     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6838       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6839         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6840         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6841         const auto *ThreadLimitClause =
6842             Dir->getSingleClause<OMPThreadLimitClause>();
6843         CodeGenFunction::LexicalScope Scope(
6844             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6845         if (const auto *PreInit =
6846                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6847           for (const auto *I : PreInit->decls()) {
6848             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6849               CGF.EmitVarDecl(cast<VarDecl>(*I));
6850             } else {
6851               CodeGenFunction::AutoVarEmission Emission =
6852                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6853               CGF.EmitAutoVarCleanups(Emission);
6854             }
6855           }
6856         }
6857         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6858             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6859         ThreadLimitVal =
6860             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6861       }
6862       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6863           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6864         CS = Dir->getInnermostCapturedStmt();
6865         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6866             CGF.getContext(), CS->getCapturedStmt());
6867         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6868       }
6869       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6870           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6871         CS = Dir->getInnermostCapturedStmt();
6872         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6873           return NumThreads;
6874       }
6875       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6876         return Bld.getInt32(1);
6877     }
6878     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6879   }
6880   case OMPD_target_teams: {
6881     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6882       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6883       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6884       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6885           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6886       ThreadLimitVal =
6887           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6888     }
6889     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6890     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6891       return NumThreads;
6892     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6893         CGF.getContext(), CS->getCapturedStmt());
6894     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6895       if (Dir->getDirectiveKind() == OMPD_distribute) {
6896         CS = Dir->getInnermostCapturedStmt();
6897         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6898           return NumThreads;
6899       }
6900     }
6901     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6902   }
6903   case OMPD_target_teams_distribute:
6904     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6905       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6906       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6907       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6908           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6909       ThreadLimitVal =
6910           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6911     }
6912     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6913   case OMPD_target_parallel:
6914   case OMPD_target_parallel_for:
6915   case OMPD_target_parallel_for_simd:
6916   case OMPD_target_teams_distribute_parallel_for:
6917   case OMPD_target_teams_distribute_parallel_for_simd: {
6918     llvm::Value *CondVal = nullptr;
6919     // Handle if clause. If if clause present, the number of threads is
6920     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6921     if (D.hasClausesOfKind<OMPIfClause>()) {
6922       const OMPIfClause *IfClause = nullptr;
6923       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6924         if (C->getNameModifier() == OMPD_unknown ||
6925             C->getNameModifier() == OMPD_parallel) {
6926           IfClause = C;
6927           break;
6928         }
6929       }
6930       if (IfClause) {
6931         const Expr *Cond = IfClause->getCondition();
6932         bool Result;
6933         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6934           if (!Result)
6935             return Bld.getInt32(1);
6936         } else {
6937           CodeGenFunction::RunCleanupsScope Scope(CGF);
6938           CondVal = CGF.EvaluateExprAsBool(Cond);
6939         }
6940       }
6941     }
6942     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6943       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6944       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6945       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6946           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6947       ThreadLimitVal =
6948           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6949     }
6950     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6951       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6952       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6953       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6954           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6955       NumThreadsVal =
6956           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6957       ThreadLimitVal = ThreadLimitVal
6958                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6959                                                                 ThreadLimitVal),
6960                                               NumThreadsVal, ThreadLimitVal)
6961                            : NumThreadsVal;
6962     }
6963     if (!ThreadLimitVal)
6964       ThreadLimitVal = Bld.getInt32(0);
6965     if (CondVal)
6966       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6967     return ThreadLimitVal;
6968   }
6969   case OMPD_target_teams_distribute_simd:
6970   case OMPD_target_simd:
6971     return Bld.getInt32(1);
6972   case OMPD_parallel:
6973   case OMPD_for:
6974   case OMPD_parallel_for:
6975   case OMPD_parallel_sections:
6976   case OMPD_for_simd:
6977   case OMPD_parallel_for_simd:
6978   case OMPD_cancel:
6979   case OMPD_cancellation_point:
6980   case OMPD_ordered:
6981   case OMPD_threadprivate:
6982   case OMPD_allocate:
6983   case OMPD_task:
6984   case OMPD_simd:
6985   case OMPD_sections:
6986   case OMPD_section:
6987   case OMPD_single:
6988   case OMPD_master:
6989   case OMPD_critical:
6990   case OMPD_taskyield:
6991   case OMPD_barrier:
6992   case OMPD_taskwait:
6993   case OMPD_taskgroup:
6994   case OMPD_atomic:
6995   case OMPD_flush:
6996   case OMPD_teams:
6997   case OMPD_target_data:
6998   case OMPD_target_exit_data:
6999   case OMPD_target_enter_data:
7000   case OMPD_distribute:
7001   case OMPD_distribute_simd:
7002   case OMPD_distribute_parallel_for:
7003   case OMPD_distribute_parallel_for_simd:
7004   case OMPD_teams_distribute:
7005   case OMPD_teams_distribute_simd:
7006   case OMPD_teams_distribute_parallel_for:
7007   case OMPD_teams_distribute_parallel_for_simd:
7008   case OMPD_target_update:
7009   case OMPD_declare_simd:
7010   case OMPD_declare_variant:
7011   case OMPD_declare_target:
7012   case OMPD_end_declare_target:
7013   case OMPD_declare_reduction:
7014   case OMPD_declare_mapper:
7015   case OMPD_taskloop:
7016   case OMPD_taskloop_simd:
7017   case OMPD_master_taskloop:
7018   case OMPD_master_taskloop_simd:
7019   case OMPD_parallel_master_taskloop:
7020   case OMPD_requires:
7021   case OMPD_unknown:
7022     break;
7023   }
7024   llvm_unreachable("Unsupported directive kind.");
7025 }
7026 
7027 namespace {
7028 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7029 
7030 // Utility to handle information from clauses associated with a given
7031 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7032 // It provides a convenient interface to obtain the information and generate
7033 // code for that information.
7034 class MappableExprsHandler {
7035 public:
7036   /// Values for bit flags used to specify the mapping type for
7037   /// offloading.
7038   enum OpenMPOffloadMappingFlags : uint64_t {
7039     /// No flags
7040     OMP_MAP_NONE = 0x0,
7041     /// Allocate memory on the device and move data from host to device.
7042     OMP_MAP_TO = 0x01,
7043     /// Allocate memory on the device and move data from device to host.
7044     OMP_MAP_FROM = 0x02,
7045     /// Always perform the requested mapping action on the element, even
7046     /// if it was already mapped before.
7047     OMP_MAP_ALWAYS = 0x04,
7048     /// Delete the element from the device environment, ignoring the
7049     /// current reference count associated with the element.
7050     OMP_MAP_DELETE = 0x08,
7051     /// The element being mapped is a pointer-pointee pair; both the
7052     /// pointer and the pointee should be mapped.
7053     OMP_MAP_PTR_AND_OBJ = 0x10,
7054     /// This flags signals that the base address of an entry should be
7055     /// passed to the target kernel as an argument.
7056     OMP_MAP_TARGET_PARAM = 0x20,
7057     /// Signal that the runtime library has to return the device pointer
7058     /// in the current position for the data being mapped. Used when we have the
7059     /// use_device_ptr clause.
7060     OMP_MAP_RETURN_PARAM = 0x40,
7061     /// This flag signals that the reference being passed is a pointer to
7062     /// private data.
7063     OMP_MAP_PRIVATE = 0x80,
7064     /// Pass the element to the device by value.
7065     OMP_MAP_LITERAL = 0x100,
7066     /// Implicit map
7067     OMP_MAP_IMPLICIT = 0x200,
7068     /// Close is a hint to the runtime to allocate memory close to
7069     /// the target device.
7070     OMP_MAP_CLOSE = 0x400,
7071     /// The 16 MSBs of the flags indicate whether the entry is member of some
7072     /// struct/class.
7073     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7074     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7075   };
7076 
7077   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7078   static unsigned getFlagMemberOffset() {
7079     unsigned Offset = 0;
7080     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7081          Remain = Remain >> 1)
7082       Offset++;
7083     return Offset;
7084   }
7085 
7086   /// Class that associates information with a base pointer to be passed to the
7087   /// runtime library.
7088   class BasePointerInfo {
7089     /// The base pointer.
7090     llvm::Value *Ptr = nullptr;
7091     /// The base declaration that refers to this device pointer, or null if
7092     /// there is none.
7093     const ValueDecl *DevPtrDecl = nullptr;
7094 
7095   public:
7096     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7097         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7098     llvm::Value *operator*() const { return Ptr; }
7099     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7100     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7101   };
7102 
7103   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7104   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7105   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7106 
7107   /// Map between a struct and the its lowest & highest elements which have been
7108   /// mapped.
7109   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7110   ///                    HE(FieldIndex, Pointer)}
7111   struct StructRangeInfoTy {
7112     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7113         0, Address::invalid()};
7114     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7115         0, Address::invalid()};
7116     Address Base = Address::invalid();
7117   };
7118 
7119 private:
7120   /// Kind that defines how a device pointer has to be returned.
7121   struct MapInfo {
7122     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7123     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7124     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7125     bool ReturnDevicePointer = false;
7126     bool IsImplicit = false;
7127 
7128     MapInfo() = default;
7129     MapInfo(
7130         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7131         OpenMPMapClauseKind MapType,
7132         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7133         bool ReturnDevicePointer, bool IsImplicit)
7134         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7135           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
7136   };
7137 
7138   /// If use_device_ptr is used on a pointer which is a struct member and there
7139   /// is no map information about it, then emission of that entry is deferred
7140   /// until the whole struct has been processed.
7141   struct DeferredDevicePtrEntryTy {
7142     const Expr *IE = nullptr;
7143     const ValueDecl *VD = nullptr;
7144 
7145     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
7146         : IE(IE), VD(VD) {}
7147   };
7148 
7149   /// The target directive from where the mappable clauses were extracted. It
7150   /// is either a executable directive or a user-defined mapper directive.
7151   llvm::PointerUnion<const OMPExecutableDirective *,
7152                      const OMPDeclareMapperDecl *>
7153       CurDir;
7154 
7155   /// Function the directive is being generated for.
7156   CodeGenFunction &CGF;
7157 
7158   /// Set of all first private variables in the current directive.
7159   /// bool data is set to true if the variable is implicitly marked as
7160   /// firstprivate, false otherwise.
7161   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7162 
7163   /// Map between device pointer declarations and their expression components.
7164   /// The key value for declarations in 'this' is null.
7165   llvm::DenseMap<
7166       const ValueDecl *,
7167       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7168       DevPointersMap;
7169 
7170   llvm::Value *getExprTypeSize(const Expr *E) const {
7171     QualType ExprTy = E->getType().getCanonicalType();
7172 
7173     // Reference types are ignored for mapping purposes.
7174     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7175       ExprTy = RefTy->getPointeeType().getCanonicalType();
7176 
7177     // Given that an array section is considered a built-in type, we need to
7178     // do the calculation based on the length of the section instead of relying
7179     // on CGF.getTypeSize(E->getType()).
7180     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7181       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7182                             OAE->getBase()->IgnoreParenImpCasts())
7183                             .getCanonicalType();
7184 
7185       // If there is no length associated with the expression and lower bound is
7186       // not specified too, that means we are using the whole length of the
7187       // base.
7188       if (!OAE->getLength() && OAE->getColonLoc().isValid() &&
7189           !OAE->getLowerBound())
7190         return CGF.getTypeSize(BaseTy);
7191 
7192       llvm::Value *ElemSize;
7193       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7194         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7195       } else {
7196         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7197         assert(ATy && "Expecting array type if not a pointer type.");
7198         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7199       }
7200 
7201       // If we don't have a length at this point, that is because we have an
7202       // array section with a single element.
7203       if (!OAE->getLength() && OAE->getColonLoc().isInvalid())
7204         return ElemSize;
7205 
7206       if (const Expr *LenExpr = OAE->getLength()) {
7207         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7208         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7209                                              CGF.getContext().getSizeType(),
7210                                              LenExpr->getExprLoc());
7211         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7212       }
7213       assert(!OAE->getLength() && OAE->getColonLoc().isValid() &&
7214              OAE->getLowerBound() && "expected array_section[lb:].");
7215       // Size = sizetype - lb * elemtype;
7216       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7217       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7218       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7219                                        CGF.getContext().getSizeType(),
7220                                        OAE->getLowerBound()->getExprLoc());
7221       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7222       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7223       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7224       LengthVal = CGF.Builder.CreateSelect(
7225           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7226       return LengthVal;
7227     }
7228     return CGF.getTypeSize(ExprTy);
7229   }
7230 
7231   /// Return the corresponding bits for a given map clause modifier. Add
7232   /// a flag marking the map as a pointer if requested. Add a flag marking the
7233   /// map as the first one of a series of maps that relate to the same map
7234   /// expression.
7235   OpenMPOffloadMappingFlags getMapTypeBits(
7236       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7237       bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7238     OpenMPOffloadMappingFlags Bits =
7239         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7240     switch (MapType) {
7241     case OMPC_MAP_alloc:
7242     case OMPC_MAP_release:
7243       // alloc and release is the default behavior in the runtime library,  i.e.
7244       // if we don't pass any bits alloc/release that is what the runtime is
7245       // going to do. Therefore, we don't need to signal anything for these two
7246       // type modifiers.
7247       break;
7248     case OMPC_MAP_to:
7249       Bits |= OMP_MAP_TO;
7250       break;
7251     case OMPC_MAP_from:
7252       Bits |= OMP_MAP_FROM;
7253       break;
7254     case OMPC_MAP_tofrom:
7255       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7256       break;
7257     case OMPC_MAP_delete:
7258       Bits |= OMP_MAP_DELETE;
7259       break;
7260     case OMPC_MAP_unknown:
7261       llvm_unreachable("Unexpected map type!");
7262     }
7263     if (AddPtrFlag)
7264       Bits |= OMP_MAP_PTR_AND_OBJ;
7265     if (AddIsTargetParamFlag)
7266       Bits |= OMP_MAP_TARGET_PARAM;
7267     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7268         != MapModifiers.end())
7269       Bits |= OMP_MAP_ALWAYS;
7270     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7271         != MapModifiers.end())
7272       Bits |= OMP_MAP_CLOSE;
7273     return Bits;
7274   }
7275 
7276   /// Return true if the provided expression is a final array section. A
7277   /// final array section, is one whose length can't be proved to be one.
7278   bool isFinalArraySectionExpression(const Expr *E) const {
7279     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7280 
7281     // It is not an array section and therefore not a unity-size one.
7282     if (!OASE)
7283       return false;
7284 
7285     // An array section with no colon always refer to a single element.
7286     if (OASE->getColonLoc().isInvalid())
7287       return false;
7288 
7289     const Expr *Length = OASE->getLength();
7290 
7291     // If we don't have a length we have to check if the array has size 1
7292     // for this dimension. Also, we should always expect a length if the
7293     // base type is pointer.
7294     if (!Length) {
7295       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7296                              OASE->getBase()->IgnoreParenImpCasts())
7297                              .getCanonicalType();
7298       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7299         return ATy->getSize().getSExtValue() != 1;
7300       // If we don't have a constant dimension length, we have to consider
7301       // the current section as having any size, so it is not necessarily
7302       // unitary. If it happen to be unity size, that's user fault.
7303       return true;
7304     }
7305 
7306     // Check if the length evaluates to 1.
7307     Expr::EvalResult Result;
7308     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7309       return true; // Can have more that size 1.
7310 
7311     llvm::APSInt ConstLength = Result.Val.getInt();
7312     return ConstLength.getSExtValue() != 1;
7313   }
7314 
7315   /// Generate the base pointers, section pointers, sizes and map type
7316   /// bits for the provided map type, map modifier, and expression components.
7317   /// \a IsFirstComponent should be set to true if the provided set of
7318   /// components is the first associated with a capture.
7319   void generateInfoForComponentList(
7320       OpenMPMapClauseKind MapType,
7321       ArrayRef<OpenMPMapModifierKind> MapModifiers,
7322       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7323       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7324       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7325       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7326       bool IsImplicit,
7327       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7328           OverlappedElements = llvm::None) const {
7329     // The following summarizes what has to be generated for each map and the
7330     // types below. The generated information is expressed in this order:
7331     // base pointer, section pointer, size, flags
7332     // (to add to the ones that come from the map type and modifier).
7333     //
7334     // double d;
7335     // int i[100];
7336     // float *p;
7337     //
7338     // struct S1 {
7339     //   int i;
7340     //   float f[50];
7341     // }
7342     // struct S2 {
7343     //   int i;
7344     //   float f[50];
7345     //   S1 s;
7346     //   double *p;
7347     //   struct S2 *ps;
7348     // }
7349     // S2 s;
7350     // S2 *ps;
7351     //
7352     // map(d)
7353     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7354     //
7355     // map(i)
7356     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7357     //
7358     // map(i[1:23])
7359     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7360     //
7361     // map(p)
7362     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7363     //
7364     // map(p[1:24])
7365     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7366     //
7367     // map(s)
7368     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7369     //
7370     // map(s.i)
7371     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7372     //
7373     // map(s.s.f)
7374     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7375     //
7376     // map(s.p)
7377     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7378     //
7379     // map(to: s.p[:22])
7380     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7381     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7382     // &(s.p), &(s.p[0]), 22*sizeof(double),
7383     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7384     // (*) alloc space for struct members, only this is a target parameter
7385     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7386     //      optimizes this entry out, same in the examples below)
7387     // (***) map the pointee (map: to)
7388     //
7389     // map(s.ps)
7390     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7391     //
7392     // map(from: s.ps->s.i)
7393     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7394     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7395     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7396     //
7397     // map(to: s.ps->ps)
7398     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7399     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7400     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7401     //
7402     // map(s.ps->ps->ps)
7403     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7404     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7405     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7406     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7407     //
7408     // map(to: s.ps->ps->s.f[:22])
7409     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7410     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7411     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7412     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7413     //
7414     // map(ps)
7415     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7416     //
7417     // map(ps->i)
7418     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7419     //
7420     // map(ps->s.f)
7421     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7422     //
7423     // map(from: ps->p)
7424     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7425     //
7426     // map(to: ps->p[:22])
7427     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7428     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7429     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7430     //
7431     // map(ps->ps)
7432     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7433     //
7434     // map(from: ps->ps->s.i)
7435     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7436     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7437     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7438     //
7439     // map(from: ps->ps->ps)
7440     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7441     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7442     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7443     //
7444     // map(ps->ps->ps->ps)
7445     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7446     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7447     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7448     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7449     //
7450     // map(to: ps->ps->ps->s.f[:22])
7451     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7452     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7453     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7454     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7455     //
7456     // map(to: s.f[:22]) map(from: s.p[:33])
7457     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7458     //     sizeof(double*) (**), TARGET_PARAM
7459     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7460     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7461     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7462     // (*) allocate contiguous space needed to fit all mapped members even if
7463     //     we allocate space for members not mapped (in this example,
7464     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7465     //     them as well because they fall between &s.f[0] and &s.p)
7466     //
7467     // map(from: s.f[:22]) map(to: ps->p[:33])
7468     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7469     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7470     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7471     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7472     // (*) the struct this entry pertains to is the 2nd element in the list of
7473     //     arguments, hence MEMBER_OF(2)
7474     //
7475     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7476     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7477     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7478     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7479     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7480     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7481     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7482     // (*) the struct this entry pertains to is the 4th element in the list
7483     //     of arguments, hence MEMBER_OF(4)
7484 
7485     // Track if the map information being generated is the first for a capture.
7486     bool IsCaptureFirstInfo = IsFirstComponentList;
7487     // When the variable is on a declare target link or in a to clause with
7488     // unified memory, a reference is needed to hold the host/device address
7489     // of the variable.
7490     bool RequiresReference = false;
7491 
7492     // Scan the components from the base to the complete expression.
7493     auto CI = Components.rbegin();
7494     auto CE = Components.rend();
7495     auto I = CI;
7496 
7497     // Track if the map information being generated is the first for a list of
7498     // components.
7499     bool IsExpressionFirstInfo = true;
7500     Address BP = Address::invalid();
7501     const Expr *AssocExpr = I->getAssociatedExpression();
7502     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7503     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7504 
7505     if (isa<MemberExpr>(AssocExpr)) {
7506       // The base is the 'this' pointer. The content of the pointer is going
7507       // to be the base of the field being mapped.
7508       BP = CGF.LoadCXXThisAddress();
7509     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7510                (OASE &&
7511                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7512       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7513     } else {
7514       // The base is the reference to the variable.
7515       // BP = &Var.
7516       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7517       if (const auto *VD =
7518               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7519         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7520                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7521           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7522               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7523                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7524             RequiresReference = true;
7525             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7526           }
7527         }
7528       }
7529 
7530       // If the variable is a pointer and is being dereferenced (i.e. is not
7531       // the last component), the base has to be the pointer itself, not its
7532       // reference. References are ignored for mapping purposes.
7533       QualType Ty =
7534           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7535       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7536         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7537 
7538         // We do not need to generate individual map information for the
7539         // pointer, it can be associated with the combined storage.
7540         ++I;
7541       }
7542     }
7543 
7544     // Track whether a component of the list should be marked as MEMBER_OF some
7545     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7546     // in a component list should be marked as MEMBER_OF, all subsequent entries
7547     // do not belong to the base struct. E.g.
7548     // struct S2 s;
7549     // s.ps->ps->ps->f[:]
7550     //   (1) (2) (3) (4)
7551     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7552     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7553     // is the pointee of ps(2) which is not member of struct s, so it should not
7554     // be marked as such (it is still PTR_AND_OBJ).
7555     // The variable is initialized to false so that PTR_AND_OBJ entries which
7556     // are not struct members are not considered (e.g. array of pointers to
7557     // data).
7558     bool ShouldBeMemberOf = false;
7559 
7560     // Variable keeping track of whether or not we have encountered a component
7561     // in the component list which is a member expression. Useful when we have a
7562     // pointer or a final array section, in which case it is the previous
7563     // component in the list which tells us whether we have a member expression.
7564     // E.g. X.f[:]
7565     // While processing the final array section "[:]" it is "f" which tells us
7566     // whether we are dealing with a member of a declared struct.
7567     const MemberExpr *EncounteredME = nullptr;
7568 
7569     for (; I != CE; ++I) {
7570       // If the current component is member of a struct (parent struct) mark it.
7571       if (!EncounteredME) {
7572         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7573         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7574         // as MEMBER_OF the parent struct.
7575         if (EncounteredME)
7576           ShouldBeMemberOf = true;
7577       }
7578 
7579       auto Next = std::next(I);
7580 
7581       // We need to generate the addresses and sizes if this is the last
7582       // component, if the component is a pointer or if it is an array section
7583       // whose length can't be proved to be one. If this is a pointer, it
7584       // becomes the base address for the following components.
7585 
7586       // A final array section, is one whose length can't be proved to be one.
7587       bool IsFinalArraySection =
7588           isFinalArraySectionExpression(I->getAssociatedExpression());
7589 
7590       // Get information on whether the element is a pointer. Have to do a
7591       // special treatment for array sections given that they are built-in
7592       // types.
7593       const auto *OASE =
7594           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7595       bool IsPointer =
7596           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7597                        .getCanonicalType()
7598                        ->isAnyPointerType()) ||
7599           I->getAssociatedExpression()->getType()->isAnyPointerType();
7600 
7601       if (Next == CE || IsPointer || IsFinalArraySection) {
7602         // If this is not the last component, we expect the pointer to be
7603         // associated with an array expression or member expression.
7604         assert((Next == CE ||
7605                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7606                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7607                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
7608                "Unexpected expression");
7609 
7610         Address LB =
7611             CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress();
7612 
7613         // If this component is a pointer inside the base struct then we don't
7614         // need to create any entry for it - it will be combined with the object
7615         // it is pointing to into a single PTR_AND_OBJ entry.
7616         bool IsMemberPointer =
7617             IsPointer && EncounteredME &&
7618             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7619              EncounteredME);
7620         if (!OverlappedElements.empty()) {
7621           // Handle base element with the info for overlapped elements.
7622           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7623           assert(Next == CE &&
7624                  "Expected last element for the overlapped elements.");
7625           assert(!IsPointer &&
7626                  "Unexpected base element with the pointer type.");
7627           // Mark the whole struct as the struct that requires allocation on the
7628           // device.
7629           PartialStruct.LowestElem = {0, LB};
7630           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7631               I->getAssociatedExpression()->getType());
7632           Address HB = CGF.Builder.CreateConstGEP(
7633               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7634                                                               CGF.VoidPtrTy),
7635               TypeSize.getQuantity() - 1);
7636           PartialStruct.HighestElem = {
7637               std::numeric_limits<decltype(
7638                   PartialStruct.HighestElem.first)>::max(),
7639               HB};
7640           PartialStruct.Base = BP;
7641           // Emit data for non-overlapped data.
7642           OpenMPOffloadMappingFlags Flags =
7643               OMP_MAP_MEMBER_OF |
7644               getMapTypeBits(MapType, MapModifiers, IsImplicit,
7645                              /*AddPtrFlag=*/false,
7646                              /*AddIsTargetParamFlag=*/false);
7647           LB = BP;
7648           llvm::Value *Size = nullptr;
7649           // Do bitcopy of all non-overlapped structure elements.
7650           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7651                    Component : OverlappedElements) {
7652             Address ComponentLB = Address::invalid();
7653             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7654                  Component) {
7655               if (MC.getAssociatedDeclaration()) {
7656                 ComponentLB =
7657                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7658                         .getAddress();
7659                 Size = CGF.Builder.CreatePtrDiff(
7660                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7661                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7662                 break;
7663               }
7664             }
7665             BasePointers.push_back(BP.getPointer());
7666             Pointers.push_back(LB.getPointer());
7667             Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
7668                                                       /*isSigned=*/true));
7669             Types.push_back(Flags);
7670             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7671           }
7672           BasePointers.push_back(BP.getPointer());
7673           Pointers.push_back(LB.getPointer());
7674           Size = CGF.Builder.CreatePtrDiff(
7675               CGF.EmitCastToVoidPtr(
7676                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7677               CGF.EmitCastToVoidPtr(LB.getPointer()));
7678           Sizes.push_back(
7679               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7680           Types.push_back(Flags);
7681           break;
7682         }
7683         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7684         if (!IsMemberPointer) {
7685           BasePointers.push_back(BP.getPointer());
7686           Pointers.push_back(LB.getPointer());
7687           Sizes.push_back(
7688               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7689 
7690           // We need to add a pointer flag for each map that comes from the
7691           // same expression except for the first one. We also need to signal
7692           // this map is the first one that relates with the current capture
7693           // (there is a set of entries for each capture).
7694           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7695               MapType, MapModifiers, IsImplicit,
7696               !IsExpressionFirstInfo || RequiresReference,
7697               IsCaptureFirstInfo && !RequiresReference);
7698 
7699           if (!IsExpressionFirstInfo) {
7700             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7701             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7702             if (IsPointer)
7703               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7704                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7705 
7706             if (ShouldBeMemberOf) {
7707               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7708               // should be later updated with the correct value of MEMBER_OF.
7709               Flags |= OMP_MAP_MEMBER_OF;
7710               // From now on, all subsequent PTR_AND_OBJ entries should not be
7711               // marked as MEMBER_OF.
7712               ShouldBeMemberOf = false;
7713             }
7714           }
7715 
7716           Types.push_back(Flags);
7717         }
7718 
7719         // If we have encountered a member expression so far, keep track of the
7720         // mapped member. If the parent is "*this", then the value declaration
7721         // is nullptr.
7722         if (EncounteredME) {
7723           const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl());
7724           unsigned FieldIndex = FD->getFieldIndex();
7725 
7726           // Update info about the lowest and highest elements for this struct
7727           if (!PartialStruct.Base.isValid()) {
7728             PartialStruct.LowestElem = {FieldIndex, LB};
7729             PartialStruct.HighestElem = {FieldIndex, LB};
7730             PartialStruct.Base = BP;
7731           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7732             PartialStruct.LowestElem = {FieldIndex, LB};
7733           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7734             PartialStruct.HighestElem = {FieldIndex, LB};
7735           }
7736         }
7737 
7738         // If we have a final array section, we are done with this expression.
7739         if (IsFinalArraySection)
7740           break;
7741 
7742         // The pointer becomes the base for the next element.
7743         if (Next != CE)
7744           BP = LB;
7745 
7746         IsExpressionFirstInfo = false;
7747         IsCaptureFirstInfo = false;
7748       }
7749     }
7750   }
7751 
7752   /// Return the adjusted map modifiers if the declaration a capture refers to
7753   /// appears in a first-private clause. This is expected to be used only with
7754   /// directives that start with 'target'.
7755   MappableExprsHandler::OpenMPOffloadMappingFlags
7756   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7757     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7758 
7759     // A first private variable captured by reference will use only the
7760     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7761     // declaration is known as first-private in this handler.
7762     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7763       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7764           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7765         return MappableExprsHandler::OMP_MAP_ALWAYS |
7766                MappableExprsHandler::OMP_MAP_TO;
7767       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7768         return MappableExprsHandler::OMP_MAP_TO |
7769                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7770       return MappableExprsHandler::OMP_MAP_PRIVATE |
7771              MappableExprsHandler::OMP_MAP_TO;
7772     }
7773     return MappableExprsHandler::OMP_MAP_TO |
7774            MappableExprsHandler::OMP_MAP_FROM;
7775   }
7776 
7777   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7778     // Rotate by getFlagMemberOffset() bits.
7779     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7780                                                   << getFlagMemberOffset());
7781   }
7782 
7783   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7784                                      OpenMPOffloadMappingFlags MemberOfFlag) {
7785     // If the entry is PTR_AND_OBJ but has not been marked with the special
7786     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7787     // marked as MEMBER_OF.
7788     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7789         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7790       return;
7791 
7792     // Reset the placeholder value to prepare the flag for the assignment of the
7793     // proper MEMBER_OF value.
7794     Flags &= ~OMP_MAP_MEMBER_OF;
7795     Flags |= MemberOfFlag;
7796   }
7797 
7798   void getPlainLayout(const CXXRecordDecl *RD,
7799                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7800                       bool AsBase) const {
7801     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7802 
7803     llvm::StructType *St =
7804         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7805 
7806     unsigned NumElements = St->getNumElements();
7807     llvm::SmallVector<
7808         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7809         RecordLayout(NumElements);
7810 
7811     // Fill bases.
7812     for (const auto &I : RD->bases()) {
7813       if (I.isVirtual())
7814         continue;
7815       const auto *Base = I.getType()->getAsCXXRecordDecl();
7816       // Ignore empty bases.
7817       if (Base->isEmpty() || CGF.getContext()
7818                                  .getASTRecordLayout(Base)
7819                                  .getNonVirtualSize()
7820                                  .isZero())
7821         continue;
7822 
7823       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7824       RecordLayout[FieldIndex] = Base;
7825     }
7826     // Fill in virtual bases.
7827     for (const auto &I : RD->vbases()) {
7828       const auto *Base = I.getType()->getAsCXXRecordDecl();
7829       // Ignore empty bases.
7830       if (Base->isEmpty())
7831         continue;
7832       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7833       if (RecordLayout[FieldIndex])
7834         continue;
7835       RecordLayout[FieldIndex] = Base;
7836     }
7837     // Fill in all the fields.
7838     assert(!RD->isUnion() && "Unexpected union.");
7839     for (const auto *Field : RD->fields()) {
7840       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7841       // will fill in later.)
7842       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7843         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7844         RecordLayout[FieldIndex] = Field;
7845       }
7846     }
7847     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7848              &Data : RecordLayout) {
7849       if (Data.isNull())
7850         continue;
7851       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7852         getPlainLayout(Base, Layout, /*AsBase=*/true);
7853       else
7854         Layout.push_back(Data.get<const FieldDecl *>());
7855     }
7856   }
7857 
7858 public:
7859   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7860       : CurDir(&Dir), CGF(CGF) {
7861     // Extract firstprivate clause information.
7862     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7863       for (const auto *D : C->varlists())
7864         FirstPrivateDecls.try_emplace(
7865             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
7866     // Extract device pointer clause information.
7867     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7868       for (auto L : C->component_lists())
7869         DevPointersMap[L.first].push_back(L.second);
7870   }
7871 
7872   /// Constructor for the declare mapper directive.
7873   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
7874       : CurDir(&Dir), CGF(CGF) {}
7875 
7876   /// Generate code for the combined entry if we have a partially mapped struct
7877   /// and take care of the mapping flags of the arguments corresponding to
7878   /// individual struct members.
7879   void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7880                          MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7881                          MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7882                          const StructRangeInfoTy &PartialStruct) const {
7883     // Base is the base of the struct
7884     BasePointers.push_back(PartialStruct.Base.getPointer());
7885     // Pointer is the address of the lowest element
7886     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7887     Pointers.push_back(LB);
7888     // Size is (addr of {highest+1} element) - (addr of lowest element)
7889     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7890     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7891     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7892     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7893     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7894     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
7895                                                   /*isSigned=*/false);
7896     Sizes.push_back(Size);
7897     // Map type is always TARGET_PARAM
7898     Types.push_back(OMP_MAP_TARGET_PARAM);
7899     // Remove TARGET_PARAM flag from the first element
7900     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7901 
7902     // All other current entries will be MEMBER_OF the combined entry
7903     // (except for PTR_AND_OBJ entries which do not have a placeholder value
7904     // 0xFFFF in the MEMBER_OF field).
7905     OpenMPOffloadMappingFlags MemberOfFlag =
7906         getMemberOfFlag(BasePointers.size() - 1);
7907     for (auto &M : CurTypes)
7908       setCorrectMemberOfFlag(M, MemberOfFlag);
7909   }
7910 
7911   /// Generate all the base pointers, section pointers, sizes and map
7912   /// types for the extracted mappable expressions. Also, for each item that
7913   /// relates with a device pointer, a pair of the relevant declaration and
7914   /// index where it occurs is appended to the device pointers info array.
7915   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7916                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7917                        MapFlagsArrayTy &Types) const {
7918     // We have to process the component lists that relate with the same
7919     // declaration in a single chunk so that we can generate the map flags
7920     // correctly. Therefore, we organize all lists in a map.
7921     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7922 
7923     // Helper function to fill the information map for the different supported
7924     // clauses.
7925     auto &&InfoGen = [&Info](
7926         const ValueDecl *D,
7927         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7928         OpenMPMapClauseKind MapType,
7929         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7930         bool ReturnDevicePointer, bool IsImplicit) {
7931       const ValueDecl *VD =
7932           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7933       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7934                             IsImplicit);
7935     };
7936 
7937     assert(CurDir.is<const OMPExecutableDirective *>() &&
7938            "Expect a executable directive");
7939     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
7940     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
7941       for (const auto &L : C->component_lists()) {
7942         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
7943             /*ReturnDevicePointer=*/false, C->isImplicit());
7944       }
7945     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
7946       for (const auto &L : C->component_lists()) {
7947         InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
7948             /*ReturnDevicePointer=*/false, C->isImplicit());
7949       }
7950     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
7951       for (const auto &L : C->component_lists()) {
7952         InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
7953             /*ReturnDevicePointer=*/false, C->isImplicit());
7954       }
7955 
7956     // Look at the use_device_ptr clause information and mark the existing map
7957     // entries as such. If there is no map information for an entry in the
7958     // use_device_ptr list, we create one with map type 'alloc' and zero size
7959     // section. It is the user fault if that was not mapped before. If there is
7960     // no map information and the pointer is a struct member, then we defer the
7961     // emission of that entry until the whole struct has been processed.
7962     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
7963         DeferredInfo;
7964 
7965     for (const auto *C :
7966          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
7967       for (const auto &L : C->component_lists()) {
7968         assert(!L.second.empty() && "Not expecting empty list of components!");
7969         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
7970         VD = cast<ValueDecl>(VD->getCanonicalDecl());
7971         const Expr *IE = L.second.back().getAssociatedExpression();
7972         // If the first component is a member expression, we have to look into
7973         // 'this', which maps to null in the map of map information. Otherwise
7974         // look directly for the information.
7975         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7976 
7977         // We potentially have map information for this declaration already.
7978         // Look for the first set of components that refer to it.
7979         if (It != Info.end()) {
7980           auto CI = std::find_if(
7981               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
7982                 return MI.Components.back().getAssociatedDeclaration() == VD;
7983               });
7984           // If we found a map entry, signal that the pointer has to be returned
7985           // and move on to the next declaration.
7986           if (CI != It->second.end()) {
7987             CI->ReturnDevicePointer = true;
7988             continue;
7989           }
7990         }
7991 
7992         // We didn't find any match in our map information - generate a zero
7993         // size array section - if the pointer is a struct member we defer this
7994         // action until the whole struct has been processed.
7995         if (isa<MemberExpr>(IE)) {
7996           // Insert the pointer into Info to be processed by
7997           // generateInfoForComponentList. Because it is a member pointer
7998           // without a pointee, no entry will be generated for it, therefore
7999           // we need to generate one after the whole struct has been processed.
8000           // Nonetheless, generateInfoForComponentList must be called to take
8001           // the pointer into account for the calculation of the range of the
8002           // partial struct.
8003           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
8004                   /*ReturnDevicePointer=*/false, C->isImplicit());
8005           DeferredInfo[nullptr].emplace_back(IE, VD);
8006         } else {
8007           llvm::Value *Ptr =
8008               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8009           BasePointers.emplace_back(Ptr, VD);
8010           Pointers.push_back(Ptr);
8011           Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8012           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8013         }
8014       }
8015     }
8016 
8017     for (const auto &M : Info) {
8018       // We need to know when we generate information for the first component
8019       // associated with a capture, because the mapping flags depend on it.
8020       bool IsFirstComponentList = true;
8021 
8022       // Temporary versions of arrays
8023       MapBaseValuesArrayTy CurBasePointers;
8024       MapValuesArrayTy CurPointers;
8025       MapValuesArrayTy CurSizes;
8026       MapFlagsArrayTy CurTypes;
8027       StructRangeInfoTy PartialStruct;
8028 
8029       for (const MapInfo &L : M.second) {
8030         assert(!L.Components.empty() &&
8031                "Not expecting declaration with no component lists.");
8032 
8033         // Remember the current base pointer index.
8034         unsigned CurrentBasePointersIdx = CurBasePointers.size();
8035         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8036                                      CurBasePointers, CurPointers, CurSizes,
8037                                      CurTypes, PartialStruct,
8038                                      IsFirstComponentList, L.IsImplicit);
8039 
8040         // If this entry relates with a device pointer, set the relevant
8041         // declaration and add the 'return pointer' flag.
8042         if (L.ReturnDevicePointer) {
8043           assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8044                  "Unexpected number of mapped base pointers.");
8045 
8046           const ValueDecl *RelevantVD =
8047               L.Components.back().getAssociatedDeclaration();
8048           assert(RelevantVD &&
8049                  "No relevant declaration related with device pointer??");
8050 
8051           CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8052           CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8053         }
8054         IsFirstComponentList = false;
8055       }
8056 
8057       // Append any pending zero-length pointers which are struct members and
8058       // used with use_device_ptr.
8059       auto CI = DeferredInfo.find(M.first);
8060       if (CI != DeferredInfo.end()) {
8061         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8062           llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer();
8063           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
8064               this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
8065           CurBasePointers.emplace_back(BasePtr, L.VD);
8066           CurPointers.push_back(Ptr);
8067           CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8068           // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8069           // value MEMBER_OF=FFFF so that the entry is later updated with the
8070           // correct value of MEMBER_OF.
8071           CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8072                              OMP_MAP_MEMBER_OF);
8073         }
8074       }
8075 
8076       // If there is an entry in PartialStruct it means we have a struct with
8077       // individual members mapped. Emit an extra combined entry.
8078       if (PartialStruct.Base.isValid())
8079         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8080                           PartialStruct);
8081 
8082       // We need to append the results of this capture to what we already have.
8083       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8084       Pointers.append(CurPointers.begin(), CurPointers.end());
8085       Sizes.append(CurSizes.begin(), CurSizes.end());
8086       Types.append(CurTypes.begin(), CurTypes.end());
8087     }
8088   }
8089 
8090   /// Generate all the base pointers, section pointers, sizes and map types for
8091   /// the extracted map clauses of user-defined mapper.
8092   void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
8093                                 MapValuesArrayTy &Pointers,
8094                                 MapValuesArrayTy &Sizes,
8095                                 MapFlagsArrayTy &Types) const {
8096     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8097            "Expect a declare mapper directive");
8098     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8099     // We have to process the component lists that relate with the same
8100     // declaration in a single chunk so that we can generate the map flags
8101     // correctly. Therefore, we organize all lists in a map.
8102     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8103 
8104     // Helper function to fill the information map for the different supported
8105     // clauses.
8106     auto &&InfoGen = [&Info](
8107         const ValueDecl *D,
8108         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8109         OpenMPMapClauseKind MapType,
8110         ArrayRef<OpenMPMapModifierKind> MapModifiers,
8111         bool ReturnDevicePointer, bool IsImplicit) {
8112       const ValueDecl *VD =
8113           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8114       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8115                             IsImplicit);
8116     };
8117 
8118     for (const auto *C : CurMapperDir->clauselists()) {
8119       const auto *MC = cast<OMPMapClause>(C);
8120       for (const auto &L : MC->component_lists()) {
8121         InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
8122                 /*ReturnDevicePointer=*/false, MC->isImplicit());
8123       }
8124     }
8125 
8126     for (const auto &M : Info) {
8127       // We need to know when we generate information for the first component
8128       // associated with a capture, because the mapping flags depend on it.
8129       bool IsFirstComponentList = true;
8130 
8131       // Temporary versions of arrays
8132       MapBaseValuesArrayTy CurBasePointers;
8133       MapValuesArrayTy CurPointers;
8134       MapValuesArrayTy CurSizes;
8135       MapFlagsArrayTy CurTypes;
8136       StructRangeInfoTy PartialStruct;
8137 
8138       for (const MapInfo &L : M.second) {
8139         assert(!L.Components.empty() &&
8140                "Not expecting declaration with no component lists.");
8141         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8142                                      CurBasePointers, CurPointers, CurSizes,
8143                                      CurTypes, PartialStruct,
8144                                      IsFirstComponentList, L.IsImplicit);
8145         IsFirstComponentList = false;
8146       }
8147 
8148       // If there is an entry in PartialStruct it means we have a struct with
8149       // individual members mapped. Emit an extra combined entry.
8150       if (PartialStruct.Base.isValid())
8151         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8152                           PartialStruct);
8153 
8154       // We need to append the results of this capture to what we already have.
8155       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8156       Pointers.append(CurPointers.begin(), CurPointers.end());
8157       Sizes.append(CurSizes.begin(), CurSizes.end());
8158       Types.append(CurTypes.begin(), CurTypes.end());
8159     }
8160   }
8161 
8162   /// Emit capture info for lambdas for variables captured by reference.
8163   void generateInfoForLambdaCaptures(
8164       const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8165       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8166       MapFlagsArrayTy &Types,
8167       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8168     const auto *RD = VD->getType()
8169                          .getCanonicalType()
8170                          .getNonReferenceType()
8171                          ->getAsCXXRecordDecl();
8172     if (!RD || !RD->isLambda())
8173       return;
8174     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8175     LValue VDLVal = CGF.MakeAddrLValue(
8176         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8177     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8178     FieldDecl *ThisCapture = nullptr;
8179     RD->getCaptureFields(Captures, ThisCapture);
8180     if (ThisCapture) {
8181       LValue ThisLVal =
8182           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8183       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8184       LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer());
8185       BasePointers.push_back(ThisLVal.getPointer());
8186       Pointers.push_back(ThisLValVal.getPointer());
8187       Sizes.push_back(
8188           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8189                                     CGF.Int64Ty, /*isSigned=*/true));
8190       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8191                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8192     }
8193     for (const LambdaCapture &LC : RD->captures()) {
8194       if (!LC.capturesVariable())
8195         continue;
8196       const VarDecl *VD = LC.getCapturedVar();
8197       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8198         continue;
8199       auto It = Captures.find(VD);
8200       assert(It != Captures.end() && "Found lambda capture without field.");
8201       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8202       if (LC.getCaptureKind() == LCK_ByRef) {
8203         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8204         LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
8205         BasePointers.push_back(VarLVal.getPointer());
8206         Pointers.push_back(VarLValVal.getPointer());
8207         Sizes.push_back(CGF.Builder.CreateIntCast(
8208             CGF.getTypeSize(
8209                 VD->getType().getCanonicalType().getNonReferenceType()),
8210             CGF.Int64Ty, /*isSigned=*/true));
8211       } else {
8212         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8213         LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
8214         BasePointers.push_back(VarLVal.getPointer());
8215         Pointers.push_back(VarRVal.getScalarVal());
8216         Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8217       }
8218       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8219                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8220     }
8221   }
8222 
8223   /// Set correct indices for lambdas captures.
8224   void adjustMemberOfForLambdaCaptures(
8225       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8226       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8227       MapFlagsArrayTy &Types) const {
8228     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8229       // Set correct member_of idx for all implicit lambda captures.
8230       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8231                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8232         continue;
8233       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8234       assert(BasePtr && "Unable to find base lambda address.");
8235       int TgtIdx = -1;
8236       for (unsigned J = I; J > 0; --J) {
8237         unsigned Idx = J - 1;
8238         if (Pointers[Idx] != BasePtr)
8239           continue;
8240         TgtIdx = Idx;
8241         break;
8242       }
8243       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8244       // All other current entries will be MEMBER_OF the combined entry
8245       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8246       // 0xFFFF in the MEMBER_OF field).
8247       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8248       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8249     }
8250   }
8251 
8252   /// Generate the base pointers, section pointers, sizes and map types
8253   /// associated to a given capture.
8254   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8255                               llvm::Value *Arg,
8256                               MapBaseValuesArrayTy &BasePointers,
8257                               MapValuesArrayTy &Pointers,
8258                               MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8259                               StructRangeInfoTy &PartialStruct) const {
8260     assert(!Cap->capturesVariableArrayType() &&
8261            "Not expecting to generate map info for a variable array type!");
8262 
8263     // We need to know when we generating information for the first component
8264     const ValueDecl *VD = Cap->capturesThis()
8265                               ? nullptr
8266                               : Cap->getCapturedVar()->getCanonicalDecl();
8267 
8268     // If this declaration appears in a is_device_ptr clause we just have to
8269     // pass the pointer by value. If it is a reference to a declaration, we just
8270     // pass its value.
8271     if (DevPointersMap.count(VD)) {
8272       BasePointers.emplace_back(Arg, VD);
8273       Pointers.push_back(Arg);
8274       Sizes.push_back(
8275           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8276                                     CGF.Int64Ty, /*isSigned=*/true));
8277       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8278       return;
8279     }
8280 
8281     using MapData =
8282         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8283                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8284     SmallVector<MapData, 4> DeclComponentLists;
8285     assert(CurDir.is<const OMPExecutableDirective *>() &&
8286            "Expect a executable directive");
8287     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8288     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8289       for (const auto &L : C->decl_component_lists(VD)) {
8290         assert(L.first == VD &&
8291                "We got information for the wrong declaration??");
8292         assert(!L.second.empty() &&
8293                "Not expecting declaration with no component lists.");
8294         DeclComponentLists.emplace_back(L.second, C->getMapType(),
8295                                         C->getMapTypeModifiers(),
8296                                         C->isImplicit());
8297       }
8298     }
8299 
8300     // Find overlapping elements (including the offset from the base element).
8301     llvm::SmallDenseMap<
8302         const MapData *,
8303         llvm::SmallVector<
8304             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8305         4>
8306         OverlappedData;
8307     size_t Count = 0;
8308     for (const MapData &L : DeclComponentLists) {
8309       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8310       OpenMPMapClauseKind MapType;
8311       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8312       bool IsImplicit;
8313       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8314       ++Count;
8315       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8316         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8317         std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8318         auto CI = Components.rbegin();
8319         auto CE = Components.rend();
8320         auto SI = Components1.rbegin();
8321         auto SE = Components1.rend();
8322         for (; CI != CE && SI != SE; ++CI, ++SI) {
8323           if (CI->getAssociatedExpression()->getStmtClass() !=
8324               SI->getAssociatedExpression()->getStmtClass())
8325             break;
8326           // Are we dealing with different variables/fields?
8327           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8328             break;
8329         }
8330         // Found overlapping if, at least for one component, reached the head of
8331         // the components list.
8332         if (CI == CE || SI == SE) {
8333           assert((CI != CE || SI != SE) &&
8334                  "Unexpected full match of the mapping components.");
8335           const MapData &BaseData = CI == CE ? L : L1;
8336           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8337               SI == SE ? Components : Components1;
8338           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8339           OverlappedElements.getSecond().push_back(SubData);
8340         }
8341       }
8342     }
8343     // Sort the overlapped elements for each item.
8344     llvm::SmallVector<const FieldDecl *, 4> Layout;
8345     if (!OverlappedData.empty()) {
8346       if (const auto *CRD =
8347               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8348         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8349       else {
8350         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8351         Layout.append(RD->field_begin(), RD->field_end());
8352       }
8353     }
8354     for (auto &Pair : OverlappedData) {
8355       llvm::sort(
8356           Pair.getSecond(),
8357           [&Layout](
8358               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8359               OMPClauseMappableExprCommon::MappableExprComponentListRef
8360                   Second) {
8361             auto CI = First.rbegin();
8362             auto CE = First.rend();
8363             auto SI = Second.rbegin();
8364             auto SE = Second.rend();
8365             for (; CI != CE && SI != SE; ++CI, ++SI) {
8366               if (CI->getAssociatedExpression()->getStmtClass() !=
8367                   SI->getAssociatedExpression()->getStmtClass())
8368                 break;
8369               // Are we dealing with different variables/fields?
8370               if (CI->getAssociatedDeclaration() !=
8371                   SI->getAssociatedDeclaration())
8372                 break;
8373             }
8374 
8375             // Lists contain the same elements.
8376             if (CI == CE && SI == SE)
8377               return false;
8378 
8379             // List with less elements is less than list with more elements.
8380             if (CI == CE || SI == SE)
8381               return CI == CE;
8382 
8383             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8384             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8385             if (FD1->getParent() == FD2->getParent())
8386               return FD1->getFieldIndex() < FD2->getFieldIndex();
8387             const auto It =
8388                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8389                   return FD == FD1 || FD == FD2;
8390                 });
8391             return *It == FD1;
8392           });
8393     }
8394 
8395     // Associated with a capture, because the mapping flags depend on it.
8396     // Go through all of the elements with the overlapped elements.
8397     for (const auto &Pair : OverlappedData) {
8398       const MapData &L = *Pair.getFirst();
8399       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8400       OpenMPMapClauseKind MapType;
8401       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8402       bool IsImplicit;
8403       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8404       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8405           OverlappedComponents = Pair.getSecond();
8406       bool IsFirstComponentList = true;
8407       generateInfoForComponentList(MapType, MapModifiers, Components,
8408                                    BasePointers, Pointers, Sizes, Types,
8409                                    PartialStruct, IsFirstComponentList,
8410                                    IsImplicit, OverlappedComponents);
8411     }
8412     // Go through other elements without overlapped elements.
8413     bool IsFirstComponentList = OverlappedData.empty();
8414     for (const MapData &L : DeclComponentLists) {
8415       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8416       OpenMPMapClauseKind MapType;
8417       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8418       bool IsImplicit;
8419       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8420       auto It = OverlappedData.find(&L);
8421       if (It == OverlappedData.end())
8422         generateInfoForComponentList(MapType, MapModifiers, Components,
8423                                      BasePointers, Pointers, Sizes, Types,
8424                                      PartialStruct, IsFirstComponentList,
8425                                      IsImplicit);
8426       IsFirstComponentList = false;
8427     }
8428   }
8429 
8430   /// Generate the base pointers, section pointers, sizes and map types
8431   /// associated with the declare target link variables.
8432   void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8433                                         MapValuesArrayTy &Pointers,
8434                                         MapValuesArrayTy &Sizes,
8435                                         MapFlagsArrayTy &Types) const {
8436     assert(CurDir.is<const OMPExecutableDirective *>() &&
8437            "Expect a executable directive");
8438     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8439     // Map other list items in the map clause which are not captured variables
8440     // but "declare target link" global variables.
8441     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8442       for (const auto &L : C->component_lists()) {
8443         if (!L.first)
8444           continue;
8445         const auto *VD = dyn_cast<VarDecl>(L.first);
8446         if (!VD)
8447           continue;
8448         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8449             OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8450         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8451             !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8452           continue;
8453         StructRangeInfoTy PartialStruct;
8454         generateInfoForComponentList(
8455             C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8456             Pointers, Sizes, Types, PartialStruct,
8457             /*IsFirstComponentList=*/true, C->isImplicit());
8458         assert(!PartialStruct.Base.isValid() &&
8459                "No partial structs for declare target link expected.");
8460       }
8461     }
8462   }
8463 
8464   /// Generate the default map information for a given capture \a CI,
8465   /// record field declaration \a RI and captured value \a CV.
8466   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8467                               const FieldDecl &RI, llvm::Value *CV,
8468                               MapBaseValuesArrayTy &CurBasePointers,
8469                               MapValuesArrayTy &CurPointers,
8470                               MapValuesArrayTy &CurSizes,
8471                               MapFlagsArrayTy &CurMapTypes) const {
8472     bool IsImplicit = true;
8473     // Do the default mapping.
8474     if (CI.capturesThis()) {
8475       CurBasePointers.push_back(CV);
8476       CurPointers.push_back(CV);
8477       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8478       CurSizes.push_back(
8479           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8480                                     CGF.Int64Ty, /*isSigned=*/true));
8481       // Default map type.
8482       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8483     } else if (CI.capturesVariableByCopy()) {
8484       CurBasePointers.push_back(CV);
8485       CurPointers.push_back(CV);
8486       if (!RI.getType()->isAnyPointerType()) {
8487         // We have to signal to the runtime captures passed by value that are
8488         // not pointers.
8489         CurMapTypes.push_back(OMP_MAP_LITERAL);
8490         CurSizes.push_back(CGF.Builder.CreateIntCast(
8491             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8492       } else {
8493         // Pointers are implicitly mapped with a zero size and no flags
8494         // (other than first map that is added for all implicit maps).
8495         CurMapTypes.push_back(OMP_MAP_NONE);
8496         CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8497       }
8498       const VarDecl *VD = CI.getCapturedVar();
8499       auto I = FirstPrivateDecls.find(VD);
8500       if (I != FirstPrivateDecls.end())
8501         IsImplicit = I->getSecond();
8502     } else {
8503       assert(CI.capturesVariable() && "Expected captured reference.");
8504       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8505       QualType ElementType = PtrTy->getPointeeType();
8506       CurSizes.push_back(CGF.Builder.CreateIntCast(
8507           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8508       // The default map type for a scalar/complex type is 'to' because by
8509       // default the value doesn't have to be retrieved. For an aggregate
8510       // type, the default is 'tofrom'.
8511       CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8512       const VarDecl *VD = CI.getCapturedVar();
8513       auto I = FirstPrivateDecls.find(VD);
8514       if (I != FirstPrivateDecls.end() &&
8515           VD->getType().isConstant(CGF.getContext())) {
8516         llvm::Constant *Addr =
8517             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8518         // Copy the value of the original variable to the new global copy.
8519         CGF.Builder.CreateMemCpy(
8520             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(),
8521             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8522             CurSizes.back(), /*IsVolatile=*/false);
8523         // Use new global variable as the base pointers.
8524         CurBasePointers.push_back(Addr);
8525         CurPointers.push_back(Addr);
8526       } else {
8527         CurBasePointers.push_back(CV);
8528         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8529           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8530               CV, ElementType, CGF.getContext().getDeclAlign(VD),
8531               AlignmentSource::Decl));
8532           CurPointers.push_back(PtrAddr.getPointer());
8533         } else {
8534           CurPointers.push_back(CV);
8535         }
8536       }
8537       if (I != FirstPrivateDecls.end())
8538         IsImplicit = I->getSecond();
8539     }
8540     // Every default map produces a single argument which is a target parameter.
8541     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8542 
8543     // Add flag stating this is an implicit map.
8544     if (IsImplicit)
8545       CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8546   }
8547 };
8548 } // anonymous namespace
8549 
8550 /// Emit the arrays used to pass the captures and map information to the
8551 /// offloading runtime library. If there is no map or capture information,
8552 /// return nullptr by reference.
8553 static void
8554 emitOffloadingArrays(CodeGenFunction &CGF,
8555                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
8556                      MappableExprsHandler::MapValuesArrayTy &Pointers,
8557                      MappableExprsHandler::MapValuesArrayTy &Sizes,
8558                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
8559                      CGOpenMPRuntime::TargetDataInfo &Info) {
8560   CodeGenModule &CGM = CGF.CGM;
8561   ASTContext &Ctx = CGF.getContext();
8562 
8563   // Reset the array information.
8564   Info.clearArrayInfo();
8565   Info.NumberOfPtrs = BasePointers.size();
8566 
8567   if (Info.NumberOfPtrs) {
8568     // Detect if we have any capture size requiring runtime evaluation of the
8569     // size so that a constant array could be eventually used.
8570     bool hasRuntimeEvaluationCaptureSize = false;
8571     for (llvm::Value *S : Sizes)
8572       if (!isa<llvm::Constant>(S)) {
8573         hasRuntimeEvaluationCaptureSize = true;
8574         break;
8575       }
8576 
8577     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8578     QualType PointerArrayType = Ctx.getConstantArrayType(
8579         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
8580         /*IndexTypeQuals=*/0);
8581 
8582     Info.BasePointersArray =
8583         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8584     Info.PointersArray =
8585         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8586 
8587     // If we don't have any VLA types or other types that require runtime
8588     // evaluation, we can use a constant array for the map sizes, otherwise we
8589     // need to fill up the arrays as we do for the pointers.
8590     QualType Int64Ty =
8591         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8592     if (hasRuntimeEvaluationCaptureSize) {
8593       QualType SizeArrayType = Ctx.getConstantArrayType(
8594           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
8595           /*IndexTypeQuals=*/0);
8596       Info.SizesArray =
8597           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8598     } else {
8599       // We expect all the sizes to be constant, so we collect them to create
8600       // a constant array.
8601       SmallVector<llvm::Constant *, 16> ConstSizes;
8602       for (llvm::Value *S : Sizes)
8603         ConstSizes.push_back(cast<llvm::Constant>(S));
8604 
8605       auto *SizesArrayInit = llvm::ConstantArray::get(
8606           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8607       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8608       auto *SizesArrayGbl = new llvm::GlobalVariable(
8609           CGM.getModule(), SizesArrayInit->getType(),
8610           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8611           SizesArrayInit, Name);
8612       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8613       Info.SizesArray = SizesArrayGbl;
8614     }
8615 
8616     // The map types are always constant so we don't need to generate code to
8617     // fill arrays. Instead, we create an array constant.
8618     SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8619     llvm::copy(MapTypes, Mapping.begin());
8620     llvm::Constant *MapTypesArrayInit =
8621         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8622     std::string MaptypesName =
8623         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8624     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8625         CGM.getModule(), MapTypesArrayInit->getType(),
8626         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8627         MapTypesArrayInit, MaptypesName);
8628     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8629     Info.MapTypesArray = MapTypesArrayGbl;
8630 
8631     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8632       llvm::Value *BPVal = *BasePointers[I];
8633       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8634           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8635           Info.BasePointersArray, 0, I);
8636       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8637           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8638       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8639       CGF.Builder.CreateStore(BPVal, BPAddr);
8640 
8641       if (Info.requiresDevicePointerInfo())
8642         if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8643           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8644 
8645       llvm::Value *PVal = Pointers[I];
8646       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8647           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8648           Info.PointersArray, 0, I);
8649       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8650           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8651       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8652       CGF.Builder.CreateStore(PVal, PAddr);
8653 
8654       if (hasRuntimeEvaluationCaptureSize) {
8655         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8656             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8657             Info.SizesArray,
8658             /*Idx0=*/0,
8659             /*Idx1=*/I);
8660         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8661         CGF.Builder.CreateStore(
8662             CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
8663             SAddr);
8664       }
8665     }
8666   }
8667 }
8668 
8669 /// Emit the arguments to be passed to the runtime library based on the
8670 /// arrays of pointers, sizes and map types.
8671 static void emitOffloadingArraysArgument(
8672     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8673     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8674     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8675   CodeGenModule &CGM = CGF.CGM;
8676   if (Info.NumberOfPtrs) {
8677     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8678         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8679         Info.BasePointersArray,
8680         /*Idx0=*/0, /*Idx1=*/0);
8681     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8682         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8683         Info.PointersArray,
8684         /*Idx0=*/0,
8685         /*Idx1=*/0);
8686     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8687         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8688         /*Idx0=*/0, /*Idx1=*/0);
8689     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8690         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8691         Info.MapTypesArray,
8692         /*Idx0=*/0,
8693         /*Idx1=*/0);
8694   } else {
8695     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8696     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8697     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8698     MapTypesArrayArg =
8699         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8700   }
8701 }
8702 
8703 /// Check for inner distribute directive.
8704 static const OMPExecutableDirective *
8705 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8706   const auto *CS = D.getInnermostCapturedStmt();
8707   const auto *Body =
8708       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8709   const Stmt *ChildStmt =
8710       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8711 
8712   if (const auto *NestedDir =
8713           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8714     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8715     switch (D.getDirectiveKind()) {
8716     case OMPD_target:
8717       if (isOpenMPDistributeDirective(DKind))
8718         return NestedDir;
8719       if (DKind == OMPD_teams) {
8720         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8721             /*IgnoreCaptured=*/true);
8722         if (!Body)
8723           return nullptr;
8724         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8725         if (const auto *NND =
8726                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8727           DKind = NND->getDirectiveKind();
8728           if (isOpenMPDistributeDirective(DKind))
8729             return NND;
8730         }
8731       }
8732       return nullptr;
8733     case OMPD_target_teams:
8734       if (isOpenMPDistributeDirective(DKind))
8735         return NestedDir;
8736       return nullptr;
8737     case OMPD_target_parallel:
8738     case OMPD_target_simd:
8739     case OMPD_target_parallel_for:
8740     case OMPD_target_parallel_for_simd:
8741       return nullptr;
8742     case OMPD_target_teams_distribute:
8743     case OMPD_target_teams_distribute_simd:
8744     case OMPD_target_teams_distribute_parallel_for:
8745     case OMPD_target_teams_distribute_parallel_for_simd:
8746     case OMPD_parallel:
8747     case OMPD_for:
8748     case OMPD_parallel_for:
8749     case OMPD_parallel_sections:
8750     case OMPD_for_simd:
8751     case OMPD_parallel_for_simd:
8752     case OMPD_cancel:
8753     case OMPD_cancellation_point:
8754     case OMPD_ordered:
8755     case OMPD_threadprivate:
8756     case OMPD_allocate:
8757     case OMPD_task:
8758     case OMPD_simd:
8759     case OMPD_sections:
8760     case OMPD_section:
8761     case OMPD_single:
8762     case OMPD_master:
8763     case OMPD_critical:
8764     case OMPD_taskyield:
8765     case OMPD_barrier:
8766     case OMPD_taskwait:
8767     case OMPD_taskgroup:
8768     case OMPD_atomic:
8769     case OMPD_flush:
8770     case OMPD_teams:
8771     case OMPD_target_data:
8772     case OMPD_target_exit_data:
8773     case OMPD_target_enter_data:
8774     case OMPD_distribute:
8775     case OMPD_distribute_simd:
8776     case OMPD_distribute_parallel_for:
8777     case OMPD_distribute_parallel_for_simd:
8778     case OMPD_teams_distribute:
8779     case OMPD_teams_distribute_simd:
8780     case OMPD_teams_distribute_parallel_for:
8781     case OMPD_teams_distribute_parallel_for_simd:
8782     case OMPD_target_update:
8783     case OMPD_declare_simd:
8784     case OMPD_declare_variant:
8785     case OMPD_declare_target:
8786     case OMPD_end_declare_target:
8787     case OMPD_declare_reduction:
8788     case OMPD_declare_mapper:
8789     case OMPD_taskloop:
8790     case OMPD_taskloop_simd:
8791     case OMPD_master_taskloop:
8792     case OMPD_master_taskloop_simd:
8793     case OMPD_parallel_master_taskloop:
8794     case OMPD_requires:
8795     case OMPD_unknown:
8796       llvm_unreachable("Unexpected directive.");
8797     }
8798   }
8799 
8800   return nullptr;
8801 }
8802 
8803 /// Emit the user-defined mapper function. The code generation follows the
8804 /// pattern in the example below.
8805 /// \code
8806 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
8807 ///                                           void *base, void *begin,
8808 ///                                           int64_t size, int64_t type) {
8809 ///   // Allocate space for an array section first.
8810 ///   if (size > 1 && !maptype.IsDelete)
8811 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8812 ///                                 size*sizeof(Ty), clearToFrom(type));
8813 ///   // Map members.
8814 ///   for (unsigned i = 0; i < size; i++) {
8815 ///     // For each component specified by this mapper:
8816 ///     for (auto c : all_components) {
8817 ///       if (c.hasMapper())
8818 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
8819 ///                       c.arg_type);
8820 ///       else
8821 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
8822 ///                                     c.arg_begin, c.arg_size, c.arg_type);
8823 ///     }
8824 ///   }
8825 ///   // Delete the array section.
8826 ///   if (size > 1 && maptype.IsDelete)
8827 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8828 ///                                 size*sizeof(Ty), clearToFrom(type));
8829 /// }
8830 /// \endcode
8831 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
8832                                             CodeGenFunction *CGF) {
8833   if (UDMMap.count(D) > 0)
8834     return;
8835   ASTContext &C = CGM.getContext();
8836   QualType Ty = D->getType();
8837   QualType PtrTy = C.getPointerType(Ty).withRestrict();
8838   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
8839   auto *MapperVarDecl =
8840       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
8841   SourceLocation Loc = D->getLocation();
8842   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
8843 
8844   // Prepare mapper function arguments and attributes.
8845   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8846                               C.VoidPtrTy, ImplicitParamDecl::Other);
8847   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
8848                             ImplicitParamDecl::Other);
8849   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8850                              C.VoidPtrTy, ImplicitParamDecl::Other);
8851   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8852                             ImplicitParamDecl::Other);
8853   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8854                             ImplicitParamDecl::Other);
8855   FunctionArgList Args;
8856   Args.push_back(&HandleArg);
8857   Args.push_back(&BaseArg);
8858   Args.push_back(&BeginArg);
8859   Args.push_back(&SizeArg);
8860   Args.push_back(&TypeArg);
8861   const CGFunctionInfo &FnInfo =
8862       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
8863   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
8864   SmallString<64> TyStr;
8865   llvm::raw_svector_ostream Out(TyStr);
8866   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
8867   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
8868   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
8869                                     Name, &CGM.getModule());
8870   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
8871   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
8872   // Start the mapper function code generation.
8873   CodeGenFunction MapperCGF(CGM);
8874   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
8875   // Compute the starting and end addreses of array elements.
8876   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
8877       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
8878       C.getPointerType(Int64Ty), Loc);
8879   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
8880       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
8881       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
8882   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
8883   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
8884       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
8885       C.getPointerType(Int64Ty), Loc);
8886   // Prepare common arguments for array initiation and deletion.
8887   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
8888       MapperCGF.GetAddrOfLocalVar(&HandleArg),
8889       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8890   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
8891       MapperCGF.GetAddrOfLocalVar(&BaseArg),
8892       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8893   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
8894       MapperCGF.GetAddrOfLocalVar(&BeginArg),
8895       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8896 
8897   // Emit array initiation if this is an array section and \p MapType indicates
8898   // that memory allocation is required.
8899   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
8900   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
8901                              ElementSize, HeadBB, /*IsInit=*/true);
8902 
8903   // Emit a for loop to iterate through SizeArg of elements and map all of them.
8904 
8905   // Emit the loop header block.
8906   MapperCGF.EmitBlock(HeadBB);
8907   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
8908   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
8909   // Evaluate whether the initial condition is satisfied.
8910   llvm::Value *IsEmpty =
8911       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
8912   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
8913   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
8914 
8915   // Emit the loop body block.
8916   MapperCGF.EmitBlock(BodyBB);
8917   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
8918       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
8919   PtrPHI->addIncoming(PtrBegin, EntryBB);
8920   Address PtrCurrent =
8921       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
8922                           .getAlignment()
8923                           .alignmentOfArrayElement(ElementSize));
8924   // Privatize the declared variable of mapper to be the current array element.
8925   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
8926   Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
8927     return MapperCGF
8928         .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
8929         .getAddress();
8930   });
8931   (void)Scope.Privatize();
8932 
8933   // Get map clause information. Fill up the arrays with all mapped variables.
8934   MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
8935   MappableExprsHandler::MapValuesArrayTy Pointers;
8936   MappableExprsHandler::MapValuesArrayTy Sizes;
8937   MappableExprsHandler::MapFlagsArrayTy MapTypes;
8938   MappableExprsHandler MEHandler(*D, MapperCGF);
8939   MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
8940 
8941   // Call the runtime API __tgt_mapper_num_components to get the number of
8942   // pre-existing components.
8943   llvm::Value *OffloadingArgs[] = {Handle};
8944   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
8945       createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs);
8946   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
8947       PreviousSize,
8948       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
8949 
8950   // Fill up the runtime mapper handle for all components.
8951   for (unsigned I = 0; I < BasePointers.size(); ++I) {
8952     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
8953         *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
8954     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
8955         Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
8956     llvm::Value *CurSizeArg = Sizes[I];
8957 
8958     // Extract the MEMBER_OF field from the map type.
8959     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
8960     MapperCGF.EmitBlock(MemberBB);
8961     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
8962     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
8963         OriMapType,
8964         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
8965     llvm::BasicBlock *MemberCombineBB =
8966         MapperCGF.createBasicBlock("omp.member.combine");
8967     llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
8968     llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
8969     MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
8970     // Add the number of pre-existing components to the MEMBER_OF field if it
8971     // is valid.
8972     MapperCGF.EmitBlock(MemberCombineBB);
8973     llvm::Value *CombinedMember =
8974         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
8975     // Do nothing if it is not a member of previous components.
8976     MapperCGF.EmitBlock(TypeBB);
8977     llvm::PHINode *MemberMapType =
8978         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
8979     MemberMapType->addIncoming(OriMapType, MemberBB);
8980     MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
8981 
8982     // Combine the map type inherited from user-defined mapper with that
8983     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
8984     // bits of the \a MapType, which is the input argument of the mapper
8985     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
8986     // bits of MemberMapType.
8987     // [OpenMP 5.0], 1.2.6. map-type decay.
8988     //        | alloc |  to   | from  | tofrom | release | delete
8989     // ----------------------------------------------------------
8990     // alloc  | alloc | alloc | alloc | alloc  | release | delete
8991     // to     | alloc |  to   | alloc |   to   | release | delete
8992     // from   | alloc | alloc | from  |  from  | release | delete
8993     // tofrom | alloc |  to   | from  | tofrom | release | delete
8994     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
8995         MapType,
8996         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
8997                                    MappableExprsHandler::OMP_MAP_FROM));
8998     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
8999     llvm::BasicBlock *AllocElseBB =
9000         MapperCGF.createBasicBlock("omp.type.alloc.else");
9001     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9002     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9003     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9004     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9005     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9006     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9007     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9008     MapperCGF.EmitBlock(AllocBB);
9009     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9010         MemberMapType,
9011         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9012                                      MappableExprsHandler::OMP_MAP_FROM)));
9013     MapperCGF.Builder.CreateBr(EndBB);
9014     MapperCGF.EmitBlock(AllocElseBB);
9015     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9016         LeftToFrom,
9017         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9018     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9019     // In case of to, clear OMP_MAP_FROM.
9020     MapperCGF.EmitBlock(ToBB);
9021     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9022         MemberMapType,
9023         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9024     MapperCGF.Builder.CreateBr(EndBB);
9025     MapperCGF.EmitBlock(ToElseBB);
9026     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9027         LeftToFrom,
9028         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9029     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9030     // In case of from, clear OMP_MAP_TO.
9031     MapperCGF.EmitBlock(FromBB);
9032     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9033         MemberMapType,
9034         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9035     // In case of tofrom, do nothing.
9036     MapperCGF.EmitBlock(EndBB);
9037     llvm::PHINode *CurMapType =
9038         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9039     CurMapType->addIncoming(AllocMapType, AllocBB);
9040     CurMapType->addIncoming(ToMapType, ToBB);
9041     CurMapType->addIncoming(FromMapType, FromBB);
9042     CurMapType->addIncoming(MemberMapType, ToElseBB);
9043 
9044     // TODO: call the corresponding mapper function if a user-defined mapper is
9045     // associated with this map clause.
9046     // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9047     // data structure.
9048     llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9049                                      CurSizeArg, CurMapType};
9050     MapperCGF.EmitRuntimeCall(
9051         createRuntimeFunction(OMPRTL__tgt_push_mapper_component),
9052         OffloadingArgs);
9053   }
9054 
9055   // Update the pointer to point to the next element that needs to be mapped,
9056   // and check whether we have mapped all elements.
9057   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9058       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9059   PtrPHI->addIncoming(PtrNext, BodyBB);
9060   llvm::Value *IsDone =
9061       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9062   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9063   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9064 
9065   MapperCGF.EmitBlock(ExitBB);
9066   // Emit array deletion if this is an array section and \p MapType indicates
9067   // that deletion is required.
9068   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9069                              ElementSize, DoneBB, /*IsInit=*/false);
9070 
9071   // Emit the function exit block.
9072   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9073   MapperCGF.FinishFunction();
9074   UDMMap.try_emplace(D, Fn);
9075   if (CGF) {
9076     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9077     Decls.second.push_back(D);
9078   }
9079 }
9080 
9081 /// Emit the array initialization or deletion portion for user-defined mapper
9082 /// code generation. First, it evaluates whether an array section is mapped and
9083 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9084 /// true, and \a MapType indicates to not delete this array, array
9085 /// initialization code is generated. If \a IsInit is false, and \a MapType
9086 /// indicates to not this array, array deletion code is generated.
9087 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9088     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9089     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9090     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9091   StringRef Prefix = IsInit ? ".init" : ".del";
9092 
9093   // Evaluate if this is an array section.
9094   llvm::BasicBlock *IsDeleteBB =
9095       MapperCGF.createBasicBlock("omp.array" + Prefix + ".evaldelete");
9096   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.array" + Prefix);
9097   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9098       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9099   MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9100 
9101   // Evaluate if we are going to delete this section.
9102   MapperCGF.EmitBlock(IsDeleteBB);
9103   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9104       MapType,
9105       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9106   llvm::Value *DeleteCond;
9107   if (IsInit) {
9108     DeleteCond = MapperCGF.Builder.CreateIsNull(
9109         DeleteBit, "omp.array" + Prefix + ".delete");
9110   } else {
9111     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9112         DeleteBit, "omp.array" + Prefix + ".delete");
9113   }
9114   MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9115 
9116   MapperCGF.EmitBlock(BodyBB);
9117   // Get the array size by multiplying element size and element number (i.e., \p
9118   // Size).
9119   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9120       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9121   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9122   // memory allocation/deletion purpose only.
9123   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9124       MapType,
9125       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9126                                    MappableExprsHandler::OMP_MAP_FROM)));
9127   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9128   // data structure.
9129   llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9130   MapperCGF.EmitRuntimeCall(
9131       createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs);
9132 }
9133 
9134 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9135     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9136     llvm::Value *DeviceID,
9137     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9138                                      const OMPLoopDirective &D)>
9139         SizeEmitter) {
9140   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9141   const OMPExecutableDirective *TD = &D;
9142   // Get nested teams distribute kind directive, if any.
9143   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9144     TD = getNestedDistributeDirective(CGM.getContext(), D);
9145   if (!TD)
9146     return;
9147   const auto *LD = cast<OMPLoopDirective>(TD);
9148   auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
9149                                                      PrePostActionTy &) {
9150     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9151       llvm::Value *Args[] = {DeviceID, NumIterations};
9152       CGF.EmitRuntimeCall(
9153           createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
9154     }
9155   };
9156   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9157 }
9158 
9159 void CGOpenMPRuntime::emitTargetCall(
9160     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9161     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9162     const Expr *Device,
9163     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9164                                      const OMPLoopDirective &D)>
9165         SizeEmitter) {
9166   if (!CGF.HaveInsertPoint())
9167     return;
9168 
9169   assert(OutlinedFn && "Invalid outlined function!");
9170 
9171   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
9172   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9173   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9174   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9175                                             PrePostActionTy &) {
9176     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9177   };
9178   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9179 
9180   CodeGenFunction::OMPTargetDataInfo InputInfo;
9181   llvm::Value *MapTypesArray = nullptr;
9182   // Fill up the pointer arrays and transfer execution to the device.
9183   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9184                     &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
9185                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9186     // On top of the arrays that were filled up, the target offloading call
9187     // takes as arguments the device id as well as the host pointer. The host
9188     // pointer is used by the runtime library to identify the current target
9189     // region, so it only has to be unique and not necessarily point to
9190     // anything. It could be the pointer to the outlined function that
9191     // implements the target region, but we aren't using that so that the
9192     // compiler doesn't need to keep that, and could therefore inline the host
9193     // function if proven worthwhile during optimization.
9194 
9195     // From this point on, we need to have an ID of the target region defined.
9196     assert(OutlinedFnID && "Invalid outlined function ID!");
9197 
9198     // Emit device ID if any.
9199     llvm::Value *DeviceID;
9200     if (Device) {
9201       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9202                                            CGF.Int64Ty, /*isSigned=*/true);
9203     } else {
9204       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9205     }
9206 
9207     // Emit the number of elements in the offloading arrays.
9208     llvm::Value *PointerNum =
9209         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9210 
9211     // Return value of the runtime offloading call.
9212     llvm::Value *Return;
9213 
9214     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9215     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9216 
9217     // Emit tripcount for the target loop-based directive.
9218     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
9219 
9220     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9221     // The target region is an outlined function launched by the runtime
9222     // via calls __tgt_target() or __tgt_target_teams().
9223     //
9224     // __tgt_target() launches a target region with one team and one thread,
9225     // executing a serial region.  This master thread may in turn launch
9226     // more threads within its team upon encountering a parallel region,
9227     // however, no additional teams can be launched on the device.
9228     //
9229     // __tgt_target_teams() launches a target region with one or more teams,
9230     // each with one or more threads.  This call is required for target
9231     // constructs such as:
9232     //  'target teams'
9233     //  'target' / 'teams'
9234     //  'target teams distribute parallel for'
9235     //  'target parallel'
9236     // and so on.
9237     //
9238     // Note that on the host and CPU targets, the runtime implementation of
9239     // these calls simply call the outlined function without forking threads.
9240     // The outlined functions themselves have runtime calls to
9241     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9242     // the compiler in emitTeamsCall() and emitParallelCall().
9243     //
9244     // In contrast, on the NVPTX target, the implementation of
9245     // __tgt_target_teams() launches a GPU kernel with the requested number
9246     // of teams and threads so no additional calls to the runtime are required.
9247     if (NumTeams) {
9248       // If we have NumTeams defined this means that we have an enclosed teams
9249       // region. Therefore we also expect to have NumThreads defined. These two
9250       // values should be defined in the presence of a teams directive,
9251       // regardless of having any clauses associated. If the user is using teams
9252       // but no clauses, these two values will be the default that should be
9253       // passed to the runtime library - a 32-bit integer with the value zero.
9254       assert(NumThreads && "Thread limit expression should be available along "
9255                            "with number of teams.");
9256       llvm::Value *OffloadingArgs[] = {DeviceID,
9257                                        OutlinedFnID,
9258                                        PointerNum,
9259                                        InputInfo.BasePointersArray.getPointer(),
9260                                        InputInfo.PointersArray.getPointer(),
9261                                        InputInfo.SizesArray.getPointer(),
9262                                        MapTypesArray,
9263                                        NumTeams,
9264                                        NumThreads};
9265       Return = CGF.EmitRuntimeCall(
9266           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
9267                                           : OMPRTL__tgt_target_teams),
9268           OffloadingArgs);
9269     } else {
9270       llvm::Value *OffloadingArgs[] = {DeviceID,
9271                                        OutlinedFnID,
9272                                        PointerNum,
9273                                        InputInfo.BasePointersArray.getPointer(),
9274                                        InputInfo.PointersArray.getPointer(),
9275                                        InputInfo.SizesArray.getPointer(),
9276                                        MapTypesArray};
9277       Return = CGF.EmitRuntimeCall(
9278           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
9279                                           : OMPRTL__tgt_target),
9280           OffloadingArgs);
9281     }
9282 
9283     // Check the error code and execute the host version if required.
9284     llvm::BasicBlock *OffloadFailedBlock =
9285         CGF.createBasicBlock("omp_offload.failed");
9286     llvm::BasicBlock *OffloadContBlock =
9287         CGF.createBasicBlock("omp_offload.cont");
9288     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9289     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9290 
9291     CGF.EmitBlock(OffloadFailedBlock);
9292     if (RequiresOuterTask) {
9293       CapturedVars.clear();
9294       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9295     }
9296     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9297     CGF.EmitBranch(OffloadContBlock);
9298 
9299     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9300   };
9301 
9302   // Notify that the host version must be executed.
9303   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
9304                     RequiresOuterTask](CodeGenFunction &CGF,
9305                                        PrePostActionTy &) {
9306     if (RequiresOuterTask) {
9307       CapturedVars.clear();
9308       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9309     }
9310     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9311   };
9312 
9313   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9314                           &CapturedVars, RequiresOuterTask,
9315                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9316     // Fill up the arrays with all the captured variables.
9317     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9318     MappableExprsHandler::MapValuesArrayTy Pointers;
9319     MappableExprsHandler::MapValuesArrayTy Sizes;
9320     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9321 
9322     // Get mappable expression information.
9323     MappableExprsHandler MEHandler(D, CGF);
9324     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9325 
9326     auto RI = CS.getCapturedRecordDecl()->field_begin();
9327     auto CV = CapturedVars.begin();
9328     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9329                                               CE = CS.capture_end();
9330          CI != CE; ++CI, ++RI, ++CV) {
9331       MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
9332       MappableExprsHandler::MapValuesArrayTy CurPointers;
9333       MappableExprsHandler::MapValuesArrayTy CurSizes;
9334       MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
9335       MappableExprsHandler::StructRangeInfoTy PartialStruct;
9336 
9337       // VLA sizes are passed to the outlined region by copy and do not have map
9338       // information associated.
9339       if (CI->capturesVariableArrayType()) {
9340         CurBasePointers.push_back(*CV);
9341         CurPointers.push_back(*CV);
9342         CurSizes.push_back(CGF.Builder.CreateIntCast(
9343             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9344         // Copy to the device as an argument. No need to retrieve it.
9345         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
9346                               MappableExprsHandler::OMP_MAP_TARGET_PARAM |
9347                               MappableExprsHandler::OMP_MAP_IMPLICIT);
9348       } else {
9349         // If we have any information in the map clause, we use it, otherwise we
9350         // just do a default mapping.
9351         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
9352                                          CurSizes, CurMapTypes, PartialStruct);
9353         if (CurBasePointers.empty())
9354           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
9355                                            CurPointers, CurSizes, CurMapTypes);
9356         // Generate correct mapping for variables captured by reference in
9357         // lambdas.
9358         if (CI->capturesVariable())
9359           MEHandler.generateInfoForLambdaCaptures(
9360               CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
9361               CurMapTypes, LambdaPointers);
9362       }
9363       // We expect to have at least an element of information for this capture.
9364       assert(!CurBasePointers.empty() &&
9365              "Non-existing map pointer for capture!");
9366       assert(CurBasePointers.size() == CurPointers.size() &&
9367              CurBasePointers.size() == CurSizes.size() &&
9368              CurBasePointers.size() == CurMapTypes.size() &&
9369              "Inconsistent map information sizes!");
9370 
9371       // If there is an entry in PartialStruct it means we have a struct with
9372       // individual members mapped. Emit an extra combined entry.
9373       if (PartialStruct.Base.isValid())
9374         MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
9375                                     CurMapTypes, PartialStruct);
9376 
9377       // We need to append the results of this capture to what we already have.
9378       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
9379       Pointers.append(CurPointers.begin(), CurPointers.end());
9380       Sizes.append(CurSizes.begin(), CurSizes.end());
9381       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
9382     }
9383     // Adjust MEMBER_OF flags for the lambdas captures.
9384     MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
9385                                               Pointers, MapTypes);
9386     // Map other list items in the map clause which are not captured variables
9387     // but "declare target link" global variables.
9388     MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
9389                                                MapTypes);
9390 
9391     TargetDataInfo Info;
9392     // Fill up the arrays and create the arguments.
9393     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9394     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9395                                  Info.PointersArray, Info.SizesArray,
9396                                  Info.MapTypesArray, Info);
9397     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9398     InputInfo.BasePointersArray =
9399         Address(Info.BasePointersArray, CGM.getPointerAlign());
9400     InputInfo.PointersArray =
9401         Address(Info.PointersArray, CGM.getPointerAlign());
9402     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
9403     MapTypesArray = Info.MapTypesArray;
9404     if (RequiresOuterTask)
9405       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9406     else
9407       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9408   };
9409 
9410   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
9411                              CodeGenFunction &CGF, PrePostActionTy &) {
9412     if (RequiresOuterTask) {
9413       CodeGenFunction::OMPTargetDataInfo InputInfo;
9414       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9415     } else {
9416       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9417     }
9418   };
9419 
9420   // If we have a target function ID it means that we need to support
9421   // offloading, otherwise, just execute on the host. We need to execute on host
9422   // regardless of the conditional in the if clause if, e.g., the user do not
9423   // specify target triples.
9424   if (OutlinedFnID) {
9425     if (IfCond) {
9426       emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9427     } else {
9428       RegionCodeGenTy ThenRCG(TargetThenGen);
9429       ThenRCG(CGF);
9430     }
9431   } else {
9432     RegionCodeGenTy ElseRCG(TargetElseGen);
9433     ElseRCG(CGF);
9434   }
9435 }
9436 
9437 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9438                                                     StringRef ParentName) {
9439   if (!S)
9440     return;
9441 
9442   // Codegen OMP target directives that offload compute to the device.
9443   bool RequiresDeviceCodegen =
9444       isa<OMPExecutableDirective>(S) &&
9445       isOpenMPTargetExecutionDirective(
9446           cast<OMPExecutableDirective>(S)->getDirectiveKind());
9447 
9448   if (RequiresDeviceCodegen) {
9449     const auto &E = *cast<OMPExecutableDirective>(S);
9450     unsigned DeviceID;
9451     unsigned FileID;
9452     unsigned Line;
9453     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
9454                              FileID, Line);
9455 
9456     // Is this a target region that should not be emitted as an entry point? If
9457     // so just signal we are done with this target region.
9458     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
9459                                                             ParentName, Line))
9460       return;
9461 
9462     switch (E.getDirectiveKind()) {
9463     case OMPD_target:
9464       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9465                                                    cast<OMPTargetDirective>(E));
9466       break;
9467     case OMPD_target_parallel:
9468       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9469           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9470       break;
9471     case OMPD_target_teams:
9472       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9473           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9474       break;
9475     case OMPD_target_teams_distribute:
9476       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9477           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9478       break;
9479     case OMPD_target_teams_distribute_simd:
9480       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9481           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9482       break;
9483     case OMPD_target_parallel_for:
9484       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9485           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9486       break;
9487     case OMPD_target_parallel_for_simd:
9488       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9489           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9490       break;
9491     case OMPD_target_simd:
9492       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9493           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9494       break;
9495     case OMPD_target_teams_distribute_parallel_for:
9496       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9497           CGM, ParentName,
9498           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9499       break;
9500     case OMPD_target_teams_distribute_parallel_for_simd:
9501       CodeGenFunction::
9502           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9503               CGM, ParentName,
9504               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9505       break;
9506     case OMPD_parallel:
9507     case OMPD_for:
9508     case OMPD_parallel_for:
9509     case OMPD_parallel_sections:
9510     case OMPD_for_simd:
9511     case OMPD_parallel_for_simd:
9512     case OMPD_cancel:
9513     case OMPD_cancellation_point:
9514     case OMPD_ordered:
9515     case OMPD_threadprivate:
9516     case OMPD_allocate:
9517     case OMPD_task:
9518     case OMPD_simd:
9519     case OMPD_sections:
9520     case OMPD_section:
9521     case OMPD_single:
9522     case OMPD_master:
9523     case OMPD_critical:
9524     case OMPD_taskyield:
9525     case OMPD_barrier:
9526     case OMPD_taskwait:
9527     case OMPD_taskgroup:
9528     case OMPD_atomic:
9529     case OMPD_flush:
9530     case OMPD_teams:
9531     case OMPD_target_data:
9532     case OMPD_target_exit_data:
9533     case OMPD_target_enter_data:
9534     case OMPD_distribute:
9535     case OMPD_distribute_simd:
9536     case OMPD_distribute_parallel_for:
9537     case OMPD_distribute_parallel_for_simd:
9538     case OMPD_teams_distribute:
9539     case OMPD_teams_distribute_simd:
9540     case OMPD_teams_distribute_parallel_for:
9541     case OMPD_teams_distribute_parallel_for_simd:
9542     case OMPD_target_update:
9543     case OMPD_declare_simd:
9544     case OMPD_declare_variant:
9545     case OMPD_declare_target:
9546     case OMPD_end_declare_target:
9547     case OMPD_declare_reduction:
9548     case OMPD_declare_mapper:
9549     case OMPD_taskloop:
9550     case OMPD_taskloop_simd:
9551     case OMPD_master_taskloop:
9552     case OMPD_master_taskloop_simd:
9553     case OMPD_parallel_master_taskloop:
9554     case OMPD_requires:
9555     case OMPD_unknown:
9556       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9557     }
9558     return;
9559   }
9560 
9561   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9562     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9563       return;
9564 
9565     scanForTargetRegionsFunctions(
9566         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9567     return;
9568   }
9569 
9570   // If this is a lambda function, look into its body.
9571   if (const auto *L = dyn_cast<LambdaExpr>(S))
9572     S = L->getBody();
9573 
9574   // Keep looking for target regions recursively.
9575   for (const Stmt *II : S->children())
9576     scanForTargetRegionsFunctions(II, ParentName);
9577 }
9578 
9579 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9580   // If emitting code for the host, we do not process FD here. Instead we do
9581   // the normal code generation.
9582   if (!CGM.getLangOpts().OpenMPIsDevice) {
9583     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
9584       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9585           OMPDeclareTargetDeclAttr::getDeviceType(FD);
9586       // Do not emit device_type(nohost) functions for the host.
9587       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9588         return true;
9589     }
9590     return false;
9591   }
9592 
9593   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9594   StringRef Name = CGM.getMangledName(GD);
9595   // Try to detect target regions in the function.
9596   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9597     scanForTargetRegionsFunctions(FD->getBody(), Name);
9598     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9599         OMPDeclareTargetDeclAttr::getDeviceType(FD);
9600     // Do not emit device_type(nohost) functions for the host.
9601     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9602       return true;
9603   }
9604 
9605   // Do not to emit function if it is not marked as declare target.
9606   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9607          AlreadyEmittedTargetFunctions.count(Name) == 0;
9608 }
9609 
9610 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9611   if (!CGM.getLangOpts().OpenMPIsDevice)
9612     return false;
9613 
9614   // Check if there are Ctors/Dtors in this declaration and look for target
9615   // regions in it. We use the complete variant to produce the kernel name
9616   // mangling.
9617   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9618   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9619     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9620       StringRef ParentName =
9621           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9622       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9623     }
9624     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9625       StringRef ParentName =
9626           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9627       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9628     }
9629   }
9630 
9631   // Do not to emit variable if it is not marked as declare target.
9632   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9633       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9634           cast<VarDecl>(GD.getDecl()));
9635   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9636       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9637        HasRequiresUnifiedSharedMemory)) {
9638     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9639     return true;
9640   }
9641   return false;
9642 }
9643 
9644 llvm::Constant *
9645 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9646                                                 const VarDecl *VD) {
9647   assert(VD->getType().isConstant(CGM.getContext()) &&
9648          "Expected constant variable.");
9649   StringRef VarName;
9650   llvm::Constant *Addr;
9651   llvm::GlobalValue::LinkageTypes Linkage;
9652   QualType Ty = VD->getType();
9653   SmallString<128> Buffer;
9654   {
9655     unsigned DeviceID;
9656     unsigned FileID;
9657     unsigned Line;
9658     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9659                              FileID, Line);
9660     llvm::raw_svector_ostream OS(Buffer);
9661     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9662        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9663     VarName = OS.str();
9664   }
9665   Linkage = llvm::GlobalValue::InternalLinkage;
9666   Addr =
9667       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9668                                   getDefaultFirstprivateAddressSpace());
9669   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9670   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9671   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9672   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9673       VarName, Addr, VarSize,
9674       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9675   return Addr;
9676 }
9677 
9678 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9679                                                    llvm::Constant *Addr) {
9680   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9681       !CGM.getLangOpts().OpenMPIsDevice)
9682     return;
9683   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9684       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9685   if (!Res) {
9686     if (CGM.getLangOpts().OpenMPIsDevice) {
9687       // Register non-target variables being emitted in device code (debug info
9688       // may cause this).
9689       StringRef VarName = CGM.getMangledName(VD);
9690       EmittedNonTargetVariables.try_emplace(VarName, Addr);
9691     }
9692     return;
9693   }
9694   // Register declare target variables.
9695   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9696   StringRef VarName;
9697   CharUnits VarSize;
9698   llvm::GlobalValue::LinkageTypes Linkage;
9699 
9700   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9701       !HasRequiresUnifiedSharedMemory) {
9702     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9703     VarName = CGM.getMangledName(VD);
9704     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9705       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9706       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9707     } else {
9708       VarSize = CharUnits::Zero();
9709     }
9710     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9711     // Temp solution to prevent optimizations of the internal variables.
9712     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9713       std::string RefName = getName({VarName, "ref"});
9714       if (!CGM.GetGlobalValue(RefName)) {
9715         llvm::Constant *AddrRef =
9716             getOrCreateInternalVariable(Addr->getType(), RefName);
9717         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9718         GVAddrRef->setConstant(/*Val=*/true);
9719         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9720         GVAddrRef->setInitializer(Addr);
9721         CGM.addCompilerUsedGlobal(GVAddrRef);
9722       }
9723     }
9724   } else {
9725     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
9726             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9727              HasRequiresUnifiedSharedMemory)) &&
9728            "Declare target attribute must link or to with unified memory.");
9729     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
9730       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
9731     else
9732       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9733 
9734     if (CGM.getLangOpts().OpenMPIsDevice) {
9735       VarName = Addr->getName();
9736       Addr = nullptr;
9737     } else {
9738       VarName = getAddrOfDeclareTargetVar(VD).getName();
9739       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
9740     }
9741     VarSize = CGM.getPointerSize();
9742     Linkage = llvm::GlobalValue::WeakAnyLinkage;
9743   }
9744 
9745   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9746       VarName, Addr, VarSize, Flags, Linkage);
9747 }
9748 
9749 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9750   if (isa<FunctionDecl>(GD.getDecl()) ||
9751       isa<OMPDeclareReductionDecl>(GD.getDecl()))
9752     return emitTargetFunctions(GD);
9753 
9754   return emitTargetGlobalVariable(GD);
9755 }
9756 
9757 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9758   for (const VarDecl *VD : DeferredGlobalVariables) {
9759     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9760         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9761     if (!Res)
9762       continue;
9763     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9764         !HasRequiresUnifiedSharedMemory) {
9765       CGM.EmitGlobal(VD);
9766     } else {
9767       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9768               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9769                HasRequiresUnifiedSharedMemory)) &&
9770              "Expected link clause or to clause with unified memory.");
9771       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
9772     }
9773   }
9774 }
9775 
9776 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
9777     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9778   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9779          " Expected target-based directive.");
9780 }
9781 
9782 void CGOpenMPRuntime::checkArchForUnifiedAddressing(
9783     const OMPRequiresDecl *D) {
9784   for (const OMPClause *Clause : D->clauselists()) {
9785     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9786       HasRequiresUnifiedSharedMemory = true;
9787       break;
9788     }
9789   }
9790 }
9791 
9792 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
9793                                                        LangAS &AS) {
9794   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9795     return false;
9796   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9797   switch(A->getAllocatorType()) {
9798   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9799   // Not supported, fallback to the default mem space.
9800   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9801   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9802   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9803   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9804   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9805   case OMPAllocateDeclAttr::OMPConstMemAlloc:
9806   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9807     AS = LangAS::Default;
9808     return true;
9809   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9810     llvm_unreachable("Expected predefined allocator for the variables with the "
9811                      "static storage.");
9812   }
9813   return false;
9814 }
9815 
9816 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
9817   return HasRequiresUnifiedSharedMemory;
9818 }
9819 
9820 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
9821     CodeGenModule &CGM)
9822     : CGM(CGM) {
9823   if (CGM.getLangOpts().OpenMPIsDevice) {
9824     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
9825     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
9826   }
9827 }
9828 
9829 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
9830   if (CGM.getLangOpts().OpenMPIsDevice)
9831     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
9832 }
9833 
9834 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
9835   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
9836     return true;
9837 
9838   StringRef Name = CGM.getMangledName(GD);
9839   const auto *D = cast<FunctionDecl>(GD.getDecl());
9840   // Do not to emit function if it is marked as declare target as it was already
9841   // emitted.
9842   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
9843     if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) {
9844       if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name)))
9845         return !F->isDeclaration();
9846       return false;
9847     }
9848     return true;
9849   }
9850 
9851   return !AlreadyEmittedTargetFunctions.insert(Name).second;
9852 }
9853 
9854 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
9855   // If we don't have entries or if we are emitting code for the device, we
9856   // don't need to do anything.
9857   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
9858       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
9859       (OffloadEntriesInfoManager.empty() &&
9860        !HasEmittedDeclareTargetRegion &&
9861        !HasEmittedTargetRegion))
9862     return nullptr;
9863 
9864   // Create and register the function that handles the requires directives.
9865   ASTContext &C = CGM.getContext();
9866 
9867   llvm::Function *RequiresRegFn;
9868   {
9869     CodeGenFunction CGF(CGM);
9870     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
9871     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
9872     std::string ReqName = getName({"omp_offloading", "requires_reg"});
9873     RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI);
9874     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
9875     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
9876     // TODO: check for other requires clauses.
9877     // The requires directive takes effect only when a target region is
9878     // present in the compilation unit. Otherwise it is ignored and not
9879     // passed to the runtime. This avoids the runtime from throwing an error
9880     // for mismatching requires clauses across compilation units that don't
9881     // contain at least 1 target region.
9882     assert((HasEmittedTargetRegion ||
9883             HasEmittedDeclareTargetRegion ||
9884             !OffloadEntriesInfoManager.empty()) &&
9885            "Target or declare target region expected.");
9886     if (HasRequiresUnifiedSharedMemory)
9887       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
9888     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires),
9889         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
9890     CGF.FinishFunction();
9891   }
9892   return RequiresRegFn;
9893 }
9894 
9895 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
9896                                     const OMPExecutableDirective &D,
9897                                     SourceLocation Loc,
9898                                     llvm::Function *OutlinedFn,
9899                                     ArrayRef<llvm::Value *> CapturedVars) {
9900   if (!CGF.HaveInsertPoint())
9901     return;
9902 
9903   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9904   CodeGenFunction::RunCleanupsScope Scope(CGF);
9905 
9906   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
9907   llvm::Value *Args[] = {
9908       RTLoc,
9909       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
9910       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
9911   llvm::SmallVector<llvm::Value *, 16> RealArgs;
9912   RealArgs.append(std::begin(Args), std::end(Args));
9913   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
9914 
9915   llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
9916   CGF.EmitRuntimeCall(RTLFn, RealArgs);
9917 }
9918 
9919 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
9920                                          const Expr *NumTeams,
9921                                          const Expr *ThreadLimit,
9922                                          SourceLocation Loc) {
9923   if (!CGF.HaveInsertPoint())
9924     return;
9925 
9926   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9927 
9928   llvm::Value *NumTeamsVal =
9929       NumTeams
9930           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
9931                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9932           : CGF.Builder.getInt32(0);
9933 
9934   llvm::Value *ThreadLimitVal =
9935       ThreadLimit
9936           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
9937                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9938           : CGF.Builder.getInt32(0);
9939 
9940   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
9941   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
9942                                      ThreadLimitVal};
9943   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
9944                       PushNumTeamsArgs);
9945 }
9946 
9947 void CGOpenMPRuntime::emitTargetDataCalls(
9948     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9949     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
9950   if (!CGF.HaveInsertPoint())
9951     return;
9952 
9953   // Action used to replace the default codegen action and turn privatization
9954   // off.
9955   PrePostActionTy NoPrivAction;
9956 
9957   // Generate the code for the opening of the data environment. Capture all the
9958   // arguments of the runtime call by reference because they are used in the
9959   // closing of the region.
9960   auto &&BeginThenGen = [this, &D, Device, &Info,
9961                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
9962     // Fill up the arrays with all the mapped variables.
9963     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9964     MappableExprsHandler::MapValuesArrayTy Pointers;
9965     MappableExprsHandler::MapValuesArrayTy Sizes;
9966     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9967 
9968     // Get map clause information.
9969     MappableExprsHandler MCHandler(D, CGF);
9970     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
9971 
9972     // Fill up the arrays and create the arguments.
9973     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9974 
9975     llvm::Value *BasePointersArrayArg = nullptr;
9976     llvm::Value *PointersArrayArg = nullptr;
9977     llvm::Value *SizesArrayArg = nullptr;
9978     llvm::Value *MapTypesArrayArg = nullptr;
9979     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
9980                                  SizesArrayArg, MapTypesArrayArg, Info);
9981 
9982     // Emit device ID if any.
9983     llvm::Value *DeviceID = nullptr;
9984     if (Device) {
9985       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9986                                            CGF.Int64Ty, /*isSigned=*/true);
9987     } else {
9988       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9989     }
9990 
9991     // Emit the number of elements in the offloading arrays.
9992     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
9993 
9994     llvm::Value *OffloadingArgs[] = {
9995         DeviceID,         PointerNum,    BasePointersArrayArg,
9996         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
9997     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
9998                         OffloadingArgs);
9999 
10000     // If device pointer privatization is required, emit the body of the region
10001     // here. It will have to be duplicated: with and without privatization.
10002     if (!Info.CaptureDeviceAddrMap.empty())
10003       CodeGen(CGF);
10004   };
10005 
10006   // Generate code for the closing of the data region.
10007   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10008                                             PrePostActionTy &) {
10009     assert(Info.isValid() && "Invalid data environment closing arguments.");
10010 
10011     llvm::Value *BasePointersArrayArg = nullptr;
10012     llvm::Value *PointersArrayArg = nullptr;
10013     llvm::Value *SizesArrayArg = nullptr;
10014     llvm::Value *MapTypesArrayArg = nullptr;
10015     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10016                                  SizesArrayArg, MapTypesArrayArg, Info);
10017 
10018     // Emit device ID if any.
10019     llvm::Value *DeviceID = nullptr;
10020     if (Device) {
10021       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10022                                            CGF.Int64Ty, /*isSigned=*/true);
10023     } else {
10024       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10025     }
10026 
10027     // Emit the number of elements in the offloading arrays.
10028     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10029 
10030     llvm::Value *OffloadingArgs[] = {
10031         DeviceID,         PointerNum,    BasePointersArrayArg,
10032         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10033     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
10034                         OffloadingArgs);
10035   };
10036 
10037   // If we need device pointer privatization, we need to emit the body of the
10038   // region with no privatization in the 'else' branch of the conditional.
10039   // Otherwise, we don't have to do anything.
10040   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10041                                                          PrePostActionTy &) {
10042     if (!Info.CaptureDeviceAddrMap.empty()) {
10043       CodeGen.setAction(NoPrivAction);
10044       CodeGen(CGF);
10045     }
10046   };
10047 
10048   // We don't have to do anything to close the region if the if clause evaluates
10049   // to false.
10050   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10051 
10052   if (IfCond) {
10053     emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10054   } else {
10055     RegionCodeGenTy RCG(BeginThenGen);
10056     RCG(CGF);
10057   }
10058 
10059   // If we don't require privatization of device pointers, we emit the body in
10060   // between the runtime calls. This avoids duplicating the body code.
10061   if (Info.CaptureDeviceAddrMap.empty()) {
10062     CodeGen.setAction(NoPrivAction);
10063     CodeGen(CGF);
10064   }
10065 
10066   if (IfCond) {
10067     emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10068   } else {
10069     RegionCodeGenTy RCG(EndThenGen);
10070     RCG(CGF);
10071   }
10072 }
10073 
10074 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10075     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10076     const Expr *Device) {
10077   if (!CGF.HaveInsertPoint())
10078     return;
10079 
10080   assert((isa<OMPTargetEnterDataDirective>(D) ||
10081           isa<OMPTargetExitDataDirective>(D) ||
10082           isa<OMPTargetUpdateDirective>(D)) &&
10083          "Expecting either target enter, exit data, or update directives.");
10084 
10085   CodeGenFunction::OMPTargetDataInfo InputInfo;
10086   llvm::Value *MapTypesArray = nullptr;
10087   // Generate the code for the opening of the data environment.
10088   auto &&ThenGen = [this, &D, Device, &InputInfo,
10089                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10090     // Emit device ID if any.
10091     llvm::Value *DeviceID = nullptr;
10092     if (Device) {
10093       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10094                                            CGF.Int64Ty, /*isSigned=*/true);
10095     } else {
10096       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10097     }
10098 
10099     // Emit the number of elements in the offloading arrays.
10100     llvm::Constant *PointerNum =
10101         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10102 
10103     llvm::Value *OffloadingArgs[] = {DeviceID,
10104                                      PointerNum,
10105                                      InputInfo.BasePointersArray.getPointer(),
10106                                      InputInfo.PointersArray.getPointer(),
10107                                      InputInfo.SizesArray.getPointer(),
10108                                      MapTypesArray};
10109 
10110     // Select the right runtime function call for each expected standalone
10111     // directive.
10112     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10113     OpenMPRTLFunction RTLFn;
10114     switch (D.getDirectiveKind()) {
10115     case OMPD_target_enter_data:
10116       RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
10117                         : OMPRTL__tgt_target_data_begin;
10118       break;
10119     case OMPD_target_exit_data:
10120       RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
10121                         : OMPRTL__tgt_target_data_end;
10122       break;
10123     case OMPD_target_update:
10124       RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
10125                         : OMPRTL__tgt_target_data_update;
10126       break;
10127     case OMPD_parallel:
10128     case OMPD_for:
10129     case OMPD_parallel_for:
10130     case OMPD_parallel_sections:
10131     case OMPD_for_simd:
10132     case OMPD_parallel_for_simd:
10133     case OMPD_cancel:
10134     case OMPD_cancellation_point:
10135     case OMPD_ordered:
10136     case OMPD_threadprivate:
10137     case OMPD_allocate:
10138     case OMPD_task:
10139     case OMPD_simd:
10140     case OMPD_sections:
10141     case OMPD_section:
10142     case OMPD_single:
10143     case OMPD_master:
10144     case OMPD_critical:
10145     case OMPD_taskyield:
10146     case OMPD_barrier:
10147     case OMPD_taskwait:
10148     case OMPD_taskgroup:
10149     case OMPD_atomic:
10150     case OMPD_flush:
10151     case OMPD_teams:
10152     case OMPD_target_data:
10153     case OMPD_distribute:
10154     case OMPD_distribute_simd:
10155     case OMPD_distribute_parallel_for:
10156     case OMPD_distribute_parallel_for_simd:
10157     case OMPD_teams_distribute:
10158     case OMPD_teams_distribute_simd:
10159     case OMPD_teams_distribute_parallel_for:
10160     case OMPD_teams_distribute_parallel_for_simd:
10161     case OMPD_declare_simd:
10162     case OMPD_declare_variant:
10163     case OMPD_declare_target:
10164     case OMPD_end_declare_target:
10165     case OMPD_declare_reduction:
10166     case OMPD_declare_mapper:
10167     case OMPD_taskloop:
10168     case OMPD_taskloop_simd:
10169     case OMPD_master_taskloop:
10170     case OMPD_master_taskloop_simd:
10171     case OMPD_parallel_master_taskloop:
10172     case OMPD_target:
10173     case OMPD_target_simd:
10174     case OMPD_target_teams_distribute:
10175     case OMPD_target_teams_distribute_simd:
10176     case OMPD_target_teams_distribute_parallel_for:
10177     case OMPD_target_teams_distribute_parallel_for_simd:
10178     case OMPD_target_teams:
10179     case OMPD_target_parallel:
10180     case OMPD_target_parallel_for:
10181     case OMPD_target_parallel_for_simd:
10182     case OMPD_requires:
10183     case OMPD_unknown:
10184       llvm_unreachable("Unexpected standalone target data directive.");
10185       break;
10186     }
10187     CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
10188   };
10189 
10190   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
10191                              CodeGenFunction &CGF, PrePostActionTy &) {
10192     // Fill up the arrays with all the mapped variables.
10193     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10194     MappableExprsHandler::MapValuesArrayTy Pointers;
10195     MappableExprsHandler::MapValuesArrayTy Sizes;
10196     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10197 
10198     // Get map clause information.
10199     MappableExprsHandler MEHandler(D, CGF);
10200     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10201 
10202     TargetDataInfo Info;
10203     // Fill up the arrays and create the arguments.
10204     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10205     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10206                                  Info.PointersArray, Info.SizesArray,
10207                                  Info.MapTypesArray, Info);
10208     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10209     InputInfo.BasePointersArray =
10210         Address(Info.BasePointersArray, CGM.getPointerAlign());
10211     InputInfo.PointersArray =
10212         Address(Info.PointersArray, CGM.getPointerAlign());
10213     InputInfo.SizesArray =
10214         Address(Info.SizesArray, CGM.getPointerAlign());
10215     MapTypesArray = Info.MapTypesArray;
10216     if (D.hasClausesOfKind<OMPDependClause>())
10217       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10218     else
10219       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10220   };
10221 
10222   if (IfCond) {
10223     emitOMPIfClause(CGF, IfCond, TargetThenGen,
10224                     [](CodeGenFunction &CGF, PrePostActionTy &) {});
10225   } else {
10226     RegionCodeGenTy ThenRCG(TargetThenGen);
10227     ThenRCG(CGF);
10228   }
10229 }
10230 
10231 namespace {
10232   /// Kind of parameter in a function with 'declare simd' directive.
10233   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
10234   /// Attribute set of the parameter.
10235   struct ParamAttrTy {
10236     ParamKindTy Kind = Vector;
10237     llvm::APSInt StrideOrArg;
10238     llvm::APSInt Alignment;
10239   };
10240 } // namespace
10241 
10242 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10243                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10244   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10245   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10246   // of that clause. The VLEN value must be power of 2.
10247   // In other case the notion of the function`s "characteristic data type" (CDT)
10248   // is used to compute the vector length.
10249   // CDT is defined in the following order:
10250   //   a) For non-void function, the CDT is the return type.
10251   //   b) If the function has any non-uniform, non-linear parameters, then the
10252   //   CDT is the type of the first such parameter.
10253   //   c) If the CDT determined by a) or b) above is struct, union, or class
10254   //   type which is pass-by-value (except for the type that maps to the
10255   //   built-in complex data type), the characteristic data type is int.
10256   //   d) If none of the above three cases is applicable, the CDT is int.
10257   // The VLEN is then determined based on the CDT and the size of vector
10258   // register of that ISA for which current vector version is generated. The
10259   // VLEN is computed using the formula below:
10260   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10261   // where vector register size specified in section 3.2.1 Registers and the
10262   // Stack Frame of original AMD64 ABI document.
10263   QualType RetType = FD->getReturnType();
10264   if (RetType.isNull())
10265     return 0;
10266   ASTContext &C = FD->getASTContext();
10267   QualType CDT;
10268   if (!RetType.isNull() && !RetType->isVoidType()) {
10269     CDT = RetType;
10270   } else {
10271     unsigned Offset = 0;
10272     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10273       if (ParamAttrs[Offset].Kind == Vector)
10274         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10275       ++Offset;
10276     }
10277     if (CDT.isNull()) {
10278       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10279         if (ParamAttrs[I + Offset].Kind == Vector) {
10280           CDT = FD->getParamDecl(I)->getType();
10281           break;
10282         }
10283       }
10284     }
10285   }
10286   if (CDT.isNull())
10287     CDT = C.IntTy;
10288   CDT = CDT->getCanonicalTypeUnqualified();
10289   if (CDT->isRecordType() || CDT->isUnionType())
10290     CDT = C.IntTy;
10291   return C.getTypeSize(CDT);
10292 }
10293 
10294 static void
10295 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10296                            const llvm::APSInt &VLENVal,
10297                            ArrayRef<ParamAttrTy> ParamAttrs,
10298                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
10299   struct ISADataTy {
10300     char ISA;
10301     unsigned VecRegSize;
10302   };
10303   ISADataTy ISAData[] = {
10304       {
10305           'b', 128
10306       }, // SSE
10307       {
10308           'c', 256
10309       }, // AVX
10310       {
10311           'd', 256
10312       }, // AVX2
10313       {
10314           'e', 512
10315       }, // AVX512
10316   };
10317   llvm::SmallVector<char, 2> Masked;
10318   switch (State) {
10319   case OMPDeclareSimdDeclAttr::BS_Undefined:
10320     Masked.push_back('N');
10321     Masked.push_back('M');
10322     break;
10323   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10324     Masked.push_back('N');
10325     break;
10326   case OMPDeclareSimdDeclAttr::BS_Inbranch:
10327     Masked.push_back('M');
10328     break;
10329   }
10330   for (char Mask : Masked) {
10331     for (const ISADataTy &Data : ISAData) {
10332       SmallString<256> Buffer;
10333       llvm::raw_svector_ostream Out(Buffer);
10334       Out << "_ZGV" << Data.ISA << Mask;
10335       if (!VLENVal) {
10336         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10337         assert(NumElts && "Non-zero simdlen/cdtsize expected");
10338         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10339       } else {
10340         Out << VLENVal;
10341       }
10342       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
10343         switch (ParamAttr.Kind){
10344         case LinearWithVarStride:
10345           Out << 's' << ParamAttr.StrideOrArg;
10346           break;
10347         case Linear:
10348           Out << 'l';
10349           if (!!ParamAttr.StrideOrArg)
10350             Out << ParamAttr.StrideOrArg;
10351           break;
10352         case Uniform:
10353           Out << 'u';
10354           break;
10355         case Vector:
10356           Out << 'v';
10357           break;
10358         }
10359         if (!!ParamAttr.Alignment)
10360           Out << 'a' << ParamAttr.Alignment;
10361       }
10362       Out << '_' << Fn->getName();
10363       Fn->addFnAttr(Out.str());
10364     }
10365   }
10366 }
10367 
10368 // This are the Functions that are needed to mangle the name of the
10369 // vector functions generated by the compiler, according to the rules
10370 // defined in the "Vector Function ABI specifications for AArch64",
10371 // available at
10372 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10373 
10374 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
10375 ///
10376 /// TODO: Need to implement the behavior for reference marked with a
10377 /// var or no linear modifiers (1.b in the section). For this, we
10378 /// need to extend ParamKindTy to support the linear modifiers.
10379 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10380   QT = QT.getCanonicalType();
10381 
10382   if (QT->isVoidType())
10383     return false;
10384 
10385   if (Kind == ParamKindTy::Uniform)
10386     return false;
10387 
10388   if (Kind == ParamKindTy::Linear)
10389     return false;
10390 
10391   // TODO: Handle linear references with modifiers
10392 
10393   if (Kind == ParamKindTy::LinearWithVarStride)
10394     return false;
10395 
10396   return true;
10397 }
10398 
10399 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10400 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10401   QT = QT.getCanonicalType();
10402   unsigned Size = C.getTypeSize(QT);
10403 
10404   // Only scalars and complex within 16 bytes wide set PVB to true.
10405   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10406     return false;
10407 
10408   if (QT->isFloatingType())
10409     return true;
10410 
10411   if (QT->isIntegerType())
10412     return true;
10413 
10414   if (QT->isPointerType())
10415     return true;
10416 
10417   // TODO: Add support for complex types (section 3.1.2, item 2).
10418 
10419   return false;
10420 }
10421 
10422 /// Computes the lane size (LS) of a return type or of an input parameter,
10423 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10424 /// TODO: Add support for references, section 3.2.1, item 1.
10425 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10426   if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10427     QualType PTy = QT.getCanonicalType()->getPointeeType();
10428     if (getAArch64PBV(PTy, C))
10429       return C.getTypeSize(PTy);
10430   }
10431   if (getAArch64PBV(QT, C))
10432     return C.getTypeSize(QT);
10433 
10434   return C.getTypeSize(C.getUIntPtrType());
10435 }
10436 
10437 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10438 // signature of the scalar function, as defined in 3.2.2 of the
10439 // AAVFABI.
10440 static std::tuple<unsigned, unsigned, bool>
10441 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10442   QualType RetType = FD->getReturnType().getCanonicalType();
10443 
10444   ASTContext &C = FD->getASTContext();
10445 
10446   bool OutputBecomesInput = false;
10447 
10448   llvm::SmallVector<unsigned, 8> Sizes;
10449   if (!RetType->isVoidType()) {
10450     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10451     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10452       OutputBecomesInput = true;
10453   }
10454   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10455     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10456     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10457   }
10458 
10459   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10460   // The LS of a function parameter / return value can only be a power
10461   // of 2, starting from 8 bits, up to 128.
10462   assert(std::all_of(Sizes.begin(), Sizes.end(),
10463                      [](unsigned Size) {
10464                        return Size == 8 || Size == 16 || Size == 32 ||
10465                               Size == 64 || Size == 128;
10466                      }) &&
10467          "Invalid size");
10468 
10469   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10470                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
10471                          OutputBecomesInput);
10472 }
10473 
10474 /// Mangle the parameter part of the vector function name according to
10475 /// their OpenMP classification. The mangling function is defined in
10476 /// section 3.5 of the AAVFABI.
10477 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10478   SmallString<256> Buffer;
10479   llvm::raw_svector_ostream Out(Buffer);
10480   for (const auto &ParamAttr : ParamAttrs) {
10481     switch (ParamAttr.Kind) {
10482     case LinearWithVarStride:
10483       Out << "ls" << ParamAttr.StrideOrArg;
10484       break;
10485     case Linear:
10486       Out << 'l';
10487       // Don't print the step value if it is not present or if it is
10488       // equal to 1.
10489       if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1)
10490         Out << ParamAttr.StrideOrArg;
10491       break;
10492     case Uniform:
10493       Out << 'u';
10494       break;
10495     case Vector:
10496       Out << 'v';
10497       break;
10498     }
10499 
10500     if (!!ParamAttr.Alignment)
10501       Out << 'a' << ParamAttr.Alignment;
10502   }
10503 
10504   return Out.str();
10505 }
10506 
10507 // Function used to add the attribute. The parameter `VLEN` is
10508 // templated to allow the use of "x" when targeting scalable functions
10509 // for SVE.
10510 template <typename T>
10511 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10512                                  char ISA, StringRef ParSeq,
10513                                  StringRef MangledName, bool OutputBecomesInput,
10514                                  llvm::Function *Fn) {
10515   SmallString<256> Buffer;
10516   llvm::raw_svector_ostream Out(Buffer);
10517   Out << Prefix << ISA << LMask << VLEN;
10518   if (OutputBecomesInput)
10519     Out << "v";
10520   Out << ParSeq << "_" << MangledName;
10521   Fn->addFnAttr(Out.str());
10522 }
10523 
10524 // Helper function to generate the Advanced SIMD names depending on
10525 // the value of the NDS when simdlen is not present.
10526 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10527                                       StringRef Prefix, char ISA,
10528                                       StringRef ParSeq, StringRef MangledName,
10529                                       bool OutputBecomesInput,
10530                                       llvm::Function *Fn) {
10531   switch (NDS) {
10532   case 8:
10533     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10534                          OutputBecomesInput, Fn);
10535     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10536                          OutputBecomesInput, Fn);
10537     break;
10538   case 16:
10539     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10540                          OutputBecomesInput, Fn);
10541     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10542                          OutputBecomesInput, Fn);
10543     break;
10544   case 32:
10545     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10546                          OutputBecomesInput, Fn);
10547     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10548                          OutputBecomesInput, Fn);
10549     break;
10550   case 64:
10551   case 128:
10552     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10553                          OutputBecomesInput, Fn);
10554     break;
10555   default:
10556     llvm_unreachable("Scalar type is too wide.");
10557   }
10558 }
10559 
10560 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10561 static void emitAArch64DeclareSimdFunction(
10562     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10563     ArrayRef<ParamAttrTy> ParamAttrs,
10564     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10565     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10566 
10567   // Get basic data for building the vector signature.
10568   const auto Data = getNDSWDS(FD, ParamAttrs);
10569   const unsigned NDS = std::get<0>(Data);
10570   const unsigned WDS = std::get<1>(Data);
10571   const bool OutputBecomesInput = std::get<2>(Data);
10572 
10573   // Check the values provided via `simdlen` by the user.
10574   // 1. A `simdlen(1)` doesn't produce vector signatures,
10575   if (UserVLEN == 1) {
10576     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10577         DiagnosticsEngine::Warning,
10578         "The clause simdlen(1) has no effect when targeting aarch64.");
10579     CGM.getDiags().Report(SLoc, DiagID);
10580     return;
10581   }
10582 
10583   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10584   // Advanced SIMD output.
10585   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10586     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10587         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10588                                     "power of 2 when targeting Advanced SIMD.");
10589     CGM.getDiags().Report(SLoc, DiagID);
10590     return;
10591   }
10592 
10593   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10594   // limits.
10595   if (ISA == 's' && UserVLEN != 0) {
10596     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10597       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10598           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10599                                       "lanes in the architectural constraints "
10600                                       "for SVE (min is 128-bit, max is "
10601                                       "2048-bit, by steps of 128-bit)");
10602       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10603       return;
10604     }
10605   }
10606 
10607   // Sort out parameter sequence.
10608   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10609   StringRef Prefix = "_ZGV";
10610   // Generate simdlen from user input (if any).
10611   if (UserVLEN) {
10612     if (ISA == 's') {
10613       // SVE generates only a masked function.
10614       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10615                            OutputBecomesInput, Fn);
10616     } else {
10617       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10618       // Advanced SIMD generates one or two functions, depending on
10619       // the `[not]inbranch` clause.
10620       switch (State) {
10621       case OMPDeclareSimdDeclAttr::BS_Undefined:
10622         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10623                              OutputBecomesInput, Fn);
10624         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10625                              OutputBecomesInput, Fn);
10626         break;
10627       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10628         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10629                              OutputBecomesInput, Fn);
10630         break;
10631       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10632         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10633                              OutputBecomesInput, Fn);
10634         break;
10635       }
10636     }
10637   } else {
10638     // If no user simdlen is provided, follow the AAVFABI rules for
10639     // generating the vector length.
10640     if (ISA == 's') {
10641       // SVE, section 3.4.1, item 1.
10642       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10643                            OutputBecomesInput, Fn);
10644     } else {
10645       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10646       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10647       // two vector names depending on the use of the clause
10648       // `[not]inbranch`.
10649       switch (State) {
10650       case OMPDeclareSimdDeclAttr::BS_Undefined:
10651         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10652                                   OutputBecomesInput, Fn);
10653         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10654                                   OutputBecomesInput, Fn);
10655         break;
10656       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10657         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10658                                   OutputBecomesInput, Fn);
10659         break;
10660       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10661         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10662                                   OutputBecomesInput, Fn);
10663         break;
10664       }
10665     }
10666   }
10667 }
10668 
10669 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10670                                               llvm::Function *Fn) {
10671   ASTContext &C = CGM.getContext();
10672   FD = FD->getMostRecentDecl();
10673   // Map params to their positions in function decl.
10674   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10675   if (isa<CXXMethodDecl>(FD))
10676     ParamPositions.try_emplace(FD, 0);
10677   unsigned ParamPos = ParamPositions.size();
10678   for (const ParmVarDecl *P : FD->parameters()) {
10679     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10680     ++ParamPos;
10681   }
10682   while (FD) {
10683     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10684       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10685       // Mark uniform parameters.
10686       for (const Expr *E : Attr->uniforms()) {
10687         E = E->IgnoreParenImpCasts();
10688         unsigned Pos;
10689         if (isa<CXXThisExpr>(E)) {
10690           Pos = ParamPositions[FD];
10691         } else {
10692           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10693                                 ->getCanonicalDecl();
10694           Pos = ParamPositions[PVD];
10695         }
10696         ParamAttrs[Pos].Kind = Uniform;
10697       }
10698       // Get alignment info.
10699       auto NI = Attr->alignments_begin();
10700       for (const Expr *E : Attr->aligneds()) {
10701         E = E->IgnoreParenImpCasts();
10702         unsigned Pos;
10703         QualType ParmTy;
10704         if (isa<CXXThisExpr>(E)) {
10705           Pos = ParamPositions[FD];
10706           ParmTy = E->getType();
10707         } else {
10708           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10709                                 ->getCanonicalDecl();
10710           Pos = ParamPositions[PVD];
10711           ParmTy = PVD->getType();
10712         }
10713         ParamAttrs[Pos].Alignment =
10714             (*NI)
10715                 ? (*NI)->EvaluateKnownConstInt(C)
10716                 : llvm::APSInt::getUnsigned(
10717                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10718                           .getQuantity());
10719         ++NI;
10720       }
10721       // Mark linear parameters.
10722       auto SI = Attr->steps_begin();
10723       auto MI = Attr->modifiers_begin();
10724       for (const Expr *E : Attr->linears()) {
10725         E = E->IgnoreParenImpCasts();
10726         unsigned Pos;
10727         if (isa<CXXThisExpr>(E)) {
10728           Pos = ParamPositions[FD];
10729         } else {
10730           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10731                                 ->getCanonicalDecl();
10732           Pos = ParamPositions[PVD];
10733         }
10734         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10735         ParamAttr.Kind = Linear;
10736         if (*SI) {
10737           Expr::EvalResult Result;
10738           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10739             if (const auto *DRE =
10740                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10741               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10742                 ParamAttr.Kind = LinearWithVarStride;
10743                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10744                     ParamPositions[StridePVD->getCanonicalDecl()]);
10745               }
10746             }
10747           } else {
10748             ParamAttr.StrideOrArg = Result.Val.getInt();
10749           }
10750         }
10751         ++SI;
10752         ++MI;
10753       }
10754       llvm::APSInt VLENVal;
10755       SourceLocation ExprLoc;
10756       const Expr *VLENExpr = Attr->getSimdlen();
10757       if (VLENExpr) {
10758         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10759         ExprLoc = VLENExpr->getExprLoc();
10760       }
10761       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10762       if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
10763           CGM.getTriple().getArch() == llvm::Triple::x86_64) {
10764         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10765       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10766         unsigned VLEN = VLENVal.getExtValue();
10767         StringRef MangledName = Fn->getName();
10768         if (CGM.getTarget().hasFeature("sve"))
10769           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10770                                          MangledName, 's', 128, Fn, ExprLoc);
10771         if (CGM.getTarget().hasFeature("neon"))
10772           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10773                                          MangledName, 'n', 128, Fn, ExprLoc);
10774       }
10775     }
10776     FD = FD->getPreviousDecl();
10777   }
10778 }
10779 
10780 namespace {
10781 /// Cleanup action for doacross support.
10782 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10783 public:
10784   static const int DoacrossFinArgs = 2;
10785 
10786 private:
10787   llvm::FunctionCallee RTLFn;
10788   llvm::Value *Args[DoacrossFinArgs];
10789 
10790 public:
10791   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10792                     ArrayRef<llvm::Value *> CallArgs)
10793       : RTLFn(RTLFn) {
10794     assert(CallArgs.size() == DoacrossFinArgs);
10795     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10796   }
10797   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10798     if (!CGF.HaveInsertPoint())
10799       return;
10800     CGF.EmitRuntimeCall(RTLFn, Args);
10801   }
10802 };
10803 } // namespace
10804 
10805 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10806                                        const OMPLoopDirective &D,
10807                                        ArrayRef<Expr *> NumIterations) {
10808   if (!CGF.HaveInsertPoint())
10809     return;
10810 
10811   ASTContext &C = CGM.getContext();
10812   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
10813   RecordDecl *RD;
10814   if (KmpDimTy.isNull()) {
10815     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
10816     //  kmp_int64 lo; // lower
10817     //  kmp_int64 up; // upper
10818     //  kmp_int64 st; // stride
10819     // };
10820     RD = C.buildImplicitRecord("kmp_dim");
10821     RD->startDefinition();
10822     addFieldToRecordDecl(C, RD, Int64Ty);
10823     addFieldToRecordDecl(C, RD, Int64Ty);
10824     addFieldToRecordDecl(C, RD, Int64Ty);
10825     RD->completeDefinition();
10826     KmpDimTy = C.getRecordType(RD);
10827   } else {
10828     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
10829   }
10830   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
10831   QualType ArrayTy =
10832       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
10833 
10834   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
10835   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
10836   enum { LowerFD = 0, UpperFD, StrideFD };
10837   // Fill dims with data.
10838   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
10839     LValue DimsLVal = CGF.MakeAddrLValue(
10840         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
10841     // dims.upper = num_iterations;
10842     LValue UpperLVal = CGF.EmitLValueForField(
10843         DimsLVal, *std::next(RD->field_begin(), UpperFD));
10844     llvm::Value *NumIterVal =
10845         CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
10846                                  D.getNumIterations()->getType(), Int64Ty,
10847                                  D.getNumIterations()->getExprLoc());
10848     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
10849     // dims.stride = 1;
10850     LValue StrideLVal = CGF.EmitLValueForField(
10851         DimsLVal, *std::next(RD->field_begin(), StrideFD));
10852     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
10853                           StrideLVal);
10854   }
10855 
10856   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
10857   // kmp_int32 num_dims, struct kmp_dim * dims);
10858   llvm::Value *Args[] = {
10859       emitUpdateLocation(CGF, D.getBeginLoc()),
10860       getThreadID(CGF, D.getBeginLoc()),
10861       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
10862       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
10863           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
10864           CGM.VoidPtrTy)};
10865 
10866   llvm::FunctionCallee RTLFn =
10867       createRuntimeFunction(OMPRTL__kmpc_doacross_init);
10868   CGF.EmitRuntimeCall(RTLFn, Args);
10869   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
10870       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
10871   llvm::FunctionCallee FiniRTLFn =
10872       createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
10873   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10874                                              llvm::makeArrayRef(FiniArgs));
10875 }
10876 
10877 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
10878                                           const OMPDependClause *C) {
10879   QualType Int64Ty =
10880       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
10881   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
10882   QualType ArrayTy = CGM.getContext().getConstantArrayType(
10883       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
10884   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
10885   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
10886     const Expr *CounterVal = C->getLoopData(I);
10887     assert(CounterVal);
10888     llvm::Value *CntVal = CGF.EmitScalarConversion(
10889         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
10890         CounterVal->getExprLoc());
10891     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
10892                           /*Volatile=*/false, Int64Ty);
10893   }
10894   llvm::Value *Args[] = {
10895       emitUpdateLocation(CGF, C->getBeginLoc()),
10896       getThreadID(CGF, C->getBeginLoc()),
10897       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
10898   llvm::FunctionCallee RTLFn;
10899   if (C->getDependencyKind() == OMPC_DEPEND_source) {
10900     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
10901   } else {
10902     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
10903     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
10904   }
10905   CGF.EmitRuntimeCall(RTLFn, Args);
10906 }
10907 
10908 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
10909                                llvm::FunctionCallee Callee,
10910                                ArrayRef<llvm::Value *> Args) const {
10911   assert(Loc.isValid() && "Outlined function call location must be valid.");
10912   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
10913 
10914   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
10915     if (Fn->doesNotThrow()) {
10916       CGF.EmitNounwindRuntimeCall(Fn, Args);
10917       return;
10918     }
10919   }
10920   CGF.EmitRuntimeCall(Callee, Args);
10921 }
10922 
10923 void CGOpenMPRuntime::emitOutlinedFunctionCall(
10924     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
10925     ArrayRef<llvm::Value *> Args) const {
10926   emitCall(CGF, Loc, OutlinedFn, Args);
10927 }
10928 
10929 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
10930   if (const auto *FD = dyn_cast<FunctionDecl>(D))
10931     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
10932       HasEmittedDeclareTargetRegion = true;
10933 }
10934 
10935 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
10936                                              const VarDecl *NativeParam,
10937                                              const VarDecl *TargetParam) const {
10938   return CGF.GetAddrOfLocalVar(NativeParam);
10939 }
10940 
10941 namespace {
10942 /// Cleanup action for allocate support.
10943 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
10944 public:
10945   static const int CleanupArgs = 3;
10946 
10947 private:
10948   llvm::FunctionCallee RTLFn;
10949   llvm::Value *Args[CleanupArgs];
10950 
10951 public:
10952   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
10953                        ArrayRef<llvm::Value *> CallArgs)
10954       : RTLFn(RTLFn) {
10955     assert(CallArgs.size() == CleanupArgs &&
10956            "Size of arguments does not match.");
10957     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10958   }
10959   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10960     if (!CGF.HaveInsertPoint())
10961       return;
10962     CGF.EmitRuntimeCall(RTLFn, Args);
10963   }
10964 };
10965 } // namespace
10966 
10967 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
10968                                                    const VarDecl *VD) {
10969   if (!VD)
10970     return Address::invalid();
10971   const VarDecl *CVD = VD->getCanonicalDecl();
10972   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
10973     return Address::invalid();
10974   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
10975   // Use the default allocation.
10976   if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
10977       !AA->getAllocator())
10978     return Address::invalid();
10979   llvm::Value *Size;
10980   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
10981   if (CVD->getType()->isVariablyModifiedType()) {
10982     Size = CGF.getTypeSize(CVD->getType());
10983     // Align the size: ((size + align - 1) / align) * align
10984     Size = CGF.Builder.CreateNUWAdd(
10985         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
10986     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
10987     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
10988   } else {
10989     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
10990     Size = CGM.getSize(Sz.alignTo(Align));
10991   }
10992   llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
10993   assert(AA->getAllocator() &&
10994          "Expected allocator expression for non-default allocator.");
10995   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
10996   // According to the standard, the original allocator type is a enum (integer).
10997   // Convert to pointer type, if required.
10998   if (Allocator->getType()->isIntegerTy())
10999     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
11000   else if (Allocator->getType()->isPointerTy())
11001     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
11002                                                                 CGM.VoidPtrTy);
11003   llvm::Value *Args[] = {ThreadID, Size, Allocator};
11004 
11005   llvm::Value *Addr =
11006       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
11007                           CVD->getName() + ".void.addr");
11008   llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
11009                                                               Allocator};
11010   llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
11011 
11012   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11013                                                 llvm::makeArrayRef(FiniArgs));
11014   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11015       Addr,
11016       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
11017       CVD->getName() + ".addr");
11018   return Address(Addr, Align);
11019 }
11020 
11021 /// Checks current context and returns true if it matches the context selector.
11022 template <OMPDeclareVariantAttr::CtxSelectorSetType CtxSet,
11023           OMPDeclareVariantAttr::CtxSelectorType Ctx>
11024 static bool checkContext(const OMPDeclareVariantAttr *A) {
11025   assert(CtxSet != OMPDeclareVariantAttr::CtxSetUnknown &&
11026          Ctx != OMPDeclareVariantAttr::CtxUnknown &&
11027          "Unknown context selector or context selector set.");
11028   return false;
11029 }
11030 
11031 /// Checks for implementation={vendor(<vendor>)} context selector.
11032 /// \returns true iff <vendor>="llvm", false otherwise.
11033 template <>
11034 bool checkContext<OMPDeclareVariantAttr::CtxSetImplementation,
11035                   OMPDeclareVariantAttr::CtxVendor>(
11036     const OMPDeclareVariantAttr *A) {
11037   return llvm::all_of(A->implVendors(),
11038                       [](StringRef S) { return !S.compare_lower("llvm"); });
11039 }
11040 
11041 static bool greaterCtxScore(ASTContext &Ctx, const Expr *LHS, const Expr *RHS) {
11042   // If both scores are unknown, choose the very first one.
11043   if (!LHS && !RHS)
11044     return true;
11045   // If only one is known, return this one.
11046   if (LHS && !RHS)
11047     return true;
11048   if (!LHS && RHS)
11049     return false;
11050   llvm::APSInt LHSVal = LHS->EvaluateKnownConstInt(Ctx);
11051   llvm::APSInt RHSVal = RHS->EvaluateKnownConstInt(Ctx);
11052   return llvm::APSInt::compareValues(LHSVal, RHSVal) >= 0;
11053 }
11054 
11055 namespace {
11056 /// Comparator for the priority queue for context selector.
11057 class OMPDeclareVariantAttrComparer
11058     : public std::greater<const OMPDeclareVariantAttr *> {
11059 private:
11060   ASTContext &Ctx;
11061 
11062 public:
11063   OMPDeclareVariantAttrComparer(ASTContext &Ctx) : Ctx(Ctx) {}
11064   bool operator()(const OMPDeclareVariantAttr *LHS,
11065                   const OMPDeclareVariantAttr *RHS) const {
11066     const Expr *LHSExpr = nullptr;
11067     const Expr *RHSExpr = nullptr;
11068     if (LHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified)
11069       LHSExpr = LHS->getScore();
11070     if (RHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified)
11071       RHSExpr = RHS->getScore();
11072     return greaterCtxScore(Ctx, LHSExpr, RHSExpr);
11073   }
11074 };
11075 } // anonymous namespace
11076 
11077 /// Finds the variant function that matches current context with its context
11078 /// selector.
11079 static const FunctionDecl *getDeclareVariantFunction(ASTContext &Ctx,
11080                                                      const FunctionDecl *FD) {
11081   if (!FD->hasAttrs() || !FD->hasAttr<OMPDeclareVariantAttr>())
11082     return FD;
11083   // Iterate through all DeclareVariant attributes and check context selectors.
11084   auto &&Comparer = [&Ctx](const OMPDeclareVariantAttr *LHS,
11085                            const OMPDeclareVariantAttr *RHS) {
11086     const Expr *LHSExpr = nullptr;
11087     const Expr *RHSExpr = nullptr;
11088     if (LHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified)
11089       LHSExpr = LHS->getScore();
11090     if (RHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified)
11091       RHSExpr = RHS->getScore();
11092     return greaterCtxScore(Ctx, LHSExpr, RHSExpr);
11093   };
11094   const OMPDeclareVariantAttr *TopMostAttr = nullptr;
11095   for (const auto *A : FD->specific_attrs<OMPDeclareVariantAttr>()) {
11096     const OMPDeclareVariantAttr *SelectedAttr = nullptr;
11097     switch (A->getCtxSelectorSet()) {
11098     case OMPDeclareVariantAttr::CtxSetImplementation:
11099       switch (A->getCtxSelector()) {
11100       case OMPDeclareVariantAttr::CtxVendor:
11101         if (checkContext<OMPDeclareVariantAttr::CtxSetImplementation,
11102                          OMPDeclareVariantAttr::CtxVendor>(A))
11103           SelectedAttr = A;
11104         break;
11105       case OMPDeclareVariantAttr::CtxUnknown:
11106         llvm_unreachable(
11107             "Unknown context selector in implementation selector set.");
11108       }
11109       break;
11110     case OMPDeclareVariantAttr::CtxSetUnknown:
11111       llvm_unreachable("Unknown context selector set.");
11112     }
11113     // If the attribute matches the context, find the attribute with the highest
11114     // score.
11115     if (SelectedAttr && (!TopMostAttr || !Comparer(TopMostAttr, SelectedAttr)))
11116       TopMostAttr = SelectedAttr;
11117   }
11118   if (!TopMostAttr)
11119     return FD;
11120   return cast<FunctionDecl>(
11121       cast<DeclRefExpr>(TopMostAttr->getVariantFuncRef()->IgnoreParenImpCasts())
11122           ->getDecl());
11123 }
11124 
11125 bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) {
11126   const auto *D = cast<FunctionDecl>(GD.getDecl());
11127   // If the original function is defined already, use its definition.
11128   StringRef MangledName = CGM.getMangledName(GD);
11129   llvm::GlobalValue *Orig = CGM.GetGlobalValue(MangledName);
11130   if (Orig && !Orig->isDeclaration())
11131     return false;
11132   const FunctionDecl *NewFD = getDeclareVariantFunction(CGM.getContext(), D);
11133   // Emit original function if it does not have declare variant attribute or the
11134   // context does not match.
11135   if (NewFD == D)
11136     return false;
11137   GlobalDecl NewGD = GD.getWithDecl(NewFD);
11138   if (tryEmitDeclareVariant(NewGD, GD, Orig, IsForDefinition)) {
11139     DeferredVariantFunction.erase(D);
11140     return true;
11141   }
11142   DeferredVariantFunction.insert(std::make_pair(D, std::make_pair(NewGD, GD)));
11143   return true;
11144 }
11145 
11146 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11147     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11148     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11149   llvm_unreachable("Not supported in SIMD-only mode");
11150 }
11151 
11152 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11153     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11154     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11155   llvm_unreachable("Not supported in SIMD-only mode");
11156 }
11157 
11158 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11159     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11160     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11161     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11162     bool Tied, unsigned &NumberOfParts) {
11163   llvm_unreachable("Not supported in SIMD-only mode");
11164 }
11165 
11166 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11167                                            SourceLocation Loc,
11168                                            llvm::Function *OutlinedFn,
11169                                            ArrayRef<llvm::Value *> CapturedVars,
11170                                            const Expr *IfCond) {
11171   llvm_unreachable("Not supported in SIMD-only mode");
11172 }
11173 
11174 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11175     CodeGenFunction &CGF, StringRef CriticalName,
11176     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11177     const Expr *Hint) {
11178   llvm_unreachable("Not supported in SIMD-only mode");
11179 }
11180 
11181 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11182                                            const RegionCodeGenTy &MasterOpGen,
11183                                            SourceLocation Loc) {
11184   llvm_unreachable("Not supported in SIMD-only mode");
11185 }
11186 
11187 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11188                                             SourceLocation Loc) {
11189   llvm_unreachable("Not supported in SIMD-only mode");
11190 }
11191 
11192 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11193     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11194     SourceLocation Loc) {
11195   llvm_unreachable("Not supported in SIMD-only mode");
11196 }
11197 
11198 void CGOpenMPSIMDRuntime::emitSingleRegion(
11199     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11200     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11201     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11202     ArrayRef<const Expr *> AssignmentOps) {
11203   llvm_unreachable("Not supported in SIMD-only mode");
11204 }
11205 
11206 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11207                                             const RegionCodeGenTy &OrderedOpGen,
11208                                             SourceLocation Loc,
11209                                             bool IsThreads) {
11210   llvm_unreachable("Not supported in SIMD-only mode");
11211 }
11212 
11213 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11214                                           SourceLocation Loc,
11215                                           OpenMPDirectiveKind Kind,
11216                                           bool EmitChecks,
11217                                           bool ForceSimpleCall) {
11218   llvm_unreachable("Not supported in SIMD-only mode");
11219 }
11220 
11221 void CGOpenMPSIMDRuntime::emitForDispatchInit(
11222     CodeGenFunction &CGF, SourceLocation Loc,
11223     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11224     bool Ordered, const DispatchRTInput &DispatchValues) {
11225   llvm_unreachable("Not supported in SIMD-only mode");
11226 }
11227 
11228 void CGOpenMPSIMDRuntime::emitForStaticInit(
11229     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
11230     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11231   llvm_unreachable("Not supported in SIMD-only mode");
11232 }
11233 
11234 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
11235     CodeGenFunction &CGF, SourceLocation Loc,
11236     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11237   llvm_unreachable("Not supported in SIMD-only mode");
11238 }
11239 
11240 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
11241                                                      SourceLocation Loc,
11242                                                      unsigned IVSize,
11243                                                      bool IVSigned) {
11244   llvm_unreachable("Not supported in SIMD-only mode");
11245 }
11246 
11247 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
11248                                               SourceLocation Loc,
11249                                               OpenMPDirectiveKind DKind) {
11250   llvm_unreachable("Not supported in SIMD-only mode");
11251 }
11252 
11253 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
11254                                               SourceLocation Loc,
11255                                               unsigned IVSize, bool IVSigned,
11256                                               Address IL, Address LB,
11257                                               Address UB, Address ST) {
11258   llvm_unreachable("Not supported in SIMD-only mode");
11259 }
11260 
11261 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
11262                                                llvm::Value *NumThreads,
11263                                                SourceLocation Loc) {
11264   llvm_unreachable("Not supported in SIMD-only mode");
11265 }
11266 
11267 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
11268                                              OpenMPProcBindClauseKind ProcBind,
11269                                              SourceLocation Loc) {
11270   llvm_unreachable("Not supported in SIMD-only mode");
11271 }
11272 
11273 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
11274                                                     const VarDecl *VD,
11275                                                     Address VDAddr,
11276                                                     SourceLocation Loc) {
11277   llvm_unreachable("Not supported in SIMD-only mode");
11278 }
11279 
11280 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
11281     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
11282     CodeGenFunction *CGF) {
11283   llvm_unreachable("Not supported in SIMD-only mode");
11284 }
11285 
11286 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
11287     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
11288   llvm_unreachable("Not supported in SIMD-only mode");
11289 }
11290 
11291 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
11292                                     ArrayRef<const Expr *> Vars,
11293                                     SourceLocation Loc) {
11294   llvm_unreachable("Not supported in SIMD-only mode");
11295 }
11296 
11297 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
11298                                        const OMPExecutableDirective &D,
11299                                        llvm::Function *TaskFunction,
11300                                        QualType SharedsTy, Address Shareds,
11301                                        const Expr *IfCond,
11302                                        const OMPTaskDataTy &Data) {
11303   llvm_unreachable("Not supported in SIMD-only mode");
11304 }
11305 
11306 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
11307     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
11308     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
11309     const Expr *IfCond, const OMPTaskDataTy &Data) {
11310   llvm_unreachable("Not supported in SIMD-only mode");
11311 }
11312 
11313 void CGOpenMPSIMDRuntime::emitReduction(
11314     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
11315     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
11316     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
11317   assert(Options.SimpleReduction && "Only simple reduction is expected.");
11318   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
11319                                  ReductionOps, Options);
11320 }
11321 
11322 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
11323     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
11324     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
11325   llvm_unreachable("Not supported in SIMD-only mode");
11326 }
11327 
11328 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
11329                                                   SourceLocation Loc,
11330                                                   ReductionCodeGen &RCG,
11331                                                   unsigned N) {
11332   llvm_unreachable("Not supported in SIMD-only mode");
11333 }
11334 
11335 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
11336                                                   SourceLocation Loc,
11337                                                   llvm::Value *ReductionsPtr,
11338                                                   LValue SharedLVal) {
11339   llvm_unreachable("Not supported in SIMD-only mode");
11340 }
11341 
11342 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
11343                                            SourceLocation Loc) {
11344   llvm_unreachable("Not supported in SIMD-only mode");
11345 }
11346 
11347 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
11348     CodeGenFunction &CGF, SourceLocation Loc,
11349     OpenMPDirectiveKind CancelRegion) {
11350   llvm_unreachable("Not supported in SIMD-only mode");
11351 }
11352 
11353 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
11354                                          SourceLocation Loc, const Expr *IfCond,
11355                                          OpenMPDirectiveKind CancelRegion) {
11356   llvm_unreachable("Not supported in SIMD-only mode");
11357 }
11358 
11359 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
11360     const OMPExecutableDirective &D, StringRef ParentName,
11361     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
11362     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
11363   llvm_unreachable("Not supported in SIMD-only mode");
11364 }
11365 
11366 void CGOpenMPSIMDRuntime::emitTargetCall(
11367     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11368     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
11369     const Expr *Device,
11370     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
11371                                      const OMPLoopDirective &D)>
11372         SizeEmitter) {
11373   llvm_unreachable("Not supported in SIMD-only mode");
11374 }
11375 
11376 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
11377   llvm_unreachable("Not supported in SIMD-only mode");
11378 }
11379 
11380 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
11381   llvm_unreachable("Not supported in SIMD-only mode");
11382 }
11383 
11384 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
11385   return false;
11386 }
11387 
11388 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
11389                                         const OMPExecutableDirective &D,
11390                                         SourceLocation Loc,
11391                                         llvm::Function *OutlinedFn,
11392                                         ArrayRef<llvm::Value *> CapturedVars) {
11393   llvm_unreachable("Not supported in SIMD-only mode");
11394 }
11395 
11396 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11397                                              const Expr *NumTeams,
11398                                              const Expr *ThreadLimit,
11399                                              SourceLocation Loc) {
11400   llvm_unreachable("Not supported in SIMD-only mode");
11401 }
11402 
11403 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
11404     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11405     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11406   llvm_unreachable("Not supported in SIMD-only mode");
11407 }
11408 
11409 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
11410     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11411     const Expr *Device) {
11412   llvm_unreachable("Not supported in SIMD-only mode");
11413 }
11414 
11415 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11416                                            const OMPLoopDirective &D,
11417                                            ArrayRef<Expr *> NumIterations) {
11418   llvm_unreachable("Not supported in SIMD-only mode");
11419 }
11420 
11421 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11422                                               const OMPDependClause *C) {
11423   llvm_unreachable("Not supported in SIMD-only mode");
11424 }
11425 
11426 const VarDecl *
11427 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
11428                                         const VarDecl *NativeParam) const {
11429   llvm_unreachable("Not supported in SIMD-only mode");
11430 }
11431 
11432 Address
11433 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
11434                                          const VarDecl *NativeParam,
11435                                          const VarDecl *TargetParam) const {
11436   llvm_unreachable("Not supported in SIMD-only mode");
11437 }
11438