xref: /freebsd/contrib/llvm-project/clang/lib/CodeGen/CGStmtOpenMP.cpp (revision 6c37d6032ed296370371f8c9f79068ba7c6a54df)
1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This contains code to emit OpenMP nodes as LLVM code.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGCleanup.h"
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CodeGenModule.h"
17 #include "TargetInfo.h"
18 #include "clang/AST/Stmt.h"
19 #include "clang/AST/StmtOpenMP.h"
20 #include "clang/AST/DeclOpenMP.h"
21 using namespace clang;
22 using namespace CodeGen;
23 
24 namespace {
25 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
26 /// for captured expressions.
27 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
28   void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
29     for (const auto *C : S.clauses()) {
30       if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
31         if (const auto *PreInit =
32                 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
33           for (const auto *I : PreInit->decls()) {
34             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
35               CGF.EmitVarDecl(cast<VarDecl>(*I));
36             } else {
37               CodeGenFunction::AutoVarEmission Emission =
38                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
39               CGF.EmitAutoVarCleanups(Emission);
40             }
41           }
42         }
43       }
44     }
45   }
46   CodeGenFunction::OMPPrivateScope InlinedShareds;
47 
48   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
49     return CGF.LambdaCaptureFields.lookup(VD) ||
50            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
51            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
52   }
53 
54 public:
55   OMPLexicalScope(
56       CodeGenFunction &CGF, const OMPExecutableDirective &S,
57       const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None,
58       const bool EmitPreInitStmt = true)
59       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
60         InlinedShareds(CGF) {
61     if (EmitPreInitStmt)
62       emitPreInitStmt(CGF, S);
63     if (!CapturedRegion.hasValue())
64       return;
65     assert(S.hasAssociatedStmt() &&
66            "Expected associated statement for inlined directive.");
67     const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
68     for (const auto &C : CS->captures()) {
69       if (C.capturesVariable() || C.capturesVariableByCopy()) {
70         auto *VD = C.getCapturedVar();
71         assert(VD == VD->getCanonicalDecl() &&
72                "Canonical decl must be captured.");
73         DeclRefExpr DRE(
74             CGF.getContext(), const_cast<VarDecl *>(VD),
75             isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
76                                        InlinedShareds.isGlobalVarCaptured(VD)),
77             VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
78         InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
79           return CGF.EmitLValue(&DRE).getAddress();
80         });
81       }
82     }
83     (void)InlinedShareds.Privatize();
84   }
85 };
86 
87 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
88 /// for captured expressions.
89 class OMPParallelScope final : public OMPLexicalScope {
90   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
91     OpenMPDirectiveKind Kind = S.getDirectiveKind();
92     return !(isOpenMPTargetExecutionDirective(Kind) ||
93              isOpenMPLoopBoundSharingDirective(Kind)) &&
94            isOpenMPParallelDirective(Kind);
95   }
96 
97 public:
98   OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
99       : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
100                         EmitPreInitStmt(S)) {}
101 };
102 
103 /// Lexical scope for OpenMP teams construct, that handles correct codegen
104 /// for captured expressions.
105 class OMPTeamsScope final : public OMPLexicalScope {
106   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
107     OpenMPDirectiveKind Kind = S.getDirectiveKind();
108     return !isOpenMPTargetExecutionDirective(Kind) &&
109            isOpenMPTeamsDirective(Kind);
110   }
111 
112 public:
113   OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
114       : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
115                         EmitPreInitStmt(S)) {}
116 };
117 
118 /// Private scope for OpenMP loop-based directives, that supports capturing
119 /// of used expression from loop statement.
120 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
121   void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
122     CodeGenFunction::OMPMapVars PreCondVars;
123     for (const auto *E : S.counters()) {
124       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
125       (void)PreCondVars.setVarAddr(
126           CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
127     }
128     (void)PreCondVars.apply(CGF);
129     if (const auto *PreInits = cast_or_null<DeclStmt>(S.getPreInits())) {
130       for (const auto *I : PreInits->decls())
131         CGF.EmitVarDecl(cast<VarDecl>(*I));
132     }
133     PreCondVars.restore(CGF);
134   }
135 
136 public:
137   OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
138       : CodeGenFunction::RunCleanupsScope(CGF) {
139     emitPreInitStmt(CGF, S);
140   }
141 };
142 
143 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
144   CodeGenFunction::OMPPrivateScope InlinedShareds;
145 
146   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
147     return CGF.LambdaCaptureFields.lookup(VD) ||
148            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
149            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
150             cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
151   }
152 
153 public:
154   OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
155       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
156         InlinedShareds(CGF) {
157     for (const auto *C : S.clauses()) {
158       if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
159         if (const auto *PreInit =
160                 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
161           for (const auto *I : PreInit->decls()) {
162             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
163               CGF.EmitVarDecl(cast<VarDecl>(*I));
164             } else {
165               CodeGenFunction::AutoVarEmission Emission =
166                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
167               CGF.EmitAutoVarCleanups(Emission);
168             }
169           }
170         }
171       } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) {
172         for (const Expr *E : UDP->varlists()) {
173           const Decl *D = cast<DeclRefExpr>(E)->getDecl();
174           if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
175             CGF.EmitVarDecl(*OED);
176         }
177       }
178     }
179     if (!isOpenMPSimdDirective(S.getDirectiveKind()))
180       CGF.EmitOMPPrivateClause(S, InlinedShareds);
181     if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
182       if (const Expr *E = TG->getReductionRef())
183         CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
184     }
185     const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
186     while (CS) {
187       for (auto &C : CS->captures()) {
188         if (C.capturesVariable() || C.capturesVariableByCopy()) {
189           auto *VD = C.getCapturedVar();
190           assert(VD == VD->getCanonicalDecl() &&
191                  "Canonical decl must be captured.");
192           DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
193                           isCapturedVar(CGF, VD) ||
194                               (CGF.CapturedStmtInfo &&
195                                InlinedShareds.isGlobalVarCaptured(VD)),
196                           VD->getType().getNonReferenceType(), VK_LValue,
197                           C.getLocation());
198           InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
199             return CGF.EmitLValue(&DRE).getAddress();
200           });
201         }
202       }
203       CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt());
204     }
205     (void)InlinedShareds.Privatize();
206   }
207 };
208 
209 } // namespace
210 
211 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
212                                          const OMPExecutableDirective &S,
213                                          const RegionCodeGenTy &CodeGen);
214 
215 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
216   if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
217     if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
218       OrigVD = OrigVD->getCanonicalDecl();
219       bool IsCaptured =
220           LambdaCaptureFields.lookup(OrigVD) ||
221           (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
222           (CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
223       DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured,
224                       OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
225       return EmitLValue(&DRE);
226     }
227   }
228   return EmitLValue(E);
229 }
230 
231 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
232   ASTContext &C = getContext();
233   llvm::Value *Size = nullptr;
234   auto SizeInChars = C.getTypeSizeInChars(Ty);
235   if (SizeInChars.isZero()) {
236     // getTypeSizeInChars() returns 0 for a VLA.
237     while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) {
238       VlaSizePair VlaSize = getVLASize(VAT);
239       Ty = VlaSize.Type;
240       Size = Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts)
241                   : VlaSize.NumElts;
242     }
243     SizeInChars = C.getTypeSizeInChars(Ty);
244     if (SizeInChars.isZero())
245       return llvm::ConstantInt::get(SizeTy, /*V=*/0);
246     return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
247   }
248   return CGM.getSize(SizeInChars);
249 }
250 
251 void CodeGenFunction::GenerateOpenMPCapturedVars(
252     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
253   const RecordDecl *RD = S.getCapturedRecordDecl();
254   auto CurField = RD->field_begin();
255   auto CurCap = S.captures().begin();
256   for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
257                                                  E = S.capture_init_end();
258        I != E; ++I, ++CurField, ++CurCap) {
259     if (CurField->hasCapturedVLAType()) {
260       const VariableArrayType *VAT = CurField->getCapturedVLAType();
261       llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
262       CapturedVars.push_back(Val);
263     } else if (CurCap->capturesThis()) {
264       CapturedVars.push_back(CXXThisValue);
265     } else if (CurCap->capturesVariableByCopy()) {
266       llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation());
267 
268       // If the field is not a pointer, we need to save the actual value
269       // and load it as a void pointer.
270       if (!CurField->getType()->isAnyPointerType()) {
271         ASTContext &Ctx = getContext();
272         Address DstAddr = CreateMemTemp(
273             Ctx.getUIntPtrType(),
274             Twine(CurCap->getCapturedVar()->getName(), ".casted"));
275         LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
276 
277         llvm::Value *SrcAddrVal = EmitScalarConversion(
278             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
279             Ctx.getPointerType(CurField->getType()), CurCap->getLocation());
280         LValue SrcLV =
281             MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
282 
283         // Store the value using the source type pointer.
284         EmitStoreThroughLValue(RValue::get(CV), SrcLV);
285 
286         // Load the value using the destination type pointer.
287         CV = EmitLoadOfScalar(DstLV, CurCap->getLocation());
288       }
289       CapturedVars.push_back(CV);
290     } else {
291       assert(CurCap->capturesVariable() && "Expected capture by reference.");
292       CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
293     }
294   }
295 }
296 
297 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
298                                     QualType DstType, StringRef Name,
299                                     LValue AddrLV) {
300   ASTContext &Ctx = CGF.getContext();
301 
302   llvm::Value *CastedPtr = CGF.EmitScalarConversion(
303       AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
304       Ctx.getPointerType(DstType), Loc);
305   Address TmpAddr =
306       CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
307           .getAddress();
308   return TmpAddr;
309 }
310 
311 static QualType getCanonicalParamType(ASTContext &C, QualType T) {
312   if (T->isLValueReferenceType())
313     return C.getLValueReferenceType(
314         getCanonicalParamType(C, T.getNonReferenceType()),
315         /*SpelledAsLValue=*/false);
316   if (T->isPointerType())
317     return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
318   if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
319     if (const auto *VLA = dyn_cast<VariableArrayType>(A))
320       return getCanonicalParamType(C, VLA->getElementType());
321     if (!A->isVariablyModifiedType())
322       return C.getCanonicalType(T);
323   }
324   return C.getCanonicalParamType(T);
325 }
326 
327 namespace {
328   /// Contains required data for proper outlined function codegen.
329   struct FunctionOptions {
330     /// Captured statement for which the function is generated.
331     const CapturedStmt *S = nullptr;
332     /// true if cast to/from  UIntPtr is required for variables captured by
333     /// value.
334     const bool UIntPtrCastRequired = true;
335     /// true if only casted arguments must be registered as local args or VLA
336     /// sizes.
337     const bool RegisterCastedArgsOnly = false;
338     /// Name of the generated function.
339     const StringRef FunctionName;
340     explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
341                              bool RegisterCastedArgsOnly,
342                              StringRef FunctionName)
343         : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
344           RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
345           FunctionName(FunctionName) {}
346   };
347 }
348 
349 static llvm::Function *emitOutlinedFunctionPrologue(
350     CodeGenFunction &CGF, FunctionArgList &Args,
351     llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
352         &LocalAddrs,
353     llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
354         &VLASizes,
355     llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
356   const CapturedDecl *CD = FO.S->getCapturedDecl();
357   const RecordDecl *RD = FO.S->getCapturedRecordDecl();
358   assert(CD->hasBody() && "missing CapturedDecl body");
359 
360   CXXThisValue = nullptr;
361   // Build the argument list.
362   CodeGenModule &CGM = CGF.CGM;
363   ASTContext &Ctx = CGM.getContext();
364   FunctionArgList TargetArgs;
365   Args.append(CD->param_begin(),
366               std::next(CD->param_begin(), CD->getContextParamPosition()));
367   TargetArgs.append(
368       CD->param_begin(),
369       std::next(CD->param_begin(), CD->getContextParamPosition()));
370   auto I = FO.S->captures().begin();
371   FunctionDecl *DebugFunctionDecl = nullptr;
372   if (!FO.UIntPtrCastRequired) {
373     FunctionProtoType::ExtProtoInfo EPI;
374     QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI);
375     DebugFunctionDecl = FunctionDecl::Create(
376         Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(),
377         SourceLocation(), DeclarationName(), FunctionTy,
378         Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static,
379         /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false);
380   }
381   for (const FieldDecl *FD : RD->fields()) {
382     QualType ArgType = FD->getType();
383     IdentifierInfo *II = nullptr;
384     VarDecl *CapVar = nullptr;
385 
386     // If this is a capture by copy and the type is not a pointer, the outlined
387     // function argument type should be uintptr and the value properly casted to
388     // uintptr. This is necessary given that the runtime library is only able to
389     // deal with pointers. We can pass in the same way the VLA type sizes to the
390     // outlined function.
391     if (FO.UIntPtrCastRequired &&
392         ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
393          I->capturesVariableArrayType()))
394       ArgType = Ctx.getUIntPtrType();
395 
396     if (I->capturesVariable() || I->capturesVariableByCopy()) {
397       CapVar = I->getCapturedVar();
398       II = CapVar->getIdentifier();
399     } else if (I->capturesThis()) {
400       II = &Ctx.Idents.get("this");
401     } else {
402       assert(I->capturesVariableArrayType());
403       II = &Ctx.Idents.get("vla");
404     }
405     if (ArgType->isVariablyModifiedType())
406       ArgType = getCanonicalParamType(Ctx, ArgType);
407     VarDecl *Arg;
408     if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
409       Arg = ParmVarDecl::Create(
410           Ctx, DebugFunctionDecl,
411           CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(),
412           CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType,
413           /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
414     } else {
415       Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
416                                       II, ArgType, ImplicitParamDecl::Other);
417     }
418     Args.emplace_back(Arg);
419     // Do not cast arguments if we emit function with non-original types.
420     TargetArgs.emplace_back(
421         FO.UIntPtrCastRequired
422             ? Arg
423             : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
424     ++I;
425   }
426   Args.append(
427       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
428       CD->param_end());
429   TargetArgs.append(
430       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
431       CD->param_end());
432 
433   // Create the function declaration.
434   const CGFunctionInfo &FuncInfo =
435       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
436   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
437 
438   auto *F =
439       llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
440                              FO.FunctionName, &CGM.getModule());
441   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
442   if (CD->isNothrow())
443     F->setDoesNotThrow();
444   F->setDoesNotRecurse();
445 
446   // Generate the function.
447   CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
448                     FO.S->getBeginLoc(), CD->getBody()->getBeginLoc());
449   unsigned Cnt = CD->getContextParamPosition();
450   I = FO.S->captures().begin();
451   for (const FieldDecl *FD : RD->fields()) {
452     // Do not map arguments if we emit function with non-original types.
453     Address LocalAddr(Address::invalid());
454     if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
455       LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
456                                                              TargetArgs[Cnt]);
457     } else {
458       LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
459     }
460     // If we are capturing a pointer by copy we don't need to do anything, just
461     // use the value that we get from the arguments.
462     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
463       const VarDecl *CurVD = I->getCapturedVar();
464       if (!FO.RegisterCastedArgsOnly)
465         LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
466       ++Cnt;
467       ++I;
468       continue;
469     }
470 
471     LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
472                                         AlignmentSource::Decl);
473     if (FD->hasCapturedVLAType()) {
474       if (FO.UIntPtrCastRequired) {
475         ArgLVal = CGF.MakeAddrLValue(
476             castValueFromUintptr(CGF, I->getLocation(), FD->getType(),
477                                  Args[Cnt]->getName(), ArgLVal),
478             FD->getType(), AlignmentSource::Decl);
479       }
480       llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
481       const VariableArrayType *VAT = FD->getCapturedVLAType();
482       VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg);
483     } else if (I->capturesVariable()) {
484       const VarDecl *Var = I->getCapturedVar();
485       QualType VarTy = Var->getType();
486       Address ArgAddr = ArgLVal.getAddress();
487       if (ArgLVal.getType()->isLValueReferenceType()) {
488         ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
489       } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
490         assert(ArgLVal.getType()->isPointerType());
491         ArgAddr = CGF.EmitLoadOfPointer(
492             ArgAddr, ArgLVal.getType()->castAs<PointerType>());
493       }
494       if (!FO.RegisterCastedArgsOnly) {
495         LocalAddrs.insert(
496             {Args[Cnt],
497              {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
498       }
499     } else if (I->capturesVariableByCopy()) {
500       assert(!FD->getType()->isAnyPointerType() &&
501              "Not expecting a captured pointer.");
502       const VarDecl *Var = I->getCapturedVar();
503       LocalAddrs.insert({Args[Cnt],
504                          {Var, FO.UIntPtrCastRequired
505                                    ? castValueFromUintptr(
506                                          CGF, I->getLocation(), FD->getType(),
507                                          Args[Cnt]->getName(), ArgLVal)
508                                    : ArgLVal.getAddress()}});
509     } else {
510       // If 'this' is captured, load it into CXXThisValue.
511       assert(I->capturesThis());
512       CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
513       LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}});
514     }
515     ++Cnt;
516     ++I;
517   }
518 
519   return F;
520 }
521 
522 llvm::Function *
523 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
524   assert(
525       CapturedStmtInfo &&
526       "CapturedStmtInfo should be set when generating the captured function");
527   const CapturedDecl *CD = S.getCapturedDecl();
528   // Build the argument list.
529   bool NeedWrapperFunction =
530       getDebugInfo() &&
531       CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo;
532   FunctionArgList Args;
533   llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
534   llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
535   SmallString<256> Buffer;
536   llvm::raw_svector_ostream Out(Buffer);
537   Out << CapturedStmtInfo->getHelperName();
538   if (NeedWrapperFunction)
539     Out << "_debug__";
540   FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
541                      Out.str());
542   llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
543                                                    VLASizes, CXXThisValue, FO);
544   CodeGenFunction::OMPPrivateScope LocalScope(*this);
545   for (const auto &LocalAddrPair : LocalAddrs) {
546     if (LocalAddrPair.second.first) {
547       LocalScope.addPrivate(LocalAddrPair.second.first, [&LocalAddrPair]() {
548         return LocalAddrPair.second.second;
549       });
550     }
551   }
552   (void)LocalScope.Privatize();
553   for (const auto &VLASizePair : VLASizes)
554     VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
555   PGO.assignRegionCounters(GlobalDecl(CD), F);
556   CapturedStmtInfo->EmitBody(*this, CD->getBody());
557   (void)LocalScope.ForceCleanup();
558   FinishFunction(CD->getBodyRBrace());
559   if (!NeedWrapperFunction)
560     return F;
561 
562   FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
563                             /*RegisterCastedArgsOnly=*/true,
564                             CapturedStmtInfo->getHelperName());
565   CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
566   WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
567   Args.clear();
568   LocalAddrs.clear();
569   VLASizes.clear();
570   llvm::Function *WrapperF =
571       emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
572                                    WrapperCGF.CXXThisValue, WrapperFO);
573   llvm::SmallVector<llvm::Value *, 4> CallArgs;
574   for (const auto *Arg : Args) {
575     llvm::Value *CallArg;
576     auto I = LocalAddrs.find(Arg);
577     if (I != LocalAddrs.end()) {
578       LValue LV = WrapperCGF.MakeAddrLValue(
579           I->second.second,
580           I->second.first ? I->second.first->getType() : Arg->getType(),
581           AlignmentSource::Decl);
582       CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
583     } else {
584       auto EI = VLASizes.find(Arg);
585       if (EI != VLASizes.end()) {
586         CallArg = EI->second.second;
587       } else {
588         LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
589                                               Arg->getType(),
590                                               AlignmentSource::Decl);
591         CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
592       }
593     }
594     CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
595   }
596   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getBeginLoc(),
597                                                   F, CallArgs);
598   WrapperCGF.FinishFunction();
599   return WrapperF;
600 }
601 
602 //===----------------------------------------------------------------------===//
603 //                              OpenMP Directive Emission
604 //===----------------------------------------------------------------------===//
605 void CodeGenFunction::EmitOMPAggregateAssign(
606     Address DestAddr, Address SrcAddr, QualType OriginalType,
607     const llvm::function_ref<void(Address, Address)> CopyGen) {
608   // Perform element-by-element initialization.
609   QualType ElementTy;
610 
611   // Drill down to the base element type on both arrays.
612   const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
613   llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
614   SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
615 
616   llvm::Value *SrcBegin = SrcAddr.getPointer();
617   llvm::Value *DestBegin = DestAddr.getPointer();
618   // Cast from pointer to array type to pointer to single element.
619   llvm::Value *DestEnd = Builder.CreateGEP(DestBegin, NumElements);
620   // The basic structure here is a while-do loop.
621   llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body");
622   llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done");
623   llvm::Value *IsEmpty =
624       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
625   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
626 
627   // Enter the loop body, making that address the current address.
628   llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
629   EmitBlock(BodyBB);
630 
631   CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
632 
633   llvm::PHINode *SrcElementPHI =
634     Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
635   SrcElementPHI->addIncoming(SrcBegin, EntryBB);
636   Address SrcElementCurrent =
637       Address(SrcElementPHI,
638               SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
639 
640   llvm::PHINode *DestElementPHI =
641     Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
642   DestElementPHI->addIncoming(DestBegin, EntryBB);
643   Address DestElementCurrent =
644     Address(DestElementPHI,
645             DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
646 
647   // Emit copy.
648   CopyGen(DestElementCurrent, SrcElementCurrent);
649 
650   // Shift the address forward by one element.
651   llvm::Value *DestElementNext = Builder.CreateConstGEP1_32(
652       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
653   llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32(
654       SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
655   // Check whether we've reached the end.
656   llvm::Value *Done =
657       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
658   Builder.CreateCondBr(Done, DoneBB, BodyBB);
659   DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
660   SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
661 
662   // Done.
663   EmitBlock(DoneBB, /*IsFinished=*/true);
664 }
665 
666 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
667                                   Address SrcAddr, const VarDecl *DestVD,
668                                   const VarDecl *SrcVD, const Expr *Copy) {
669   if (OriginalType->isArrayType()) {
670     const auto *BO = dyn_cast<BinaryOperator>(Copy);
671     if (BO && BO->getOpcode() == BO_Assign) {
672       // Perform simple memcpy for simple copying.
673       LValue Dest = MakeAddrLValue(DestAddr, OriginalType);
674       LValue Src = MakeAddrLValue(SrcAddr, OriginalType);
675       EmitAggregateAssign(Dest, Src, OriginalType);
676     } else {
677       // For arrays with complex element types perform element by element
678       // copying.
679       EmitOMPAggregateAssign(
680           DestAddr, SrcAddr, OriginalType,
681           [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
682             // Working with the single array element, so have to remap
683             // destination and source variables to corresponding array
684             // elements.
685             CodeGenFunction::OMPPrivateScope Remap(*this);
686             Remap.addPrivate(DestVD, [DestElement]() { return DestElement; });
687             Remap.addPrivate(SrcVD, [SrcElement]() { return SrcElement; });
688             (void)Remap.Privatize();
689             EmitIgnoredExpr(Copy);
690           });
691     }
692   } else {
693     // Remap pseudo source variable to private copy.
694     CodeGenFunction::OMPPrivateScope Remap(*this);
695     Remap.addPrivate(SrcVD, [SrcAddr]() { return SrcAddr; });
696     Remap.addPrivate(DestVD, [DestAddr]() { return DestAddr; });
697     (void)Remap.Privatize();
698     // Emit copying of the whole variable.
699     EmitIgnoredExpr(Copy);
700   }
701 }
702 
703 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
704                                                 OMPPrivateScope &PrivateScope) {
705   if (!HaveInsertPoint())
706     return false;
707   bool DeviceConstTarget =
708       getLangOpts().OpenMPIsDevice &&
709       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
710   bool FirstprivateIsLastprivate = false;
711   llvm::DenseSet<const VarDecl *> Lastprivates;
712   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
713     for (const auto *D : C->varlists())
714       Lastprivates.insert(
715           cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
716   }
717   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
718   llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
719   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
720   // Force emission of the firstprivate copy if the directive does not emit
721   // outlined function, like omp for, omp simd, omp distribute etc.
722   bool MustEmitFirstprivateCopy =
723       CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
724   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
725     auto IRef = C->varlist_begin();
726     auto InitsRef = C->inits().begin();
727     for (const Expr *IInit : C->private_copies()) {
728       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
729       bool ThisFirstprivateIsLastprivate =
730           Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
731       const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD);
732       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
733       if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
734           !FD->getType()->isReferenceType() &&
735           (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
736         EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
737         ++IRef;
738         ++InitsRef;
739         continue;
740       }
741       // Do not emit copy for firstprivate constant variables in target regions,
742       // captured by reference.
743       if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) &&
744           FD && FD->getType()->isReferenceType() &&
745           (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
746         (void)CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(*this,
747                                                                     OrigVD);
748         ++IRef;
749         ++InitsRef;
750         continue;
751       }
752       FirstprivateIsLastprivate =
753           FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
754       if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
755         const auto *VDInit =
756             cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
757         bool IsRegistered;
758         DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
759                         /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
760                         (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
761         LValue OriginalLVal;
762         if (!FD) {
763           // Check if the firstprivate variable is just a constant value.
764           ConstantEmission CE = tryEmitAsConstant(&DRE);
765           if (CE && !CE.isReference()) {
766             // Constant value, no need to create a copy.
767             ++IRef;
768             ++InitsRef;
769             continue;
770           }
771           if (CE && CE.isReference()) {
772             OriginalLVal = CE.getReferenceLValue(*this, &DRE);
773           } else {
774             assert(!CE && "Expected non-constant firstprivate.");
775             OriginalLVal = EmitLValue(&DRE);
776           }
777         } else {
778           OriginalLVal = EmitLValue(&DRE);
779         }
780         QualType Type = VD->getType();
781         if (Type->isArrayType()) {
782           // Emit VarDecl with copy init for arrays.
783           // Get the address of the original variable captured in current
784           // captured region.
785           IsRegistered = PrivateScope.addPrivate(
786               OrigVD, [this, VD, Type, OriginalLVal, VDInit]() {
787                 AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
788                 const Expr *Init = VD->getInit();
789                 if (!isa<CXXConstructExpr>(Init) ||
790                     isTrivialInitializer(Init)) {
791                   // Perform simple memcpy.
792                   LValue Dest =
793                       MakeAddrLValue(Emission.getAllocatedAddress(), Type);
794                   EmitAggregateAssign(Dest, OriginalLVal, Type);
795                 } else {
796                   EmitOMPAggregateAssign(
797                       Emission.getAllocatedAddress(), OriginalLVal.getAddress(),
798                       Type,
799                       [this, VDInit, Init](Address DestElement,
800                                            Address SrcElement) {
801                         // Clean up any temporaries needed by the
802                         // initialization.
803                         RunCleanupsScope InitScope(*this);
804                         // Emit initialization for single element.
805                         setAddrOfLocalVar(VDInit, SrcElement);
806                         EmitAnyExprToMem(Init, DestElement,
807                                          Init->getType().getQualifiers(),
808                                          /*IsInitializer*/ false);
809                         LocalDeclMap.erase(VDInit);
810                       });
811                 }
812                 EmitAutoVarCleanups(Emission);
813                 return Emission.getAllocatedAddress();
814               });
815         } else {
816           Address OriginalAddr = OriginalLVal.getAddress();
817           IsRegistered = PrivateScope.addPrivate(
818               OrigVD, [this, VDInit, OriginalAddr, VD]() {
819                 // Emit private VarDecl with copy init.
820                 // Remap temp VDInit variable to the address of the original
821                 // variable (for proper handling of captured global variables).
822                 setAddrOfLocalVar(VDInit, OriginalAddr);
823                 EmitDecl(*VD);
824                 LocalDeclMap.erase(VDInit);
825                 return GetAddrOfLocalVar(VD);
826               });
827         }
828         assert(IsRegistered &&
829                "firstprivate var already registered as private");
830         // Silence the warning about unused variable.
831         (void)IsRegistered;
832       }
833       ++IRef;
834       ++InitsRef;
835     }
836   }
837   return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
838 }
839 
840 void CodeGenFunction::EmitOMPPrivateClause(
841     const OMPExecutableDirective &D,
842     CodeGenFunction::OMPPrivateScope &PrivateScope) {
843   if (!HaveInsertPoint())
844     return;
845   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
846   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
847     auto IRef = C->varlist_begin();
848     for (const Expr *IInit : C->private_copies()) {
849       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
850       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
851         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
852         bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() {
853           // Emit private VarDecl with copy init.
854           EmitDecl(*VD);
855           return GetAddrOfLocalVar(VD);
856         });
857         assert(IsRegistered && "private var already registered as private");
858         // Silence the warning about unused variable.
859         (void)IsRegistered;
860       }
861       ++IRef;
862     }
863   }
864 }
865 
866 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
867   if (!HaveInsertPoint())
868     return false;
869   // threadprivate_var1 = master_threadprivate_var1;
870   // operator=(threadprivate_var2, master_threadprivate_var2);
871   // ...
872   // __kmpc_barrier(&loc, global_tid);
873   llvm::DenseSet<const VarDecl *> CopiedVars;
874   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
875   for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
876     auto IRef = C->varlist_begin();
877     auto ISrcRef = C->source_exprs().begin();
878     auto IDestRef = C->destination_exprs().begin();
879     for (const Expr *AssignOp : C->assignment_ops()) {
880       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
881       QualType Type = VD->getType();
882       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
883         // Get the address of the master variable. If we are emitting code with
884         // TLS support, the address is passed from the master as field in the
885         // captured declaration.
886         Address MasterAddr = Address::invalid();
887         if (getLangOpts().OpenMPUseTLS &&
888             getContext().getTargetInfo().isTLSSupported()) {
889           assert(CapturedStmtInfo->lookup(VD) &&
890                  "Copyin threadprivates should have been captured!");
891           DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true,
892                           (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
893           MasterAddr = EmitLValue(&DRE).getAddress();
894           LocalDeclMap.erase(VD);
895         } else {
896           MasterAddr =
897             Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
898                                         : CGM.GetAddrOfGlobal(VD),
899                     getContext().getDeclAlign(VD));
900         }
901         // Get the address of the threadprivate variable.
902         Address PrivateAddr = EmitLValue(*IRef).getAddress();
903         if (CopiedVars.size() == 1) {
904           // At first check if current thread is a master thread. If it is, no
905           // need to copy data.
906           CopyBegin = createBasicBlock("copyin.not.master");
907           CopyEnd = createBasicBlock("copyin.not.master.end");
908           Builder.CreateCondBr(
909               Builder.CreateICmpNE(
910                   Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
911                   Builder.CreatePtrToInt(PrivateAddr.getPointer(),
912                                          CGM.IntPtrTy)),
913               CopyBegin, CopyEnd);
914           EmitBlock(CopyBegin);
915         }
916         const auto *SrcVD =
917             cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
918         const auto *DestVD =
919             cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
920         EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
921       }
922       ++IRef;
923       ++ISrcRef;
924       ++IDestRef;
925     }
926   }
927   if (CopyEnd) {
928     // Exit out of copying procedure for non-master thread.
929     EmitBlock(CopyEnd, /*IsFinished=*/true);
930     return true;
931   }
932   return false;
933 }
934 
935 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
936     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
937   if (!HaveInsertPoint())
938     return false;
939   bool HasAtLeastOneLastprivate = false;
940   llvm::DenseSet<const VarDecl *> SIMDLCVs;
941   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
942     const auto *LoopDirective = cast<OMPLoopDirective>(&D);
943     for (const Expr *C : LoopDirective->counters()) {
944       SIMDLCVs.insert(
945           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
946     }
947   }
948   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
949   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
950     HasAtLeastOneLastprivate = true;
951     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
952         !getLangOpts().OpenMPSimd)
953       break;
954     auto IRef = C->varlist_begin();
955     auto IDestRef = C->destination_exprs().begin();
956     for (const Expr *IInit : C->private_copies()) {
957       // Keep the address of the original variable for future update at the end
958       // of the loop.
959       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
960       // Taskloops do not require additional initialization, it is done in
961       // runtime support library.
962       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
963         const auto *DestVD =
964             cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
965         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() {
966           DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
967                           /*RefersToEnclosingVariableOrCapture=*/
968                               CapturedStmtInfo->lookup(OrigVD) != nullptr,
969                           (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
970           return EmitLValue(&DRE).getAddress();
971         });
972         // Check if the variable is also a firstprivate: in this case IInit is
973         // not generated. Initialization of this variable will happen in codegen
974         // for 'firstprivate' clause.
975         if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
976           const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
977           bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() {
978             // Emit private VarDecl with copy init.
979             EmitDecl(*VD);
980             return GetAddrOfLocalVar(VD);
981           });
982           assert(IsRegistered &&
983                  "lastprivate var already registered as private");
984           (void)IsRegistered;
985         }
986       }
987       ++IRef;
988       ++IDestRef;
989     }
990   }
991   return HasAtLeastOneLastprivate;
992 }
993 
994 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
995     const OMPExecutableDirective &D, bool NoFinals,
996     llvm::Value *IsLastIterCond) {
997   if (!HaveInsertPoint())
998     return;
999   // Emit following code:
1000   // if (<IsLastIterCond>) {
1001   //   orig_var1 = private_orig_var1;
1002   //   ...
1003   //   orig_varn = private_orig_varn;
1004   // }
1005   llvm::BasicBlock *ThenBB = nullptr;
1006   llvm::BasicBlock *DoneBB = nullptr;
1007   if (IsLastIterCond) {
1008     ThenBB = createBasicBlock(".omp.lastprivate.then");
1009     DoneBB = createBasicBlock(".omp.lastprivate.done");
1010     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
1011     EmitBlock(ThenBB);
1012   }
1013   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1014   llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
1015   if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
1016     auto IC = LoopDirective->counters().begin();
1017     for (const Expr *F : LoopDirective->finals()) {
1018       const auto *D =
1019           cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
1020       if (NoFinals)
1021         AlreadyEmittedVars.insert(D);
1022       else
1023         LoopCountersAndUpdates[D] = F;
1024       ++IC;
1025     }
1026   }
1027   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1028     auto IRef = C->varlist_begin();
1029     auto ISrcRef = C->source_exprs().begin();
1030     auto IDestRef = C->destination_exprs().begin();
1031     for (const Expr *AssignOp : C->assignment_ops()) {
1032       const auto *PrivateVD =
1033           cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1034       QualType Type = PrivateVD->getType();
1035       const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1036       if (AlreadyEmittedVars.insert(CanonicalVD).second) {
1037         // If lastprivate variable is a loop control variable for loop-based
1038         // directive, update its value before copyin back to original
1039         // variable.
1040         if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
1041           EmitIgnoredExpr(FinalExpr);
1042         const auto *SrcVD =
1043             cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1044         const auto *DestVD =
1045             cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1046         // Get the address of the original variable.
1047         Address OriginalAddr = GetAddrOfLocalVar(DestVD);
1048         // Get the address of the private variable.
1049         Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
1050         if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1051           PrivateAddr =
1052               Address(Builder.CreateLoad(PrivateAddr),
1053                       getNaturalTypeAlignment(RefTy->getPointeeType()));
1054         EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
1055       }
1056       ++IRef;
1057       ++ISrcRef;
1058       ++IDestRef;
1059     }
1060     if (const Expr *PostUpdate = C->getPostUpdateExpr())
1061       EmitIgnoredExpr(PostUpdate);
1062   }
1063   if (IsLastIterCond)
1064     EmitBlock(DoneBB, /*IsFinished=*/true);
1065 }
1066 
1067 void CodeGenFunction::EmitOMPReductionClauseInit(
1068     const OMPExecutableDirective &D,
1069     CodeGenFunction::OMPPrivateScope &PrivateScope) {
1070   if (!HaveInsertPoint())
1071     return;
1072   SmallVector<const Expr *, 4> Shareds;
1073   SmallVector<const Expr *, 4> Privates;
1074   SmallVector<const Expr *, 4> ReductionOps;
1075   SmallVector<const Expr *, 4> LHSs;
1076   SmallVector<const Expr *, 4> RHSs;
1077   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1078     auto IPriv = C->privates().begin();
1079     auto IRed = C->reduction_ops().begin();
1080     auto ILHS = C->lhs_exprs().begin();
1081     auto IRHS = C->rhs_exprs().begin();
1082     for (const Expr *Ref : C->varlists()) {
1083       Shareds.emplace_back(Ref);
1084       Privates.emplace_back(*IPriv);
1085       ReductionOps.emplace_back(*IRed);
1086       LHSs.emplace_back(*ILHS);
1087       RHSs.emplace_back(*IRHS);
1088       std::advance(IPriv, 1);
1089       std::advance(IRed, 1);
1090       std::advance(ILHS, 1);
1091       std::advance(IRHS, 1);
1092     }
1093   }
1094   ReductionCodeGen RedCG(Shareds, Privates, ReductionOps);
1095   unsigned Count = 0;
1096   auto ILHS = LHSs.begin();
1097   auto IRHS = RHSs.begin();
1098   auto IPriv = Privates.begin();
1099   for (const Expr *IRef : Shareds) {
1100     const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
1101     // Emit private VarDecl with reduction init.
1102     RedCG.emitSharedLValue(*this, Count);
1103     RedCG.emitAggregateType(*this, Count);
1104     AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1105     RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
1106                              RedCG.getSharedLValue(Count),
1107                              [&Emission](CodeGenFunction &CGF) {
1108                                CGF.EmitAutoVarInit(Emission);
1109                                return true;
1110                              });
1111     EmitAutoVarCleanups(Emission);
1112     Address BaseAddr = RedCG.adjustPrivateAddress(
1113         *this, Count, Emission.getAllocatedAddress());
1114     bool IsRegistered = PrivateScope.addPrivate(
1115         RedCG.getBaseDecl(Count), [BaseAddr]() { return BaseAddr; });
1116     assert(IsRegistered && "private var already registered as private");
1117     // Silence the warning about unused variable.
1118     (void)IsRegistered;
1119 
1120     const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1121     const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1122     QualType Type = PrivateVD->getType();
1123     bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
1124     if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1125       // Store the address of the original variable associated with the LHS
1126       // implicit variable.
1127       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() {
1128         return RedCG.getSharedLValue(Count).getAddress();
1129       });
1130       PrivateScope.addPrivate(
1131           RHSVD, [this, PrivateVD]() { return GetAddrOfLocalVar(PrivateVD); });
1132     } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1133                isa<ArraySubscriptExpr>(IRef)) {
1134       // Store the address of the original variable associated with the LHS
1135       // implicit variable.
1136       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() {
1137         return RedCG.getSharedLValue(Count).getAddress();
1138       });
1139       PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() {
1140         return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
1141                                             ConvertTypeForMem(RHSVD->getType()),
1142                                             "rhs.begin");
1143       });
1144     } else {
1145       QualType Type = PrivateVD->getType();
1146       bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1147       Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress();
1148       // Store the address of the original variable associated with the LHS
1149       // implicit variable.
1150       if (IsArray) {
1151         OriginalAddr = Builder.CreateElementBitCast(
1152             OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1153       }
1154       PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; });
1155       PrivateScope.addPrivate(
1156           RHSVD, [this, PrivateVD, RHSVD, IsArray]() {
1157             return IsArray
1158                        ? Builder.CreateElementBitCast(
1159                              GetAddrOfLocalVar(PrivateVD),
1160                              ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
1161                        : GetAddrOfLocalVar(PrivateVD);
1162           });
1163     }
1164     ++ILHS;
1165     ++IRHS;
1166     ++IPriv;
1167     ++Count;
1168   }
1169 }
1170 
1171 void CodeGenFunction::EmitOMPReductionClauseFinal(
1172     const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1173   if (!HaveInsertPoint())
1174     return;
1175   llvm::SmallVector<const Expr *, 8> Privates;
1176   llvm::SmallVector<const Expr *, 8> LHSExprs;
1177   llvm::SmallVector<const Expr *, 8> RHSExprs;
1178   llvm::SmallVector<const Expr *, 8> ReductionOps;
1179   bool HasAtLeastOneReduction = false;
1180   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1181     HasAtLeastOneReduction = true;
1182     Privates.append(C->privates().begin(), C->privates().end());
1183     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1184     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1185     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1186   }
1187   if (HasAtLeastOneReduction) {
1188     bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1189                       isOpenMPParallelDirective(D.getDirectiveKind()) ||
1190                       ReductionKind == OMPD_simd;
1191     bool SimpleReduction = ReductionKind == OMPD_simd;
1192     // Emit nowait reduction if nowait clause is present or directive is a
1193     // parallel directive (it always has implicit barrier).
1194     CGM.getOpenMPRuntime().emitReduction(
1195         *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
1196         {WithNowait, SimpleReduction, ReductionKind});
1197   }
1198 }
1199 
1200 static void emitPostUpdateForReductionClause(
1201     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1202     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1203   if (!CGF.HaveInsertPoint())
1204     return;
1205   llvm::BasicBlock *DoneBB = nullptr;
1206   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1207     if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
1208       if (!DoneBB) {
1209         if (llvm::Value *Cond = CondGen(CGF)) {
1210           // If the first post-update expression is found, emit conditional
1211           // block if it was requested.
1212           llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1213           DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1214           CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1215           CGF.EmitBlock(ThenBB);
1216         }
1217       }
1218       CGF.EmitIgnoredExpr(PostUpdate);
1219     }
1220   }
1221   if (DoneBB)
1222     CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1223 }
1224 
1225 namespace {
1226 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1227 /// parallel function. This is necessary for combined constructs such as
1228 /// 'distribute parallel for'
1229 typedef llvm::function_ref<void(CodeGenFunction &,
1230                                 const OMPExecutableDirective &,
1231                                 llvm::SmallVectorImpl<llvm::Value *> &)>
1232     CodeGenBoundParametersTy;
1233 } // anonymous namespace
1234 
1235 static void emitCommonOMPParallelDirective(
1236     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1237     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1238     const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1239   const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1240   llvm::Function *OutlinedFn =
1241       CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1242           S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1243   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1244     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1245     llvm::Value *NumThreads =
1246         CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1247                            /*IgnoreResultAssign=*/true);
1248     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1249         CGF, NumThreads, NumThreadsClause->getBeginLoc());
1250   }
1251   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1252     CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1253     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1254         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc());
1255   }
1256   const Expr *IfCond = nullptr;
1257   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1258     if (C->getNameModifier() == OMPD_unknown ||
1259         C->getNameModifier() == OMPD_parallel) {
1260       IfCond = C->getCondition();
1261       break;
1262     }
1263   }
1264 
1265   OMPParallelScope Scope(CGF, S);
1266   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1267   // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1268   // lower and upper bounds with the pragma 'for' chunking mechanism.
1269   // The following lambda takes care of appending the lower and upper bound
1270   // parameters when necessary
1271   CodeGenBoundParameters(CGF, S, CapturedVars);
1272   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1273   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn,
1274                                               CapturedVars, IfCond);
1275 }
1276 
1277 static void emitEmptyBoundParameters(CodeGenFunction &,
1278                                      const OMPExecutableDirective &,
1279                                      llvm::SmallVectorImpl<llvm::Value *> &) {}
1280 
1281 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1282   // Emit parallel region as a standalone region.
1283   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1284     Action.Enter(CGF);
1285     OMPPrivateScope PrivateScope(CGF);
1286     bool Copyins = CGF.EmitOMPCopyinClause(S);
1287     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1288     if (Copyins) {
1289       // Emit implicit barrier to synchronize threads and avoid data races on
1290       // propagation master's thread values of threadprivate variables to local
1291       // instances of that variables of all other implicit threads.
1292       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1293           CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
1294           /*ForceSimpleCall=*/true);
1295     }
1296     CGF.EmitOMPPrivateClause(S, PrivateScope);
1297     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1298     (void)PrivateScope.Privatize();
1299     CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
1300     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1301   };
1302   emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1303                                  emitEmptyBoundParameters);
1304   emitPostUpdateForReductionClause(*this, S,
1305                                    [](CodeGenFunction &) { return nullptr; });
1306 }
1307 
1308 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1309                                       JumpDest LoopExit) {
1310   RunCleanupsScope BodyScope(*this);
1311   // Update counters values on current iteration.
1312   for (const Expr *UE : D.updates())
1313     EmitIgnoredExpr(UE);
1314   // Update the linear variables.
1315   // In distribute directives only loop counters may be marked as linear, no
1316   // need to generate the code for them.
1317   if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
1318     for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1319       for (const Expr *UE : C->updates())
1320         EmitIgnoredExpr(UE);
1321     }
1322   }
1323 
1324   // On a continue in the body, jump to the end.
1325   JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue");
1326   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1327   // Emit loop body.
1328   EmitStmt(D.getBody());
1329   // The end (updates/cleanups).
1330   EmitBlock(Continue.getBlock());
1331   BreakContinueStack.pop_back();
1332 }
1333 
1334 void CodeGenFunction::EmitOMPInnerLoop(
1335     const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
1336     const Expr *IncExpr,
1337     const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
1338     const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
1339   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
1340 
1341   // Start the loop with a block that tests the condition.
1342   auto CondBlock = createBasicBlock("omp.inner.for.cond");
1343   EmitBlock(CondBlock);
1344   const SourceRange R = S.getSourceRange();
1345   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1346                  SourceLocToDebugLoc(R.getEnd()));
1347 
1348   // If there are any cleanups between here and the loop-exit scope,
1349   // create a block to stage a loop exit along.
1350   llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
1351   if (RequiresCleanup)
1352     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
1353 
1354   llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body");
1355 
1356   // Emit condition.
1357   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
1358   if (ExitBlock != LoopExit.getBlock()) {
1359     EmitBlock(ExitBlock);
1360     EmitBranchThroughCleanup(LoopExit);
1361   }
1362 
1363   EmitBlock(LoopBody);
1364   incrementProfileCounter(&S);
1365 
1366   // Create a block for the increment.
1367   JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
1368   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1369 
1370   BodyGen(*this);
1371 
1372   // Emit "IV = IV + 1" and a back-edge to the condition block.
1373   EmitBlock(Continue.getBlock());
1374   EmitIgnoredExpr(IncExpr);
1375   PostIncGen(*this);
1376   BreakContinueStack.pop_back();
1377   EmitBranch(CondBlock);
1378   LoopStack.pop();
1379   // Emit the fall-through block.
1380   EmitBlock(LoopExit.getBlock());
1381 }
1382 
1383 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
1384   if (!HaveInsertPoint())
1385     return false;
1386   // Emit inits for the linear variables.
1387   bool HasLinears = false;
1388   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1389     for (const Expr *Init : C->inits()) {
1390       HasLinears = true;
1391       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
1392       if (const auto *Ref =
1393               dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
1394         AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
1395         const auto *OrigVD = cast<VarDecl>(Ref->getDecl());
1396         DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1397                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
1398                         VD->getInit()->getType(), VK_LValue,
1399                         VD->getInit()->getExprLoc());
1400         EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
1401                                                 VD->getType()),
1402                        /*capturedByInit=*/false);
1403         EmitAutoVarCleanups(Emission);
1404       } else {
1405         EmitVarDecl(*VD);
1406       }
1407     }
1408     // Emit the linear steps for the linear clauses.
1409     // If a step is not constant, it is pre-calculated before the loop.
1410     if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
1411       if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
1412         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
1413         // Emit calculation of the linear step.
1414         EmitIgnoredExpr(CS);
1415       }
1416   }
1417   return HasLinears;
1418 }
1419 
1420 void CodeGenFunction::EmitOMPLinearClauseFinal(
1421     const OMPLoopDirective &D,
1422     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1423   if (!HaveInsertPoint())
1424     return;
1425   llvm::BasicBlock *DoneBB = nullptr;
1426   // Emit the final values of the linear variables.
1427   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1428     auto IC = C->varlist_begin();
1429     for (const Expr *F : C->finals()) {
1430       if (!DoneBB) {
1431         if (llvm::Value *Cond = CondGen(*this)) {
1432           // If the first post-update expression is found, emit conditional
1433           // block if it was requested.
1434           llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu");
1435           DoneBB = createBasicBlock(".omp.linear.pu.done");
1436           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1437           EmitBlock(ThenBB);
1438         }
1439       }
1440       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
1441       DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1442                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
1443                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1444       Address OrigAddr = EmitLValue(&DRE).getAddress();
1445       CodeGenFunction::OMPPrivateScope VarScope(*this);
1446       VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
1447       (void)VarScope.Privatize();
1448       EmitIgnoredExpr(F);
1449       ++IC;
1450     }
1451     if (const Expr *PostUpdate = C->getPostUpdateExpr())
1452       EmitIgnoredExpr(PostUpdate);
1453   }
1454   if (DoneBB)
1455     EmitBlock(DoneBB, /*IsFinished=*/true);
1456 }
1457 
1458 static void emitAlignedClause(CodeGenFunction &CGF,
1459                               const OMPExecutableDirective &D) {
1460   if (!CGF.HaveInsertPoint())
1461     return;
1462   for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
1463     llvm::APInt ClauseAlignment(64, 0);
1464     if (const Expr *AlignmentExpr = Clause->getAlignment()) {
1465       auto *AlignmentCI =
1466           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
1467       ClauseAlignment = AlignmentCI->getValue();
1468     }
1469     for (const Expr *E : Clause->varlists()) {
1470       llvm::APInt Alignment(ClauseAlignment);
1471       if (Alignment == 0) {
1472         // OpenMP [2.8.1, Description]
1473         // If no optional parameter is specified, implementation-defined default
1474         // alignments for SIMD instructions on the target platforms are assumed.
1475         Alignment =
1476             CGF.getContext()
1477                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
1478                     E->getType()->getPointeeType()))
1479                 .getQuantity();
1480       }
1481       assert((Alignment == 0 || Alignment.isPowerOf2()) &&
1482              "alignment is not power of 2");
1483       if (Alignment != 0) {
1484         llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
1485         CGF.EmitAlignmentAssumption(
1486             PtrValue, E, /*No second loc needed*/ SourceLocation(),
1487             llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment));
1488       }
1489     }
1490   }
1491 }
1492 
1493 void CodeGenFunction::EmitOMPPrivateLoopCounters(
1494     const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
1495   if (!HaveInsertPoint())
1496     return;
1497   auto I = S.private_counters().begin();
1498   for (const Expr *E : S.counters()) {
1499     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1500     const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
1501     // Emit var without initialization.
1502     AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD);
1503     EmitAutoVarCleanups(VarEmission);
1504     LocalDeclMap.erase(PrivateVD);
1505     (void)LoopScope.addPrivate(VD, [&VarEmission]() {
1506       return VarEmission.getAllocatedAddress();
1507     });
1508     if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
1509         VD->hasGlobalStorage()) {
1510       (void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() {
1511         DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD),
1512                         LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
1513                         E->getType(), VK_LValue, E->getExprLoc());
1514         return EmitLValue(&DRE).getAddress();
1515       });
1516     } else {
1517       (void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() {
1518         return VarEmission.getAllocatedAddress();
1519       });
1520     }
1521     ++I;
1522   }
1523   // Privatize extra loop counters used in loops for ordered(n) clauses.
1524   for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
1525     if (!C->getNumForLoops())
1526       continue;
1527     for (unsigned I = S.getCollapsedNumber(),
1528                   E = C->getLoopNumIterations().size();
1529          I < E; ++I) {
1530       const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I));
1531       const auto *VD = cast<VarDecl>(DRE->getDecl());
1532       // Override only those variables that can be captured to avoid re-emission
1533       // of the variables declared within the loops.
1534       if (DRE->refersToEnclosingVariableOrCapture()) {
1535         (void)LoopScope.addPrivate(VD, [this, DRE, VD]() {
1536           return CreateMemTemp(DRE->getType(), VD->getName());
1537         });
1538       }
1539     }
1540   }
1541 }
1542 
1543 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
1544                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
1545                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
1546   if (!CGF.HaveInsertPoint())
1547     return;
1548   {
1549     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
1550     CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
1551     (void)PreCondScope.Privatize();
1552     // Get initial values of real counters.
1553     for (const Expr *I : S.inits()) {
1554       CGF.EmitIgnoredExpr(I);
1555     }
1556   }
1557   // Check that loop is executed at least one time.
1558   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
1559 }
1560 
1561 void CodeGenFunction::EmitOMPLinearClause(
1562     const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
1563   if (!HaveInsertPoint())
1564     return;
1565   llvm::DenseSet<const VarDecl *> SIMDLCVs;
1566   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1567     const auto *LoopDirective = cast<OMPLoopDirective>(&D);
1568     for (const Expr *C : LoopDirective->counters()) {
1569       SIMDLCVs.insert(
1570           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1571     }
1572   }
1573   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1574     auto CurPrivate = C->privates().begin();
1575     for (const Expr *E : C->varlists()) {
1576       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1577       const auto *PrivateVD =
1578           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
1579       if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
1580         bool IsRegistered = PrivateScope.addPrivate(VD, [this, PrivateVD]() {
1581           // Emit private VarDecl with copy init.
1582           EmitVarDecl(*PrivateVD);
1583           return GetAddrOfLocalVar(PrivateVD);
1584         });
1585         assert(IsRegistered && "linear var already registered as private");
1586         // Silence the warning about unused variable.
1587         (void)IsRegistered;
1588       } else {
1589         EmitVarDecl(*PrivateVD);
1590       }
1591       ++CurPrivate;
1592     }
1593   }
1594 }
1595 
1596 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
1597                                      const OMPExecutableDirective &D,
1598                                      bool IsMonotonic) {
1599   if (!CGF.HaveInsertPoint())
1600     return;
1601   if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
1602     RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
1603                                  /*ignoreResult=*/true);
1604     auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1605     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1606     // In presence of finite 'safelen', it may be unsafe to mark all
1607     // the memory instructions parallel, because loop-carried
1608     // dependences of 'safelen' iterations are possible.
1609     if (!IsMonotonic)
1610       CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
1611   } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
1612     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
1613                                  /*ignoreResult=*/true);
1614     auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1615     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1616     // In presence of finite 'safelen', it may be unsafe to mark all
1617     // the memory instructions parallel, because loop-carried
1618     // dependences of 'safelen' iterations are possible.
1619     CGF.LoopStack.setParallel(/*Enable=*/false);
1620   }
1621 }
1622 
1623 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
1624                                       bool IsMonotonic) {
1625   // Walk clauses and process safelen/lastprivate.
1626   LoopStack.setParallel(!IsMonotonic);
1627   LoopStack.setVectorizeEnable();
1628   emitSimdlenSafelenClause(*this, D, IsMonotonic);
1629 }
1630 
1631 void CodeGenFunction::EmitOMPSimdFinal(
1632     const OMPLoopDirective &D,
1633     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1634   if (!HaveInsertPoint())
1635     return;
1636   llvm::BasicBlock *DoneBB = nullptr;
1637   auto IC = D.counters().begin();
1638   auto IPC = D.private_counters().begin();
1639   for (const Expr *F : D.finals()) {
1640     const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
1641     const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
1642     const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
1643     if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
1644         OrigVD->hasGlobalStorage() || CED) {
1645       if (!DoneBB) {
1646         if (llvm::Value *Cond = CondGen(*this)) {
1647           // If the first post-update expression is found, emit conditional
1648           // block if it was requested.
1649           llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then");
1650           DoneBB = createBasicBlock(".omp.final.done");
1651           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1652           EmitBlock(ThenBB);
1653         }
1654       }
1655       Address OrigAddr = Address::invalid();
1656       if (CED) {
1657         OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
1658       } else {
1659         DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD),
1660                         /*RefersToEnclosingVariableOrCapture=*/false,
1661                         (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
1662         OrigAddr = EmitLValue(&DRE).getAddress();
1663       }
1664       OMPPrivateScope VarScope(*this);
1665       VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
1666       (void)VarScope.Privatize();
1667       EmitIgnoredExpr(F);
1668     }
1669     ++IC;
1670     ++IPC;
1671   }
1672   if (DoneBB)
1673     EmitBlock(DoneBB, /*IsFinished=*/true);
1674 }
1675 
1676 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
1677                                          const OMPLoopDirective &S,
1678                                          CodeGenFunction::JumpDest LoopExit) {
1679   CGF.EmitOMPLoopBody(S, LoopExit);
1680   CGF.EmitStopPoint(&S);
1681 }
1682 
1683 /// Emit a helper variable and return corresponding lvalue.
1684 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
1685                                const DeclRefExpr *Helper) {
1686   auto VDecl = cast<VarDecl>(Helper->getDecl());
1687   CGF.EmitVarDecl(*VDecl);
1688   return CGF.EmitLValue(Helper);
1689 }
1690 
1691 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
1692                               PrePostActionTy &Action) {
1693   Action.Enter(CGF);
1694   assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
1695          "Expected simd directive");
1696   OMPLoopScope PreInitScope(CGF, S);
1697   // if (PreCond) {
1698   //   for (IV in 0..LastIteration) BODY;
1699   //   <Final counter/linear vars updates>;
1700   // }
1701   //
1702   if (isOpenMPDistributeDirective(S.getDirectiveKind()) ||
1703       isOpenMPWorksharingDirective(S.getDirectiveKind()) ||
1704       isOpenMPTaskLoopDirective(S.getDirectiveKind())) {
1705     (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
1706     (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
1707   }
1708 
1709   // Emit: if (PreCond) - begin.
1710   // If the condition constant folds and can be elided, avoid emitting the
1711   // whole loop.
1712   bool CondConstant;
1713   llvm::BasicBlock *ContBlock = nullptr;
1714   if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1715     if (!CondConstant)
1716       return;
1717   } else {
1718     llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then");
1719     ContBlock = CGF.createBasicBlock("simd.if.end");
1720     emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
1721                 CGF.getProfileCount(&S));
1722     CGF.EmitBlock(ThenBlock);
1723     CGF.incrementProfileCounter(&S);
1724   }
1725 
1726   // Emit the loop iteration variable.
1727   const Expr *IVExpr = S.getIterationVariable();
1728   const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
1729   CGF.EmitVarDecl(*IVDecl);
1730   CGF.EmitIgnoredExpr(S.getInit());
1731 
1732   // Emit the iterations count variable.
1733   // If it is not a variable, Sema decided to calculate iterations count on
1734   // each iteration (e.g., it is foldable into a constant).
1735   if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1736     CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1737     // Emit calculation of the iterations count.
1738     CGF.EmitIgnoredExpr(S.getCalcLastIteration());
1739   }
1740 
1741   CGF.EmitOMPSimdInit(S);
1742 
1743   emitAlignedClause(CGF, S);
1744   (void)CGF.EmitOMPLinearClauseInit(S);
1745   {
1746     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
1747     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
1748     CGF.EmitOMPLinearClause(S, LoopScope);
1749     CGF.EmitOMPPrivateClause(S, LoopScope);
1750     CGF.EmitOMPReductionClauseInit(S, LoopScope);
1751     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
1752     (void)LoopScope.Privatize();
1753     if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
1754       CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
1755     CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1756                          S.getInc(),
1757                          [&S](CodeGenFunction &CGF) {
1758                            CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest());
1759                            CGF.EmitStopPoint(&S);
1760                          },
1761                          [](CodeGenFunction &) {});
1762     CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; });
1763     // Emit final copy of the lastprivate variables at the end of loops.
1764     if (HasLastprivateClause)
1765       CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
1766     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
1767     emitPostUpdateForReductionClause(CGF, S,
1768                                      [](CodeGenFunction &) { return nullptr; });
1769   }
1770   CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; });
1771   // Emit: if (PreCond) - end.
1772   if (ContBlock) {
1773     CGF.EmitBranch(ContBlock);
1774     CGF.EmitBlock(ContBlock, true);
1775   }
1776 }
1777 
1778 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
1779   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1780     emitOMPSimdRegion(CGF, S, Action);
1781   };
1782   OMPLexicalScope Scope(*this, S, OMPD_unknown);
1783   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
1784 }
1785 
1786 void CodeGenFunction::EmitOMPOuterLoop(
1787     bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
1788     CodeGenFunction::OMPPrivateScope &LoopScope,
1789     const CodeGenFunction::OMPLoopArguments &LoopArgs,
1790     const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
1791     const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
1792   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
1793 
1794   const Expr *IVExpr = S.getIterationVariable();
1795   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1796   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1797 
1798   JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
1799 
1800   // Start the loop with a block that tests the condition.
1801   llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond");
1802   EmitBlock(CondBlock);
1803   const SourceRange R = S.getSourceRange();
1804   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1805                  SourceLocToDebugLoc(R.getEnd()));
1806 
1807   llvm::Value *BoolCondVal = nullptr;
1808   if (!DynamicOrOrdered) {
1809     // UB = min(UB, GlobalUB) or
1810     // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
1811     // 'distribute parallel for')
1812     EmitIgnoredExpr(LoopArgs.EUB);
1813     // IV = LB
1814     EmitIgnoredExpr(LoopArgs.Init);
1815     // IV < UB
1816     BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
1817   } else {
1818     BoolCondVal =
1819         RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL,
1820                        LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
1821   }
1822 
1823   // If there are any cleanups between here and the loop-exit scope,
1824   // create a block to stage a loop exit along.
1825   llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
1826   if (LoopScope.requiresCleanups())
1827     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
1828 
1829   llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body");
1830   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
1831   if (ExitBlock != LoopExit.getBlock()) {
1832     EmitBlock(ExitBlock);
1833     EmitBranchThroughCleanup(LoopExit);
1834   }
1835   EmitBlock(LoopBody);
1836 
1837   // Emit "IV = LB" (in case of static schedule, we have already calculated new
1838   // LB for loop condition and emitted it above).
1839   if (DynamicOrOrdered)
1840     EmitIgnoredExpr(LoopArgs.Init);
1841 
1842   // Create a block for the increment.
1843   JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
1844   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1845 
1846   // Generate !llvm.loop.parallel metadata for loads and stores for loops
1847   // with dynamic/guided scheduling and without ordered clause.
1848   if (!isOpenMPSimdDirective(S.getDirectiveKind()))
1849     LoopStack.setParallel(!IsMonotonic);
1850   else
1851     EmitOMPSimdInit(S, IsMonotonic);
1852 
1853   SourceLocation Loc = S.getBeginLoc();
1854 
1855   // when 'distribute' is not combined with a 'for':
1856   // while (idx <= UB) { BODY; ++idx; }
1857   // when 'distribute' is combined with a 'for'
1858   // (e.g. 'distribute parallel for')
1859   // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
1860   EmitOMPInnerLoop(
1861       S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
1862       [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
1863         CodeGenLoop(CGF, S, LoopExit);
1864       },
1865       [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
1866         CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
1867       });
1868 
1869   EmitBlock(Continue.getBlock());
1870   BreakContinueStack.pop_back();
1871   if (!DynamicOrOrdered) {
1872     // Emit "LB = LB + Stride", "UB = UB + Stride".
1873     EmitIgnoredExpr(LoopArgs.NextLB);
1874     EmitIgnoredExpr(LoopArgs.NextUB);
1875   }
1876 
1877   EmitBranch(CondBlock);
1878   LoopStack.pop();
1879   // Emit the fall-through block.
1880   EmitBlock(LoopExit.getBlock());
1881 
1882   // Tell the runtime we are done.
1883   auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
1884     if (!DynamicOrOrdered)
1885       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
1886                                                      S.getDirectiveKind());
1887   };
1888   OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
1889 }
1890 
1891 void CodeGenFunction::EmitOMPForOuterLoop(
1892     const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
1893     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
1894     const OMPLoopArguments &LoopArgs,
1895     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
1896   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
1897 
1898   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
1899   const bool DynamicOrOrdered =
1900       Ordered || RT.isDynamic(ScheduleKind.Schedule);
1901 
1902   assert((Ordered ||
1903           !RT.isStaticNonchunked(ScheduleKind.Schedule,
1904                                  LoopArgs.Chunk != nullptr)) &&
1905          "static non-chunked schedule does not need outer loop");
1906 
1907   // Emit outer loop.
1908   //
1909   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1910   // When schedule(dynamic,chunk_size) is specified, the iterations are
1911   // distributed to threads in the team in chunks as the threads request them.
1912   // Each thread executes a chunk of iterations, then requests another chunk,
1913   // until no chunks remain to be distributed. Each chunk contains chunk_size
1914   // iterations, except for the last chunk to be distributed, which may have
1915   // fewer iterations. When no chunk_size is specified, it defaults to 1.
1916   //
1917   // When schedule(guided,chunk_size) is specified, the iterations are assigned
1918   // to threads in the team in chunks as the executing threads request them.
1919   // Each thread executes a chunk of iterations, then requests another chunk,
1920   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
1921   // each chunk is proportional to the number of unassigned iterations divided
1922   // by the number of threads in the team, decreasing to 1. For a chunk_size
1923   // with value k (greater than 1), the size of each chunk is determined in the
1924   // same way, with the restriction that the chunks do not contain fewer than k
1925   // iterations (except for the last chunk to be assigned, which may have fewer
1926   // than k iterations).
1927   //
1928   // When schedule(auto) is specified, the decision regarding scheduling is
1929   // delegated to the compiler and/or runtime system. The programmer gives the
1930   // implementation the freedom to choose any possible mapping of iterations to
1931   // threads in the team.
1932   //
1933   // When schedule(runtime) is specified, the decision regarding scheduling is
1934   // deferred until run time, and the schedule and chunk size are taken from the
1935   // run-sched-var ICV. If the ICV is set to auto, the schedule is
1936   // implementation defined
1937   //
1938   // while(__kmpc_dispatch_next(&LB, &UB)) {
1939   //   idx = LB;
1940   //   while (idx <= UB) { BODY; ++idx;
1941   //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
1942   //   } // inner loop
1943   // }
1944   //
1945   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1946   // When schedule(static, chunk_size) is specified, iterations are divided into
1947   // chunks of size chunk_size, and the chunks are assigned to the threads in
1948   // the team in a round-robin fashion in the order of the thread number.
1949   //
1950   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
1951   //   while (idx <= UB) { BODY; ++idx; } // inner loop
1952   //   LB = LB + ST;
1953   //   UB = UB + ST;
1954   // }
1955   //
1956 
1957   const Expr *IVExpr = S.getIterationVariable();
1958   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1959   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1960 
1961   if (DynamicOrOrdered) {
1962     const std::pair<llvm::Value *, llvm::Value *> DispatchBounds =
1963         CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
1964     llvm::Value *LBVal = DispatchBounds.first;
1965     llvm::Value *UBVal = DispatchBounds.second;
1966     CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
1967                                                              LoopArgs.Chunk};
1968     RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize,
1969                            IVSigned, Ordered, DipatchRTInputValues);
1970   } else {
1971     CGOpenMPRuntime::StaticRTInput StaticInit(
1972         IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
1973         LoopArgs.ST, LoopArgs.Chunk);
1974     RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
1975                          ScheduleKind, StaticInit);
1976   }
1977 
1978   auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
1979                                     const unsigned IVSize,
1980                                     const bool IVSigned) {
1981     if (Ordered) {
1982       CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
1983                                                             IVSigned);
1984     }
1985   };
1986 
1987   OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
1988                                  LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
1989   OuterLoopArgs.IncExpr = S.getInc();
1990   OuterLoopArgs.Init = S.getInit();
1991   OuterLoopArgs.Cond = S.getCond();
1992   OuterLoopArgs.NextLB = S.getNextLowerBound();
1993   OuterLoopArgs.NextUB = S.getNextUpperBound();
1994   EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
1995                    emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
1996 }
1997 
1998 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
1999                              const unsigned IVSize, const bool IVSigned) {}
2000 
2001 void CodeGenFunction::EmitOMPDistributeOuterLoop(
2002     OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
2003     OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
2004     const CodeGenLoopTy &CodeGenLoopContent) {
2005 
2006   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2007 
2008   // Emit outer loop.
2009   // Same behavior as a OMPForOuterLoop, except that schedule cannot be
2010   // dynamic
2011   //
2012 
2013   const Expr *IVExpr = S.getIterationVariable();
2014   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2015   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2016 
2017   CGOpenMPRuntime::StaticRTInput StaticInit(
2018       IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
2019       LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
2020   RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit);
2021 
2022   // for combined 'distribute' and 'for' the increment expression of distribute
2023   // is stored in DistInc. For 'distribute' alone, it is in Inc.
2024   Expr *IncExpr;
2025   if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
2026     IncExpr = S.getDistInc();
2027   else
2028     IncExpr = S.getInc();
2029 
2030   // this routine is shared by 'omp distribute parallel for' and
2031   // 'omp distribute': select the right EUB expression depending on the
2032   // directive
2033   OMPLoopArguments OuterLoopArgs;
2034   OuterLoopArgs.LB = LoopArgs.LB;
2035   OuterLoopArgs.UB = LoopArgs.UB;
2036   OuterLoopArgs.ST = LoopArgs.ST;
2037   OuterLoopArgs.IL = LoopArgs.IL;
2038   OuterLoopArgs.Chunk = LoopArgs.Chunk;
2039   OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2040                           ? S.getCombinedEnsureUpperBound()
2041                           : S.getEnsureUpperBound();
2042   OuterLoopArgs.IncExpr = IncExpr;
2043   OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2044                            ? S.getCombinedInit()
2045                            : S.getInit();
2046   OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2047                            ? S.getCombinedCond()
2048                            : S.getCond();
2049   OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2050                              ? S.getCombinedNextLowerBound()
2051                              : S.getNextLowerBound();
2052   OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2053                              ? S.getCombinedNextUpperBound()
2054                              : S.getNextUpperBound();
2055 
2056   EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
2057                    LoopScope, OuterLoopArgs, CodeGenLoopContent,
2058                    emitEmptyOrdered);
2059 }
2060 
2061 static std::pair<LValue, LValue>
2062 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
2063                                      const OMPExecutableDirective &S) {
2064   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2065   LValue LB =
2066       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2067   LValue UB =
2068       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2069 
2070   // When composing 'distribute' with 'for' (e.g. as in 'distribute
2071   // parallel for') we need to use the 'distribute'
2072   // chunk lower and upper bounds rather than the whole loop iteration
2073   // space. These are parameters to the outlined function for 'parallel'
2074   // and we copy the bounds of the previous schedule into the
2075   // the current ones.
2076   LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
2077   LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
2078   llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
2079       PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc());
2080   PrevLBVal = CGF.EmitScalarConversion(
2081       PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
2082       LS.getIterationVariable()->getType(),
2083       LS.getPrevLowerBoundVariable()->getExprLoc());
2084   llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
2085       PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc());
2086   PrevUBVal = CGF.EmitScalarConversion(
2087       PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
2088       LS.getIterationVariable()->getType(),
2089       LS.getPrevUpperBoundVariable()->getExprLoc());
2090 
2091   CGF.EmitStoreOfScalar(PrevLBVal, LB);
2092   CGF.EmitStoreOfScalar(PrevUBVal, UB);
2093 
2094   return {LB, UB};
2095 }
2096 
2097 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
2098 /// we need to use the LB and UB expressions generated by the worksharing
2099 /// code generation support, whereas in non combined situations we would
2100 /// just emit 0 and the LastIteration expression
2101 /// This function is necessary due to the difference of the LB and UB
2102 /// types for the RT emission routines for 'for_static_init' and
2103 /// 'for_dispatch_init'
2104 static std::pair<llvm::Value *, llvm::Value *>
2105 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
2106                                         const OMPExecutableDirective &S,
2107                                         Address LB, Address UB) {
2108   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2109   const Expr *IVExpr = LS.getIterationVariable();
2110   // when implementing a dynamic schedule for a 'for' combined with a
2111   // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
2112   // is not normalized as each team only executes its own assigned
2113   // distribute chunk
2114   QualType IteratorTy = IVExpr->getType();
2115   llvm::Value *LBVal =
2116       CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
2117   llvm::Value *UBVal =
2118       CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
2119   return {LBVal, UBVal};
2120 }
2121 
2122 static void emitDistributeParallelForDistributeInnerBoundParams(
2123     CodeGenFunction &CGF, const OMPExecutableDirective &S,
2124     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
2125   const auto &Dir = cast<OMPLoopDirective>(S);
2126   LValue LB =
2127       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
2128   llvm::Value *LBCast = CGF.Builder.CreateIntCast(
2129       CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
2130   CapturedVars.push_back(LBCast);
2131   LValue UB =
2132       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
2133 
2134   llvm::Value *UBCast = CGF.Builder.CreateIntCast(
2135       CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
2136   CapturedVars.push_back(UBCast);
2137 }
2138 
2139 static void
2140 emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
2141                                  const OMPLoopDirective &S,
2142                                  CodeGenFunction::JumpDest LoopExit) {
2143   auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
2144                                          PrePostActionTy &Action) {
2145     Action.Enter(CGF);
2146     bool HasCancel = false;
2147     if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2148       if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
2149         HasCancel = D->hasCancel();
2150       else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
2151         HasCancel = D->hasCancel();
2152       else if (const auto *D =
2153                    dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
2154         HasCancel = D->hasCancel();
2155     }
2156     CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
2157                                                      HasCancel);
2158     CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
2159                                emitDistributeParallelForInnerBounds,
2160                                emitDistributeParallelForDispatchBounds);
2161   };
2162 
2163   emitCommonOMPParallelDirective(
2164       CGF, S,
2165       isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for,
2166       CGInlinedWorksharingLoop,
2167       emitDistributeParallelForDistributeInnerBoundParams);
2168 }
2169 
2170 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
2171     const OMPDistributeParallelForDirective &S) {
2172   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2173     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
2174                               S.getDistInc());
2175   };
2176   OMPLexicalScope Scope(*this, S, OMPD_parallel);
2177   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
2178 }
2179 
2180 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
2181     const OMPDistributeParallelForSimdDirective &S) {
2182   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2183     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
2184                               S.getDistInc());
2185   };
2186   OMPLexicalScope Scope(*this, S, OMPD_parallel);
2187   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
2188 }
2189 
2190 void CodeGenFunction::EmitOMPDistributeSimdDirective(
2191     const OMPDistributeSimdDirective &S) {
2192   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2193     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
2194   };
2195   OMPLexicalScope Scope(*this, S, OMPD_unknown);
2196   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2197 }
2198 
2199 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
2200     CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
2201   // Emit SPMD target parallel for region as a standalone region.
2202   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2203     emitOMPSimdRegion(CGF, S, Action);
2204   };
2205   llvm::Function *Fn;
2206   llvm::Constant *Addr;
2207   // Emit target region as a standalone region.
2208   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
2209       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
2210   assert(Fn && Addr && "Target device function emission failed.");
2211 }
2212 
2213 void CodeGenFunction::EmitOMPTargetSimdDirective(
2214     const OMPTargetSimdDirective &S) {
2215   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2216     emitOMPSimdRegion(CGF, S, Action);
2217   };
2218   emitCommonOMPTargetDirective(*this, S, CodeGen);
2219 }
2220 
2221 namespace {
2222   struct ScheduleKindModifiersTy {
2223     OpenMPScheduleClauseKind Kind;
2224     OpenMPScheduleClauseModifier M1;
2225     OpenMPScheduleClauseModifier M2;
2226     ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
2227                             OpenMPScheduleClauseModifier M1,
2228                             OpenMPScheduleClauseModifier M2)
2229         : Kind(Kind), M1(M1), M2(M2) {}
2230   };
2231 } // namespace
2232 
2233 bool CodeGenFunction::EmitOMPWorksharingLoop(
2234     const OMPLoopDirective &S, Expr *EUB,
2235     const CodeGenLoopBoundsTy &CodeGenLoopBounds,
2236     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2237   // Emit the loop iteration variable.
2238   const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2239   const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
2240   EmitVarDecl(*IVDecl);
2241 
2242   // Emit the iterations count variable.
2243   // If it is not a variable, Sema decided to calculate iterations count on each
2244   // iteration (e.g., it is foldable into a constant).
2245   if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2246     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2247     // Emit calculation of the iterations count.
2248     EmitIgnoredExpr(S.getCalcLastIteration());
2249   }
2250 
2251   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2252 
2253   bool HasLastprivateClause;
2254   // Check pre-condition.
2255   {
2256     OMPLoopScope PreInitScope(*this, S);
2257     // Skip the entire loop if we don't meet the precondition.
2258     // If the condition constant folds and can be elided, avoid emitting the
2259     // whole loop.
2260     bool CondConstant;
2261     llvm::BasicBlock *ContBlock = nullptr;
2262     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2263       if (!CondConstant)
2264         return false;
2265     } else {
2266       llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
2267       ContBlock = createBasicBlock("omp.precond.end");
2268       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
2269                   getProfileCount(&S));
2270       EmitBlock(ThenBlock);
2271       incrementProfileCounter(&S);
2272     }
2273 
2274     RunCleanupsScope DoacrossCleanupScope(*this);
2275     bool Ordered = false;
2276     if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
2277       if (OrderedClause->getNumForLoops())
2278         RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations());
2279       else
2280         Ordered = true;
2281     }
2282 
2283     llvm::DenseSet<const Expr *> EmittedFinals;
2284     emitAlignedClause(*this, S);
2285     bool HasLinears = EmitOMPLinearClauseInit(S);
2286     // Emit helper vars inits.
2287 
2288     std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
2289     LValue LB = Bounds.first;
2290     LValue UB = Bounds.second;
2291     LValue ST =
2292         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2293     LValue IL =
2294         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2295 
2296     // Emit 'then' code.
2297     {
2298       OMPPrivateScope LoopScope(*this);
2299       if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
2300         // Emit implicit barrier to synchronize threads and avoid data races on
2301         // initialization of firstprivate variables and post-update of
2302         // lastprivate variables.
2303         CGM.getOpenMPRuntime().emitBarrierCall(
2304             *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
2305             /*ForceSimpleCall=*/true);
2306       }
2307       EmitOMPPrivateClause(S, LoopScope);
2308       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
2309       EmitOMPReductionClauseInit(S, LoopScope);
2310       EmitOMPPrivateLoopCounters(S, LoopScope);
2311       EmitOMPLinearClause(S, LoopScope);
2312       (void)LoopScope.Privatize();
2313       if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
2314         CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
2315 
2316       // Detect the loop schedule kind and chunk.
2317       const Expr *ChunkExpr = nullptr;
2318       OpenMPScheduleTy ScheduleKind;
2319       if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
2320         ScheduleKind.Schedule = C->getScheduleKind();
2321         ScheduleKind.M1 = C->getFirstScheduleModifier();
2322         ScheduleKind.M2 = C->getSecondScheduleModifier();
2323         ChunkExpr = C->getChunkSize();
2324       } else {
2325         // Default behaviour for schedule clause.
2326         CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
2327             *this, S, ScheduleKind.Schedule, ChunkExpr);
2328       }
2329       bool HasChunkSizeOne = false;
2330       llvm::Value *Chunk = nullptr;
2331       if (ChunkExpr) {
2332         Chunk = EmitScalarExpr(ChunkExpr);
2333         Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(),
2334                                      S.getIterationVariable()->getType(),
2335                                      S.getBeginLoc());
2336         Expr::EvalResult Result;
2337         if (ChunkExpr->EvaluateAsInt(Result, getContext())) {
2338           llvm::APSInt EvaluatedChunk = Result.Val.getInt();
2339           HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
2340         }
2341       }
2342       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2343       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2344       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
2345       // If the static schedule kind is specified or if the ordered clause is
2346       // specified, and if no monotonic modifier is specified, the effect will
2347       // be as if the monotonic modifier was specified.
2348       bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule,
2349           /* Chunked */ Chunk != nullptr) && HasChunkSizeOne &&
2350           isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
2351       if ((RT.isStaticNonchunked(ScheduleKind.Schedule,
2352                                  /* Chunked */ Chunk != nullptr) ||
2353            StaticChunkedOne) &&
2354           !Ordered) {
2355         if (isOpenMPSimdDirective(S.getDirectiveKind()))
2356           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
2357         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2358         // When no chunk_size is specified, the iteration space is divided into
2359         // chunks that are approximately equal in size, and at most one chunk is
2360         // distributed to each thread. Note that the size of the chunks is
2361         // unspecified in this case.
2362         CGOpenMPRuntime::StaticRTInput StaticInit(
2363             IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
2364             UB.getAddress(), ST.getAddress(),
2365             StaticChunkedOne ? Chunk : nullptr);
2366         RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
2367                              ScheduleKind, StaticInit);
2368         JumpDest LoopExit =
2369             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
2370         // UB = min(UB, GlobalUB);
2371         if (!StaticChunkedOne)
2372           EmitIgnoredExpr(S.getEnsureUpperBound());
2373         // IV = LB;
2374         EmitIgnoredExpr(S.getInit());
2375         // For unchunked static schedule generate:
2376         //
2377         // while (idx <= UB) {
2378         //   BODY;
2379         //   ++idx;
2380         // }
2381         //
2382         // For static schedule with chunk one:
2383         //
2384         // while (IV <= PrevUB) {
2385         //   BODY;
2386         //   IV += ST;
2387         // }
2388         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
2389             StaticChunkedOne ? S.getCombinedParForInDistCond() : S.getCond(),
2390             StaticChunkedOne ? S.getDistInc() : S.getInc(),
2391             [&S, LoopExit](CodeGenFunction &CGF) {
2392              CGF.EmitOMPLoopBody(S, LoopExit);
2393              CGF.EmitStopPoint(&S);
2394             },
2395             [](CodeGenFunction &) {});
2396         EmitBlock(LoopExit.getBlock());
2397         // Tell the runtime we are done.
2398         auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2399           CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
2400                                                          S.getDirectiveKind());
2401         };
2402         OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2403       } else {
2404         const bool IsMonotonic =
2405             Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
2406             ScheduleKind.Schedule == OMPC_SCHEDULE_unknown ||
2407             ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
2408             ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
2409         // Emit the outer loop, which requests its work chunk [LB..UB] from
2410         // runtime and runs the inner loop to process it.
2411         const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
2412                                              ST.getAddress(), IL.getAddress(),
2413                                              Chunk, EUB);
2414         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
2415                             LoopArguments, CGDispatchBounds);
2416       }
2417       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2418         EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
2419           return CGF.Builder.CreateIsNotNull(
2420               CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
2421         });
2422       }
2423       EmitOMPReductionClauseFinal(
2424           S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
2425                  ? /*Parallel and Simd*/ OMPD_parallel_for_simd
2426                  : /*Parallel only*/ OMPD_parallel);
2427       // Emit post-update of the reduction variables if IsLastIter != 0.
2428       emitPostUpdateForReductionClause(
2429           *this, S, [IL, &S](CodeGenFunction &CGF) {
2430             return CGF.Builder.CreateIsNotNull(
2431                 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
2432           });
2433       // Emit final copy of the lastprivate variables if IsLastIter != 0.
2434       if (HasLastprivateClause)
2435         EmitOMPLastprivateClauseFinal(
2436             S, isOpenMPSimdDirective(S.getDirectiveKind()),
2437             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
2438     }
2439     EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
2440       return CGF.Builder.CreateIsNotNull(
2441           CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
2442     });
2443     DoacrossCleanupScope.ForceCleanup();
2444     // We're now done with the loop, so jump to the continuation block.
2445     if (ContBlock) {
2446       EmitBranch(ContBlock);
2447       EmitBlock(ContBlock, /*IsFinished=*/true);
2448     }
2449   }
2450   return HasLastprivateClause;
2451 }
2452 
2453 /// The following two functions generate expressions for the loop lower
2454 /// and upper bounds in case of static and dynamic (dispatch) schedule
2455 /// of the associated 'for' or 'distribute' loop.
2456 static std::pair<LValue, LValue>
2457 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
2458   const auto &LS = cast<OMPLoopDirective>(S);
2459   LValue LB =
2460       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2461   LValue UB =
2462       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2463   return {LB, UB};
2464 }
2465 
2466 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
2467 /// consider the lower and upper bound expressions generated by the
2468 /// worksharing loop support, but we use 0 and the iteration space size as
2469 /// constants
2470 static std::pair<llvm::Value *, llvm::Value *>
2471 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
2472                           Address LB, Address UB) {
2473   const auto &LS = cast<OMPLoopDirective>(S);
2474   const Expr *IVExpr = LS.getIterationVariable();
2475   const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
2476   llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
2477   llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
2478   return {LBVal, UBVal};
2479 }
2480 
2481 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
2482   bool HasLastprivates = false;
2483   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2484                                           PrePostActionTy &) {
2485     OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
2486     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2487                                                  emitForLoopBounds,
2488                                                  emitDispatchForLoopBounds);
2489   };
2490   {
2491     OMPLexicalScope Scope(*this, S, OMPD_unknown);
2492     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
2493                                                 S.hasCancel());
2494   }
2495 
2496   // Emit an implicit barrier at the end.
2497   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
2498     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
2499 }
2500 
2501 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
2502   bool HasLastprivates = false;
2503   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2504                                           PrePostActionTy &) {
2505     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2506                                                  emitForLoopBounds,
2507                                                  emitDispatchForLoopBounds);
2508   };
2509   {
2510     OMPLexicalScope Scope(*this, S, OMPD_unknown);
2511     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2512   }
2513 
2514   // Emit an implicit barrier at the end.
2515   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
2516     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
2517 }
2518 
2519 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
2520                                 const Twine &Name,
2521                                 llvm::Value *Init = nullptr) {
2522   LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
2523   if (Init)
2524     CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
2525   return LVal;
2526 }
2527 
2528 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
2529   const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
2530   const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
2531   bool HasLastprivates = false;
2532   auto &&CodeGen = [&S, CapturedStmt, CS,
2533                     &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
2534     ASTContext &C = CGF.getContext();
2535     QualType KmpInt32Ty =
2536         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2537     // Emit helper vars inits.
2538     LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
2539                                   CGF.Builder.getInt32(0));
2540     llvm::ConstantInt *GlobalUBVal = CS != nullptr
2541                                          ? CGF.Builder.getInt32(CS->size() - 1)
2542                                          : CGF.Builder.getInt32(0);
2543     LValue UB =
2544         createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
2545     LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
2546                                   CGF.Builder.getInt32(1));
2547     LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
2548                                   CGF.Builder.getInt32(0));
2549     // Loop counter.
2550     LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
2551     OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
2552     CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
2553     OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
2554     CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
2555     // Generate condition for loop.
2556     BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
2557                         OK_Ordinary, S.getBeginLoc(), FPOptions());
2558     // Increment for loop counter.
2559     UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
2560                       S.getBeginLoc(), true);
2561     auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
2562       // Iterate through all sections and emit a switch construct:
2563       // switch (IV) {
2564       //   case 0:
2565       //     <SectionStmt[0]>;
2566       //     break;
2567       // ...
2568       //   case <NumSection> - 1:
2569       //     <SectionStmt[<NumSection> - 1]>;
2570       //     break;
2571       // }
2572       // .omp.sections.exit:
2573       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
2574       llvm::SwitchInst *SwitchStmt =
2575           CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()),
2576                                    ExitBB, CS == nullptr ? 1 : CS->size());
2577       if (CS) {
2578         unsigned CaseNumber = 0;
2579         for (const Stmt *SubStmt : CS->children()) {
2580           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2581           CGF.EmitBlock(CaseBB);
2582           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
2583           CGF.EmitStmt(SubStmt);
2584           CGF.EmitBranch(ExitBB);
2585           ++CaseNumber;
2586         }
2587       } else {
2588         llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case");
2589         CGF.EmitBlock(CaseBB);
2590         SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
2591         CGF.EmitStmt(CapturedStmt);
2592         CGF.EmitBranch(ExitBB);
2593       }
2594       CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
2595     };
2596 
2597     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2598     if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
2599       // Emit implicit barrier to synchronize threads and avoid data races on
2600       // initialization of firstprivate variables and post-update of lastprivate
2601       // variables.
2602       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
2603           CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
2604           /*ForceSimpleCall=*/true);
2605     }
2606     CGF.EmitOMPPrivateClause(S, LoopScope);
2607     HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2608     CGF.EmitOMPReductionClauseInit(S, LoopScope);
2609     (void)LoopScope.Privatize();
2610     if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
2611       CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
2612 
2613     // Emit static non-chunked loop.
2614     OpenMPScheduleTy ScheduleKind;
2615     ScheduleKind.Schedule = OMPC_SCHEDULE_static;
2616     CGOpenMPRuntime::StaticRTInput StaticInit(
2617         /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
2618         LB.getAddress(), UB.getAddress(), ST.getAddress());
2619     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
2620         CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit);
2621     // UB = min(UB, GlobalUB);
2622     llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc());
2623     llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
2624         CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
2625     CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
2626     // IV = LB;
2627     CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV);
2628     // while (idx <= UB) { BODY; ++idx; }
2629     CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
2630                          [](CodeGenFunction &) {});
2631     // Tell the runtime we are done.
2632     auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2633       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
2634                                                      S.getDirectiveKind());
2635     };
2636     CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
2637     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
2638     // Emit post-update of the reduction variables if IsLastIter != 0.
2639     emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) {
2640       return CGF.Builder.CreateIsNotNull(
2641           CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
2642     });
2643 
2644     // Emit final copy of the lastprivate variables if IsLastIter != 0.
2645     if (HasLastprivates)
2646       CGF.EmitOMPLastprivateClauseFinal(
2647           S, /*NoFinals=*/false,
2648           CGF.Builder.CreateIsNotNull(
2649               CGF.EmitLoadOfScalar(IL, S.getBeginLoc())));
2650   };
2651 
2652   bool HasCancel = false;
2653   if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
2654     HasCancel = OSD->hasCancel();
2655   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
2656     HasCancel = OPSD->hasCancel();
2657   OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
2658   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
2659                                               HasCancel);
2660   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
2661   // clause. Otherwise the barrier will be generated by the codegen for the
2662   // directive.
2663   if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
2664     // Emit implicit barrier to synchronize threads and avoid data races on
2665     // initialization of firstprivate variables.
2666     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
2667                                            OMPD_unknown);
2668   }
2669 }
2670 
2671 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
2672   {
2673     OMPLexicalScope Scope(*this, S, OMPD_unknown);
2674     EmitSections(S);
2675   }
2676   // Emit an implicit barrier at the end.
2677   if (!S.getSingleClause<OMPNowaitClause>()) {
2678     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
2679                                            OMPD_sections);
2680   }
2681 }
2682 
2683 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
2684   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2685     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
2686   };
2687   OMPLexicalScope Scope(*this, S, OMPD_unknown);
2688   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
2689                                               S.hasCancel());
2690 }
2691 
2692 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
2693   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
2694   llvm::SmallVector<const Expr *, 8> DestExprs;
2695   llvm::SmallVector<const Expr *, 8> SrcExprs;
2696   llvm::SmallVector<const Expr *, 8> AssignmentOps;
2697   // Check if there are any 'copyprivate' clauses associated with this
2698   // 'single' construct.
2699   // Build a list of copyprivate variables along with helper expressions
2700   // (<source>, <destination>, <destination>=<source> expressions)
2701   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
2702     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
2703     DestExprs.append(C->destination_exprs().begin(),
2704                      C->destination_exprs().end());
2705     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
2706     AssignmentOps.append(C->assignment_ops().begin(),
2707                          C->assignment_ops().end());
2708   }
2709   // Emit code for 'single' region along with 'copyprivate' clauses
2710   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2711     Action.Enter(CGF);
2712     OMPPrivateScope SingleScope(CGF);
2713     (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
2714     CGF.EmitOMPPrivateClause(S, SingleScope);
2715     (void)SingleScope.Privatize();
2716     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
2717   };
2718   {
2719     OMPLexicalScope Scope(*this, S, OMPD_unknown);
2720     CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(),
2721                                             CopyprivateVars, DestExprs,
2722                                             SrcExprs, AssignmentOps);
2723   }
2724   // Emit an implicit barrier at the end (to avoid data race on firstprivate
2725   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
2726   if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
2727     CGM.getOpenMPRuntime().emitBarrierCall(
2728         *this, S.getBeginLoc(),
2729         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
2730   }
2731 }
2732 
2733 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
2734   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2735     Action.Enter(CGF);
2736     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
2737   };
2738   OMPLexicalScope Scope(*this, S, OMPD_unknown);
2739   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
2740 }
2741 
2742 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
2743   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2744     Action.Enter(CGF);
2745     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
2746   };
2747   const Expr *Hint = nullptr;
2748   if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
2749     Hint = HintClause->getHint();
2750   OMPLexicalScope Scope(*this, S, OMPD_unknown);
2751   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
2752                                             S.getDirectiveName().getAsString(),
2753                                             CodeGen, S.getBeginLoc(), Hint);
2754 }
2755 
2756 void CodeGenFunction::EmitOMPParallelForDirective(
2757     const OMPParallelForDirective &S) {
2758   // Emit directive as a combined directive that consists of two implicit
2759   // directives: 'parallel' with 'for' directive.
2760   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2761     Action.Enter(CGF);
2762     OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
2763     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2764                                emitDispatchForLoopBounds);
2765   };
2766   emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
2767                                  emitEmptyBoundParameters);
2768 }
2769 
2770 void CodeGenFunction::EmitOMPParallelForSimdDirective(
2771     const OMPParallelForSimdDirective &S) {
2772   // Emit directive as a combined directive that consists of two implicit
2773   // directives: 'parallel' with 'for' directive.
2774   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2775     Action.Enter(CGF);
2776     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2777                                emitDispatchForLoopBounds);
2778   };
2779   emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen,
2780                                  emitEmptyBoundParameters);
2781 }
2782 
2783 void CodeGenFunction::EmitOMPParallelSectionsDirective(
2784     const OMPParallelSectionsDirective &S) {
2785   // Emit directive as a combined directive that consists of two implicit
2786   // directives: 'parallel' with 'sections' directive.
2787   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2788     Action.Enter(CGF);
2789     CGF.EmitSections(S);
2790   };
2791   emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
2792                                  emitEmptyBoundParameters);
2793 }
2794 
2795 void CodeGenFunction::EmitOMPTaskBasedDirective(
2796     const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
2797     const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
2798     OMPTaskDataTy &Data) {
2799   // Emit outlined function for task construct.
2800   const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion);
2801   auto I = CS->getCapturedDecl()->param_begin();
2802   auto PartId = std::next(I);
2803   auto TaskT = std::next(I, 4);
2804   // Check if the task is final
2805   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
2806     // If the condition constant folds and can be elided, try to avoid emitting
2807     // the condition and the dead arm of the if/else.
2808     const Expr *Cond = Clause->getCondition();
2809     bool CondConstant;
2810     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
2811       Data.Final.setInt(CondConstant);
2812     else
2813       Data.Final.setPointer(EvaluateExprAsBool(Cond));
2814   } else {
2815     // By default the task is not final.
2816     Data.Final.setInt(/*IntVal=*/false);
2817   }
2818   // Check if the task has 'priority' clause.
2819   if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
2820     const Expr *Prio = Clause->getPriority();
2821     Data.Priority.setInt(/*IntVal=*/true);
2822     Data.Priority.setPointer(EmitScalarConversion(
2823         EmitScalarExpr(Prio), Prio->getType(),
2824         getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
2825         Prio->getExprLoc()));
2826   }
2827   // The first function argument for tasks is a thread id, the second one is a
2828   // part id (0 for tied tasks, >=0 for untied task).
2829   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
2830   // Get list of private variables.
2831   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
2832     auto IRef = C->varlist_begin();
2833     for (const Expr *IInit : C->private_copies()) {
2834       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2835       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2836         Data.PrivateVars.push_back(*IRef);
2837         Data.PrivateCopies.push_back(IInit);
2838       }
2839       ++IRef;
2840     }
2841   }
2842   EmittedAsPrivate.clear();
2843   // Get list of firstprivate variables.
2844   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
2845     auto IRef = C->varlist_begin();
2846     auto IElemInitRef = C->inits().begin();
2847     for (const Expr *IInit : C->private_copies()) {
2848       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2849       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2850         Data.FirstprivateVars.push_back(*IRef);
2851         Data.FirstprivateCopies.push_back(IInit);
2852         Data.FirstprivateInits.push_back(*IElemInitRef);
2853       }
2854       ++IRef;
2855       ++IElemInitRef;
2856     }
2857   }
2858   // Get list of lastprivate variables (for taskloops).
2859   llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
2860   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
2861     auto IRef = C->varlist_begin();
2862     auto ID = C->destination_exprs().begin();
2863     for (const Expr *IInit : C->private_copies()) {
2864       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2865       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2866         Data.LastprivateVars.push_back(*IRef);
2867         Data.LastprivateCopies.push_back(IInit);
2868       }
2869       LastprivateDstsOrigs.insert(
2870           {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
2871            cast<DeclRefExpr>(*IRef)});
2872       ++IRef;
2873       ++ID;
2874     }
2875   }
2876   SmallVector<const Expr *, 4> LHSs;
2877   SmallVector<const Expr *, 4> RHSs;
2878   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
2879     auto IPriv = C->privates().begin();
2880     auto IRed = C->reduction_ops().begin();
2881     auto ILHS = C->lhs_exprs().begin();
2882     auto IRHS = C->rhs_exprs().begin();
2883     for (const Expr *Ref : C->varlists()) {
2884       Data.ReductionVars.emplace_back(Ref);
2885       Data.ReductionCopies.emplace_back(*IPriv);
2886       Data.ReductionOps.emplace_back(*IRed);
2887       LHSs.emplace_back(*ILHS);
2888       RHSs.emplace_back(*IRHS);
2889       std::advance(IPriv, 1);
2890       std::advance(IRed, 1);
2891       std::advance(ILHS, 1);
2892       std::advance(IRHS, 1);
2893     }
2894   }
2895   Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
2896       *this, S.getBeginLoc(), LHSs, RHSs, Data);
2897   // Build list of dependences.
2898   for (const auto *C : S.getClausesOfKind<OMPDependClause>())
2899     for (const Expr *IRef : C->varlists())
2900       Data.Dependences.emplace_back(C->getDependencyKind(), IRef);
2901   auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
2902                     CapturedRegion](CodeGenFunction &CGF,
2903                                     PrePostActionTy &Action) {
2904     // Set proper addresses for generated private copies.
2905     OMPPrivateScope Scope(CGF);
2906     if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
2907         !Data.LastprivateVars.empty()) {
2908       llvm::FunctionType *CopyFnTy = llvm::FunctionType::get(
2909           CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true);
2910       enum { PrivatesParam = 2, CopyFnParam = 3 };
2911       llvm::Value *CopyFn = CGF.Builder.CreateLoad(
2912           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
2913       llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
2914           CS->getCapturedDecl()->getParam(PrivatesParam)));
2915       // Map privates.
2916       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
2917       llvm::SmallVector<llvm::Value *, 16> CallArgs;
2918       CallArgs.push_back(PrivatesPtr);
2919       for (const Expr *E : Data.PrivateVars) {
2920         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2921         Address PrivatePtr = CGF.CreateMemTemp(
2922             CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
2923         PrivatePtrs.emplace_back(VD, PrivatePtr);
2924         CallArgs.push_back(PrivatePtr.getPointer());
2925       }
2926       for (const Expr *E : Data.FirstprivateVars) {
2927         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2928         Address PrivatePtr =
2929             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2930                               ".firstpriv.ptr.addr");
2931         PrivatePtrs.emplace_back(VD, PrivatePtr);
2932         CallArgs.push_back(PrivatePtr.getPointer());
2933       }
2934       for (const Expr *E : Data.LastprivateVars) {
2935         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2936         Address PrivatePtr =
2937             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2938                               ".lastpriv.ptr.addr");
2939         PrivatePtrs.emplace_back(VD, PrivatePtr);
2940         CallArgs.push_back(PrivatePtr.getPointer());
2941       }
2942       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
2943           CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
2944       for (const auto &Pair : LastprivateDstsOrigs) {
2945         const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
2946         DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
2947                         /*RefersToEnclosingVariableOrCapture=*/
2948                             CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
2949                         Pair.second->getType(), VK_LValue,
2950                         Pair.second->getExprLoc());
2951         Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
2952           return CGF.EmitLValue(&DRE).getAddress();
2953         });
2954       }
2955       for (const auto &Pair : PrivatePtrs) {
2956         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
2957                             CGF.getContext().getDeclAlign(Pair.first));
2958         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
2959       }
2960     }
2961     if (Data.Reductions) {
2962       OMPLexicalScope LexScope(CGF, S, CapturedRegion);
2963       ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies,
2964                              Data.ReductionOps);
2965       llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
2966           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
2967       for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
2968         RedCG.emitSharedLValue(CGF, Cnt);
2969         RedCG.emitAggregateType(CGF, Cnt);
2970         // FIXME: This must removed once the runtime library is fixed.
2971         // Emit required threadprivate variables for
2972         // initializer/combiner/finalizer.
2973         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
2974                                                            RedCG, Cnt);
2975         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2976             CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2977         Replacement =
2978             Address(CGF.EmitScalarConversion(
2979                         Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2980                         CGF.getContext().getPointerType(
2981                             Data.ReductionCopies[Cnt]->getType()),
2982                         Data.ReductionCopies[Cnt]->getExprLoc()),
2983                     Replacement.getAlignment());
2984         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2985         Scope.addPrivate(RedCG.getBaseDecl(Cnt),
2986                          [Replacement]() { return Replacement; });
2987       }
2988     }
2989     // Privatize all private variables except for in_reduction items.
2990     (void)Scope.Privatize();
2991     SmallVector<const Expr *, 4> InRedVars;
2992     SmallVector<const Expr *, 4> InRedPrivs;
2993     SmallVector<const Expr *, 4> InRedOps;
2994     SmallVector<const Expr *, 4> TaskgroupDescriptors;
2995     for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
2996       auto IPriv = C->privates().begin();
2997       auto IRed = C->reduction_ops().begin();
2998       auto ITD = C->taskgroup_descriptors().begin();
2999       for (const Expr *Ref : C->varlists()) {
3000         InRedVars.emplace_back(Ref);
3001         InRedPrivs.emplace_back(*IPriv);
3002         InRedOps.emplace_back(*IRed);
3003         TaskgroupDescriptors.emplace_back(*ITD);
3004         std::advance(IPriv, 1);
3005         std::advance(IRed, 1);
3006         std::advance(ITD, 1);
3007       }
3008     }
3009     // Privatize in_reduction items here, because taskgroup descriptors must be
3010     // privatized earlier.
3011     OMPPrivateScope InRedScope(CGF);
3012     if (!InRedVars.empty()) {
3013       ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps);
3014       for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
3015         RedCG.emitSharedLValue(CGF, Cnt);
3016         RedCG.emitAggregateType(CGF, Cnt);
3017         // The taskgroup descriptor variable is always implicit firstprivate and
3018         // privatized already during processing of the firstprivates.
3019         // FIXME: This must removed once the runtime library is fixed.
3020         // Emit required threadprivate variables for
3021         // initializer/combiner/finalizer.
3022         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
3023                                                            RedCG, Cnt);
3024         llvm::Value *ReductionsPtr =
3025             CGF.EmitLoadOfScalar(CGF.EmitLValue(TaskgroupDescriptors[Cnt]),
3026                                  TaskgroupDescriptors[Cnt]->getExprLoc());
3027         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
3028             CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
3029         Replacement = Address(
3030             CGF.EmitScalarConversion(
3031                 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
3032                 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
3033                 InRedPrivs[Cnt]->getExprLoc()),
3034             Replacement.getAlignment());
3035         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
3036         InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
3037                               [Replacement]() { return Replacement; });
3038       }
3039     }
3040     (void)InRedScope.Privatize();
3041 
3042     Action.Enter(CGF);
3043     BodyGen(CGF);
3044   };
3045   llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
3046       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
3047       Data.NumberOfParts);
3048   OMPLexicalScope Scope(*this, S);
3049   TaskGen(*this, OutlinedFn, Data);
3050 }
3051 
3052 static ImplicitParamDecl *
3053 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
3054                                   QualType Ty, CapturedDecl *CD,
3055                                   SourceLocation Loc) {
3056   auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
3057                                            ImplicitParamDecl::Other);
3058   auto *OrigRef = DeclRefExpr::Create(
3059       C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
3060       /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
3061   auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
3062                                               ImplicitParamDecl::Other);
3063   auto *PrivateRef = DeclRefExpr::Create(
3064       C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
3065       /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
3066   QualType ElemType = C.getBaseElementType(Ty);
3067   auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType,
3068                                            ImplicitParamDecl::Other);
3069   auto *InitRef = DeclRefExpr::Create(
3070       C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
3071       /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
3072   PrivateVD->setInitStyle(VarDecl::CInit);
3073   PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
3074                                               InitRef, /*BasePath=*/nullptr,
3075                                               VK_RValue));
3076   Data.FirstprivateVars.emplace_back(OrigRef);
3077   Data.FirstprivateCopies.emplace_back(PrivateRef);
3078   Data.FirstprivateInits.emplace_back(InitRef);
3079   return OrigVD;
3080 }
3081 
3082 void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
3083     const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
3084     OMPTargetDataInfo &InputInfo) {
3085   // Emit outlined function for task construct.
3086   const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
3087   Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
3088   QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
3089   auto I = CS->getCapturedDecl()->param_begin();
3090   auto PartId = std::next(I);
3091   auto TaskT = std::next(I, 4);
3092   OMPTaskDataTy Data;
3093   // The task is not final.
3094   Data.Final.setInt(/*IntVal=*/false);
3095   // Get list of firstprivate variables.
3096   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
3097     auto IRef = C->varlist_begin();
3098     auto IElemInitRef = C->inits().begin();
3099     for (auto *IInit : C->private_copies()) {
3100       Data.FirstprivateVars.push_back(*IRef);
3101       Data.FirstprivateCopies.push_back(IInit);
3102       Data.FirstprivateInits.push_back(*IElemInitRef);
3103       ++IRef;
3104       ++IElemInitRef;
3105     }
3106   }
3107   OMPPrivateScope TargetScope(*this);
3108   VarDecl *BPVD = nullptr;
3109   VarDecl *PVD = nullptr;
3110   VarDecl *SVD = nullptr;
3111   if (InputInfo.NumberOfTargetItems > 0) {
3112     auto *CD = CapturedDecl::Create(
3113         getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
3114     llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
3115     QualType BaseAndPointersType = getContext().getConstantArrayType(
3116         getContext().VoidPtrTy, ArrSize, ArrayType::Normal,
3117         /*IndexTypeQuals=*/0);
3118     BPVD = createImplicitFirstprivateForType(
3119         getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc());
3120     PVD = createImplicitFirstprivateForType(
3121         getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc());
3122     QualType SizesType = getContext().getConstantArrayType(
3123         getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
3124         ArrSize, ArrayType::Normal,
3125         /*IndexTypeQuals=*/0);
3126     SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
3127                                             S.getBeginLoc());
3128     TargetScope.addPrivate(
3129         BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; });
3130     TargetScope.addPrivate(PVD,
3131                            [&InputInfo]() { return InputInfo.PointersArray; });
3132     TargetScope.addPrivate(SVD,
3133                            [&InputInfo]() { return InputInfo.SizesArray; });
3134   }
3135   (void)TargetScope.Privatize();
3136   // Build list of dependences.
3137   for (const auto *C : S.getClausesOfKind<OMPDependClause>())
3138     for (const Expr *IRef : C->varlists())
3139       Data.Dependences.emplace_back(C->getDependencyKind(), IRef);
3140   auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD,
3141                     &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
3142     // Set proper addresses for generated private copies.
3143     OMPPrivateScope Scope(CGF);
3144     if (!Data.FirstprivateVars.empty()) {
3145       llvm::FunctionType *CopyFnTy = llvm::FunctionType::get(
3146           CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true);
3147       enum { PrivatesParam = 2, CopyFnParam = 3 };
3148       llvm::Value *CopyFn = CGF.Builder.CreateLoad(
3149           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
3150       llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
3151           CS->getCapturedDecl()->getParam(PrivatesParam)));
3152       // Map privates.
3153       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
3154       llvm::SmallVector<llvm::Value *, 16> CallArgs;
3155       CallArgs.push_back(PrivatesPtr);
3156       for (const Expr *E : Data.FirstprivateVars) {
3157         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3158         Address PrivatePtr =
3159             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
3160                               ".firstpriv.ptr.addr");
3161         PrivatePtrs.emplace_back(VD, PrivatePtr);
3162         CallArgs.push_back(PrivatePtr.getPointer());
3163       }
3164       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
3165           CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
3166       for (const auto &Pair : PrivatePtrs) {
3167         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
3168                             CGF.getContext().getDeclAlign(Pair.first));
3169         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
3170       }
3171     }
3172     // Privatize all private variables except for in_reduction items.
3173     (void)Scope.Privatize();
3174     if (InputInfo.NumberOfTargetItems > 0) {
3175       InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
3176           CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0);
3177       InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
3178           CGF.GetAddrOfLocalVar(PVD), /*Index=*/0);
3179       InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
3180           CGF.GetAddrOfLocalVar(SVD), /*Index=*/0);
3181     }
3182 
3183     Action.Enter(CGF);
3184     OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
3185     BodyGen(CGF);
3186   };
3187   llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
3188       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
3189       Data.NumberOfParts);
3190   llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
3191   IntegerLiteral IfCond(getContext(), TrueOrFalse,
3192                         getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3193                         SourceLocation());
3194 
3195   CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn,
3196                                       SharedsTy, CapturedStruct, &IfCond, Data);
3197 }
3198 
3199 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
3200   // Emit outlined function for task construct.
3201   const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
3202   Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
3203   QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
3204   const Expr *IfCond = nullptr;
3205   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3206     if (C->getNameModifier() == OMPD_unknown ||
3207         C->getNameModifier() == OMPD_task) {
3208       IfCond = C->getCondition();
3209       break;
3210     }
3211   }
3212 
3213   OMPTaskDataTy Data;
3214   // Check if we should emit tied or untied task.
3215   Data.Tied = !S.getSingleClause<OMPUntiedClause>();
3216   auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
3217     CGF.EmitStmt(CS->getCapturedStmt());
3218   };
3219   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
3220                     IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
3221                             const OMPTaskDataTy &Data) {
3222     CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn,
3223                                             SharedsTy, CapturedStruct, IfCond,
3224                                             Data);
3225   };
3226   EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);
3227 }
3228 
3229 void CodeGenFunction::EmitOMPTaskyieldDirective(
3230     const OMPTaskyieldDirective &S) {
3231   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc());
3232 }
3233 
3234 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
3235   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier);
3236 }
3237 
3238 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
3239   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc());
3240 }
3241 
3242 void CodeGenFunction::EmitOMPTaskgroupDirective(
3243     const OMPTaskgroupDirective &S) {
3244   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3245     Action.Enter(CGF);
3246     if (const Expr *E = S.getReductionRef()) {
3247       SmallVector<const Expr *, 4> LHSs;
3248       SmallVector<const Expr *, 4> RHSs;
3249       OMPTaskDataTy Data;
3250       for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
3251         auto IPriv = C->privates().begin();
3252         auto IRed = C->reduction_ops().begin();
3253         auto ILHS = C->lhs_exprs().begin();
3254         auto IRHS = C->rhs_exprs().begin();
3255         for (const Expr *Ref : C->varlists()) {
3256           Data.ReductionVars.emplace_back(Ref);
3257           Data.ReductionCopies.emplace_back(*IPriv);
3258           Data.ReductionOps.emplace_back(*IRed);
3259           LHSs.emplace_back(*ILHS);
3260           RHSs.emplace_back(*IRHS);
3261           std::advance(IPriv, 1);
3262           std::advance(IRed, 1);
3263           std::advance(ILHS, 1);
3264           std::advance(IRHS, 1);
3265         }
3266       }
3267       llvm::Value *ReductionDesc =
3268           CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(),
3269                                                            LHSs, RHSs, Data);
3270       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3271       CGF.EmitVarDecl(*VD);
3272       CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
3273                             /*Volatile=*/false, E->getType());
3274     }
3275     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
3276   };
3277   OMPLexicalScope Scope(*this, S, OMPD_unknown);
3278   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc());
3279 }
3280 
3281 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
3282   CGM.getOpenMPRuntime().emitFlush(
3283       *this,
3284       [&S]() -> ArrayRef<const Expr *> {
3285         if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>())
3286           return llvm::makeArrayRef(FlushClause->varlist_begin(),
3287                                     FlushClause->varlist_end());
3288         return llvm::None;
3289       }(),
3290       S.getBeginLoc());
3291 }
3292 
3293 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
3294                                             const CodeGenLoopTy &CodeGenLoop,
3295                                             Expr *IncExpr) {
3296   // Emit the loop iteration variable.
3297   const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
3298   const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
3299   EmitVarDecl(*IVDecl);
3300 
3301   // Emit the iterations count variable.
3302   // If it is not a variable, Sema decided to calculate iterations count on each
3303   // iteration (e.g., it is foldable into a constant).
3304   if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3305     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3306     // Emit calculation of the iterations count.
3307     EmitIgnoredExpr(S.getCalcLastIteration());
3308   }
3309 
3310   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3311 
3312   bool HasLastprivateClause = false;
3313   // Check pre-condition.
3314   {
3315     OMPLoopScope PreInitScope(*this, S);
3316     // Skip the entire loop if we don't meet the precondition.
3317     // If the condition constant folds and can be elided, avoid emitting the
3318     // whole loop.
3319     bool CondConstant;
3320     llvm::BasicBlock *ContBlock = nullptr;
3321     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3322       if (!CondConstant)
3323         return;
3324     } else {
3325       llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
3326       ContBlock = createBasicBlock("omp.precond.end");
3327       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3328                   getProfileCount(&S));
3329       EmitBlock(ThenBlock);
3330       incrementProfileCounter(&S);
3331     }
3332 
3333     emitAlignedClause(*this, S);
3334     // Emit 'then' code.
3335     {
3336       // Emit helper vars inits.
3337 
3338       LValue LB = EmitOMPHelperVar(
3339           *this, cast<DeclRefExpr>(
3340                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3341                           ? S.getCombinedLowerBoundVariable()
3342                           : S.getLowerBoundVariable())));
3343       LValue UB = EmitOMPHelperVar(
3344           *this, cast<DeclRefExpr>(
3345                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3346                           ? S.getCombinedUpperBoundVariable()
3347                           : S.getUpperBoundVariable())));
3348       LValue ST =
3349           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3350       LValue IL =
3351           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3352 
3353       OMPPrivateScope LoopScope(*this);
3354       if (EmitOMPFirstprivateClause(S, LoopScope)) {
3355         // Emit implicit barrier to synchronize threads and avoid data races
3356         // on initialization of firstprivate variables and post-update of
3357         // lastprivate variables.
3358         CGM.getOpenMPRuntime().emitBarrierCall(
3359             *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3360             /*ForceSimpleCall=*/true);
3361       }
3362       EmitOMPPrivateClause(S, LoopScope);
3363       if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
3364           !isOpenMPParallelDirective(S.getDirectiveKind()) &&
3365           !isOpenMPTeamsDirective(S.getDirectiveKind()))
3366         EmitOMPReductionClauseInit(S, LoopScope);
3367       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3368       EmitOMPPrivateLoopCounters(S, LoopScope);
3369       (void)LoopScope.Privatize();
3370       if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
3371         CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
3372 
3373       // Detect the distribute schedule kind and chunk.
3374       llvm::Value *Chunk = nullptr;
3375       OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
3376       if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
3377         ScheduleKind = C->getDistScheduleKind();
3378         if (const Expr *Ch = C->getChunkSize()) {
3379           Chunk = EmitScalarExpr(Ch);
3380           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
3381                                        S.getIterationVariable()->getType(),
3382                                        S.getBeginLoc());
3383         }
3384       } else {
3385         // Default behaviour for dist_schedule clause.
3386         CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
3387             *this, S, ScheduleKind, Chunk);
3388       }
3389       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3390       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3391 
3392       // OpenMP [2.10.8, distribute Construct, Description]
3393       // If dist_schedule is specified, kind must be static. If specified,
3394       // iterations are divided into chunks of size chunk_size, chunks are
3395       // assigned to the teams of the league in a round-robin fashion in the
3396       // order of the team number. When no chunk_size is specified, the
3397       // iteration space is divided into chunks that are approximately equal
3398       // in size, and at most one chunk is distributed to each team of the
3399       // league. The size of the chunks is unspecified in this case.
3400       bool StaticChunked = RT.isStaticChunked(
3401           ScheduleKind, /* Chunked */ Chunk != nullptr) &&
3402           isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
3403       if (RT.isStaticNonchunked(ScheduleKind,
3404                                 /* Chunked */ Chunk != nullptr) ||
3405           StaticChunked) {
3406         if (isOpenMPSimdDirective(S.getDirectiveKind()))
3407           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
3408         CGOpenMPRuntime::StaticRTInput StaticInit(
3409             IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
3410             LB.getAddress(), UB.getAddress(), ST.getAddress(),
3411             StaticChunked ? Chunk : nullptr);
3412         RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind,
3413                                     StaticInit);
3414         JumpDest LoopExit =
3415             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3416         // UB = min(UB, GlobalUB);
3417         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3418                             ? S.getCombinedEnsureUpperBound()
3419                             : S.getEnsureUpperBound());
3420         // IV = LB;
3421         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3422                             ? S.getCombinedInit()
3423                             : S.getInit());
3424 
3425         const Expr *Cond =
3426             isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3427                 ? S.getCombinedCond()
3428                 : S.getCond();
3429 
3430         if (StaticChunked)
3431           Cond = S.getCombinedDistCond();
3432 
3433         // For static unchunked schedules generate:
3434         //
3435         //  1. For distribute alone, codegen
3436         //    while (idx <= UB) {
3437         //      BODY;
3438         //      ++idx;
3439         //    }
3440         //
3441         //  2. When combined with 'for' (e.g. as in 'distribute parallel for')
3442         //    while (idx <= UB) {
3443         //      <CodeGen rest of pragma>(LB, UB);
3444         //      idx += ST;
3445         //    }
3446         //
3447         // For static chunk one schedule generate:
3448         //
3449         // while (IV <= GlobalUB) {
3450         //   <CodeGen rest of pragma>(LB, UB);
3451         //   LB += ST;
3452         //   UB += ST;
3453         //   UB = min(UB, GlobalUB);
3454         //   IV = LB;
3455         // }
3456         //
3457         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
3458                          [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
3459                            CodeGenLoop(CGF, S, LoopExit);
3460                          },
3461                          [&S, StaticChunked](CodeGenFunction &CGF) {
3462                            if (StaticChunked) {
3463                              CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound());
3464                              CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound());
3465                              CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound());
3466                              CGF.EmitIgnoredExpr(S.getCombinedInit());
3467                            }
3468                          });
3469         EmitBlock(LoopExit.getBlock());
3470         // Tell the runtime we are done.
3471         RT.emitForStaticFinish(*this, S.getBeginLoc(), S.getDirectiveKind());
3472       } else {
3473         // Emit the outer loop, which requests its work chunk [LB..UB] from
3474         // runtime and runs the inner loop to process it.
3475         const OMPLoopArguments LoopArguments = {
3476             LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
3477             Chunk};
3478         EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
3479                                    CodeGenLoop);
3480       }
3481       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3482         EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
3483           return CGF.Builder.CreateIsNotNull(
3484               CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3485         });
3486       }
3487       if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
3488           !isOpenMPParallelDirective(S.getDirectiveKind()) &&
3489           !isOpenMPTeamsDirective(S.getDirectiveKind())) {
3490         EmitOMPReductionClauseFinal(S, OMPD_simd);
3491         // Emit post-update of the reduction variables if IsLastIter != 0.
3492         emitPostUpdateForReductionClause(
3493             *this, S, [IL, &S](CodeGenFunction &CGF) {
3494               return CGF.Builder.CreateIsNotNull(
3495                   CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3496             });
3497       }
3498       // Emit final copy of the lastprivate variables if IsLastIter != 0.
3499       if (HasLastprivateClause) {
3500         EmitOMPLastprivateClauseFinal(
3501             S, /*NoFinals=*/false,
3502             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
3503       }
3504     }
3505 
3506     // We're now done with the loop, so jump to the continuation block.
3507     if (ContBlock) {
3508       EmitBranch(ContBlock);
3509       EmitBlock(ContBlock, true);
3510     }
3511   }
3512 }
3513 
3514 void CodeGenFunction::EmitOMPDistributeDirective(
3515     const OMPDistributeDirective &S) {
3516   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3517     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3518   };
3519   OMPLexicalScope Scope(*this, S, OMPD_unknown);
3520   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3521 }
3522 
3523 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
3524                                                    const CapturedStmt *S) {
3525   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
3526   CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
3527   CGF.CapturedStmtInfo = &CapStmtInfo;
3528   llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
3529   Fn->setDoesNotRecurse();
3530   return Fn;
3531 }
3532 
3533 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
3534   if (S.hasClausesOfKind<OMPDependClause>()) {
3535     assert(!S.getAssociatedStmt() &&
3536            "No associated statement must be in ordered depend construct.");
3537     for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
3538       CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
3539     return;
3540   }
3541   const auto *C = S.getSingleClause<OMPSIMDClause>();
3542   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
3543                                  PrePostActionTy &Action) {
3544     const CapturedStmt *CS = S.getInnermostCapturedStmt();
3545     if (C) {
3546       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3547       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3548       llvm::Function *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
3549       CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(),
3550                                                       OutlinedFn, CapturedVars);
3551     } else {
3552       Action.Enter(CGF);
3553       CGF.EmitStmt(CS->getCapturedStmt());
3554     }
3555   };
3556   OMPLexicalScope Scope(*this, S, OMPD_unknown);
3557   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C);
3558 }
3559 
3560 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
3561                                          QualType SrcType, QualType DestType,
3562                                          SourceLocation Loc) {
3563   assert(CGF.hasScalarEvaluationKind(DestType) &&
3564          "DestType must have scalar evaluation kind.");
3565   assert(!Val.isAggregate() && "Must be a scalar or complex.");
3566   return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
3567                                                    DestType, Loc)
3568                         : CGF.EmitComplexToScalarConversion(
3569                               Val.getComplexVal(), SrcType, DestType, Loc);
3570 }
3571 
3572 static CodeGenFunction::ComplexPairTy
3573 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
3574                       QualType DestType, SourceLocation Loc) {
3575   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
3576          "DestType must have complex evaluation kind.");
3577   CodeGenFunction::ComplexPairTy ComplexVal;
3578   if (Val.isScalar()) {
3579     // Convert the input element to the element type of the complex.
3580     QualType DestElementType =
3581         DestType->castAs<ComplexType>()->getElementType();
3582     llvm::Value *ScalarVal = CGF.EmitScalarConversion(
3583         Val.getScalarVal(), SrcType, DestElementType, Loc);
3584     ComplexVal = CodeGenFunction::ComplexPairTy(
3585         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
3586   } else {
3587     assert(Val.isComplex() && "Must be a scalar or complex.");
3588     QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
3589     QualType DestElementType =
3590         DestType->castAs<ComplexType>()->getElementType();
3591     ComplexVal.first = CGF.EmitScalarConversion(
3592         Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
3593     ComplexVal.second = CGF.EmitScalarConversion(
3594         Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
3595   }
3596   return ComplexVal;
3597 }
3598 
3599 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
3600                                   LValue LVal, RValue RVal) {
3601   if (LVal.isGlobalReg()) {
3602     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
3603   } else {
3604     CGF.EmitAtomicStore(RVal, LVal,
3605                         IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3606                                  : llvm::AtomicOrdering::Monotonic,
3607                         LVal.isVolatile(), /*isInit=*/false);
3608   }
3609 }
3610 
3611 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
3612                                          QualType RValTy, SourceLocation Loc) {
3613   switch (getEvaluationKind(LVal.getType())) {
3614   case TEK_Scalar:
3615     EmitStoreThroughLValue(RValue::get(convertToScalarValue(
3616                                *this, RVal, RValTy, LVal.getType(), Loc)),
3617                            LVal);
3618     break;
3619   case TEK_Complex:
3620     EmitStoreOfComplex(
3621         convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
3622         /*isInit=*/false);
3623     break;
3624   case TEK_Aggregate:
3625     llvm_unreachable("Must be a scalar or complex.");
3626   }
3627 }
3628 
3629 static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
3630                                   const Expr *X, const Expr *V,
3631                                   SourceLocation Loc) {
3632   // v = x;
3633   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
3634   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
3635   LValue XLValue = CGF.EmitLValue(X);
3636   LValue VLValue = CGF.EmitLValue(V);
3637   RValue Res = XLValue.isGlobalReg()
3638                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
3639                    : CGF.EmitAtomicLoad(
3640                          XLValue, Loc,
3641                          IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3642                                   : llvm::AtomicOrdering::Monotonic,
3643                          XLValue.isVolatile());
3644   // OpenMP, 2.12.6, atomic Construct
3645   // Any atomic construct with a seq_cst clause forces the atomically
3646   // performed operation to include an implicit flush operation without a
3647   // list.
3648   if (IsSeqCst)
3649     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3650   CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
3651 }
3652 
3653 static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
3654                                    const Expr *X, const Expr *E,
3655                                    SourceLocation Loc) {
3656   // x = expr;
3657   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
3658   emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
3659   // OpenMP, 2.12.6, atomic Construct
3660   // Any atomic construct with a seq_cst clause forces the atomically
3661   // performed operation to include an implicit flush operation without a
3662   // list.
3663   if (IsSeqCst)
3664     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3665 }
3666 
3667 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
3668                                                 RValue Update,
3669                                                 BinaryOperatorKind BO,
3670                                                 llvm::AtomicOrdering AO,
3671                                                 bool IsXLHSInRHSPart) {
3672   ASTContext &Context = CGF.getContext();
3673   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
3674   // expression is simple and atomic is allowed for the given type for the
3675   // target platform.
3676   if (BO == BO_Comma || !Update.isScalar() ||
3677       !Update.getScalarVal()->getType()->isIntegerTy() ||
3678       !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
3679                         (Update.getScalarVal()->getType() !=
3680                          X.getAddress().getElementType())) ||
3681       !X.getAddress().getElementType()->isIntegerTy() ||
3682       !Context.getTargetInfo().hasBuiltinAtomic(
3683           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
3684     return std::make_pair(false, RValue::get(nullptr));
3685 
3686   llvm::AtomicRMWInst::BinOp RMWOp;
3687   switch (BO) {
3688   case BO_Add:
3689     RMWOp = llvm::AtomicRMWInst::Add;
3690     break;
3691   case BO_Sub:
3692     if (!IsXLHSInRHSPart)
3693       return std::make_pair(false, RValue::get(nullptr));
3694     RMWOp = llvm::AtomicRMWInst::Sub;
3695     break;
3696   case BO_And:
3697     RMWOp = llvm::AtomicRMWInst::And;
3698     break;
3699   case BO_Or:
3700     RMWOp = llvm::AtomicRMWInst::Or;
3701     break;
3702   case BO_Xor:
3703     RMWOp = llvm::AtomicRMWInst::Xor;
3704     break;
3705   case BO_LT:
3706     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3707                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
3708                                    : llvm::AtomicRMWInst::Max)
3709                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
3710                                    : llvm::AtomicRMWInst::UMax);
3711     break;
3712   case BO_GT:
3713     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3714                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
3715                                    : llvm::AtomicRMWInst::Min)
3716                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
3717                                    : llvm::AtomicRMWInst::UMin);
3718     break;
3719   case BO_Assign:
3720     RMWOp = llvm::AtomicRMWInst::Xchg;
3721     break;
3722   case BO_Mul:
3723   case BO_Div:
3724   case BO_Rem:
3725   case BO_Shl:
3726   case BO_Shr:
3727   case BO_LAnd:
3728   case BO_LOr:
3729     return std::make_pair(false, RValue::get(nullptr));
3730   case BO_PtrMemD:
3731   case BO_PtrMemI:
3732   case BO_LE:
3733   case BO_GE:
3734   case BO_EQ:
3735   case BO_NE:
3736   case BO_Cmp:
3737   case BO_AddAssign:
3738   case BO_SubAssign:
3739   case BO_AndAssign:
3740   case BO_OrAssign:
3741   case BO_XorAssign:
3742   case BO_MulAssign:
3743   case BO_DivAssign:
3744   case BO_RemAssign:
3745   case BO_ShlAssign:
3746   case BO_ShrAssign:
3747   case BO_Comma:
3748     llvm_unreachable("Unsupported atomic update operation");
3749   }
3750   llvm::Value *UpdateVal = Update.getScalarVal();
3751   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
3752     UpdateVal = CGF.Builder.CreateIntCast(
3753         IC, X.getAddress().getElementType(),
3754         X.getType()->hasSignedIntegerRepresentation());
3755   }
3756   llvm::Value *Res =
3757       CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
3758   return std::make_pair(true, RValue::get(Res));
3759 }
3760 
3761 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
3762     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
3763     llvm::AtomicOrdering AO, SourceLocation Loc,
3764     const llvm::function_ref<RValue(RValue)> CommonGen) {
3765   // Update expressions are allowed to have the following forms:
3766   // x binop= expr; -> xrval + expr;
3767   // x++, ++x -> xrval + 1;
3768   // x--, --x -> xrval - 1;
3769   // x = x binop expr; -> xrval binop expr
3770   // x = expr Op x; - > expr binop xrval;
3771   auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
3772   if (!Res.first) {
3773     if (X.isGlobalReg()) {
3774       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
3775       // 'xrval'.
3776       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
3777     } else {
3778       // Perform compare-and-swap procedure.
3779       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
3780     }
3781   }
3782   return Res;
3783 }
3784 
3785 static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
3786                                     const Expr *X, const Expr *E,
3787                                     const Expr *UE, bool IsXLHSInRHSPart,
3788                                     SourceLocation Loc) {
3789   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3790          "Update expr in 'atomic update' must be a binary operator.");
3791   const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3792   // Update expressions are allowed to have the following forms:
3793   // x binop= expr; -> xrval + expr;
3794   // x++, ++x -> xrval + 1;
3795   // x--, --x -> xrval - 1;
3796   // x = x binop expr; -> xrval binop expr
3797   // x = expr Op x; - > expr binop xrval;
3798   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
3799   LValue XLValue = CGF.EmitLValue(X);
3800   RValue ExprRValue = CGF.EmitAnyExpr(E);
3801   llvm::AtomicOrdering AO = IsSeqCst
3802                                 ? llvm::AtomicOrdering::SequentiallyConsistent
3803                                 : llvm::AtomicOrdering::Monotonic;
3804   const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3805   const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3806   const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3807   const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3808   auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) {
3809     CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3810     CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3811     return CGF.EmitAnyExpr(UE);
3812   };
3813   (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
3814       XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3815   // OpenMP, 2.12.6, atomic Construct
3816   // Any atomic construct with a seq_cst clause forces the atomically
3817   // performed operation to include an implicit flush operation without a
3818   // list.
3819   if (IsSeqCst)
3820     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3821 }
3822 
3823 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
3824                             QualType SourceType, QualType ResType,
3825                             SourceLocation Loc) {
3826   switch (CGF.getEvaluationKind(ResType)) {
3827   case TEK_Scalar:
3828     return RValue::get(
3829         convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
3830   case TEK_Complex: {
3831     auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
3832     return RValue::getComplex(Res.first, Res.second);
3833   }
3834   case TEK_Aggregate:
3835     break;
3836   }
3837   llvm_unreachable("Must be a scalar or complex.");
3838 }
3839 
3840 static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
3841                                      bool IsPostfixUpdate, const Expr *V,
3842                                      const Expr *X, const Expr *E,
3843                                      const Expr *UE, bool IsXLHSInRHSPart,
3844                                      SourceLocation Loc) {
3845   assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
3846   assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
3847   RValue NewVVal;
3848   LValue VLValue = CGF.EmitLValue(V);
3849   LValue XLValue = CGF.EmitLValue(X);
3850   RValue ExprRValue = CGF.EmitAnyExpr(E);
3851   llvm::AtomicOrdering AO = IsSeqCst
3852                                 ? llvm::AtomicOrdering::SequentiallyConsistent
3853                                 : llvm::AtomicOrdering::Monotonic;
3854   QualType NewVValType;
3855   if (UE) {
3856     // 'x' is updated with some additional value.
3857     assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3858            "Update expr in 'atomic capture' must be a binary operator.");
3859     const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3860     // Update expressions are allowed to have the following forms:
3861     // x binop= expr; -> xrval + expr;
3862     // x++, ++x -> xrval + 1;
3863     // x--, --x -> xrval - 1;
3864     // x = x binop expr; -> xrval binop expr
3865     // x = expr Op x; - > expr binop xrval;
3866     const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3867     const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3868     const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3869     NewVValType = XRValExpr->getType();
3870     const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3871     auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
3872                   IsPostfixUpdate](RValue XRValue) {
3873       CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3874       CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3875       RValue Res = CGF.EmitAnyExpr(UE);
3876       NewVVal = IsPostfixUpdate ? XRValue : Res;
3877       return Res;
3878     };
3879     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3880         XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3881     if (Res.first) {
3882       // 'atomicrmw' instruction was generated.
3883       if (IsPostfixUpdate) {
3884         // Use old value from 'atomicrmw'.
3885         NewVVal = Res.second;
3886       } else {
3887         // 'atomicrmw' does not provide new value, so evaluate it using old
3888         // value of 'x'.
3889         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3890         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
3891         NewVVal = CGF.EmitAnyExpr(UE);
3892       }
3893     }
3894   } else {
3895     // 'x' is simply rewritten with some 'expr'.
3896     NewVValType = X->getType().getNonReferenceType();
3897     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
3898                                X->getType().getNonReferenceType(), Loc);
3899     auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) {
3900       NewVVal = XRValue;
3901       return ExprRValue;
3902     };
3903     // Try to perform atomicrmw xchg, otherwise simple exchange.
3904     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3905         XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
3906         Loc, Gen);
3907     if (Res.first) {
3908       // 'atomicrmw' instruction was generated.
3909       NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
3910     }
3911   }
3912   // Emit post-update store to 'v' of old/new 'x' value.
3913   CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
3914   // OpenMP, 2.12.6, atomic Construct
3915   // Any atomic construct with a seq_cst clause forces the atomically
3916   // performed operation to include an implicit flush operation without a
3917   // list.
3918   if (IsSeqCst)
3919     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3920 }
3921 
3922 static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
3923                               bool IsSeqCst, bool IsPostfixUpdate,
3924                               const Expr *X, const Expr *V, const Expr *E,
3925                               const Expr *UE, bool IsXLHSInRHSPart,
3926                               SourceLocation Loc) {
3927   switch (Kind) {
3928   case OMPC_read:
3929     emitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
3930     break;
3931   case OMPC_write:
3932     emitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
3933     break;
3934   case OMPC_unknown:
3935   case OMPC_update:
3936     emitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
3937     break;
3938   case OMPC_capture:
3939     emitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
3940                              IsXLHSInRHSPart, Loc);
3941     break;
3942   case OMPC_if:
3943   case OMPC_final:
3944   case OMPC_num_threads:
3945   case OMPC_private:
3946   case OMPC_firstprivate:
3947   case OMPC_lastprivate:
3948   case OMPC_reduction:
3949   case OMPC_task_reduction:
3950   case OMPC_in_reduction:
3951   case OMPC_safelen:
3952   case OMPC_simdlen:
3953   case OMPC_allocator:
3954   case OMPC_allocate:
3955   case OMPC_collapse:
3956   case OMPC_default:
3957   case OMPC_seq_cst:
3958   case OMPC_shared:
3959   case OMPC_linear:
3960   case OMPC_aligned:
3961   case OMPC_copyin:
3962   case OMPC_copyprivate:
3963   case OMPC_flush:
3964   case OMPC_proc_bind:
3965   case OMPC_schedule:
3966   case OMPC_ordered:
3967   case OMPC_nowait:
3968   case OMPC_untied:
3969   case OMPC_threadprivate:
3970   case OMPC_depend:
3971   case OMPC_mergeable:
3972   case OMPC_device:
3973   case OMPC_threads:
3974   case OMPC_simd:
3975   case OMPC_map:
3976   case OMPC_num_teams:
3977   case OMPC_thread_limit:
3978   case OMPC_priority:
3979   case OMPC_grainsize:
3980   case OMPC_nogroup:
3981   case OMPC_num_tasks:
3982   case OMPC_hint:
3983   case OMPC_dist_schedule:
3984   case OMPC_defaultmap:
3985   case OMPC_uniform:
3986   case OMPC_to:
3987   case OMPC_from:
3988   case OMPC_use_device_ptr:
3989   case OMPC_is_device_ptr:
3990   case OMPC_unified_address:
3991   case OMPC_unified_shared_memory:
3992   case OMPC_reverse_offload:
3993   case OMPC_dynamic_allocators:
3994   case OMPC_atomic_default_mem_order:
3995     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
3996   }
3997 }
3998 
3999 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
4000   bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
4001   OpenMPClauseKind Kind = OMPC_unknown;
4002   for (const OMPClause *C : S.clauses()) {
4003     // Find first clause (skip seq_cst clause, if it is first).
4004     if (C->getClauseKind() != OMPC_seq_cst) {
4005       Kind = C->getClauseKind();
4006       break;
4007     }
4008   }
4009 
4010   const Stmt *CS = S.getInnermostCapturedStmt()->IgnoreContainers();
4011   if (const auto *FE = dyn_cast<FullExpr>(CS))
4012     enterFullExpression(FE);
4013   // Processing for statements under 'atomic capture'.
4014   if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
4015     for (const Stmt *C : Compound->body()) {
4016       if (const auto *FE = dyn_cast<FullExpr>(C))
4017         enterFullExpression(FE);
4018     }
4019   }
4020 
4021   auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
4022                                             PrePostActionTy &) {
4023     CGF.EmitStopPoint(CS);
4024     emitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
4025                       S.getV(), S.getExpr(), S.getUpdateExpr(),
4026                       S.isXLHSInRHSPart(), S.getBeginLoc());
4027   };
4028   OMPLexicalScope Scope(*this, S, OMPD_unknown);
4029   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
4030 }
4031 
4032 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
4033                                          const OMPExecutableDirective &S,
4034                                          const RegionCodeGenTy &CodeGen) {
4035   assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
4036   CodeGenModule &CGM = CGF.CGM;
4037 
4038   // On device emit this construct as inlined code.
4039   if (CGM.getLangOpts().OpenMPIsDevice) {
4040     OMPLexicalScope Scope(CGF, S, OMPD_target);
4041     CGM.getOpenMPRuntime().emitInlinedDirective(
4042         CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4043           CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
4044         });
4045     return;
4046   }
4047 
4048   llvm::Function *Fn = nullptr;
4049   llvm::Constant *FnID = nullptr;
4050 
4051   const Expr *IfCond = nullptr;
4052   // Check for the at most one if clause associated with the target region.
4053   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4054     if (C->getNameModifier() == OMPD_unknown ||
4055         C->getNameModifier() == OMPD_target) {
4056       IfCond = C->getCondition();
4057       break;
4058     }
4059   }
4060 
4061   // Check if we have any device clause associated with the directive.
4062   const Expr *Device = nullptr;
4063   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4064     Device = C->getDevice();
4065 
4066   // Check if we have an if clause whose conditional always evaluates to false
4067   // or if we do not have any targets specified. If so the target region is not
4068   // an offload entry point.
4069   bool IsOffloadEntry = true;
4070   if (IfCond) {
4071     bool Val;
4072     if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
4073       IsOffloadEntry = false;
4074   }
4075   if (CGM.getLangOpts().OMPTargetTriples.empty())
4076     IsOffloadEntry = false;
4077 
4078   assert(CGF.CurFuncDecl && "No parent declaration for target region!");
4079   StringRef ParentName;
4080   // In case we have Ctors/Dtors we use the complete type variant to produce
4081   // the mangling of the device outlined kernel.
4082   if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
4083     ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
4084   else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
4085     ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
4086   else
4087     ParentName =
4088         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
4089 
4090   // Emit target region as a standalone region.
4091   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
4092                                                     IsOffloadEntry, CodeGen);
4093   OMPLexicalScope Scope(CGF, S, OMPD_task);
4094   auto &&SizeEmitter = [](CodeGenFunction &CGF, const OMPLoopDirective &D) {
4095     OMPLoopScope(CGF, D);
4096     // Emit calculation of the iterations count.
4097     llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations());
4098     NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty,
4099                                               /*isSigned=*/false);
4100     return NumIterations;
4101   };
4102   if (IsOffloadEntry)
4103     CGM.getOpenMPRuntime().emitTargetNumIterationsCall(CGF, S, Device,
4104                                                        SizeEmitter);
4105   CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device);
4106 }
4107 
4108 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
4109                              PrePostActionTy &Action) {
4110   Action.Enter(CGF);
4111   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4112   (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4113   CGF.EmitOMPPrivateClause(S, PrivateScope);
4114   (void)PrivateScope.Privatize();
4115   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
4116     CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
4117 
4118   CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
4119 }
4120 
4121 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
4122                                                   StringRef ParentName,
4123                                                   const OMPTargetDirective &S) {
4124   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4125     emitTargetRegion(CGF, S, Action);
4126   };
4127   llvm::Function *Fn;
4128   llvm::Constant *Addr;
4129   // Emit target region as a standalone region.
4130   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4131       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4132   assert(Fn && Addr && "Target device function emission failed.");
4133 }
4134 
4135 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
4136   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4137     emitTargetRegion(CGF, S, Action);
4138   };
4139   emitCommonOMPTargetDirective(*this, S, CodeGen);
4140 }
4141 
4142 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
4143                                         const OMPExecutableDirective &S,
4144                                         OpenMPDirectiveKind InnermostKind,
4145                                         const RegionCodeGenTy &CodeGen) {
4146   const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
4147   llvm::Function *OutlinedFn =
4148       CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
4149           S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
4150 
4151   const auto *NT = S.getSingleClause<OMPNumTeamsClause>();
4152   const auto *TL = S.getSingleClause<OMPThreadLimitClause>();
4153   if (NT || TL) {
4154     const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr;
4155     const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr;
4156 
4157     CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
4158                                                   S.getBeginLoc());
4159   }
4160 
4161   OMPTeamsScope Scope(CGF, S);
4162   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
4163   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
4164   CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn,
4165                                            CapturedVars);
4166 }
4167 
4168 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
4169   // Emit teams region as a standalone region.
4170   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4171     Action.Enter(CGF);
4172     OMPPrivateScope PrivateScope(CGF);
4173     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4174     CGF.EmitOMPPrivateClause(S, PrivateScope);
4175     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4176     (void)PrivateScope.Privatize();
4177     CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt());
4178     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4179   };
4180   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
4181   emitPostUpdateForReductionClause(*this, S,
4182                                    [](CodeGenFunction &) { return nullptr; });
4183 }
4184 
4185 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
4186                                   const OMPTargetTeamsDirective &S) {
4187   auto *CS = S.getCapturedStmt(OMPD_teams);
4188   Action.Enter(CGF);
4189   // Emit teams region as a standalone region.
4190   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
4191     Action.Enter(CGF);
4192     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4193     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4194     CGF.EmitOMPPrivateClause(S, PrivateScope);
4195     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4196     (void)PrivateScope.Privatize();
4197     if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
4198       CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
4199     CGF.EmitStmt(CS->getCapturedStmt());
4200     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4201   };
4202   emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
4203   emitPostUpdateForReductionClause(CGF, S,
4204                                    [](CodeGenFunction &) { return nullptr; });
4205 }
4206 
4207 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
4208     CodeGenModule &CGM, StringRef ParentName,
4209     const OMPTargetTeamsDirective &S) {
4210   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4211     emitTargetTeamsRegion(CGF, Action, S);
4212   };
4213   llvm::Function *Fn;
4214   llvm::Constant *Addr;
4215   // Emit target region as a standalone region.
4216   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4217       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4218   assert(Fn && Addr && "Target device function emission failed.");
4219 }
4220 
4221 void CodeGenFunction::EmitOMPTargetTeamsDirective(
4222     const OMPTargetTeamsDirective &S) {
4223   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4224     emitTargetTeamsRegion(CGF, Action, S);
4225   };
4226   emitCommonOMPTargetDirective(*this, S, CodeGen);
4227 }
4228 
4229 static void
4230 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
4231                                 const OMPTargetTeamsDistributeDirective &S) {
4232   Action.Enter(CGF);
4233   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4234     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4235   };
4236 
4237   // Emit teams region as a standalone region.
4238   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4239                                             PrePostActionTy &Action) {
4240     Action.Enter(CGF);
4241     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4242     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4243     (void)PrivateScope.Privatize();
4244     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
4245                                                     CodeGenDistribute);
4246     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4247   };
4248   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen);
4249   emitPostUpdateForReductionClause(CGF, S,
4250                                    [](CodeGenFunction &) { return nullptr; });
4251 }
4252 
4253 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
4254     CodeGenModule &CGM, StringRef ParentName,
4255     const OMPTargetTeamsDistributeDirective &S) {
4256   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4257     emitTargetTeamsDistributeRegion(CGF, Action, S);
4258   };
4259   llvm::Function *Fn;
4260   llvm::Constant *Addr;
4261   // Emit target region as a standalone region.
4262   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4263       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4264   assert(Fn && Addr && "Target device function emission failed.");
4265 }
4266 
4267 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
4268     const OMPTargetTeamsDistributeDirective &S) {
4269   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4270     emitTargetTeamsDistributeRegion(CGF, Action, S);
4271   };
4272   emitCommonOMPTargetDirective(*this, S, CodeGen);
4273 }
4274 
4275 static void emitTargetTeamsDistributeSimdRegion(
4276     CodeGenFunction &CGF, PrePostActionTy &Action,
4277     const OMPTargetTeamsDistributeSimdDirective &S) {
4278   Action.Enter(CGF);
4279   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4280     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4281   };
4282 
4283   // Emit teams region as a standalone region.
4284   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4285                                             PrePostActionTy &Action) {
4286     Action.Enter(CGF);
4287     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4288     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4289     (void)PrivateScope.Privatize();
4290     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
4291                                                     CodeGenDistribute);
4292     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4293   };
4294   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen);
4295   emitPostUpdateForReductionClause(CGF, S,
4296                                    [](CodeGenFunction &) { return nullptr; });
4297 }
4298 
4299 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
4300     CodeGenModule &CGM, StringRef ParentName,
4301     const OMPTargetTeamsDistributeSimdDirective &S) {
4302   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4303     emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
4304   };
4305   llvm::Function *Fn;
4306   llvm::Constant *Addr;
4307   // Emit target region as a standalone region.
4308   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4309       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4310   assert(Fn && Addr && "Target device function emission failed.");
4311 }
4312 
4313 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
4314     const OMPTargetTeamsDistributeSimdDirective &S) {
4315   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4316     emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
4317   };
4318   emitCommonOMPTargetDirective(*this, S, CodeGen);
4319 }
4320 
4321 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
4322     const OMPTeamsDistributeDirective &S) {
4323 
4324   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4325     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4326   };
4327 
4328   // Emit teams region as a standalone region.
4329   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4330                                             PrePostActionTy &Action) {
4331     Action.Enter(CGF);
4332     OMPPrivateScope PrivateScope(CGF);
4333     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4334     (void)PrivateScope.Privatize();
4335     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
4336                                                     CodeGenDistribute);
4337     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4338   };
4339   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
4340   emitPostUpdateForReductionClause(*this, S,
4341                                    [](CodeGenFunction &) { return nullptr; });
4342 }
4343 
4344 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
4345     const OMPTeamsDistributeSimdDirective &S) {
4346   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4347     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4348   };
4349 
4350   // Emit teams region as a standalone region.
4351   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4352                                             PrePostActionTy &Action) {
4353     Action.Enter(CGF);
4354     OMPPrivateScope PrivateScope(CGF);
4355     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4356     (void)PrivateScope.Privatize();
4357     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd,
4358                                                     CodeGenDistribute);
4359     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4360   };
4361   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen);
4362   emitPostUpdateForReductionClause(*this, S,
4363                                    [](CodeGenFunction &) { return nullptr; });
4364 }
4365 
4366 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
4367     const OMPTeamsDistributeParallelForDirective &S) {
4368   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4369     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
4370                               S.getDistInc());
4371   };
4372 
4373   // Emit teams region as a standalone region.
4374   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4375                                             PrePostActionTy &Action) {
4376     Action.Enter(CGF);
4377     OMPPrivateScope PrivateScope(CGF);
4378     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4379     (void)PrivateScope.Privatize();
4380     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
4381                                                     CodeGenDistribute);
4382     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4383   };
4384   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
4385   emitPostUpdateForReductionClause(*this, S,
4386                                    [](CodeGenFunction &) { return nullptr; });
4387 }
4388 
4389 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
4390     const OMPTeamsDistributeParallelForSimdDirective &S) {
4391   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4392     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
4393                               S.getDistInc());
4394   };
4395 
4396   // Emit teams region as a standalone region.
4397   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4398                                             PrePostActionTy &Action) {
4399     Action.Enter(CGF);
4400     OMPPrivateScope PrivateScope(CGF);
4401     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4402     (void)PrivateScope.Privatize();
4403     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
4404         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
4405     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4406   };
4407   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
4408   emitPostUpdateForReductionClause(*this, S,
4409                                    [](CodeGenFunction &) { return nullptr; });
4410 }
4411 
4412 static void emitTargetTeamsDistributeParallelForRegion(
4413     CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S,
4414     PrePostActionTy &Action) {
4415   Action.Enter(CGF);
4416   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4417     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
4418                               S.getDistInc());
4419   };
4420 
4421   // Emit teams region as a standalone region.
4422   auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4423                                                  PrePostActionTy &Action) {
4424     Action.Enter(CGF);
4425     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4426     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4427     (void)PrivateScope.Privatize();
4428     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
4429         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
4430     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4431   };
4432 
4433   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
4434                               CodeGenTeams);
4435   emitPostUpdateForReductionClause(CGF, S,
4436                                    [](CodeGenFunction &) { return nullptr; });
4437 }
4438 
4439 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
4440     CodeGenModule &CGM, StringRef ParentName,
4441     const OMPTargetTeamsDistributeParallelForDirective &S) {
4442   // Emit SPMD target teams distribute parallel for region as a standalone
4443   // region.
4444   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4445     emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
4446   };
4447   llvm::Function *Fn;
4448   llvm::Constant *Addr;
4449   // Emit target region as a standalone region.
4450   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4451       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4452   assert(Fn && Addr && "Target device function emission failed.");
4453 }
4454 
4455 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
4456     const OMPTargetTeamsDistributeParallelForDirective &S) {
4457   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4458     emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
4459   };
4460   emitCommonOMPTargetDirective(*this, S, CodeGen);
4461 }
4462 
4463 static void emitTargetTeamsDistributeParallelForSimdRegion(
4464     CodeGenFunction &CGF,
4465     const OMPTargetTeamsDistributeParallelForSimdDirective &S,
4466     PrePostActionTy &Action) {
4467   Action.Enter(CGF);
4468   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4469     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
4470                               S.getDistInc());
4471   };
4472 
4473   // Emit teams region as a standalone region.
4474   auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4475                                                  PrePostActionTy &Action) {
4476     Action.Enter(CGF);
4477     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4478     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4479     (void)PrivateScope.Privatize();
4480     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
4481         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
4482     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4483   };
4484 
4485   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd,
4486                               CodeGenTeams);
4487   emitPostUpdateForReductionClause(CGF, S,
4488                                    [](CodeGenFunction &) { return nullptr; });
4489 }
4490 
4491 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
4492     CodeGenModule &CGM, StringRef ParentName,
4493     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
4494   // Emit SPMD target teams distribute parallel for simd region as a standalone
4495   // region.
4496   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4497     emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
4498   };
4499   llvm::Function *Fn;
4500   llvm::Constant *Addr;
4501   // Emit target region as a standalone region.
4502   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4503       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4504   assert(Fn && Addr && "Target device function emission failed.");
4505 }
4506 
4507 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
4508     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
4509   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4510     emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
4511   };
4512   emitCommonOMPTargetDirective(*this, S, CodeGen);
4513 }
4514 
4515 void CodeGenFunction::EmitOMPCancellationPointDirective(
4516     const OMPCancellationPointDirective &S) {
4517   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(),
4518                                                    S.getCancelRegion());
4519 }
4520 
4521 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
4522   const Expr *IfCond = nullptr;
4523   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4524     if (C->getNameModifier() == OMPD_unknown ||
4525         C->getNameModifier() == OMPD_cancel) {
4526       IfCond = C->getCondition();
4527       break;
4528     }
4529   }
4530   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond,
4531                                         S.getCancelRegion());
4532 }
4533 
4534 CodeGenFunction::JumpDest
4535 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
4536   if (Kind == OMPD_parallel || Kind == OMPD_task ||
4537       Kind == OMPD_target_parallel)
4538     return ReturnBlock;
4539   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
4540          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
4541          Kind == OMPD_distribute_parallel_for ||
4542          Kind == OMPD_target_parallel_for ||
4543          Kind == OMPD_teams_distribute_parallel_for ||
4544          Kind == OMPD_target_teams_distribute_parallel_for);
4545   return OMPCancelStack.getExitBlock();
4546 }
4547 
4548 void CodeGenFunction::EmitOMPUseDevicePtrClause(
4549     const OMPClause &NC, OMPPrivateScope &PrivateScope,
4550     const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
4551   const auto &C = cast<OMPUseDevicePtrClause>(NC);
4552   auto OrigVarIt = C.varlist_begin();
4553   auto InitIt = C.inits().begin();
4554   for (const Expr *PvtVarIt : C.private_copies()) {
4555     const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
4556     const auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
4557     const auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
4558 
4559     // In order to identify the right initializer we need to match the
4560     // declaration used by the mapping logic. In some cases we may get
4561     // OMPCapturedExprDecl that refers to the original declaration.
4562     const ValueDecl *MatchingVD = OrigVD;
4563     if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
4564       // OMPCapturedExprDecl are used to privative fields of the current
4565       // structure.
4566       const auto *ME = cast<MemberExpr>(OED->getInit());
4567       assert(isa<CXXThisExpr>(ME->getBase()) &&
4568              "Base should be the current struct!");
4569       MatchingVD = ME->getMemberDecl();
4570     }
4571 
4572     // If we don't have information about the current list item, move on to
4573     // the next one.
4574     auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
4575     if (InitAddrIt == CaptureDeviceAddrMap.end())
4576       continue;
4577 
4578     bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, OrigVD,
4579                                                          InitAddrIt, InitVD,
4580                                                          PvtVD]() {
4581       // Initialize the temporary initialization variable with the address we
4582       // get from the runtime library. We have to cast the source address
4583       // because it is always a void *. References are materialized in the
4584       // privatization scope, so the initialization here disregards the fact
4585       // the original variable is a reference.
4586       QualType AddrQTy =
4587           getContext().getPointerType(OrigVD->getType().getNonReferenceType());
4588       llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
4589       Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
4590       setAddrOfLocalVar(InitVD, InitAddr);
4591 
4592       // Emit private declaration, it will be initialized by the value we
4593       // declaration we just added to the local declarations map.
4594       EmitDecl(*PvtVD);
4595 
4596       // The initialization variables reached its purpose in the emission
4597       // of the previous declaration, so we don't need it anymore.
4598       LocalDeclMap.erase(InitVD);
4599 
4600       // Return the address of the private variable.
4601       return GetAddrOfLocalVar(PvtVD);
4602     });
4603     assert(IsRegistered && "firstprivate var already registered as private");
4604     // Silence the warning about unused variable.
4605     (void)IsRegistered;
4606 
4607     ++OrigVarIt;
4608     ++InitIt;
4609   }
4610 }
4611 
4612 // Generate the instructions for '#pragma omp target data' directive.
4613 void CodeGenFunction::EmitOMPTargetDataDirective(
4614     const OMPTargetDataDirective &S) {
4615   CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true);
4616 
4617   // Create a pre/post action to signal the privatization of the device pointer.
4618   // This action can be replaced by the OpenMP runtime code generation to
4619   // deactivate privatization.
4620   bool PrivatizeDevicePointers = false;
4621   class DevicePointerPrivActionTy : public PrePostActionTy {
4622     bool &PrivatizeDevicePointers;
4623 
4624   public:
4625     explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
4626         : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
4627     void Enter(CodeGenFunction &CGF) override {
4628       PrivatizeDevicePointers = true;
4629     }
4630   };
4631   DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
4632 
4633   auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
4634                        CodeGenFunction &CGF, PrePostActionTy &Action) {
4635     auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4636       CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
4637     };
4638 
4639     // Codegen that selects whether to generate the privatization code or not.
4640     auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
4641                           &InnermostCodeGen](CodeGenFunction &CGF,
4642                                              PrePostActionTy &Action) {
4643       RegionCodeGenTy RCG(InnermostCodeGen);
4644       PrivatizeDevicePointers = false;
4645 
4646       // Call the pre-action to change the status of PrivatizeDevicePointers if
4647       // needed.
4648       Action.Enter(CGF);
4649 
4650       if (PrivatizeDevicePointers) {
4651         OMPPrivateScope PrivateScope(CGF);
4652         // Emit all instances of the use_device_ptr clause.
4653         for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
4654           CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
4655                                         Info.CaptureDeviceAddrMap);
4656         (void)PrivateScope.Privatize();
4657         RCG(CGF);
4658       } else {
4659         RCG(CGF);
4660       }
4661     };
4662 
4663     // Forward the provided action to the privatization codegen.
4664     RegionCodeGenTy PrivRCG(PrivCodeGen);
4665     PrivRCG.setAction(Action);
4666 
4667     // Notwithstanding the body of the region is emitted as inlined directive,
4668     // we don't use an inline scope as changes in the references inside the
4669     // region are expected to be visible outside, so we do not privative them.
4670     OMPLexicalScope Scope(CGF, S);
4671     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
4672                                                     PrivRCG);
4673   };
4674 
4675   RegionCodeGenTy RCG(CodeGen);
4676 
4677   // If we don't have target devices, don't bother emitting the data mapping
4678   // code.
4679   if (CGM.getLangOpts().OMPTargetTriples.empty()) {
4680     RCG(*this);
4681     return;
4682   }
4683 
4684   // Check if we have any if clause associated with the directive.
4685   const Expr *IfCond = nullptr;
4686   if (const auto *C = S.getSingleClause<OMPIfClause>())
4687     IfCond = C->getCondition();
4688 
4689   // Check if we have any device clause associated with the directive.
4690   const Expr *Device = nullptr;
4691   if (const auto *C = S.getSingleClause<OMPDeviceClause>())
4692     Device = C->getDevice();
4693 
4694   // Set the action to signal privatization of device pointers.
4695   RCG.setAction(PrivAction);
4696 
4697   // Emit region code.
4698   CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
4699                                              Info);
4700 }
4701 
4702 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
4703     const OMPTargetEnterDataDirective &S) {
4704   // If we don't have target devices, don't bother emitting the data mapping
4705   // code.
4706   if (CGM.getLangOpts().OMPTargetTriples.empty())
4707     return;
4708 
4709   // Check if we have any if clause associated with the directive.
4710   const Expr *IfCond = nullptr;
4711   if (const auto *C = S.getSingleClause<OMPIfClause>())
4712     IfCond = C->getCondition();
4713 
4714   // Check if we have any device clause associated with the directive.
4715   const Expr *Device = nullptr;
4716   if (const auto *C = S.getSingleClause<OMPDeviceClause>())
4717     Device = C->getDevice();
4718 
4719   OMPLexicalScope Scope(*this, S, OMPD_task);
4720   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4721 }
4722 
4723 void CodeGenFunction::EmitOMPTargetExitDataDirective(
4724     const OMPTargetExitDataDirective &S) {
4725   // If we don't have target devices, don't bother emitting the data mapping
4726   // code.
4727   if (CGM.getLangOpts().OMPTargetTriples.empty())
4728     return;
4729 
4730   // Check if we have any if clause associated with the directive.
4731   const Expr *IfCond = nullptr;
4732   if (const auto *C = S.getSingleClause<OMPIfClause>())
4733     IfCond = C->getCondition();
4734 
4735   // Check if we have any device clause associated with the directive.
4736   const Expr *Device = nullptr;
4737   if (const auto *C = S.getSingleClause<OMPDeviceClause>())
4738     Device = C->getDevice();
4739 
4740   OMPLexicalScope Scope(*this, S, OMPD_task);
4741   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4742 }
4743 
4744 static void emitTargetParallelRegion(CodeGenFunction &CGF,
4745                                      const OMPTargetParallelDirective &S,
4746                                      PrePostActionTy &Action) {
4747   // Get the captured statement associated with the 'parallel' region.
4748   const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
4749   Action.Enter(CGF);
4750   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
4751     Action.Enter(CGF);
4752     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4753     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4754     CGF.EmitOMPPrivateClause(S, PrivateScope);
4755     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4756     (void)PrivateScope.Privatize();
4757     if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
4758       CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
4759     // TODO: Add support for clauses.
4760     CGF.EmitStmt(CS->getCapturedStmt());
4761     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4762   };
4763   emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
4764                                  emitEmptyBoundParameters);
4765   emitPostUpdateForReductionClause(CGF, S,
4766                                    [](CodeGenFunction &) { return nullptr; });
4767 }
4768 
4769 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
4770     CodeGenModule &CGM, StringRef ParentName,
4771     const OMPTargetParallelDirective &S) {
4772   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4773     emitTargetParallelRegion(CGF, S, Action);
4774   };
4775   llvm::Function *Fn;
4776   llvm::Constant *Addr;
4777   // Emit target region as a standalone region.
4778   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4779       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4780   assert(Fn && Addr && "Target device function emission failed.");
4781 }
4782 
4783 void CodeGenFunction::EmitOMPTargetParallelDirective(
4784     const OMPTargetParallelDirective &S) {
4785   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4786     emitTargetParallelRegion(CGF, S, Action);
4787   };
4788   emitCommonOMPTargetDirective(*this, S, CodeGen);
4789 }
4790 
4791 static void emitTargetParallelForRegion(CodeGenFunction &CGF,
4792                                         const OMPTargetParallelForDirective &S,
4793                                         PrePostActionTy &Action) {
4794   Action.Enter(CGF);
4795   // Emit directive as a combined directive that consists of two implicit
4796   // directives: 'parallel' with 'for' directive.
4797   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4798     Action.Enter(CGF);
4799     CodeGenFunction::OMPCancelStackRAII CancelRegion(
4800         CGF, OMPD_target_parallel_for, S.hasCancel());
4801     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
4802                                emitDispatchForLoopBounds);
4803   };
4804   emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
4805                                  emitEmptyBoundParameters);
4806 }
4807 
4808 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
4809     CodeGenModule &CGM, StringRef ParentName,
4810     const OMPTargetParallelForDirective &S) {
4811   // Emit SPMD target parallel for region as a standalone region.
4812   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4813     emitTargetParallelForRegion(CGF, S, Action);
4814   };
4815   llvm::Function *Fn;
4816   llvm::Constant *Addr;
4817   // Emit target region as a standalone region.
4818   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4819       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4820   assert(Fn && Addr && "Target device function emission failed.");
4821 }
4822 
4823 void CodeGenFunction::EmitOMPTargetParallelForDirective(
4824     const OMPTargetParallelForDirective &S) {
4825   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4826     emitTargetParallelForRegion(CGF, S, Action);
4827   };
4828   emitCommonOMPTargetDirective(*this, S, CodeGen);
4829 }
4830 
4831 static void
4832 emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
4833                                 const OMPTargetParallelForSimdDirective &S,
4834                                 PrePostActionTy &Action) {
4835   Action.Enter(CGF);
4836   // Emit directive as a combined directive that consists of two implicit
4837   // directives: 'parallel' with 'for' directive.
4838   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4839     Action.Enter(CGF);
4840     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
4841                                emitDispatchForLoopBounds);
4842   };
4843   emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen,
4844                                  emitEmptyBoundParameters);
4845 }
4846 
4847 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
4848     CodeGenModule &CGM, StringRef ParentName,
4849     const OMPTargetParallelForSimdDirective &S) {
4850   // Emit SPMD target parallel for region as a standalone region.
4851   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4852     emitTargetParallelForSimdRegion(CGF, S, Action);
4853   };
4854   llvm::Function *Fn;
4855   llvm::Constant *Addr;
4856   // Emit target region as a standalone region.
4857   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4858       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4859   assert(Fn && Addr && "Target device function emission failed.");
4860 }
4861 
4862 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
4863     const OMPTargetParallelForSimdDirective &S) {
4864   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4865     emitTargetParallelForSimdRegion(CGF, S, Action);
4866   };
4867   emitCommonOMPTargetDirective(*this, S, CodeGen);
4868 }
4869 
4870 /// Emit a helper variable and return corresponding lvalue.
4871 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
4872                      const ImplicitParamDecl *PVD,
4873                      CodeGenFunction::OMPPrivateScope &Privates) {
4874   const auto *VDecl = cast<VarDecl>(Helper->getDecl());
4875   Privates.addPrivate(VDecl,
4876                       [&CGF, PVD]() { return CGF.GetAddrOfLocalVar(PVD); });
4877 }
4878 
4879 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
4880   assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
4881   // Emit outlined function for task construct.
4882   const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop);
4883   Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
4884   QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4885   const Expr *IfCond = nullptr;
4886   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4887     if (C->getNameModifier() == OMPD_unknown ||
4888         C->getNameModifier() == OMPD_taskloop) {
4889       IfCond = C->getCondition();
4890       break;
4891     }
4892   }
4893 
4894   OMPTaskDataTy Data;
4895   // Check if taskloop must be emitted without taskgroup.
4896   Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
4897   // TODO: Check if we should emit tied or untied task.
4898   Data.Tied = true;
4899   // Set scheduling for taskloop
4900   if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
4901     // grainsize clause
4902     Data.Schedule.setInt(/*IntVal=*/false);
4903     Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
4904   } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
4905     // num_tasks clause
4906     Data.Schedule.setInt(/*IntVal=*/true);
4907     Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
4908   }
4909 
4910   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
4911     // if (PreCond) {
4912     //   for (IV in 0..LastIteration) BODY;
4913     //   <Final counter/linear vars updates>;
4914     // }
4915     //
4916 
4917     // Emit: if (PreCond) - begin.
4918     // If the condition constant folds and can be elided, avoid emitting the
4919     // whole loop.
4920     bool CondConstant;
4921     llvm::BasicBlock *ContBlock = nullptr;
4922     OMPLoopScope PreInitScope(CGF, S);
4923     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
4924       if (!CondConstant)
4925         return;
4926     } else {
4927       llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
4928       ContBlock = CGF.createBasicBlock("taskloop.if.end");
4929       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
4930                   CGF.getProfileCount(&S));
4931       CGF.EmitBlock(ThenBlock);
4932       CGF.incrementProfileCounter(&S);
4933     }
4934 
4935     if (isOpenMPSimdDirective(S.getDirectiveKind()))
4936       CGF.EmitOMPSimdInit(S);
4937 
4938     OMPPrivateScope LoopScope(CGF);
4939     // Emit helper vars inits.
4940     enum { LowerBound = 5, UpperBound, Stride, LastIter };
4941     auto *I = CS->getCapturedDecl()->param_begin();
4942     auto *LBP = std::next(I, LowerBound);
4943     auto *UBP = std::next(I, UpperBound);
4944     auto *STP = std::next(I, Stride);
4945     auto *LIP = std::next(I, LastIter);
4946     mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
4947              LoopScope);
4948     mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
4949              LoopScope);
4950     mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
4951     mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
4952              LoopScope);
4953     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
4954     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
4955     (void)LoopScope.Privatize();
4956     // Emit the loop iteration variable.
4957     const Expr *IVExpr = S.getIterationVariable();
4958     const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
4959     CGF.EmitVarDecl(*IVDecl);
4960     CGF.EmitIgnoredExpr(S.getInit());
4961 
4962     // Emit the iterations count variable.
4963     // If it is not a variable, Sema decided to calculate iterations count on
4964     // each iteration (e.g., it is foldable into a constant).
4965     if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
4966       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
4967       // Emit calculation of the iterations count.
4968       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
4969     }
4970 
4971     CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
4972                          S.getInc(),
4973                          [&S](CodeGenFunction &CGF) {
4974                            CGF.EmitOMPLoopBody(S, JumpDest());
4975                            CGF.EmitStopPoint(&S);
4976                          },
4977                          [](CodeGenFunction &) {});
4978     // Emit: if (PreCond) - end.
4979     if (ContBlock) {
4980       CGF.EmitBranch(ContBlock);
4981       CGF.EmitBlock(ContBlock, true);
4982     }
4983     // Emit final copy of the lastprivate variables if IsLastIter != 0.
4984     if (HasLastprivateClause) {
4985       CGF.EmitOMPLastprivateClauseFinal(
4986           S, isOpenMPSimdDirective(S.getDirectiveKind()),
4987           CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
4988               CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
4989               (*LIP)->getType(), S.getBeginLoc())));
4990     }
4991   };
4992   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
4993                     IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
4994                             const OMPTaskDataTy &Data) {
4995     auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond,
4996                       &Data](CodeGenFunction &CGF, PrePostActionTy &) {
4997       OMPLoopScope PreInitScope(CGF, S);
4998       CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S,
4999                                                   OutlinedFn, SharedsTy,
5000                                                   CapturedStruct, IfCond, Data);
5001     };
5002     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
5003                                                     CodeGen);
5004   };
5005   if (Data.Nogroup) {
5006     EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data);
5007   } else {
5008     CGM.getOpenMPRuntime().emitTaskgroupRegion(
5009         *this,
5010         [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
5011                                         PrePostActionTy &Action) {
5012           Action.Enter(CGF);
5013           CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen,
5014                                         Data);
5015         },
5016         S.getBeginLoc());
5017   }
5018 }
5019 
5020 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
5021   EmitOMPTaskLoopBasedDirective(S);
5022 }
5023 
5024 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
5025     const OMPTaskLoopSimdDirective &S) {
5026   EmitOMPTaskLoopBasedDirective(S);
5027 }
5028 
5029 // Generate the instructions for '#pragma omp target update' directive.
5030 void CodeGenFunction::EmitOMPTargetUpdateDirective(
5031     const OMPTargetUpdateDirective &S) {
5032   // If we don't have target devices, don't bother emitting the data mapping
5033   // code.
5034   if (CGM.getLangOpts().OMPTargetTriples.empty())
5035     return;
5036 
5037   // Check if we have any if clause associated with the directive.
5038   const Expr *IfCond = nullptr;
5039   if (const auto *C = S.getSingleClause<OMPIfClause>())
5040     IfCond = C->getCondition();
5041 
5042   // Check if we have any device clause associated with the directive.
5043   const Expr *Device = nullptr;
5044   if (const auto *C = S.getSingleClause<OMPDeviceClause>())
5045     Device = C->getDevice();
5046 
5047   OMPLexicalScope Scope(*this, S, OMPD_task);
5048   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
5049 }
5050 
5051 void CodeGenFunction::EmitSimpleOMPExecutableDirective(
5052     const OMPExecutableDirective &D) {
5053   if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
5054     return;
5055   auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
5056     if (isOpenMPSimdDirective(D.getDirectiveKind())) {
5057       emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action);
5058     } else {
5059       OMPPrivateScope LoopGlobals(CGF);
5060       if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {
5061         for (const Expr *E : LD->counters()) {
5062           const auto *VD = dyn_cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5063           if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) {
5064             LValue GlobLVal = CGF.EmitLValue(E);
5065             LoopGlobals.addPrivate(
5066                 VD, [&GlobLVal]() { return GlobLVal.getAddress(); });
5067           }
5068           if (isa<OMPCapturedExprDecl>(VD)) {
5069             // Emit only those that were not explicitly referenced in clauses.
5070             if (!CGF.LocalDeclMap.count(VD))
5071               CGF.EmitVarDecl(*VD);
5072           }
5073         }
5074         for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) {
5075           if (!C->getNumForLoops())
5076             continue;
5077           for (unsigned I = LD->getCollapsedNumber(),
5078                         E = C->getLoopNumIterations().size();
5079                I < E; ++I) {
5080             if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
5081                     cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) {
5082               // Emit only those that were not explicitly referenced in clauses.
5083               if (!CGF.LocalDeclMap.count(VD))
5084                 CGF.EmitVarDecl(*VD);
5085             }
5086           }
5087         }
5088       }
5089       LoopGlobals.Privatize();
5090       CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt());
5091     }
5092   };
5093   OMPSimdLexicalScope Scope(*this, D);
5094   CGM.getOpenMPRuntime().emitInlinedDirective(
5095       *this,
5096       isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd
5097                                                   : D.getDirectiveKind(),
5098       CodeGen);
5099 }
5100