xref: /freebsd/contrib/llvm-project/clang/lib/CodeGen/CGStmtOpenMP.cpp (revision e9a994639b2af232f994ba2ad23ca45a17718d2b)
1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This contains code to emit OpenMP nodes as LLVM code.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGCleanup.h"
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CodeGenModule.h"
17 #include "TargetInfo.h"
18 #include "clang/AST/ASTContext.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/AST/DeclOpenMP.h"
21 #include "clang/AST/OpenMPClause.h"
22 #include "clang/AST/Stmt.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/PrettyStackTrace.h"
27 #include "llvm/Frontend/OpenMP/OMPConstants.h"
28 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
29 #include "llvm/IR/Constants.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/Support/AtomicOrdering.h"
32 using namespace clang;
33 using namespace CodeGen;
34 using namespace llvm::omp;
35 
36 static const VarDecl *getBaseDecl(const Expr *Ref);
37 
38 namespace {
39 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
40 /// for captured expressions.
41 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
42   void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
43     for (const auto *C : S.clauses()) {
44       if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
45         if (const auto *PreInit =
46                 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
47           for (const auto *I : PreInit->decls()) {
48             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
49               CGF.EmitVarDecl(cast<VarDecl>(*I));
50             } else {
51               CodeGenFunction::AutoVarEmission Emission =
52                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
53               CGF.EmitAutoVarCleanups(Emission);
54             }
55           }
56         }
57       }
58     }
59   }
60   CodeGenFunction::OMPPrivateScope InlinedShareds;
61 
62   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
63     return CGF.LambdaCaptureFields.lookup(VD) ||
64            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
65            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
66             cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
67   }
68 
69 public:
70   OMPLexicalScope(
71       CodeGenFunction &CGF, const OMPExecutableDirective &S,
72       const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None,
73       const bool EmitPreInitStmt = true)
74       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
75         InlinedShareds(CGF) {
76     if (EmitPreInitStmt)
77       emitPreInitStmt(CGF, S);
78     if (!CapturedRegion.hasValue())
79       return;
80     assert(S.hasAssociatedStmt() &&
81            "Expected associated statement for inlined directive.");
82     const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
83     for (const auto &C : CS->captures()) {
84       if (C.capturesVariable() || C.capturesVariableByCopy()) {
85         auto *VD = C.getCapturedVar();
86         assert(VD == VD->getCanonicalDecl() &&
87                "Canonical decl must be captured.");
88         DeclRefExpr DRE(
89             CGF.getContext(), const_cast<VarDecl *>(VD),
90             isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
91                                        InlinedShareds.isGlobalVarCaptured(VD)),
92             VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
93         InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
94           return CGF.EmitLValue(&DRE).getAddress(CGF);
95         });
96       }
97     }
98     (void)InlinedShareds.Privatize();
99   }
100 };
101 
102 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
103 /// for captured expressions.
104 class OMPParallelScope final : public OMPLexicalScope {
105   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
106     OpenMPDirectiveKind Kind = S.getDirectiveKind();
107     return !(isOpenMPTargetExecutionDirective(Kind) ||
108              isOpenMPLoopBoundSharingDirective(Kind)) &&
109            isOpenMPParallelDirective(Kind);
110   }
111 
112 public:
113   OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
114       : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
115                         EmitPreInitStmt(S)) {}
116 };
117 
118 /// Lexical scope for OpenMP teams construct, that handles correct codegen
119 /// for captured expressions.
120 class OMPTeamsScope final : public OMPLexicalScope {
121   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
122     OpenMPDirectiveKind Kind = S.getDirectiveKind();
123     return !isOpenMPTargetExecutionDirective(Kind) &&
124            isOpenMPTeamsDirective(Kind);
125   }
126 
127 public:
128   OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
129       : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
130                         EmitPreInitStmt(S)) {}
131 };
132 
133 /// Private scope for OpenMP loop-based directives, that supports capturing
134 /// of used expression from loop statement.
135 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
136   void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
137     CodeGenFunction::OMPMapVars PreCondVars;
138     llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
139     for (const auto *E : S.counters()) {
140       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
141       EmittedAsPrivate.insert(VD->getCanonicalDecl());
142       (void)PreCondVars.setVarAddr(
143           CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
144     }
145     // Mark private vars as undefs.
146     for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
147       for (const Expr *IRef : C->varlists()) {
148         const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
149         if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
150           (void)PreCondVars.setVarAddr(
151               CGF, OrigVD,
152               Address(llvm::UndefValue::get(
153                           CGF.ConvertTypeForMem(CGF.getContext().getPointerType(
154                               OrigVD->getType().getNonReferenceType()))),
155                       CGF.getContext().getDeclAlign(OrigVD)));
156         }
157       }
158     }
159     (void)PreCondVars.apply(CGF);
160     // Emit init, __range and __end variables for C++ range loops.
161     const Stmt *Body =
162         S.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
163     for (unsigned Cnt = 0; Cnt < S.getCollapsedNumber(); ++Cnt) {
164       Body = OMPLoopDirective::tryToFindNextInnerLoop(
165           Body, /*TryImperfectlyNestedLoops=*/true);
166       if (auto *For = dyn_cast<ForStmt>(Body)) {
167         Body = For->getBody();
168       } else {
169         assert(isa<CXXForRangeStmt>(Body) &&
170                "Expected canonical for loop or range-based for loop.");
171         auto *CXXFor = cast<CXXForRangeStmt>(Body);
172         if (const Stmt *Init = CXXFor->getInit())
173           CGF.EmitStmt(Init);
174         CGF.EmitStmt(CXXFor->getRangeStmt());
175         CGF.EmitStmt(CXXFor->getEndStmt());
176         Body = CXXFor->getBody();
177       }
178     }
179     if (const auto *PreInits = cast_or_null<DeclStmt>(S.getPreInits())) {
180       for (const auto *I : PreInits->decls())
181         CGF.EmitVarDecl(cast<VarDecl>(*I));
182     }
183     PreCondVars.restore(CGF);
184   }
185 
186 public:
187   OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
188       : CodeGenFunction::RunCleanupsScope(CGF) {
189     emitPreInitStmt(CGF, S);
190   }
191 };
192 
193 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
194   CodeGenFunction::OMPPrivateScope InlinedShareds;
195 
196   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
197     return CGF.LambdaCaptureFields.lookup(VD) ||
198            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
199            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
200             cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
201   }
202 
203 public:
204   OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
205       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
206         InlinedShareds(CGF) {
207     for (const auto *C : S.clauses()) {
208       if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
209         if (const auto *PreInit =
210                 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
211           for (const auto *I : PreInit->decls()) {
212             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
213               CGF.EmitVarDecl(cast<VarDecl>(*I));
214             } else {
215               CodeGenFunction::AutoVarEmission Emission =
216                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
217               CGF.EmitAutoVarCleanups(Emission);
218             }
219           }
220         }
221       } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) {
222         for (const Expr *E : UDP->varlists()) {
223           const Decl *D = cast<DeclRefExpr>(E)->getDecl();
224           if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
225             CGF.EmitVarDecl(*OED);
226         }
227       } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) {
228         for (const Expr *E : UDP->varlists()) {
229           const Decl *D = getBaseDecl(E);
230           if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
231             CGF.EmitVarDecl(*OED);
232         }
233       }
234     }
235     if (!isOpenMPSimdDirective(S.getDirectiveKind()))
236       CGF.EmitOMPPrivateClause(S, InlinedShareds);
237     if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
238       if (const Expr *E = TG->getReductionRef())
239         CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
240     }
241     const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
242     while (CS) {
243       for (auto &C : CS->captures()) {
244         if (C.capturesVariable() || C.capturesVariableByCopy()) {
245           auto *VD = C.getCapturedVar();
246           assert(VD == VD->getCanonicalDecl() &&
247                  "Canonical decl must be captured.");
248           DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
249                           isCapturedVar(CGF, VD) ||
250                               (CGF.CapturedStmtInfo &&
251                                InlinedShareds.isGlobalVarCaptured(VD)),
252                           VD->getType().getNonReferenceType(), VK_LValue,
253                           C.getLocation());
254           InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
255             return CGF.EmitLValue(&DRE).getAddress(CGF);
256           });
257         }
258       }
259       CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt());
260     }
261     (void)InlinedShareds.Privatize();
262   }
263 };
264 
265 } // namespace
266 
267 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
268                                          const OMPExecutableDirective &S,
269                                          const RegionCodeGenTy &CodeGen);
270 
271 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
272   if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
273     if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
274       OrigVD = OrigVD->getCanonicalDecl();
275       bool IsCaptured =
276           LambdaCaptureFields.lookup(OrigVD) ||
277           (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
278           (CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
279       DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured,
280                       OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
281       return EmitLValue(&DRE);
282     }
283   }
284   return EmitLValue(E);
285 }
286 
287 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
288   ASTContext &C = getContext();
289   llvm::Value *Size = nullptr;
290   auto SizeInChars = C.getTypeSizeInChars(Ty);
291   if (SizeInChars.isZero()) {
292     // getTypeSizeInChars() returns 0 for a VLA.
293     while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) {
294       VlaSizePair VlaSize = getVLASize(VAT);
295       Ty = VlaSize.Type;
296       Size = Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts)
297                   : VlaSize.NumElts;
298     }
299     SizeInChars = C.getTypeSizeInChars(Ty);
300     if (SizeInChars.isZero())
301       return llvm::ConstantInt::get(SizeTy, /*V=*/0);
302     return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
303   }
304   return CGM.getSize(SizeInChars);
305 }
306 
307 void CodeGenFunction::GenerateOpenMPCapturedVars(
308     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
309   const RecordDecl *RD = S.getCapturedRecordDecl();
310   auto CurField = RD->field_begin();
311   auto CurCap = S.captures().begin();
312   for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
313                                                  E = S.capture_init_end();
314        I != E; ++I, ++CurField, ++CurCap) {
315     if (CurField->hasCapturedVLAType()) {
316       const VariableArrayType *VAT = CurField->getCapturedVLAType();
317       llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
318       CapturedVars.push_back(Val);
319     } else if (CurCap->capturesThis()) {
320       CapturedVars.push_back(CXXThisValue);
321     } else if (CurCap->capturesVariableByCopy()) {
322       llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation());
323 
324       // If the field is not a pointer, we need to save the actual value
325       // and load it as a void pointer.
326       if (!CurField->getType()->isAnyPointerType()) {
327         ASTContext &Ctx = getContext();
328         Address DstAddr = CreateMemTemp(
329             Ctx.getUIntPtrType(),
330             Twine(CurCap->getCapturedVar()->getName(), ".casted"));
331         LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
332 
333         llvm::Value *SrcAddrVal = EmitScalarConversion(
334             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
335             Ctx.getPointerType(CurField->getType()), CurCap->getLocation());
336         LValue SrcLV =
337             MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
338 
339         // Store the value using the source type pointer.
340         EmitStoreThroughLValue(RValue::get(CV), SrcLV);
341 
342         // Load the value using the destination type pointer.
343         CV = EmitLoadOfScalar(DstLV, CurCap->getLocation());
344       }
345       CapturedVars.push_back(CV);
346     } else {
347       assert(CurCap->capturesVariable() && "Expected capture by reference.");
348       CapturedVars.push_back(EmitLValue(*I).getAddress(*this).getPointer());
349     }
350   }
351 }
352 
353 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
354                                     QualType DstType, StringRef Name,
355                                     LValue AddrLV) {
356   ASTContext &Ctx = CGF.getContext();
357 
358   llvm::Value *CastedPtr = CGF.EmitScalarConversion(
359       AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(),
360       Ctx.getPointerType(DstType), Loc);
361   Address TmpAddr =
362       CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
363           .getAddress(CGF);
364   return TmpAddr;
365 }
366 
367 static QualType getCanonicalParamType(ASTContext &C, QualType T) {
368   if (T->isLValueReferenceType())
369     return C.getLValueReferenceType(
370         getCanonicalParamType(C, T.getNonReferenceType()),
371         /*SpelledAsLValue=*/false);
372   if (T->isPointerType())
373     return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
374   if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
375     if (const auto *VLA = dyn_cast<VariableArrayType>(A))
376       return getCanonicalParamType(C, VLA->getElementType());
377     if (!A->isVariablyModifiedType())
378       return C.getCanonicalType(T);
379   }
380   return C.getCanonicalParamType(T);
381 }
382 
383 namespace {
384 /// Contains required data for proper outlined function codegen.
385 struct FunctionOptions {
386   /// Captured statement for which the function is generated.
387   const CapturedStmt *S = nullptr;
388   /// true if cast to/from  UIntPtr is required for variables captured by
389   /// value.
390   const bool UIntPtrCastRequired = true;
391   /// true if only casted arguments must be registered as local args or VLA
392   /// sizes.
393   const bool RegisterCastedArgsOnly = false;
394   /// Name of the generated function.
395   const StringRef FunctionName;
396   /// Location of the non-debug version of the outlined function.
397   SourceLocation Loc;
398   explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
399                            bool RegisterCastedArgsOnly, StringRef FunctionName,
400                            SourceLocation Loc)
401       : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
402         RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
403         FunctionName(FunctionName), Loc(Loc) {}
404 };
405 } // namespace
406 
407 static llvm::Function *emitOutlinedFunctionPrologue(
408     CodeGenFunction &CGF, FunctionArgList &Args,
409     llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
410         &LocalAddrs,
411     llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
412         &VLASizes,
413     llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
414   const CapturedDecl *CD = FO.S->getCapturedDecl();
415   const RecordDecl *RD = FO.S->getCapturedRecordDecl();
416   assert(CD->hasBody() && "missing CapturedDecl body");
417 
418   CXXThisValue = nullptr;
419   // Build the argument list.
420   CodeGenModule &CGM = CGF.CGM;
421   ASTContext &Ctx = CGM.getContext();
422   FunctionArgList TargetArgs;
423   Args.append(CD->param_begin(),
424               std::next(CD->param_begin(), CD->getContextParamPosition()));
425   TargetArgs.append(
426       CD->param_begin(),
427       std::next(CD->param_begin(), CD->getContextParamPosition()));
428   auto I = FO.S->captures().begin();
429   FunctionDecl *DebugFunctionDecl = nullptr;
430   if (!FO.UIntPtrCastRequired) {
431     FunctionProtoType::ExtProtoInfo EPI;
432     QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI);
433     DebugFunctionDecl = FunctionDecl::Create(
434         Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(),
435         SourceLocation(), DeclarationName(), FunctionTy,
436         Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static,
437         /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false);
438   }
439   for (const FieldDecl *FD : RD->fields()) {
440     QualType ArgType = FD->getType();
441     IdentifierInfo *II = nullptr;
442     VarDecl *CapVar = nullptr;
443 
444     // If this is a capture by copy and the type is not a pointer, the outlined
445     // function argument type should be uintptr and the value properly casted to
446     // uintptr. This is necessary given that the runtime library is only able to
447     // deal with pointers. We can pass in the same way the VLA type sizes to the
448     // outlined function.
449     if (FO.UIntPtrCastRequired &&
450         ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
451          I->capturesVariableArrayType()))
452       ArgType = Ctx.getUIntPtrType();
453 
454     if (I->capturesVariable() || I->capturesVariableByCopy()) {
455       CapVar = I->getCapturedVar();
456       II = CapVar->getIdentifier();
457     } else if (I->capturesThis()) {
458       II = &Ctx.Idents.get("this");
459     } else {
460       assert(I->capturesVariableArrayType());
461       II = &Ctx.Idents.get("vla");
462     }
463     if (ArgType->isVariablyModifiedType())
464       ArgType = getCanonicalParamType(Ctx, ArgType);
465     VarDecl *Arg;
466     if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
467       Arg = ParmVarDecl::Create(
468           Ctx, DebugFunctionDecl,
469           CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(),
470           CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType,
471           /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
472     } else {
473       Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
474                                       II, ArgType, ImplicitParamDecl::Other);
475     }
476     Args.emplace_back(Arg);
477     // Do not cast arguments if we emit function with non-original types.
478     TargetArgs.emplace_back(
479         FO.UIntPtrCastRequired
480             ? Arg
481             : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
482     ++I;
483   }
484   Args.append(
485       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
486       CD->param_end());
487   TargetArgs.append(
488       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
489       CD->param_end());
490 
491   // Create the function declaration.
492   const CGFunctionInfo &FuncInfo =
493       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
494   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
495 
496   auto *F =
497       llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
498                              FO.FunctionName, &CGM.getModule());
499   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
500   if (CD->isNothrow())
501     F->setDoesNotThrow();
502   F->setDoesNotRecurse();
503 
504   // Generate the function.
505   CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
506                     FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(),
507                     FO.UIntPtrCastRequired ? FO.Loc
508                                            : CD->getBody()->getBeginLoc());
509   unsigned Cnt = CD->getContextParamPosition();
510   I = FO.S->captures().begin();
511   for (const FieldDecl *FD : RD->fields()) {
512     // Do not map arguments if we emit function with non-original types.
513     Address LocalAddr(Address::invalid());
514     if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
515       LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
516                                                              TargetArgs[Cnt]);
517     } else {
518       LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
519     }
520     // If we are capturing a pointer by copy we don't need to do anything, just
521     // use the value that we get from the arguments.
522     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
523       const VarDecl *CurVD = I->getCapturedVar();
524       if (!FO.RegisterCastedArgsOnly)
525         LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
526       ++Cnt;
527       ++I;
528       continue;
529     }
530 
531     LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
532                                         AlignmentSource::Decl);
533     if (FD->hasCapturedVLAType()) {
534       if (FO.UIntPtrCastRequired) {
535         ArgLVal = CGF.MakeAddrLValue(
536             castValueFromUintptr(CGF, I->getLocation(), FD->getType(),
537                                  Args[Cnt]->getName(), ArgLVal),
538             FD->getType(), AlignmentSource::Decl);
539       }
540       llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
541       const VariableArrayType *VAT = FD->getCapturedVLAType();
542       VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg);
543     } else if (I->capturesVariable()) {
544       const VarDecl *Var = I->getCapturedVar();
545       QualType VarTy = Var->getType();
546       Address ArgAddr = ArgLVal.getAddress(CGF);
547       if (ArgLVal.getType()->isLValueReferenceType()) {
548         ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
549       } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
550         assert(ArgLVal.getType()->isPointerType());
551         ArgAddr = CGF.EmitLoadOfPointer(
552             ArgAddr, ArgLVal.getType()->castAs<PointerType>());
553       }
554       if (!FO.RegisterCastedArgsOnly) {
555         LocalAddrs.insert(
556             {Args[Cnt],
557              {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
558       }
559     } else if (I->capturesVariableByCopy()) {
560       assert(!FD->getType()->isAnyPointerType() &&
561              "Not expecting a captured pointer.");
562       const VarDecl *Var = I->getCapturedVar();
563       LocalAddrs.insert({Args[Cnt],
564                          {Var, FO.UIntPtrCastRequired
565                                    ? castValueFromUintptr(
566                                          CGF, I->getLocation(), FD->getType(),
567                                          Args[Cnt]->getName(), ArgLVal)
568                                    : ArgLVal.getAddress(CGF)}});
569     } else {
570       // If 'this' is captured, load it into CXXThisValue.
571       assert(I->capturesThis());
572       CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
573       LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}});
574     }
575     ++Cnt;
576     ++I;
577   }
578 
579   return F;
580 }
581 
582 llvm::Function *
583 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
584                                                     SourceLocation Loc) {
585   assert(
586       CapturedStmtInfo &&
587       "CapturedStmtInfo should be set when generating the captured function");
588   const CapturedDecl *CD = S.getCapturedDecl();
589   // Build the argument list.
590   bool NeedWrapperFunction =
591       getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo();
592   FunctionArgList Args;
593   llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
594   llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
595   SmallString<256> Buffer;
596   llvm::raw_svector_ostream Out(Buffer);
597   Out << CapturedStmtInfo->getHelperName();
598   if (NeedWrapperFunction)
599     Out << "_debug__";
600   FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
601                      Out.str(), Loc);
602   llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
603                                                    VLASizes, CXXThisValue, FO);
604   CodeGenFunction::OMPPrivateScope LocalScope(*this);
605   for (const auto &LocalAddrPair : LocalAddrs) {
606     if (LocalAddrPair.second.first) {
607       LocalScope.addPrivate(LocalAddrPair.second.first, [&LocalAddrPair]() {
608         return LocalAddrPair.second.second;
609       });
610     }
611   }
612   (void)LocalScope.Privatize();
613   for (const auto &VLASizePair : VLASizes)
614     VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
615   PGO.assignRegionCounters(GlobalDecl(CD), F);
616   CapturedStmtInfo->EmitBody(*this, CD->getBody());
617   (void)LocalScope.ForceCleanup();
618   FinishFunction(CD->getBodyRBrace());
619   if (!NeedWrapperFunction)
620     return F;
621 
622   FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
623                             /*RegisterCastedArgsOnly=*/true,
624                             CapturedStmtInfo->getHelperName(), Loc);
625   CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
626   WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
627   Args.clear();
628   LocalAddrs.clear();
629   VLASizes.clear();
630   llvm::Function *WrapperF =
631       emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
632                                    WrapperCGF.CXXThisValue, WrapperFO);
633   llvm::SmallVector<llvm::Value *, 4> CallArgs;
634   for (const auto *Arg : Args) {
635     llvm::Value *CallArg;
636     auto I = LocalAddrs.find(Arg);
637     if (I != LocalAddrs.end()) {
638       LValue LV = WrapperCGF.MakeAddrLValue(
639           I->second.second,
640           I->second.first ? I->second.first->getType() : Arg->getType(),
641           AlignmentSource::Decl);
642       CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
643     } else {
644       auto EI = VLASizes.find(Arg);
645       if (EI != VLASizes.end()) {
646         CallArg = EI->second.second;
647       } else {
648         LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
649                                               Arg->getType(),
650                                               AlignmentSource::Decl);
651         CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
652       }
653     }
654     CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
655   }
656   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs);
657   WrapperCGF.FinishFunction();
658   return WrapperF;
659 }
660 
661 //===----------------------------------------------------------------------===//
662 //                              OpenMP Directive Emission
663 //===----------------------------------------------------------------------===//
664 void CodeGenFunction::EmitOMPAggregateAssign(
665     Address DestAddr, Address SrcAddr, QualType OriginalType,
666     const llvm::function_ref<void(Address, Address)> CopyGen) {
667   // Perform element-by-element initialization.
668   QualType ElementTy;
669 
670   // Drill down to the base element type on both arrays.
671   const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
672   llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
673   SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
674 
675   llvm::Value *SrcBegin = SrcAddr.getPointer();
676   llvm::Value *DestBegin = DestAddr.getPointer();
677   // Cast from pointer to array type to pointer to single element.
678   llvm::Value *DestEnd = Builder.CreateGEP(DestBegin, NumElements);
679   // The basic structure here is a while-do loop.
680   llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body");
681   llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done");
682   llvm::Value *IsEmpty =
683       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
684   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
685 
686   // Enter the loop body, making that address the current address.
687   llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
688   EmitBlock(BodyBB);
689 
690   CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
691 
692   llvm::PHINode *SrcElementPHI =
693     Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
694   SrcElementPHI->addIncoming(SrcBegin, EntryBB);
695   Address SrcElementCurrent =
696       Address(SrcElementPHI,
697               SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
698 
699   llvm::PHINode *DestElementPHI =
700     Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
701   DestElementPHI->addIncoming(DestBegin, EntryBB);
702   Address DestElementCurrent =
703     Address(DestElementPHI,
704             DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
705 
706   // Emit copy.
707   CopyGen(DestElementCurrent, SrcElementCurrent);
708 
709   // Shift the address forward by one element.
710   llvm::Value *DestElementNext = Builder.CreateConstGEP1_32(
711       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
712   llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32(
713       SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
714   // Check whether we've reached the end.
715   llvm::Value *Done =
716       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
717   Builder.CreateCondBr(Done, DoneBB, BodyBB);
718   DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
719   SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
720 
721   // Done.
722   EmitBlock(DoneBB, /*IsFinished=*/true);
723 }
724 
725 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
726                                   Address SrcAddr, const VarDecl *DestVD,
727                                   const VarDecl *SrcVD, const Expr *Copy) {
728   if (OriginalType->isArrayType()) {
729     const auto *BO = dyn_cast<BinaryOperator>(Copy);
730     if (BO && BO->getOpcode() == BO_Assign) {
731       // Perform simple memcpy for simple copying.
732       LValue Dest = MakeAddrLValue(DestAddr, OriginalType);
733       LValue Src = MakeAddrLValue(SrcAddr, OriginalType);
734       EmitAggregateAssign(Dest, Src, OriginalType);
735     } else {
736       // For arrays with complex element types perform element by element
737       // copying.
738       EmitOMPAggregateAssign(
739           DestAddr, SrcAddr, OriginalType,
740           [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
741             // Working with the single array element, so have to remap
742             // destination and source variables to corresponding array
743             // elements.
744             CodeGenFunction::OMPPrivateScope Remap(*this);
745             Remap.addPrivate(DestVD, [DestElement]() { return DestElement; });
746             Remap.addPrivate(SrcVD, [SrcElement]() { return SrcElement; });
747             (void)Remap.Privatize();
748             EmitIgnoredExpr(Copy);
749           });
750     }
751   } else {
752     // Remap pseudo source variable to private copy.
753     CodeGenFunction::OMPPrivateScope Remap(*this);
754     Remap.addPrivate(SrcVD, [SrcAddr]() { return SrcAddr; });
755     Remap.addPrivate(DestVD, [DestAddr]() { return DestAddr; });
756     (void)Remap.Privatize();
757     // Emit copying of the whole variable.
758     EmitIgnoredExpr(Copy);
759   }
760 }
761 
762 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
763                                                 OMPPrivateScope &PrivateScope) {
764   if (!HaveInsertPoint())
765     return false;
766   bool DeviceConstTarget =
767       getLangOpts().OpenMPIsDevice &&
768       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
769   bool FirstprivateIsLastprivate = false;
770   llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;
771   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
772     for (const auto *D : C->varlists())
773       Lastprivates.try_emplace(
774           cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(),
775           C->getKind());
776   }
777   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
778   llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
779   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
780   // Force emission of the firstprivate copy if the directive does not emit
781   // outlined function, like omp for, omp simd, omp distribute etc.
782   bool MustEmitFirstprivateCopy =
783       CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
784   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
785     const auto *IRef = C->varlist_begin();
786     const auto *InitsRef = C->inits().begin();
787     for (const Expr *IInit : C->private_copies()) {
788       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
789       bool ThisFirstprivateIsLastprivate =
790           Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
791       const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD);
792       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
793       if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
794           !FD->getType()->isReferenceType() &&
795           (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
796         EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
797         ++IRef;
798         ++InitsRef;
799         continue;
800       }
801       // Do not emit copy for firstprivate constant variables in target regions,
802       // captured by reference.
803       if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) &&
804           FD && FD->getType()->isReferenceType() &&
805           (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
806         (void)CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(*this,
807                                                                     OrigVD);
808         ++IRef;
809         ++InitsRef;
810         continue;
811       }
812       FirstprivateIsLastprivate =
813           FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
814       if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
815         const auto *VDInit =
816             cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
817         bool IsRegistered;
818         DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
819                         /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
820                         (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
821         LValue OriginalLVal;
822         if (!FD) {
823           // Check if the firstprivate variable is just a constant value.
824           ConstantEmission CE = tryEmitAsConstant(&DRE);
825           if (CE && !CE.isReference()) {
826             // Constant value, no need to create a copy.
827             ++IRef;
828             ++InitsRef;
829             continue;
830           }
831           if (CE && CE.isReference()) {
832             OriginalLVal = CE.getReferenceLValue(*this, &DRE);
833           } else {
834             assert(!CE && "Expected non-constant firstprivate.");
835             OriginalLVal = EmitLValue(&DRE);
836           }
837         } else {
838           OriginalLVal = EmitLValue(&DRE);
839         }
840         QualType Type = VD->getType();
841         if (Type->isArrayType()) {
842           // Emit VarDecl with copy init for arrays.
843           // Get the address of the original variable captured in current
844           // captured region.
845           IsRegistered = PrivateScope.addPrivate(
846               OrigVD, [this, VD, Type, OriginalLVal, VDInit]() {
847                 AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
848                 const Expr *Init = VD->getInit();
849                 if (!isa<CXXConstructExpr>(Init) ||
850                     isTrivialInitializer(Init)) {
851                   // Perform simple memcpy.
852                   LValue Dest =
853                       MakeAddrLValue(Emission.getAllocatedAddress(), Type);
854                   EmitAggregateAssign(Dest, OriginalLVal, Type);
855                 } else {
856                   EmitOMPAggregateAssign(
857                       Emission.getAllocatedAddress(),
858                       OriginalLVal.getAddress(*this), Type,
859                       [this, VDInit, Init](Address DestElement,
860                                            Address SrcElement) {
861                         // Clean up any temporaries needed by the
862                         // initialization.
863                         RunCleanupsScope InitScope(*this);
864                         // Emit initialization for single element.
865                         setAddrOfLocalVar(VDInit, SrcElement);
866                         EmitAnyExprToMem(Init, DestElement,
867                                          Init->getType().getQualifiers(),
868                                          /*IsInitializer*/ false);
869                         LocalDeclMap.erase(VDInit);
870                       });
871                 }
872                 EmitAutoVarCleanups(Emission);
873                 return Emission.getAllocatedAddress();
874               });
875         } else {
876           Address OriginalAddr = OriginalLVal.getAddress(*this);
877           IsRegistered =
878               PrivateScope.addPrivate(OrigVD, [this, VDInit, OriginalAddr, VD,
879                                                ThisFirstprivateIsLastprivate,
880                                                OrigVD, &Lastprivates, IRef]() {
881                 // Emit private VarDecl with copy init.
882                 // Remap temp VDInit variable to the address of the original
883                 // variable (for proper handling of captured global variables).
884                 setAddrOfLocalVar(VDInit, OriginalAddr);
885                 EmitDecl(*VD);
886                 LocalDeclMap.erase(VDInit);
887                 if (ThisFirstprivateIsLastprivate &&
888                     Lastprivates[OrigVD->getCanonicalDecl()] ==
889                         OMPC_LASTPRIVATE_conditional) {
890                   // Create/init special variable for lastprivate conditionals.
891                   Address VDAddr =
892                       CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
893                           *this, OrigVD);
894                   llvm::Value *V = EmitLoadOfScalar(
895                       MakeAddrLValue(GetAddrOfLocalVar(VD), (*IRef)->getType(),
896                                      AlignmentSource::Decl),
897                       (*IRef)->getExprLoc());
898                   EmitStoreOfScalar(V,
899                                     MakeAddrLValue(VDAddr, (*IRef)->getType(),
900                                                    AlignmentSource::Decl));
901                   LocalDeclMap.erase(VD);
902                   setAddrOfLocalVar(VD, VDAddr);
903                   return VDAddr;
904                 }
905                 return GetAddrOfLocalVar(VD);
906               });
907         }
908         assert(IsRegistered &&
909                "firstprivate var already registered as private");
910         // Silence the warning about unused variable.
911         (void)IsRegistered;
912       }
913       ++IRef;
914       ++InitsRef;
915     }
916   }
917   return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
918 }
919 
920 void CodeGenFunction::EmitOMPPrivateClause(
921     const OMPExecutableDirective &D,
922     CodeGenFunction::OMPPrivateScope &PrivateScope) {
923   if (!HaveInsertPoint())
924     return;
925   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
926   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
927     auto IRef = C->varlist_begin();
928     for (const Expr *IInit : C->private_copies()) {
929       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
930       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
931         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
932         bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() {
933           // Emit private VarDecl with copy init.
934           EmitDecl(*VD);
935           return GetAddrOfLocalVar(VD);
936         });
937         assert(IsRegistered && "private var already registered as private");
938         // Silence the warning about unused variable.
939         (void)IsRegistered;
940       }
941       ++IRef;
942     }
943   }
944 }
945 
946 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
947   if (!HaveInsertPoint())
948     return false;
949   // threadprivate_var1 = master_threadprivate_var1;
950   // operator=(threadprivate_var2, master_threadprivate_var2);
951   // ...
952   // __kmpc_barrier(&loc, global_tid);
953   llvm::DenseSet<const VarDecl *> CopiedVars;
954   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
955   for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
956     auto IRef = C->varlist_begin();
957     auto ISrcRef = C->source_exprs().begin();
958     auto IDestRef = C->destination_exprs().begin();
959     for (const Expr *AssignOp : C->assignment_ops()) {
960       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
961       QualType Type = VD->getType();
962       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
963         // Get the address of the master variable. If we are emitting code with
964         // TLS support, the address is passed from the master as field in the
965         // captured declaration.
966         Address MasterAddr = Address::invalid();
967         if (getLangOpts().OpenMPUseTLS &&
968             getContext().getTargetInfo().isTLSSupported()) {
969           assert(CapturedStmtInfo->lookup(VD) &&
970                  "Copyin threadprivates should have been captured!");
971           DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true,
972                           (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
973           MasterAddr = EmitLValue(&DRE).getAddress(*this);
974           LocalDeclMap.erase(VD);
975         } else {
976           MasterAddr =
977             Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
978                                         : CGM.GetAddrOfGlobal(VD),
979                     getContext().getDeclAlign(VD));
980         }
981         // Get the address of the threadprivate variable.
982         Address PrivateAddr = EmitLValue(*IRef).getAddress(*this);
983         if (CopiedVars.size() == 1) {
984           // At first check if current thread is a master thread. If it is, no
985           // need to copy data.
986           CopyBegin = createBasicBlock("copyin.not.master");
987           CopyEnd = createBasicBlock("copyin.not.master.end");
988           Builder.CreateCondBr(
989               Builder.CreateICmpNE(
990                   Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
991                   Builder.CreatePtrToInt(PrivateAddr.getPointer(),
992                                          CGM.IntPtrTy)),
993               CopyBegin, CopyEnd);
994           EmitBlock(CopyBegin);
995         }
996         const auto *SrcVD =
997             cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
998         const auto *DestVD =
999             cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1000         EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
1001       }
1002       ++IRef;
1003       ++ISrcRef;
1004       ++IDestRef;
1005     }
1006   }
1007   if (CopyEnd) {
1008     // Exit out of copying procedure for non-master thread.
1009     EmitBlock(CopyEnd, /*IsFinished=*/true);
1010     return true;
1011   }
1012   return false;
1013 }
1014 
1015 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
1016     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
1017   if (!HaveInsertPoint())
1018     return false;
1019   bool HasAtLeastOneLastprivate = false;
1020   llvm::DenseSet<const VarDecl *> SIMDLCVs;
1021   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1022     const auto *LoopDirective = cast<OMPLoopDirective>(&D);
1023     for (const Expr *C : LoopDirective->counters()) {
1024       SIMDLCVs.insert(
1025           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1026     }
1027   }
1028   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1029   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1030     HasAtLeastOneLastprivate = true;
1031     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
1032         !getLangOpts().OpenMPSimd)
1033       break;
1034     const auto *IRef = C->varlist_begin();
1035     const auto *IDestRef = C->destination_exprs().begin();
1036     for (const Expr *IInit : C->private_copies()) {
1037       // Keep the address of the original variable for future update at the end
1038       // of the loop.
1039       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1040       // Taskloops do not require additional initialization, it is done in
1041       // runtime support library.
1042       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
1043         const auto *DestVD =
1044             cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1045         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() {
1046           DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1047                           /*RefersToEnclosingVariableOrCapture=*/
1048                               CapturedStmtInfo->lookup(OrigVD) != nullptr,
1049                           (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1050           return EmitLValue(&DRE).getAddress(*this);
1051         });
1052         // Check if the variable is also a firstprivate: in this case IInit is
1053         // not generated. Initialization of this variable will happen in codegen
1054         // for 'firstprivate' clause.
1055         if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
1056           const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
1057           bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD, C,
1058                                                                OrigVD]() {
1059             if (C->getKind() == OMPC_LASTPRIVATE_conditional) {
1060               Address VDAddr =
1061                   CGM.getOpenMPRuntime().emitLastprivateConditionalInit(*this,
1062                                                                         OrigVD);
1063               setAddrOfLocalVar(VD, VDAddr);
1064               return VDAddr;
1065             }
1066             // Emit private VarDecl with copy init.
1067             EmitDecl(*VD);
1068             return GetAddrOfLocalVar(VD);
1069           });
1070           assert(IsRegistered &&
1071                  "lastprivate var already registered as private");
1072           (void)IsRegistered;
1073         }
1074       }
1075       ++IRef;
1076       ++IDestRef;
1077     }
1078   }
1079   return HasAtLeastOneLastprivate;
1080 }
1081 
1082 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
1083     const OMPExecutableDirective &D, bool NoFinals,
1084     llvm::Value *IsLastIterCond) {
1085   if (!HaveInsertPoint())
1086     return;
1087   // Emit following code:
1088   // if (<IsLastIterCond>) {
1089   //   orig_var1 = private_orig_var1;
1090   //   ...
1091   //   orig_varn = private_orig_varn;
1092   // }
1093   llvm::BasicBlock *ThenBB = nullptr;
1094   llvm::BasicBlock *DoneBB = nullptr;
1095   if (IsLastIterCond) {
1096     // Emit implicit barrier if at least one lastprivate conditional is found
1097     // and this is not a simd mode.
1098     if (!getLangOpts().OpenMPSimd &&
1099         llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(),
1100                      [](const OMPLastprivateClause *C) {
1101                        return C->getKind() == OMPC_LASTPRIVATE_conditional;
1102                      })) {
1103       CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(),
1104                                              OMPD_unknown,
1105                                              /*EmitChecks=*/false,
1106                                              /*ForceSimpleCall=*/true);
1107     }
1108     ThenBB = createBasicBlock(".omp.lastprivate.then");
1109     DoneBB = createBasicBlock(".omp.lastprivate.done");
1110     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
1111     EmitBlock(ThenBB);
1112   }
1113   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1114   llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
1115   if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
1116     auto IC = LoopDirective->counters().begin();
1117     for (const Expr *F : LoopDirective->finals()) {
1118       const auto *D =
1119           cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
1120       if (NoFinals)
1121         AlreadyEmittedVars.insert(D);
1122       else
1123         LoopCountersAndUpdates[D] = F;
1124       ++IC;
1125     }
1126   }
1127   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1128     auto IRef = C->varlist_begin();
1129     auto ISrcRef = C->source_exprs().begin();
1130     auto IDestRef = C->destination_exprs().begin();
1131     for (const Expr *AssignOp : C->assignment_ops()) {
1132       const auto *PrivateVD =
1133           cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1134       QualType Type = PrivateVD->getType();
1135       const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1136       if (AlreadyEmittedVars.insert(CanonicalVD).second) {
1137         // If lastprivate variable is a loop control variable for loop-based
1138         // directive, update its value before copyin back to original
1139         // variable.
1140         if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
1141           EmitIgnoredExpr(FinalExpr);
1142         const auto *SrcVD =
1143             cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1144         const auto *DestVD =
1145             cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1146         // Get the address of the private variable.
1147         Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
1148         if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1149           PrivateAddr =
1150               Address(Builder.CreateLoad(PrivateAddr),
1151                       CGM.getNaturalTypeAlignment(RefTy->getPointeeType()));
1152         // Store the last value to the private copy in the last iteration.
1153         if (C->getKind() == OMPC_LASTPRIVATE_conditional)
1154           CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
1155               *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD,
1156               (*IRef)->getExprLoc());
1157         // Get the address of the original variable.
1158         Address OriginalAddr = GetAddrOfLocalVar(DestVD);
1159         EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
1160       }
1161       ++IRef;
1162       ++ISrcRef;
1163       ++IDestRef;
1164     }
1165     if (const Expr *PostUpdate = C->getPostUpdateExpr())
1166       EmitIgnoredExpr(PostUpdate);
1167   }
1168   if (IsLastIterCond)
1169     EmitBlock(DoneBB, /*IsFinished=*/true);
1170 }
1171 
1172 void CodeGenFunction::EmitOMPReductionClauseInit(
1173     const OMPExecutableDirective &D,
1174     CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) {
1175   if (!HaveInsertPoint())
1176     return;
1177   SmallVector<const Expr *, 4> Shareds;
1178   SmallVector<const Expr *, 4> Privates;
1179   SmallVector<const Expr *, 4> ReductionOps;
1180   SmallVector<const Expr *, 4> LHSs;
1181   SmallVector<const Expr *, 4> RHSs;
1182   OMPTaskDataTy Data;
1183   SmallVector<const Expr *, 4> TaskLHSs;
1184   SmallVector<const Expr *, 4> TaskRHSs;
1185   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1186     if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan))
1187       continue;
1188     Shareds.append(C->varlist_begin(), C->varlist_end());
1189     Privates.append(C->privates().begin(), C->privates().end());
1190     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1191     LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1192     RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1193     if (C->getModifier() == OMPC_REDUCTION_task) {
1194       Data.ReductionVars.append(C->privates().begin(), C->privates().end());
1195       Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
1196       Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
1197       Data.ReductionOps.append(C->reduction_ops().begin(),
1198                                C->reduction_ops().end());
1199       TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1200       TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1201     }
1202   }
1203   ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
1204   unsigned Count = 0;
1205   auto *ILHS = LHSs.begin();
1206   auto *IRHS = RHSs.begin();
1207   auto *IPriv = Privates.begin();
1208   for (const Expr *IRef : Shareds) {
1209     const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
1210     // Emit private VarDecl with reduction init.
1211     RedCG.emitSharedOrigLValue(*this, Count);
1212     RedCG.emitAggregateType(*this, Count);
1213     AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1214     RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
1215                              RedCG.getSharedLValue(Count),
1216                              [&Emission](CodeGenFunction &CGF) {
1217                                CGF.EmitAutoVarInit(Emission);
1218                                return true;
1219                              });
1220     EmitAutoVarCleanups(Emission);
1221     Address BaseAddr = RedCG.adjustPrivateAddress(
1222         *this, Count, Emission.getAllocatedAddress());
1223     bool IsRegistered = PrivateScope.addPrivate(
1224         RedCG.getBaseDecl(Count), [BaseAddr]() { return BaseAddr; });
1225     assert(IsRegistered && "private var already registered as private");
1226     // Silence the warning about unused variable.
1227     (void)IsRegistered;
1228 
1229     const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1230     const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1231     QualType Type = PrivateVD->getType();
1232     bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
1233     if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1234       // Store the address of the original variable associated with the LHS
1235       // implicit variable.
1236       PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() {
1237         return RedCG.getSharedLValue(Count).getAddress(*this);
1238       });
1239       PrivateScope.addPrivate(
1240           RHSVD, [this, PrivateVD]() { return GetAddrOfLocalVar(PrivateVD); });
1241     } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1242                isa<ArraySubscriptExpr>(IRef)) {
1243       // Store the address of the original variable associated with the LHS
1244       // implicit variable.
1245       PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() {
1246         return RedCG.getSharedLValue(Count).getAddress(*this);
1247       });
1248       PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() {
1249         return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
1250                                             ConvertTypeForMem(RHSVD->getType()),
1251                                             "rhs.begin");
1252       });
1253     } else {
1254       QualType Type = PrivateVD->getType();
1255       bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1256       Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(*this);
1257       // Store the address of the original variable associated with the LHS
1258       // implicit variable.
1259       if (IsArray) {
1260         OriginalAddr = Builder.CreateElementBitCast(
1261             OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1262       }
1263       PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; });
1264       PrivateScope.addPrivate(
1265           RHSVD, [this, PrivateVD, RHSVD, IsArray]() {
1266             return IsArray
1267                        ? Builder.CreateElementBitCast(
1268                              GetAddrOfLocalVar(PrivateVD),
1269                              ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
1270                        : GetAddrOfLocalVar(PrivateVD);
1271           });
1272     }
1273     ++ILHS;
1274     ++IRHS;
1275     ++IPriv;
1276     ++Count;
1277   }
1278   if (!Data.ReductionVars.empty()) {
1279     Data.IsReductionWithTaskMod = true;
1280     Data.IsWorksharingReduction =
1281         isOpenMPWorksharingDirective(D.getDirectiveKind());
1282     llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
1283         *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data);
1284     const Expr *TaskRedRef = nullptr;
1285     switch (D.getDirectiveKind()) {
1286     case OMPD_parallel:
1287       TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr();
1288       break;
1289     case OMPD_for:
1290       TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr();
1291       break;
1292     case OMPD_sections:
1293       TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr();
1294       break;
1295     case OMPD_parallel_for:
1296       TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr();
1297       break;
1298     case OMPD_parallel_master:
1299       TaskRedRef =
1300           cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr();
1301       break;
1302     case OMPD_parallel_sections:
1303       TaskRedRef =
1304           cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr();
1305       break;
1306     case OMPD_target_parallel:
1307       TaskRedRef =
1308           cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr();
1309       break;
1310     case OMPD_target_parallel_for:
1311       TaskRedRef =
1312           cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr();
1313       break;
1314     case OMPD_distribute_parallel_for:
1315       TaskRedRef =
1316           cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr();
1317       break;
1318     case OMPD_teams_distribute_parallel_for:
1319       TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D)
1320                        .getTaskReductionRefExpr();
1321       break;
1322     case OMPD_target_teams_distribute_parallel_for:
1323       TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D)
1324                        .getTaskReductionRefExpr();
1325       break;
1326     case OMPD_simd:
1327     case OMPD_for_simd:
1328     case OMPD_section:
1329     case OMPD_single:
1330     case OMPD_master:
1331     case OMPD_critical:
1332     case OMPD_parallel_for_simd:
1333     case OMPD_task:
1334     case OMPD_taskyield:
1335     case OMPD_barrier:
1336     case OMPD_taskwait:
1337     case OMPD_taskgroup:
1338     case OMPD_flush:
1339     case OMPD_depobj:
1340     case OMPD_scan:
1341     case OMPD_ordered:
1342     case OMPD_atomic:
1343     case OMPD_teams:
1344     case OMPD_target:
1345     case OMPD_cancellation_point:
1346     case OMPD_cancel:
1347     case OMPD_target_data:
1348     case OMPD_target_enter_data:
1349     case OMPD_target_exit_data:
1350     case OMPD_taskloop:
1351     case OMPD_taskloop_simd:
1352     case OMPD_master_taskloop:
1353     case OMPD_master_taskloop_simd:
1354     case OMPD_parallel_master_taskloop:
1355     case OMPD_parallel_master_taskloop_simd:
1356     case OMPD_distribute:
1357     case OMPD_target_update:
1358     case OMPD_distribute_parallel_for_simd:
1359     case OMPD_distribute_simd:
1360     case OMPD_target_parallel_for_simd:
1361     case OMPD_target_simd:
1362     case OMPD_teams_distribute:
1363     case OMPD_teams_distribute_simd:
1364     case OMPD_teams_distribute_parallel_for_simd:
1365     case OMPD_target_teams:
1366     case OMPD_target_teams_distribute:
1367     case OMPD_target_teams_distribute_parallel_for_simd:
1368     case OMPD_target_teams_distribute_simd:
1369     case OMPD_declare_target:
1370     case OMPD_end_declare_target:
1371     case OMPD_threadprivate:
1372     case OMPD_allocate:
1373     case OMPD_declare_reduction:
1374     case OMPD_declare_mapper:
1375     case OMPD_declare_simd:
1376     case OMPD_requires:
1377     case OMPD_declare_variant:
1378     case OMPD_begin_declare_variant:
1379     case OMPD_end_declare_variant:
1380     case OMPD_unknown:
1381     default:
1382       llvm_unreachable("Enexpected directive with task reductions.");
1383     }
1384 
1385     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl());
1386     EmitVarDecl(*VD);
1387     EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD),
1388                       /*Volatile=*/false, TaskRedRef->getType());
1389   }
1390 }
1391 
1392 void CodeGenFunction::EmitOMPReductionClauseFinal(
1393     const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1394   if (!HaveInsertPoint())
1395     return;
1396   llvm::SmallVector<const Expr *, 8> Privates;
1397   llvm::SmallVector<const Expr *, 8> LHSExprs;
1398   llvm::SmallVector<const Expr *, 8> RHSExprs;
1399   llvm::SmallVector<const Expr *, 8> ReductionOps;
1400   bool HasAtLeastOneReduction = false;
1401   bool IsReductionWithTaskMod = false;
1402   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1403     // Do not emit for inscan reductions.
1404     if (C->getModifier() == OMPC_REDUCTION_inscan)
1405       continue;
1406     HasAtLeastOneReduction = true;
1407     Privates.append(C->privates().begin(), C->privates().end());
1408     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1409     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1410     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1411     IsReductionWithTaskMod =
1412         IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
1413   }
1414   if (HasAtLeastOneReduction) {
1415     if (IsReductionWithTaskMod) {
1416       CGM.getOpenMPRuntime().emitTaskReductionFini(
1417           *this, D.getBeginLoc(),
1418           isOpenMPWorksharingDirective(D.getDirectiveKind()));
1419     }
1420     bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1421                       isOpenMPParallelDirective(D.getDirectiveKind()) ||
1422                       ReductionKind == OMPD_simd;
1423     bool SimpleReduction = ReductionKind == OMPD_simd;
1424     // Emit nowait reduction if nowait clause is present or directive is a
1425     // parallel directive (it always has implicit barrier).
1426     CGM.getOpenMPRuntime().emitReduction(
1427         *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
1428         {WithNowait, SimpleReduction, ReductionKind});
1429   }
1430 }
1431 
1432 static void emitPostUpdateForReductionClause(
1433     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1434     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1435   if (!CGF.HaveInsertPoint())
1436     return;
1437   llvm::BasicBlock *DoneBB = nullptr;
1438   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1439     if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
1440       if (!DoneBB) {
1441         if (llvm::Value *Cond = CondGen(CGF)) {
1442           // If the first post-update expression is found, emit conditional
1443           // block if it was requested.
1444           llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1445           DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1446           CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1447           CGF.EmitBlock(ThenBB);
1448         }
1449       }
1450       CGF.EmitIgnoredExpr(PostUpdate);
1451     }
1452   }
1453   if (DoneBB)
1454     CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1455 }
1456 
1457 namespace {
1458 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1459 /// parallel function. This is necessary for combined constructs such as
1460 /// 'distribute parallel for'
1461 typedef llvm::function_ref<void(CodeGenFunction &,
1462                                 const OMPExecutableDirective &,
1463                                 llvm::SmallVectorImpl<llvm::Value *> &)>
1464     CodeGenBoundParametersTy;
1465 } // anonymous namespace
1466 
1467 static void
1468 checkForLastprivateConditionalUpdate(CodeGenFunction &CGF,
1469                                      const OMPExecutableDirective &S) {
1470   if (CGF.getLangOpts().OpenMP < 50)
1471     return;
1472   llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls;
1473   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
1474     for (const Expr *Ref : C->varlists()) {
1475       if (!Ref->getType()->isScalarType())
1476         continue;
1477       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1478       if (!DRE)
1479         continue;
1480       PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1481       CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1482     }
1483   }
1484   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
1485     for (const Expr *Ref : C->varlists()) {
1486       if (!Ref->getType()->isScalarType())
1487         continue;
1488       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1489       if (!DRE)
1490         continue;
1491       PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1492       CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1493     }
1494   }
1495   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
1496     for (const Expr *Ref : C->varlists()) {
1497       if (!Ref->getType()->isScalarType())
1498         continue;
1499       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1500       if (!DRE)
1501         continue;
1502       PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1503       CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1504     }
1505   }
1506   // Privates should ne analyzed since they are not captured at all.
1507   // Task reductions may be skipped - tasks are ignored.
1508   // Firstprivates do not return value but may be passed by reference - no need
1509   // to check for updated lastprivate conditional.
1510   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
1511     for (const Expr *Ref : C->varlists()) {
1512       if (!Ref->getType()->isScalarType())
1513         continue;
1514       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1515       if (!DRE)
1516         continue;
1517       PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1518     }
1519   }
1520   CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional(
1521       CGF, S, PrivateDecls);
1522 }
1523 
1524 static void emitCommonOMPParallelDirective(
1525     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1526     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1527     const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1528   const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1529   llvm::Function *OutlinedFn =
1530       CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1531           S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1532   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1533     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1534     llvm::Value *NumThreads =
1535         CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1536                            /*IgnoreResultAssign=*/true);
1537     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1538         CGF, NumThreads, NumThreadsClause->getBeginLoc());
1539   }
1540   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1541     CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1542     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1543         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc());
1544   }
1545   const Expr *IfCond = nullptr;
1546   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1547     if (C->getNameModifier() == OMPD_unknown ||
1548         C->getNameModifier() == OMPD_parallel) {
1549       IfCond = C->getCondition();
1550       break;
1551     }
1552   }
1553 
1554   OMPParallelScope Scope(CGF, S);
1555   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1556   // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1557   // lower and upper bounds with the pragma 'for' chunking mechanism.
1558   // The following lambda takes care of appending the lower and upper bound
1559   // parameters when necessary
1560   CodeGenBoundParameters(CGF, S, CapturedVars);
1561   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1562   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn,
1563                                               CapturedVars, IfCond);
1564 }
1565 
1566 static bool isAllocatableDecl(const VarDecl *VD) {
1567   const VarDecl *CVD = VD->getCanonicalDecl();
1568   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
1569     return false;
1570   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1571   // Use the default allocation.
1572   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
1573             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
1574            !AA->getAllocator());
1575 }
1576 
1577 static void emitEmptyBoundParameters(CodeGenFunction &,
1578                                      const OMPExecutableDirective &,
1579                                      llvm::SmallVectorImpl<llvm::Value *> &) {}
1580 
1581 Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
1582     CodeGenFunction &CGF, const VarDecl *VD) {
1583   CodeGenModule &CGM = CGF.CGM;
1584   auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1585 
1586   if (!VD)
1587     return Address::invalid();
1588   const VarDecl *CVD = VD->getCanonicalDecl();
1589   if (!isAllocatableDecl(CVD))
1590     return Address::invalid();
1591   llvm::Value *Size;
1592   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
1593   if (CVD->getType()->isVariablyModifiedType()) {
1594     Size = CGF.getTypeSize(CVD->getType());
1595     // Align the size: ((size + align - 1) / align) * align
1596     Size = CGF.Builder.CreateNUWAdd(
1597         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
1598     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
1599     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
1600   } else {
1601     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
1602     Size = CGM.getSize(Sz.alignTo(Align));
1603   }
1604 
1605   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1606   assert(AA->getAllocator() &&
1607          "Expected allocator expression for non-default allocator.");
1608   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
1609   // According to the standard, the original allocator type is a enum (integer).
1610   // Convert to pointer type, if required.
1611   if (Allocator->getType()->isIntegerTy())
1612     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
1613   else if (Allocator->getType()->isPointerTy())
1614     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
1615                                                                 CGM.VoidPtrTy);
1616 
1617   llvm::Value *Addr = OMPBuilder.createOMPAlloc(
1618       CGF.Builder, Size, Allocator,
1619       getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", "."));
1620   llvm::CallInst *FreeCI =
1621       OMPBuilder.createOMPFree(CGF.Builder, Addr, Allocator);
1622 
1623   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI);
1624   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1625       Addr,
1626       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
1627       getNameWithSeparators({CVD->getName(), ".addr"}, ".", "."));
1628   return Address(Addr, Align);
1629 }
1630 
1631 Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate(
1632     CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr,
1633     SourceLocation Loc) {
1634   CodeGenModule &CGM = CGF.CGM;
1635   if (CGM.getLangOpts().OpenMPUseTLS &&
1636       CGM.getContext().getTargetInfo().isTLSSupported())
1637     return VDAddr;
1638 
1639   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1640 
1641   llvm::Type *VarTy = VDAddr.getElementType();
1642   llvm::Value *Data =
1643       CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy);
1644   llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy));
1645   std::string Suffix = getNameWithSeparators({"cache", ""});
1646   llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix);
1647 
1648   llvm::CallInst *ThreadPrivateCacheCall =
1649       OMPBuilder.createCachedThreadPrivate(CGF.Builder, Data, Size, CacheName);
1650 
1651   return Address(ThreadPrivateCacheCall, VDAddr.getAlignment());
1652 }
1653 
1654 std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators(
1655     ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) {
1656   SmallString<128> Buffer;
1657   llvm::raw_svector_ostream OS(Buffer);
1658   StringRef Sep = FirstSeparator;
1659   for (StringRef Part : Parts) {
1660     OS << Sep << Part;
1661     Sep = Separator;
1662   }
1663   return OS.str().str();
1664 }
1665 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1666   if (CGM.getLangOpts().OpenMPIRBuilder) {
1667     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1668     // Check if we have any if clause associated with the directive.
1669     llvm::Value *IfCond = nullptr;
1670     if (const auto *C = S.getSingleClause<OMPIfClause>())
1671       IfCond = EmitScalarExpr(C->getCondition(),
1672                               /*IgnoreResultAssign=*/true);
1673 
1674     llvm::Value *NumThreads = nullptr;
1675     if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>())
1676       NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(),
1677                                   /*IgnoreResultAssign=*/true);
1678 
1679     ProcBindKind ProcBind = OMP_PROC_BIND_default;
1680     if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>())
1681       ProcBind = ProcBindClause->getProcBindKind();
1682 
1683     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1684 
1685     // The cleanup callback that finalizes all variabels at the given location,
1686     // thus calls destructors etc.
1687     auto FiniCB = [this](InsertPointTy IP) {
1688       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
1689     };
1690 
1691     // Privatization callback that performs appropriate action for
1692     // shared/private/firstprivate/lastprivate/copyin/... variables.
1693     //
1694     // TODO: This defaults to shared right now.
1695     auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1696                      llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
1697       // The next line is appropriate only for variables (Val) with the
1698       // data-sharing attribute "shared".
1699       ReplVal = &Val;
1700 
1701       return CodeGenIP;
1702     };
1703 
1704     const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1705     const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();
1706 
1707     auto BodyGenCB = [ParallelRegionBodyStmt,
1708                       this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1709                             llvm::BasicBlock &ContinuationBB) {
1710       OMPBuilderCBHelpers::OutlinedRegionBodyRAII ORB(*this, AllocaIP,
1711                                                       ContinuationBB);
1712       OMPBuilderCBHelpers::EmitOMPRegionBody(*this, ParallelRegionBodyStmt,
1713                                              CodeGenIP, ContinuationBB);
1714     };
1715 
1716     CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
1717     CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
1718     llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
1719         AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
1720     Builder.restoreIP(
1721         OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1722                                   IfCond, NumThreads, ProcBind, S.hasCancel()));
1723     return;
1724   }
1725 
1726   // Emit parallel region as a standalone region.
1727   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1728     Action.Enter(CGF);
1729     OMPPrivateScope PrivateScope(CGF);
1730     bool Copyins = CGF.EmitOMPCopyinClause(S);
1731     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1732     if (Copyins) {
1733       // Emit implicit barrier to synchronize threads and avoid data races on
1734       // propagation master's thread values of threadprivate variables to local
1735       // instances of that variables of all other implicit threads.
1736       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1737           CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
1738           /*ForceSimpleCall=*/true);
1739     }
1740     CGF.EmitOMPPrivateClause(S, PrivateScope);
1741     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1742     (void)PrivateScope.Privatize();
1743     CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
1744     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1745   };
1746   {
1747     auto LPCRegion =
1748         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
1749     emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1750                                    emitEmptyBoundParameters);
1751     emitPostUpdateForReductionClause(*this, S,
1752                                      [](CodeGenFunction &) { return nullptr; });
1753   }
1754   // Check for outer lastprivate conditional update.
1755   checkForLastprivateConditionalUpdate(*this, S);
1756 }
1757 
1758 static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
1759                      int MaxLevel, int Level = 0) {
1760   assert(Level < MaxLevel && "Too deep lookup during loop body codegen.");
1761   const Stmt *SimplifiedS = S->IgnoreContainers();
1762   if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) {
1763     PrettyStackTraceLoc CrashInfo(
1764         CGF.getContext().getSourceManager(), CS->getLBracLoc(),
1765         "LLVM IR generation of compound statement ('{}')");
1766 
1767     // Keep track of the current cleanup stack depth, including debug scopes.
1768     CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange());
1769     for (const Stmt *CurStmt : CS->body())
1770       emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level);
1771     return;
1772   }
1773   if (SimplifiedS == NextLoop) {
1774     if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) {
1775       S = For->getBody();
1776     } else {
1777       assert(isa<CXXForRangeStmt>(SimplifiedS) &&
1778              "Expected canonical for loop or range-based for loop.");
1779       const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS);
1780       CGF.EmitStmt(CXXFor->getLoopVarStmt());
1781       S = CXXFor->getBody();
1782     }
1783     if (Level + 1 < MaxLevel) {
1784       NextLoop = OMPLoopDirective::tryToFindNextInnerLoop(
1785           S, /*TryImperfectlyNestedLoops=*/true);
1786       emitBody(CGF, S, NextLoop, MaxLevel, Level + 1);
1787       return;
1788     }
1789   }
1790   CGF.EmitStmt(S);
1791 }
1792 
1793 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1794                                       JumpDest LoopExit) {
1795   RunCleanupsScope BodyScope(*this);
1796   // Update counters values on current iteration.
1797   for (const Expr *UE : D.updates())
1798     EmitIgnoredExpr(UE);
1799   // Update the linear variables.
1800   // In distribute directives only loop counters may be marked as linear, no
1801   // need to generate the code for them.
1802   if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
1803     for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1804       for (const Expr *UE : C->updates())
1805         EmitIgnoredExpr(UE);
1806     }
1807   }
1808 
1809   // On a continue in the body, jump to the end.
1810   JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue");
1811   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1812   for (const Expr *E : D.finals_conditions()) {
1813     if (!E)
1814       continue;
1815     // Check that loop counter in non-rectangular nest fits into the iteration
1816     // space.
1817     llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next");
1818     EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(),
1819                          getProfileCount(D.getBody()));
1820     EmitBlock(NextBB);
1821   }
1822 
1823   OMPPrivateScope InscanScope(*this);
1824   EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true);
1825   bool IsInscanRegion = InscanScope.Privatize();
1826   if (IsInscanRegion) {
1827     // Need to remember the block before and after scan directive
1828     // to dispatch them correctly depending on the clause used in
1829     // this directive, inclusive or exclusive. For inclusive scan the natural
1830     // order of the blocks is used, for exclusive clause the blocks must be
1831     // executed in reverse order.
1832     OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb");
1833     OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb");
1834     // No need to allocate inscan exit block, in simd mode it is selected in the
1835     // codegen for the scan directive.
1836     if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd)
1837       OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb");
1838     OMPScanDispatch = createBasicBlock("omp.inscan.dispatch");
1839     EmitBranch(OMPScanDispatch);
1840     EmitBlock(OMPBeforeScanBlock);
1841   }
1842 
1843   // Emit loop variables for C++ range loops.
1844   const Stmt *Body =
1845       D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
1846   // Emit loop body.
1847   emitBody(*this, Body,
1848            OMPLoopDirective::tryToFindNextInnerLoop(
1849                Body, /*TryImperfectlyNestedLoops=*/true),
1850            D.getCollapsedNumber());
1851 
1852   // Jump to the dispatcher at the end of the loop body.
1853   if (IsInscanRegion)
1854     EmitBranch(OMPScanExitBlock);
1855 
1856   // The end (updates/cleanups).
1857   EmitBlock(Continue.getBlock());
1858   BreakContinueStack.pop_back();
1859 }
1860 
1861 void CodeGenFunction::EmitOMPInnerLoop(
1862     const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond,
1863     const Expr *IncExpr,
1864     const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
1865     const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
1866   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
1867 
1868   // Start the loop with a block that tests the condition.
1869   auto CondBlock = createBasicBlock("omp.inner.for.cond");
1870   EmitBlock(CondBlock);
1871   const SourceRange R = S.getSourceRange();
1872 
1873   // If attributes are attached, push to the basic block with them.
1874   const auto &OMPED = cast<OMPExecutableDirective>(S);
1875   const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt();
1876   const Stmt *SS = ICS->getCapturedStmt();
1877   const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS);
1878   if (AS)
1879     LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(),
1880                    AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()),
1881                    SourceLocToDebugLoc(R.getEnd()));
1882   else
1883     LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1884                    SourceLocToDebugLoc(R.getEnd()));
1885 
1886   // If there are any cleanups between here and the loop-exit scope,
1887   // create a block to stage a loop exit along.
1888   llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
1889   if (RequiresCleanup)
1890     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
1891 
1892   llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body");
1893 
1894   // Emit condition.
1895   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
1896   if (ExitBlock != LoopExit.getBlock()) {
1897     EmitBlock(ExitBlock);
1898     EmitBranchThroughCleanup(LoopExit);
1899   }
1900 
1901   EmitBlock(LoopBody);
1902   incrementProfileCounter(&S);
1903 
1904   // Create a block for the increment.
1905   JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
1906   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1907 
1908   BodyGen(*this);
1909 
1910   // Emit "IV = IV + 1" and a back-edge to the condition block.
1911   EmitBlock(Continue.getBlock());
1912   EmitIgnoredExpr(IncExpr);
1913   PostIncGen(*this);
1914   BreakContinueStack.pop_back();
1915   EmitBranch(CondBlock);
1916   LoopStack.pop();
1917   // Emit the fall-through block.
1918   EmitBlock(LoopExit.getBlock());
1919 }
1920 
1921 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
1922   if (!HaveInsertPoint())
1923     return false;
1924   // Emit inits for the linear variables.
1925   bool HasLinears = false;
1926   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1927     for (const Expr *Init : C->inits()) {
1928       HasLinears = true;
1929       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
1930       if (const auto *Ref =
1931               dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
1932         AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
1933         const auto *OrigVD = cast<VarDecl>(Ref->getDecl());
1934         DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1935                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
1936                         VD->getInit()->getType(), VK_LValue,
1937                         VD->getInit()->getExprLoc());
1938         EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
1939                                                 VD->getType()),
1940                        /*capturedByInit=*/false);
1941         EmitAutoVarCleanups(Emission);
1942       } else {
1943         EmitVarDecl(*VD);
1944       }
1945     }
1946     // Emit the linear steps for the linear clauses.
1947     // If a step is not constant, it is pre-calculated before the loop.
1948     if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
1949       if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
1950         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
1951         // Emit calculation of the linear step.
1952         EmitIgnoredExpr(CS);
1953       }
1954   }
1955   return HasLinears;
1956 }
1957 
1958 void CodeGenFunction::EmitOMPLinearClauseFinal(
1959     const OMPLoopDirective &D,
1960     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1961   if (!HaveInsertPoint())
1962     return;
1963   llvm::BasicBlock *DoneBB = nullptr;
1964   // Emit the final values of the linear variables.
1965   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1966     auto IC = C->varlist_begin();
1967     for (const Expr *F : C->finals()) {
1968       if (!DoneBB) {
1969         if (llvm::Value *Cond = CondGen(*this)) {
1970           // If the first post-update expression is found, emit conditional
1971           // block if it was requested.
1972           llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu");
1973           DoneBB = createBasicBlock(".omp.linear.pu.done");
1974           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1975           EmitBlock(ThenBB);
1976         }
1977       }
1978       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
1979       DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1980                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
1981                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1982       Address OrigAddr = EmitLValue(&DRE).getAddress(*this);
1983       CodeGenFunction::OMPPrivateScope VarScope(*this);
1984       VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
1985       (void)VarScope.Privatize();
1986       EmitIgnoredExpr(F);
1987       ++IC;
1988     }
1989     if (const Expr *PostUpdate = C->getPostUpdateExpr())
1990       EmitIgnoredExpr(PostUpdate);
1991   }
1992   if (DoneBB)
1993     EmitBlock(DoneBB, /*IsFinished=*/true);
1994 }
1995 
1996 static void emitAlignedClause(CodeGenFunction &CGF,
1997                               const OMPExecutableDirective &D) {
1998   if (!CGF.HaveInsertPoint())
1999     return;
2000   for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
2001     llvm::APInt ClauseAlignment(64, 0);
2002     if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2003       auto *AlignmentCI =
2004           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
2005       ClauseAlignment = AlignmentCI->getValue();
2006     }
2007     for (const Expr *E : Clause->varlists()) {
2008       llvm::APInt Alignment(ClauseAlignment);
2009       if (Alignment == 0) {
2010         // OpenMP [2.8.1, Description]
2011         // If no optional parameter is specified, implementation-defined default
2012         // alignments for SIMD instructions on the target platforms are assumed.
2013         Alignment =
2014             CGF.getContext()
2015                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
2016                     E->getType()->getPointeeType()))
2017                 .getQuantity();
2018       }
2019       assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2020              "alignment is not power of 2");
2021       if (Alignment != 0) {
2022         llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2023         CGF.emitAlignmentAssumption(
2024             PtrValue, E, /*No second loc needed*/ SourceLocation(),
2025             llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment));
2026       }
2027     }
2028   }
2029 }
2030 
2031 void CodeGenFunction::EmitOMPPrivateLoopCounters(
2032     const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
2033   if (!HaveInsertPoint())
2034     return;
2035   auto I = S.private_counters().begin();
2036   for (const Expr *E : S.counters()) {
2037     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2038     const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
2039     // Emit var without initialization.
2040     AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD);
2041     EmitAutoVarCleanups(VarEmission);
2042     LocalDeclMap.erase(PrivateVD);
2043     (void)LoopScope.addPrivate(VD, [&VarEmission]() {
2044       return VarEmission.getAllocatedAddress();
2045     });
2046     if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
2047         VD->hasGlobalStorage()) {
2048       (void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() {
2049         DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD),
2050                         LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
2051                         E->getType(), VK_LValue, E->getExprLoc());
2052         return EmitLValue(&DRE).getAddress(*this);
2053       });
2054     } else {
2055       (void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() {
2056         return VarEmission.getAllocatedAddress();
2057       });
2058     }
2059     ++I;
2060   }
2061   // Privatize extra loop counters used in loops for ordered(n) clauses.
2062   for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
2063     if (!C->getNumForLoops())
2064       continue;
2065     for (unsigned I = S.getCollapsedNumber(),
2066                   E = C->getLoopNumIterations().size();
2067          I < E; ++I) {
2068       const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I));
2069       const auto *VD = cast<VarDecl>(DRE->getDecl());
2070       // Override only those variables that can be captured to avoid re-emission
2071       // of the variables declared within the loops.
2072       if (DRE->refersToEnclosingVariableOrCapture()) {
2073         (void)LoopScope.addPrivate(VD, [this, DRE, VD]() {
2074           return CreateMemTemp(DRE->getType(), VD->getName());
2075         });
2076       }
2077     }
2078   }
2079 }
2080 
2081 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
2082                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
2083                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
2084   if (!CGF.HaveInsertPoint())
2085     return;
2086   {
2087     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
2088     CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
2089     (void)PreCondScope.Privatize();
2090     // Get initial values of real counters.
2091     for (const Expr *I : S.inits()) {
2092       CGF.EmitIgnoredExpr(I);
2093     }
2094   }
2095   // Create temp loop control variables with their init values to support
2096   // non-rectangular loops.
2097   CodeGenFunction::OMPMapVars PreCondVars;
2098   for (const Expr * E: S.dependent_counters()) {
2099     if (!E)
2100       continue;
2101     assert(!E->getType().getNonReferenceType()->isRecordType() &&
2102            "dependent counter must not be an iterator.");
2103     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2104     Address CounterAddr =
2105         CGF.CreateMemTemp(VD->getType().getNonReferenceType());
2106     (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr);
2107   }
2108   (void)PreCondVars.apply(CGF);
2109   for (const Expr *E : S.dependent_inits()) {
2110     if (!E)
2111       continue;
2112     CGF.EmitIgnoredExpr(E);
2113   }
2114   // Check that loop is executed at least one time.
2115   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
2116   PreCondVars.restore(CGF);
2117 }
2118 
2119 void CodeGenFunction::EmitOMPLinearClause(
2120     const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
2121   if (!HaveInsertPoint())
2122     return;
2123   llvm::DenseSet<const VarDecl *> SIMDLCVs;
2124   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
2125     const auto *LoopDirective = cast<OMPLoopDirective>(&D);
2126     for (const Expr *C : LoopDirective->counters()) {
2127       SIMDLCVs.insert(
2128           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
2129     }
2130   }
2131   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2132     auto CurPrivate = C->privates().begin();
2133     for (const Expr *E : C->varlists()) {
2134       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2135       const auto *PrivateVD =
2136           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
2137       if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
2138         bool IsRegistered = PrivateScope.addPrivate(VD, [this, PrivateVD]() {
2139           // Emit private VarDecl with copy init.
2140           EmitVarDecl(*PrivateVD);
2141           return GetAddrOfLocalVar(PrivateVD);
2142         });
2143         assert(IsRegistered && "linear var already registered as private");
2144         // Silence the warning about unused variable.
2145         (void)IsRegistered;
2146       } else {
2147         EmitVarDecl(*PrivateVD);
2148       }
2149       ++CurPrivate;
2150     }
2151   }
2152 }
2153 
2154 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
2155                                      const OMPExecutableDirective &D,
2156                                      bool IsMonotonic) {
2157   if (!CGF.HaveInsertPoint())
2158     return;
2159   if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
2160     RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
2161                                  /*ignoreResult=*/true);
2162     auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2163     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2164     // In presence of finite 'safelen', it may be unsafe to mark all
2165     // the memory instructions parallel, because loop-carried
2166     // dependences of 'safelen' iterations are possible.
2167     if (!IsMonotonic)
2168       CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
2169   } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
2170     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
2171                                  /*ignoreResult=*/true);
2172     auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2173     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2174     // In presence of finite 'safelen', it may be unsafe to mark all
2175     // the memory instructions parallel, because loop-carried
2176     // dependences of 'safelen' iterations are possible.
2177     CGF.LoopStack.setParallel(/*Enable=*/false);
2178   }
2179 }
2180 
2181 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
2182                                       bool IsMonotonic) {
2183   // Walk clauses and process safelen/lastprivate.
2184   LoopStack.setParallel(!IsMonotonic);
2185   LoopStack.setVectorizeEnable();
2186   emitSimdlenSafelenClause(*this, D, IsMonotonic);
2187   if (const auto *C = D.getSingleClause<OMPOrderClause>())
2188     if (C->getKind() == OMPC_ORDER_concurrent)
2189       LoopStack.setParallel(/*Enable=*/true);
2190   if ((D.getDirectiveKind() == OMPD_simd ||
2191        (getLangOpts().OpenMPSimd &&
2192         isOpenMPSimdDirective(D.getDirectiveKind()))) &&
2193       llvm::any_of(D.getClausesOfKind<OMPReductionClause>(),
2194                    [](const OMPReductionClause *C) {
2195                      return C->getModifier() == OMPC_REDUCTION_inscan;
2196                    }))
2197     // Disable parallel access in case of prefix sum.
2198     LoopStack.setParallel(/*Enable=*/false);
2199 }
2200 
2201 void CodeGenFunction::EmitOMPSimdFinal(
2202     const OMPLoopDirective &D,
2203     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2204   if (!HaveInsertPoint())
2205     return;
2206   llvm::BasicBlock *DoneBB = nullptr;
2207   auto IC = D.counters().begin();
2208   auto IPC = D.private_counters().begin();
2209   for (const Expr *F : D.finals()) {
2210     const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
2211     const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
2212     const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
2213     if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
2214         OrigVD->hasGlobalStorage() || CED) {
2215       if (!DoneBB) {
2216         if (llvm::Value *Cond = CondGen(*this)) {
2217           // If the first post-update expression is found, emit conditional
2218           // block if it was requested.
2219           llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then");
2220           DoneBB = createBasicBlock(".omp.final.done");
2221           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2222           EmitBlock(ThenBB);
2223         }
2224       }
2225       Address OrigAddr = Address::invalid();
2226       if (CED) {
2227         OrigAddr =
2228             EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(*this);
2229       } else {
2230         DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD),
2231                         /*RefersToEnclosingVariableOrCapture=*/false,
2232                         (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
2233         OrigAddr = EmitLValue(&DRE).getAddress(*this);
2234       }
2235       OMPPrivateScope VarScope(*this);
2236       VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
2237       (void)VarScope.Privatize();
2238       EmitIgnoredExpr(F);
2239     }
2240     ++IC;
2241     ++IPC;
2242   }
2243   if (DoneBB)
2244     EmitBlock(DoneBB, /*IsFinished=*/true);
2245 }
2246 
2247 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
2248                                          const OMPLoopDirective &S,
2249                                          CodeGenFunction::JumpDest LoopExit) {
2250   CGF.EmitOMPLoopBody(S, LoopExit);
2251   CGF.EmitStopPoint(&S);
2252 }
2253 
2254 /// Emit a helper variable and return corresponding lvalue.
2255 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
2256                                const DeclRefExpr *Helper) {
2257   auto VDecl = cast<VarDecl>(Helper->getDecl());
2258   CGF.EmitVarDecl(*VDecl);
2259   return CGF.EmitLValue(Helper);
2260 }
2261 
2262 static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S,
2263                                const RegionCodeGenTy &SimdInitGen,
2264                                const RegionCodeGenTy &BodyCodeGen) {
2265   auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF,
2266                                                     PrePostActionTy &) {
2267     CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S);
2268     CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2269     SimdInitGen(CGF);
2270 
2271     BodyCodeGen(CGF);
2272   };
2273   auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
2274     CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2275     CGF.LoopStack.setVectorizeEnable(/*Enable=*/false);
2276 
2277     BodyCodeGen(CGF);
2278   };
2279   const Expr *IfCond = nullptr;
2280   if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2281     for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2282       if (CGF.getLangOpts().OpenMP >= 50 &&
2283           (C->getNameModifier() == OMPD_unknown ||
2284            C->getNameModifier() == OMPD_simd)) {
2285         IfCond = C->getCondition();
2286         break;
2287       }
2288     }
2289   }
2290   if (IfCond) {
2291     CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2292   } else {
2293     RegionCodeGenTy ThenRCG(ThenGen);
2294     ThenRCG(CGF);
2295   }
2296 }
2297 
2298 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
2299                               PrePostActionTy &Action) {
2300   Action.Enter(CGF);
2301   assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
2302          "Expected simd directive");
2303   OMPLoopScope PreInitScope(CGF, S);
2304   // if (PreCond) {
2305   //   for (IV in 0..LastIteration) BODY;
2306   //   <Final counter/linear vars updates>;
2307   // }
2308   //
2309   if (isOpenMPDistributeDirective(S.getDirectiveKind()) ||
2310       isOpenMPWorksharingDirective(S.getDirectiveKind()) ||
2311       isOpenMPTaskLoopDirective(S.getDirectiveKind())) {
2312     (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
2313     (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
2314   }
2315 
2316   // Emit: if (PreCond) - begin.
2317   // If the condition constant folds and can be elided, avoid emitting the
2318   // whole loop.
2319   bool CondConstant;
2320   llvm::BasicBlock *ContBlock = nullptr;
2321   if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2322     if (!CondConstant)
2323       return;
2324   } else {
2325     llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then");
2326     ContBlock = CGF.createBasicBlock("simd.if.end");
2327     emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
2328                 CGF.getProfileCount(&S));
2329     CGF.EmitBlock(ThenBlock);
2330     CGF.incrementProfileCounter(&S);
2331   }
2332 
2333   // Emit the loop iteration variable.
2334   const Expr *IVExpr = S.getIterationVariable();
2335   const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
2336   CGF.EmitVarDecl(*IVDecl);
2337   CGF.EmitIgnoredExpr(S.getInit());
2338 
2339   // Emit the iterations count variable.
2340   // If it is not a variable, Sema decided to calculate iterations count on
2341   // each iteration (e.g., it is foldable into a constant).
2342   if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2343     CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2344     // Emit calculation of the iterations count.
2345     CGF.EmitIgnoredExpr(S.getCalcLastIteration());
2346   }
2347 
2348   emitAlignedClause(CGF, S);
2349   (void)CGF.EmitOMPLinearClauseInit(S);
2350   {
2351     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2352     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
2353     CGF.EmitOMPLinearClause(S, LoopScope);
2354     CGF.EmitOMPPrivateClause(S, LoopScope);
2355     CGF.EmitOMPReductionClauseInit(S, LoopScope);
2356     CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
2357         CGF, S, CGF.EmitLValue(S.getIterationVariable()));
2358     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2359     (void)LoopScope.Privatize();
2360     if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
2361       CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
2362 
2363     emitCommonSimdLoop(
2364         CGF, S,
2365         [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2366           CGF.EmitOMPSimdInit(S);
2367         },
2368         [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2369           CGF.EmitOMPInnerLoop(
2370               S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
2371               [&S](CodeGenFunction &CGF) {
2372                 emitOMPLoopBodyWithStopPoint(CGF, S,
2373                                              CodeGenFunction::JumpDest());
2374               },
2375               [](CodeGenFunction &) {});
2376         });
2377     CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; });
2378     // Emit final copy of the lastprivate variables at the end of loops.
2379     if (HasLastprivateClause)
2380       CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
2381     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
2382     emitPostUpdateForReductionClause(CGF, S,
2383                                      [](CodeGenFunction &) { return nullptr; });
2384   }
2385   CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; });
2386   // Emit: if (PreCond) - end.
2387   if (ContBlock) {
2388     CGF.EmitBranch(ContBlock);
2389     CGF.EmitBlock(ContBlock, true);
2390   }
2391 }
2392 
2393 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
2394   ParentLoopDirectiveForScanRegion ScanRegion(*this, S);
2395   OMPFirstScanLoop = true;
2396   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2397     emitOMPSimdRegion(CGF, S, Action);
2398   };
2399   {
2400     auto LPCRegion =
2401         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
2402     OMPLexicalScope Scope(*this, S, OMPD_unknown);
2403     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2404   }
2405   // Check for outer lastprivate conditional update.
2406   checkForLastprivateConditionalUpdate(*this, S);
2407 }
2408 
2409 void CodeGenFunction::EmitOMPOuterLoop(
2410     bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
2411     CodeGenFunction::OMPPrivateScope &LoopScope,
2412     const CodeGenFunction::OMPLoopArguments &LoopArgs,
2413     const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
2414     const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
2415   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2416 
2417   const Expr *IVExpr = S.getIterationVariable();
2418   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2419   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2420 
2421   JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
2422 
2423   // Start the loop with a block that tests the condition.
2424   llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond");
2425   EmitBlock(CondBlock);
2426   const SourceRange R = S.getSourceRange();
2427   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2428                  SourceLocToDebugLoc(R.getEnd()));
2429 
2430   llvm::Value *BoolCondVal = nullptr;
2431   if (!DynamicOrOrdered) {
2432     // UB = min(UB, GlobalUB) or
2433     // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
2434     // 'distribute parallel for')
2435     EmitIgnoredExpr(LoopArgs.EUB);
2436     // IV = LB
2437     EmitIgnoredExpr(LoopArgs.Init);
2438     // IV < UB
2439     BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
2440   } else {
2441     BoolCondVal =
2442         RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL,
2443                        LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
2444   }
2445 
2446   // If there are any cleanups between here and the loop-exit scope,
2447   // create a block to stage a loop exit along.
2448   llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2449   if (LoopScope.requiresCleanups())
2450     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
2451 
2452   llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body");
2453   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
2454   if (ExitBlock != LoopExit.getBlock()) {
2455     EmitBlock(ExitBlock);
2456     EmitBranchThroughCleanup(LoopExit);
2457   }
2458   EmitBlock(LoopBody);
2459 
2460   // Emit "IV = LB" (in case of static schedule, we have already calculated new
2461   // LB for loop condition and emitted it above).
2462   if (DynamicOrOrdered)
2463     EmitIgnoredExpr(LoopArgs.Init);
2464 
2465   // Create a block for the increment.
2466   JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
2467   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
2468 
2469   emitCommonSimdLoop(
2470       *this, S,
2471       [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
2472         // Generate !llvm.loop.parallel metadata for loads and stores for loops
2473         // with dynamic/guided scheduling and without ordered clause.
2474         if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2475           CGF.LoopStack.setParallel(!IsMonotonic);
2476           if (const auto *C = S.getSingleClause<OMPOrderClause>())
2477             if (C->getKind() == OMPC_ORDER_concurrent)
2478               CGF.LoopStack.setParallel(/*Enable=*/true);
2479         } else {
2480           CGF.EmitOMPSimdInit(S, IsMonotonic);
2481         }
2482       },
2483       [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered,
2484        &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2485         SourceLocation Loc = S.getBeginLoc();
2486         // when 'distribute' is not combined with a 'for':
2487         // while (idx <= UB) { BODY; ++idx; }
2488         // when 'distribute' is combined with a 'for'
2489         // (e.g. 'distribute parallel for')
2490         // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
2491         CGF.EmitOMPInnerLoop(
2492             S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
2493             [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
2494               CodeGenLoop(CGF, S, LoopExit);
2495             },
2496             [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
2497               CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
2498             });
2499       });
2500 
2501   EmitBlock(Continue.getBlock());
2502   BreakContinueStack.pop_back();
2503   if (!DynamicOrOrdered) {
2504     // Emit "LB = LB + Stride", "UB = UB + Stride".
2505     EmitIgnoredExpr(LoopArgs.NextLB);
2506     EmitIgnoredExpr(LoopArgs.NextUB);
2507   }
2508 
2509   EmitBranch(CondBlock);
2510   LoopStack.pop();
2511   // Emit the fall-through block.
2512   EmitBlock(LoopExit.getBlock());
2513 
2514   // Tell the runtime we are done.
2515   auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
2516     if (!DynamicOrOrdered)
2517       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
2518                                                      S.getDirectiveKind());
2519   };
2520   OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2521 }
2522 
2523 void CodeGenFunction::EmitOMPForOuterLoop(
2524     const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
2525     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
2526     const OMPLoopArguments &LoopArgs,
2527     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2528   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2529 
2530   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
2531   const bool DynamicOrOrdered =
2532       Ordered || RT.isDynamic(ScheduleKind.Schedule);
2533 
2534   assert((Ordered ||
2535           !RT.isStaticNonchunked(ScheduleKind.Schedule,
2536                                  LoopArgs.Chunk != nullptr)) &&
2537          "static non-chunked schedule does not need outer loop");
2538 
2539   // Emit outer loop.
2540   //
2541   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2542   // When schedule(dynamic,chunk_size) is specified, the iterations are
2543   // distributed to threads in the team in chunks as the threads request them.
2544   // Each thread executes a chunk of iterations, then requests another chunk,
2545   // until no chunks remain to be distributed. Each chunk contains chunk_size
2546   // iterations, except for the last chunk to be distributed, which may have
2547   // fewer iterations. When no chunk_size is specified, it defaults to 1.
2548   //
2549   // When schedule(guided,chunk_size) is specified, the iterations are assigned
2550   // to threads in the team in chunks as the executing threads request them.
2551   // Each thread executes a chunk of iterations, then requests another chunk,
2552   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
2553   // each chunk is proportional to the number of unassigned iterations divided
2554   // by the number of threads in the team, decreasing to 1. For a chunk_size
2555   // with value k (greater than 1), the size of each chunk is determined in the
2556   // same way, with the restriction that the chunks do not contain fewer than k
2557   // iterations (except for the last chunk to be assigned, which may have fewer
2558   // than k iterations).
2559   //
2560   // When schedule(auto) is specified, the decision regarding scheduling is
2561   // delegated to the compiler and/or runtime system. The programmer gives the
2562   // implementation the freedom to choose any possible mapping of iterations to
2563   // threads in the team.
2564   //
2565   // When schedule(runtime) is specified, the decision regarding scheduling is
2566   // deferred until run time, and the schedule and chunk size are taken from the
2567   // run-sched-var ICV. If the ICV is set to auto, the schedule is
2568   // implementation defined
2569   //
2570   // while(__kmpc_dispatch_next(&LB, &UB)) {
2571   //   idx = LB;
2572   //   while (idx <= UB) { BODY; ++idx;
2573   //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
2574   //   } // inner loop
2575   // }
2576   //
2577   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2578   // When schedule(static, chunk_size) is specified, iterations are divided into
2579   // chunks of size chunk_size, and the chunks are assigned to the threads in
2580   // the team in a round-robin fashion in the order of the thread number.
2581   //
2582   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
2583   //   while (idx <= UB) { BODY; ++idx; } // inner loop
2584   //   LB = LB + ST;
2585   //   UB = UB + ST;
2586   // }
2587   //
2588 
2589   const Expr *IVExpr = S.getIterationVariable();
2590   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2591   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2592 
2593   if (DynamicOrOrdered) {
2594     const std::pair<llvm::Value *, llvm::Value *> DispatchBounds =
2595         CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
2596     llvm::Value *LBVal = DispatchBounds.first;
2597     llvm::Value *UBVal = DispatchBounds.second;
2598     CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
2599                                                              LoopArgs.Chunk};
2600     RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize,
2601                            IVSigned, Ordered, DipatchRTInputValues);
2602   } else {
2603     CGOpenMPRuntime::StaticRTInput StaticInit(
2604         IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
2605         LoopArgs.ST, LoopArgs.Chunk);
2606     RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
2607                          ScheduleKind, StaticInit);
2608   }
2609 
2610   auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
2611                                     const unsigned IVSize,
2612                                     const bool IVSigned) {
2613     if (Ordered) {
2614       CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
2615                                                             IVSigned);
2616     }
2617   };
2618 
2619   OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
2620                                  LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
2621   OuterLoopArgs.IncExpr = S.getInc();
2622   OuterLoopArgs.Init = S.getInit();
2623   OuterLoopArgs.Cond = S.getCond();
2624   OuterLoopArgs.NextLB = S.getNextLowerBound();
2625   OuterLoopArgs.NextUB = S.getNextUpperBound();
2626   EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
2627                    emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
2628 }
2629 
2630 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
2631                              const unsigned IVSize, const bool IVSigned) {}
2632 
2633 void CodeGenFunction::EmitOMPDistributeOuterLoop(
2634     OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
2635     OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
2636     const CodeGenLoopTy &CodeGenLoopContent) {
2637 
2638   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2639 
2640   // Emit outer loop.
2641   // Same behavior as a OMPForOuterLoop, except that schedule cannot be
2642   // dynamic
2643   //
2644 
2645   const Expr *IVExpr = S.getIterationVariable();
2646   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2647   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2648 
2649   CGOpenMPRuntime::StaticRTInput StaticInit(
2650       IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
2651       LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
2652   RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit);
2653 
2654   // for combined 'distribute' and 'for' the increment expression of distribute
2655   // is stored in DistInc. For 'distribute' alone, it is in Inc.
2656   Expr *IncExpr;
2657   if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
2658     IncExpr = S.getDistInc();
2659   else
2660     IncExpr = S.getInc();
2661 
2662   // this routine is shared by 'omp distribute parallel for' and
2663   // 'omp distribute': select the right EUB expression depending on the
2664   // directive
2665   OMPLoopArguments OuterLoopArgs;
2666   OuterLoopArgs.LB = LoopArgs.LB;
2667   OuterLoopArgs.UB = LoopArgs.UB;
2668   OuterLoopArgs.ST = LoopArgs.ST;
2669   OuterLoopArgs.IL = LoopArgs.IL;
2670   OuterLoopArgs.Chunk = LoopArgs.Chunk;
2671   OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2672                           ? S.getCombinedEnsureUpperBound()
2673                           : S.getEnsureUpperBound();
2674   OuterLoopArgs.IncExpr = IncExpr;
2675   OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2676                            ? S.getCombinedInit()
2677                            : S.getInit();
2678   OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2679                            ? S.getCombinedCond()
2680                            : S.getCond();
2681   OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2682                              ? S.getCombinedNextLowerBound()
2683                              : S.getNextLowerBound();
2684   OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2685                              ? S.getCombinedNextUpperBound()
2686                              : S.getNextUpperBound();
2687 
2688   EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
2689                    LoopScope, OuterLoopArgs, CodeGenLoopContent,
2690                    emitEmptyOrdered);
2691 }
2692 
2693 static std::pair<LValue, LValue>
2694 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
2695                                      const OMPExecutableDirective &S) {
2696   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2697   LValue LB =
2698       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2699   LValue UB =
2700       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2701 
2702   // When composing 'distribute' with 'for' (e.g. as in 'distribute
2703   // parallel for') we need to use the 'distribute'
2704   // chunk lower and upper bounds rather than the whole loop iteration
2705   // space. These are parameters to the outlined function for 'parallel'
2706   // and we copy the bounds of the previous schedule into the
2707   // the current ones.
2708   LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
2709   LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
2710   llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
2711       PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc());
2712   PrevLBVal = CGF.EmitScalarConversion(
2713       PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
2714       LS.getIterationVariable()->getType(),
2715       LS.getPrevLowerBoundVariable()->getExprLoc());
2716   llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
2717       PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc());
2718   PrevUBVal = CGF.EmitScalarConversion(
2719       PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
2720       LS.getIterationVariable()->getType(),
2721       LS.getPrevUpperBoundVariable()->getExprLoc());
2722 
2723   CGF.EmitStoreOfScalar(PrevLBVal, LB);
2724   CGF.EmitStoreOfScalar(PrevUBVal, UB);
2725 
2726   return {LB, UB};
2727 }
2728 
2729 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
2730 /// we need to use the LB and UB expressions generated by the worksharing
2731 /// code generation support, whereas in non combined situations we would
2732 /// just emit 0 and the LastIteration expression
2733 /// This function is necessary due to the difference of the LB and UB
2734 /// types for the RT emission routines for 'for_static_init' and
2735 /// 'for_dispatch_init'
2736 static std::pair<llvm::Value *, llvm::Value *>
2737 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
2738                                         const OMPExecutableDirective &S,
2739                                         Address LB, Address UB) {
2740   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2741   const Expr *IVExpr = LS.getIterationVariable();
2742   // when implementing a dynamic schedule for a 'for' combined with a
2743   // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
2744   // is not normalized as each team only executes its own assigned
2745   // distribute chunk
2746   QualType IteratorTy = IVExpr->getType();
2747   llvm::Value *LBVal =
2748       CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
2749   llvm::Value *UBVal =
2750       CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
2751   return {LBVal, UBVal};
2752 }
2753 
2754 static void emitDistributeParallelForDistributeInnerBoundParams(
2755     CodeGenFunction &CGF, const OMPExecutableDirective &S,
2756     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
2757   const auto &Dir = cast<OMPLoopDirective>(S);
2758   LValue LB =
2759       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
2760   llvm::Value *LBCast =
2761       CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(LB.getAddress(CGF)),
2762                                 CGF.SizeTy, /*isSigned=*/false);
2763   CapturedVars.push_back(LBCast);
2764   LValue UB =
2765       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
2766 
2767   llvm::Value *UBCast =
2768       CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(UB.getAddress(CGF)),
2769                                 CGF.SizeTy, /*isSigned=*/false);
2770   CapturedVars.push_back(UBCast);
2771 }
2772 
2773 static void
2774 emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
2775                                  const OMPLoopDirective &S,
2776                                  CodeGenFunction::JumpDest LoopExit) {
2777   auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
2778                                          PrePostActionTy &Action) {
2779     Action.Enter(CGF);
2780     bool HasCancel = false;
2781     if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2782       if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
2783         HasCancel = D->hasCancel();
2784       else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
2785         HasCancel = D->hasCancel();
2786       else if (const auto *D =
2787                    dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
2788         HasCancel = D->hasCancel();
2789     }
2790     CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
2791                                                      HasCancel);
2792     CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
2793                                emitDistributeParallelForInnerBounds,
2794                                emitDistributeParallelForDispatchBounds);
2795   };
2796 
2797   emitCommonOMPParallelDirective(
2798       CGF, S,
2799       isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for,
2800       CGInlinedWorksharingLoop,
2801       emitDistributeParallelForDistributeInnerBoundParams);
2802 }
2803 
2804 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
2805     const OMPDistributeParallelForDirective &S) {
2806   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2807     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
2808                               S.getDistInc());
2809   };
2810   OMPLexicalScope Scope(*this, S, OMPD_parallel);
2811   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
2812 }
2813 
2814 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
2815     const OMPDistributeParallelForSimdDirective &S) {
2816   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2817     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
2818                               S.getDistInc());
2819   };
2820   OMPLexicalScope Scope(*this, S, OMPD_parallel);
2821   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
2822 }
2823 
2824 void CodeGenFunction::EmitOMPDistributeSimdDirective(
2825     const OMPDistributeSimdDirective &S) {
2826   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2827     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
2828   };
2829   OMPLexicalScope Scope(*this, S, OMPD_unknown);
2830   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2831 }
2832 
2833 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
2834     CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
2835   // Emit SPMD target parallel for region as a standalone region.
2836   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2837     emitOMPSimdRegion(CGF, S, Action);
2838   };
2839   llvm::Function *Fn;
2840   llvm::Constant *Addr;
2841   // Emit target region as a standalone region.
2842   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
2843       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
2844   assert(Fn && Addr && "Target device function emission failed.");
2845 }
2846 
2847 void CodeGenFunction::EmitOMPTargetSimdDirective(
2848     const OMPTargetSimdDirective &S) {
2849   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2850     emitOMPSimdRegion(CGF, S, Action);
2851   };
2852   emitCommonOMPTargetDirective(*this, S, CodeGen);
2853 }
2854 
2855 namespace {
2856   struct ScheduleKindModifiersTy {
2857     OpenMPScheduleClauseKind Kind;
2858     OpenMPScheduleClauseModifier M1;
2859     OpenMPScheduleClauseModifier M2;
2860     ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
2861                             OpenMPScheduleClauseModifier M1,
2862                             OpenMPScheduleClauseModifier M2)
2863         : Kind(Kind), M1(M1), M2(M2) {}
2864   };
2865 } // namespace
2866 
2867 bool CodeGenFunction::EmitOMPWorksharingLoop(
2868     const OMPLoopDirective &S, Expr *EUB,
2869     const CodeGenLoopBoundsTy &CodeGenLoopBounds,
2870     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2871   // Emit the loop iteration variable.
2872   const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2873   const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
2874   EmitVarDecl(*IVDecl);
2875 
2876   // Emit the iterations count variable.
2877   // If it is not a variable, Sema decided to calculate iterations count on each
2878   // iteration (e.g., it is foldable into a constant).
2879   if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2880     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2881     // Emit calculation of the iterations count.
2882     EmitIgnoredExpr(S.getCalcLastIteration());
2883   }
2884 
2885   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2886 
2887   bool HasLastprivateClause;
2888   // Check pre-condition.
2889   {
2890     OMPLoopScope PreInitScope(*this, S);
2891     // Skip the entire loop if we don't meet the precondition.
2892     // If the condition constant folds and can be elided, avoid emitting the
2893     // whole loop.
2894     bool CondConstant;
2895     llvm::BasicBlock *ContBlock = nullptr;
2896     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2897       if (!CondConstant)
2898         return false;
2899     } else {
2900       llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
2901       ContBlock = createBasicBlock("omp.precond.end");
2902       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
2903                   getProfileCount(&S));
2904       EmitBlock(ThenBlock);
2905       incrementProfileCounter(&S);
2906     }
2907 
2908     RunCleanupsScope DoacrossCleanupScope(*this);
2909     bool Ordered = false;
2910     if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
2911       if (OrderedClause->getNumForLoops())
2912         RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations());
2913       else
2914         Ordered = true;
2915     }
2916 
2917     llvm::DenseSet<const Expr *> EmittedFinals;
2918     emitAlignedClause(*this, S);
2919     bool HasLinears = EmitOMPLinearClauseInit(S);
2920     // Emit helper vars inits.
2921 
2922     std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
2923     LValue LB = Bounds.first;
2924     LValue UB = Bounds.second;
2925     LValue ST =
2926         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2927     LValue IL =
2928         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2929 
2930     // Emit 'then' code.
2931     {
2932       OMPPrivateScope LoopScope(*this);
2933       if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
2934         // Emit implicit barrier to synchronize threads and avoid data races on
2935         // initialization of firstprivate variables and post-update of
2936         // lastprivate variables.
2937         CGM.getOpenMPRuntime().emitBarrierCall(
2938             *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
2939             /*ForceSimpleCall=*/true);
2940       }
2941       EmitOMPPrivateClause(S, LoopScope);
2942       CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
2943           *this, S, EmitLValue(S.getIterationVariable()));
2944       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
2945       EmitOMPReductionClauseInit(S, LoopScope);
2946       EmitOMPPrivateLoopCounters(S, LoopScope);
2947       EmitOMPLinearClause(S, LoopScope);
2948       (void)LoopScope.Privatize();
2949       if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
2950         CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
2951 
2952       // Detect the loop schedule kind and chunk.
2953       const Expr *ChunkExpr = nullptr;
2954       OpenMPScheduleTy ScheduleKind;
2955       if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
2956         ScheduleKind.Schedule = C->getScheduleKind();
2957         ScheduleKind.M1 = C->getFirstScheduleModifier();
2958         ScheduleKind.M2 = C->getSecondScheduleModifier();
2959         ChunkExpr = C->getChunkSize();
2960       } else {
2961         // Default behaviour for schedule clause.
2962         CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
2963             *this, S, ScheduleKind.Schedule, ChunkExpr);
2964       }
2965       bool HasChunkSizeOne = false;
2966       llvm::Value *Chunk = nullptr;
2967       if (ChunkExpr) {
2968         Chunk = EmitScalarExpr(ChunkExpr);
2969         Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(),
2970                                      S.getIterationVariable()->getType(),
2971                                      S.getBeginLoc());
2972         Expr::EvalResult Result;
2973         if (ChunkExpr->EvaluateAsInt(Result, getContext())) {
2974           llvm::APSInt EvaluatedChunk = Result.Val.getInt();
2975           HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
2976         }
2977       }
2978       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2979       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2980       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
2981       // If the static schedule kind is specified or if the ordered clause is
2982       // specified, and if no monotonic modifier is specified, the effect will
2983       // be as if the monotonic modifier was specified.
2984       bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule,
2985           /* Chunked */ Chunk != nullptr) && HasChunkSizeOne &&
2986           isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
2987       bool IsMonotonic =
2988           Ordered ||
2989           ((ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
2990             ScheduleKind.Schedule == OMPC_SCHEDULE_unknown) &&
2991            !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic ||
2992              ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) ||
2993           ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
2994           ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
2995       if ((RT.isStaticNonchunked(ScheduleKind.Schedule,
2996                                  /* Chunked */ Chunk != nullptr) ||
2997            StaticChunkedOne) &&
2998           !Ordered) {
2999         JumpDest LoopExit =
3000             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3001         emitCommonSimdLoop(
3002             *this, S,
3003             [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
3004               if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3005                 CGF.EmitOMPSimdInit(S, IsMonotonic);
3006               } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
3007                 if (C->getKind() == OMPC_ORDER_concurrent)
3008                   CGF.LoopStack.setParallel(/*Enable=*/true);
3009               }
3010             },
3011             [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk,
3012              &S, ScheduleKind, LoopExit,
3013              &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3014               // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3015               // When no chunk_size is specified, the iteration space is divided
3016               // into chunks that are approximately equal in size, and at most
3017               // one chunk is distributed to each thread. Note that the size of
3018               // the chunks is unspecified in this case.
3019               CGOpenMPRuntime::StaticRTInput StaticInit(
3020                   IVSize, IVSigned, Ordered, IL.getAddress(CGF),
3021                   LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF),
3022                   StaticChunkedOne ? Chunk : nullptr);
3023               CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3024                   CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind,
3025                   StaticInit);
3026               // UB = min(UB, GlobalUB);
3027               if (!StaticChunkedOne)
3028                 CGF.EmitIgnoredExpr(S.getEnsureUpperBound());
3029               // IV = LB;
3030               CGF.EmitIgnoredExpr(S.getInit());
3031               // For unchunked static schedule generate:
3032               //
3033               // while (idx <= UB) {
3034               //   BODY;
3035               //   ++idx;
3036               // }
3037               //
3038               // For static schedule with chunk one:
3039               //
3040               // while (IV <= PrevUB) {
3041               //   BODY;
3042               //   IV += ST;
3043               // }
3044               CGF.EmitOMPInnerLoop(
3045                   S, LoopScope.requiresCleanups(),
3046                   StaticChunkedOne ? S.getCombinedParForInDistCond()
3047                                    : S.getCond(),
3048                   StaticChunkedOne ? S.getDistInc() : S.getInc(),
3049                   [&S, LoopExit](CodeGenFunction &CGF) {
3050                     emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit);
3051                   },
3052                   [](CodeGenFunction &) {});
3053             });
3054         EmitBlock(LoopExit.getBlock());
3055         // Tell the runtime we are done.
3056         auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3057           CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
3058                                                          S.getDirectiveKind());
3059         };
3060         OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
3061       } else {
3062         // Emit the outer loop, which requests its work chunk [LB..UB] from
3063         // runtime and runs the inner loop to process it.
3064         const OMPLoopArguments LoopArguments(
3065             LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
3066             IL.getAddress(*this), Chunk, EUB);
3067         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
3068                             LoopArguments, CGDispatchBounds);
3069       }
3070       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3071         EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
3072           return CGF.Builder.CreateIsNotNull(
3073               CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3074         });
3075       }
3076       EmitOMPReductionClauseFinal(
3077           S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
3078                  ? /*Parallel and Simd*/ OMPD_parallel_for_simd
3079                  : /*Parallel only*/ OMPD_parallel);
3080       // Emit post-update of the reduction variables if IsLastIter != 0.
3081       emitPostUpdateForReductionClause(
3082           *this, S, [IL, &S](CodeGenFunction &CGF) {
3083             return CGF.Builder.CreateIsNotNull(
3084                 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3085           });
3086       // Emit final copy of the lastprivate variables if IsLastIter != 0.
3087       if (HasLastprivateClause)
3088         EmitOMPLastprivateClauseFinal(
3089             S, isOpenMPSimdDirective(S.getDirectiveKind()),
3090             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
3091     }
3092     EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
3093       return CGF.Builder.CreateIsNotNull(
3094           CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3095     });
3096     DoacrossCleanupScope.ForceCleanup();
3097     // We're now done with the loop, so jump to the continuation block.
3098     if (ContBlock) {
3099       EmitBranch(ContBlock);
3100       EmitBlock(ContBlock, /*IsFinished=*/true);
3101     }
3102   }
3103   return HasLastprivateClause;
3104 }
3105 
3106 /// The following two functions generate expressions for the loop lower
3107 /// and upper bounds in case of static and dynamic (dispatch) schedule
3108 /// of the associated 'for' or 'distribute' loop.
3109 static std::pair<LValue, LValue>
3110 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
3111   const auto &LS = cast<OMPLoopDirective>(S);
3112   LValue LB =
3113       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
3114   LValue UB =
3115       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
3116   return {LB, UB};
3117 }
3118 
3119 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
3120 /// consider the lower and upper bound expressions generated by the
3121 /// worksharing loop support, but we use 0 and the iteration space size as
3122 /// constants
3123 static std::pair<llvm::Value *, llvm::Value *>
3124 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
3125                           Address LB, Address UB) {
3126   const auto &LS = cast<OMPLoopDirective>(S);
3127   const Expr *IVExpr = LS.getIterationVariable();
3128   const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
3129   llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
3130   llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
3131   return {LBVal, UBVal};
3132 }
3133 
3134 /// Emits the code for the directive with inscan reductions.
3135 /// The code is the following:
3136 /// \code
3137 /// size num_iters = <num_iters>;
3138 /// <type> buffer[num_iters];
3139 /// #pragma omp ...
3140 /// for (i: 0..<num_iters>) {
3141 ///   <input phase>;
3142 ///   buffer[i] = red;
3143 /// }
3144 /// for (int k = 0; k != ceil(log2(num_iters)); ++k)
3145 /// for (size cnt = last_iter; cnt >= pow(2, k); --k)
3146 ///   buffer[i] op= buffer[i-pow(2,k)];
3147 /// #pragma omp ...
3148 /// for (0..<num_iters>) {
3149 ///   red = InclusiveScan ? buffer[i] : buffer[i-1];
3150 ///   <scan phase>;
3151 /// }
3152 /// \endcode
3153 static void emitScanBasedDirective(
3154     CodeGenFunction &CGF, const OMPLoopDirective &S,
3155     llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
3156     llvm::function_ref<void(CodeGenFunction &)> FirstGen,
3157     llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
3158   llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3159       NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3160   SmallVector<const Expr *, 4> Shareds;
3161   SmallVector<const Expr *, 4> Privates;
3162   SmallVector<const Expr *, 4> ReductionOps;
3163   SmallVector<const Expr *, 4> LHSs;
3164   SmallVector<const Expr *, 4> RHSs;
3165   SmallVector<const Expr *, 4> CopyOps;
3166   SmallVector<const Expr *, 4> CopyArrayTemps;
3167   SmallVector<const Expr *, 4> CopyArrayElems;
3168   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3169     assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3170            "Only inscan reductions are expected.");
3171     Shareds.append(C->varlist_begin(), C->varlist_end());
3172     Privates.append(C->privates().begin(), C->privates().end());
3173     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3174     LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
3175     RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
3176     CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
3177     CopyArrayTemps.append(C->copy_array_temps().begin(),
3178                           C->copy_array_temps().end());
3179     CopyArrayElems.append(C->copy_array_elems().begin(),
3180                           C->copy_array_elems().end());
3181   }
3182   {
3183     // Emit buffers for each reduction variables.
3184     // ReductionCodeGen is required to emit correctly the code for array
3185     // reductions.
3186     ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
3187     unsigned Count = 0;
3188     auto *ITA = CopyArrayTemps.begin();
3189     for (const Expr *IRef : Privates) {
3190       const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
3191       // Emit variably modified arrays, used for arrays/array sections
3192       // reductions.
3193       if (PrivateVD->getType()->isVariablyModifiedType()) {
3194         RedCG.emitSharedOrigLValue(CGF, Count);
3195         RedCG.emitAggregateType(CGF, Count);
3196       }
3197       CodeGenFunction::OpaqueValueMapping DimMapping(
3198           CGF,
3199           cast<OpaqueValueExpr>(
3200               cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe())
3201                   ->getSizeExpr()),
3202           RValue::get(OMPScanNumIterations));
3203       // Emit temp buffer.
3204       CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl()));
3205       ++ITA;
3206       ++Count;
3207     }
3208   }
3209   CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
3210   {
3211     // Emit loop with input phase:
3212     // #pragma omp ...
3213     // for (i: 0..<num_iters>) {
3214     //   <input phase>;
3215     //   buffer[i] = red;
3216     // }
3217     CGF.OMPFirstScanLoop = true;
3218     CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3219     FirstGen(CGF);
3220   }
3221   // Emit prefix reduction:
3222   // for (int k = 0; k <= ceil(log2(n)); ++k)
3223   llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
3224   llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body");
3225   llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit");
3226   llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy);
3227   llvm::Value *Arg =
3228       CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy);
3229   llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg);
3230   F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy);
3231   LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal);
3232   LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy);
3233   llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
3234       OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1));
3235   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc());
3236   CGF.EmitBlock(LoopBB);
3237   auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2);
3238   // size pow2k = 1;
3239   auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3240   Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB);
3241   Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB);
3242   // for (size i = n - 1; i >= 2 ^ k; --i)
3243   //   tmp[i] op= tmp[i-pow2k];
3244   llvm::BasicBlock *InnerLoopBB =
3245       CGF.createBasicBlock("omp.inner.log.scan.body");
3246   llvm::BasicBlock *InnerExitBB =
3247       CGF.createBasicBlock("omp.inner.log.scan.exit");
3248   llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K);
3249   CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3250   CGF.EmitBlock(InnerLoopBB);
3251   auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3252   IVal->addIncoming(NMin1, LoopBB);
3253   {
3254     CodeGenFunction::OMPPrivateScope PrivScope(CGF);
3255     auto *ILHS = LHSs.begin();
3256     auto *IRHS = RHSs.begin();
3257     for (const Expr *CopyArrayElem : CopyArrayElems) {
3258       const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3259       const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3260       Address LHSAddr = Address::invalid();
3261       {
3262         CodeGenFunction::OpaqueValueMapping IdxMapping(
3263             CGF,
3264             cast<OpaqueValueExpr>(
3265                 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3266             RValue::get(IVal));
3267         LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
3268       }
3269       PrivScope.addPrivate(LHSVD, [LHSAddr]() { return LHSAddr; });
3270       Address RHSAddr = Address::invalid();
3271       {
3272         llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K);
3273         CodeGenFunction::OpaqueValueMapping IdxMapping(
3274             CGF,
3275             cast<OpaqueValueExpr>(
3276                 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3277             RValue::get(OffsetIVal));
3278         RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
3279       }
3280       PrivScope.addPrivate(RHSVD, [RHSAddr]() { return RHSAddr; });
3281       ++ILHS;
3282       ++IRHS;
3283     }
3284     PrivScope.Privatize();
3285     CGF.CGM.getOpenMPRuntime().emitReduction(
3286         CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
3287         {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown});
3288   }
3289   llvm::Value *NextIVal =
3290       CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1));
3291   IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock());
3292   CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K);
3293   CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3294   CGF.EmitBlock(InnerExitBB);
3295   llvm::Value *Next =
3296       CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1));
3297   Counter->addIncoming(Next, CGF.Builder.GetInsertBlock());
3298   // pow2k <<= 1;
3299   llvm::Value *NextPow2K = CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
3300   Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock());
3301   llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal);
3302   CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
3303   auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc());
3304   CGF.EmitBlock(ExitBB);
3305 
3306   CGF.OMPFirstScanLoop = false;
3307   SecondGen(CGF);
3308 }
3309 
3310 static bool emitWorksharingDirective(CodeGenFunction &CGF,
3311                                      const OMPLoopDirective &S,
3312                                      bool HasCancel) {
3313   bool HasLastprivates;
3314   if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
3315                    [](const OMPReductionClause *C) {
3316                      return C->getModifier() == OMPC_REDUCTION_inscan;
3317                    })) {
3318     const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
3319       CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3320       OMPLoopScope LoopScope(CGF, S);
3321       return CGF.EmitScalarExpr(S.getNumIterations());
3322     };
3323     const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) {
3324       CodeGenFunction::OMPCancelStackRAII CancelRegion(
3325           CGF, S.getDirectiveKind(), HasCancel);
3326       (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3327                                        emitForLoopBounds,
3328                                        emitDispatchForLoopBounds);
3329       // Emit an implicit barrier at the end.
3330       CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(),
3331                                                  OMPD_for);
3332     };
3333     const auto &&SecondGen = [&S, HasCancel,
3334                               &HasLastprivates](CodeGenFunction &CGF) {
3335       CodeGenFunction::OMPCancelStackRAII CancelRegion(
3336           CGF, S.getDirectiveKind(), HasCancel);
3337       HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3338                                                    emitForLoopBounds,
3339                                                    emitDispatchForLoopBounds);
3340     };
3341     emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
3342   } else {
3343     CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
3344                                                      HasCancel);
3345     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3346                                                  emitForLoopBounds,
3347                                                  emitDispatchForLoopBounds);
3348   }
3349   return HasLastprivates;
3350 }
3351 
3352 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
3353   bool HasLastprivates = false;
3354   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
3355                                           PrePostActionTy &) {
3356     HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel());
3357   };
3358   {
3359     auto LPCRegion =
3360         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3361     OMPLexicalScope Scope(*this, S, OMPD_unknown);
3362     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
3363                                                 S.hasCancel());
3364   }
3365 
3366   // Emit an implicit barrier at the end.
3367   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
3368     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3369   // Check for outer lastprivate conditional update.
3370   checkForLastprivateConditionalUpdate(*this, S);
3371 }
3372 
3373 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
3374   bool HasLastprivates = false;
3375   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
3376                                           PrePostActionTy &) {
3377     HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
3378   };
3379   {
3380     auto LPCRegion =
3381         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3382     OMPLexicalScope Scope(*this, S, OMPD_unknown);
3383     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
3384   }
3385 
3386   // Emit an implicit barrier at the end.
3387   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
3388     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3389   // Check for outer lastprivate conditional update.
3390   checkForLastprivateConditionalUpdate(*this, S);
3391 }
3392 
3393 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
3394                                 const Twine &Name,
3395                                 llvm::Value *Init = nullptr) {
3396   LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
3397   if (Init)
3398     CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
3399   return LVal;
3400 }
3401 
3402 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
3403   const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
3404   const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
3405   bool HasLastprivates = false;
3406   auto &&CodeGen = [&S, CapturedStmt, CS,
3407                     &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
3408     const ASTContext &C = CGF.getContext();
3409     QualType KmpInt32Ty =
3410         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3411     // Emit helper vars inits.
3412     LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
3413                                   CGF.Builder.getInt32(0));
3414     llvm::ConstantInt *GlobalUBVal = CS != nullptr
3415                                          ? CGF.Builder.getInt32(CS->size() - 1)
3416                                          : CGF.Builder.getInt32(0);
3417     LValue UB =
3418         createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
3419     LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
3420                                   CGF.Builder.getInt32(1));
3421     LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
3422                                   CGF.Builder.getInt32(0));
3423     // Loop counter.
3424     LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
3425     OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
3426     CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
3427     OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
3428     CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
3429     // Generate condition for loop.
3430     BinaryOperator *Cond = BinaryOperator::Create(
3431         C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, OK_Ordinary,
3432         S.getBeginLoc(), FPOptionsOverride());
3433     // Increment for loop counter.
3434     UnaryOperator *Inc = UnaryOperator::Create(
3435         C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
3436         S.getBeginLoc(), true, FPOptionsOverride());
3437     auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
3438       // Iterate through all sections and emit a switch construct:
3439       // switch (IV) {
3440       //   case 0:
3441       //     <SectionStmt[0]>;
3442       //     break;
3443       // ...
3444       //   case <NumSection> - 1:
3445       //     <SectionStmt[<NumSection> - 1]>;
3446       //     break;
3447       // }
3448       // .omp.sections.exit:
3449       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
3450       llvm::SwitchInst *SwitchStmt =
3451           CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()),
3452                                    ExitBB, CS == nullptr ? 1 : CS->size());
3453       if (CS) {
3454         unsigned CaseNumber = 0;
3455         for (const Stmt *SubStmt : CS->children()) {
3456           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
3457           CGF.EmitBlock(CaseBB);
3458           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
3459           CGF.EmitStmt(SubStmt);
3460           CGF.EmitBranch(ExitBB);
3461           ++CaseNumber;
3462         }
3463       } else {
3464         llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case");
3465         CGF.EmitBlock(CaseBB);
3466         SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
3467         CGF.EmitStmt(CapturedStmt);
3468         CGF.EmitBranch(ExitBB);
3469       }
3470       CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
3471     };
3472 
3473     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
3474     if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
3475       // Emit implicit barrier to synchronize threads and avoid data races on
3476       // initialization of firstprivate variables and post-update of lastprivate
3477       // variables.
3478       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
3479           CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3480           /*ForceSimpleCall=*/true);
3481     }
3482     CGF.EmitOMPPrivateClause(S, LoopScope);
3483     CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV);
3484     HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
3485     CGF.EmitOMPReductionClauseInit(S, LoopScope);
3486     (void)LoopScope.Privatize();
3487     if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
3488       CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
3489 
3490     // Emit static non-chunked loop.
3491     OpenMPScheduleTy ScheduleKind;
3492     ScheduleKind.Schedule = OMPC_SCHEDULE_static;
3493     CGOpenMPRuntime::StaticRTInput StaticInit(
3494         /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(CGF),
3495         LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF));
3496     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3497         CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit);
3498     // UB = min(UB, GlobalUB);
3499     llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc());
3500     llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
3501         CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
3502     CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
3503     // IV = LB;
3504     CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV);
3505     // while (idx <= UB) { BODY; ++idx; }
3506     CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen,
3507                          [](CodeGenFunction &) {});
3508     // Tell the runtime we are done.
3509     auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3510       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
3511                                                      S.getDirectiveKind());
3512     };
3513     CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
3514     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
3515     // Emit post-update of the reduction variables if IsLastIter != 0.
3516     emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) {
3517       return CGF.Builder.CreateIsNotNull(
3518           CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3519     });
3520 
3521     // Emit final copy of the lastprivate variables if IsLastIter != 0.
3522     if (HasLastprivates)
3523       CGF.EmitOMPLastprivateClauseFinal(
3524           S, /*NoFinals=*/false,
3525           CGF.Builder.CreateIsNotNull(
3526               CGF.EmitLoadOfScalar(IL, S.getBeginLoc())));
3527   };
3528 
3529   bool HasCancel = false;
3530   if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
3531     HasCancel = OSD->hasCancel();
3532   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
3533     HasCancel = OPSD->hasCancel();
3534   OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
3535   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
3536                                               HasCancel);
3537   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
3538   // clause. Otherwise the barrier will be generated by the codegen for the
3539   // directive.
3540   if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
3541     // Emit implicit barrier to synchronize threads and avoid data races on
3542     // initialization of firstprivate variables.
3543     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
3544                                            OMPD_unknown);
3545   }
3546 }
3547 
3548 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
3549   {
3550     auto LPCRegion =
3551         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3552     OMPLexicalScope Scope(*this, S, OMPD_unknown);
3553     EmitSections(S);
3554   }
3555   // Emit an implicit barrier at the end.
3556   if (!S.getSingleClause<OMPNowaitClause>()) {
3557     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
3558                                            OMPD_sections);
3559   }
3560   // Check for outer lastprivate conditional update.
3561   checkForLastprivateConditionalUpdate(*this, S);
3562 }
3563 
3564 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
3565   LexicalScope Scope(*this, S.getSourceRange());
3566   EmitStopPoint(&S);
3567   EmitStmt(S.getAssociatedStmt());
3568 }
3569 
3570 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
3571   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
3572   llvm::SmallVector<const Expr *, 8> DestExprs;
3573   llvm::SmallVector<const Expr *, 8> SrcExprs;
3574   llvm::SmallVector<const Expr *, 8> AssignmentOps;
3575   // Check if there are any 'copyprivate' clauses associated with this
3576   // 'single' construct.
3577   // Build a list of copyprivate variables along with helper expressions
3578   // (<source>, <destination>, <destination>=<source> expressions)
3579   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
3580     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
3581     DestExprs.append(C->destination_exprs().begin(),
3582                      C->destination_exprs().end());
3583     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
3584     AssignmentOps.append(C->assignment_ops().begin(),
3585                          C->assignment_ops().end());
3586   }
3587   // Emit code for 'single' region along with 'copyprivate' clauses
3588   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3589     Action.Enter(CGF);
3590     OMPPrivateScope SingleScope(CGF);
3591     (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
3592     CGF.EmitOMPPrivateClause(S, SingleScope);
3593     (void)SingleScope.Privatize();
3594     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
3595   };
3596   {
3597     auto LPCRegion =
3598         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3599     OMPLexicalScope Scope(*this, S, OMPD_unknown);
3600     CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(),
3601                                             CopyprivateVars, DestExprs,
3602                                             SrcExprs, AssignmentOps);
3603   }
3604   // Emit an implicit barrier at the end (to avoid data race on firstprivate
3605   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
3606   if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
3607     CGM.getOpenMPRuntime().emitBarrierCall(
3608         *this, S.getBeginLoc(),
3609         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
3610   }
3611   // Check for outer lastprivate conditional update.
3612   checkForLastprivateConditionalUpdate(*this, S);
3613 }
3614 
3615 static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
3616   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3617     Action.Enter(CGF);
3618     CGF.EmitStmt(S.getRawStmt());
3619   };
3620   CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
3621 }
3622 
3623 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
3624   if (CGM.getLangOpts().OpenMPIRBuilder) {
3625     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3626     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3627 
3628     const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt();
3629 
3630     auto FiniCB = [this](InsertPointTy IP) {
3631       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
3632     };
3633 
3634     auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP,
3635                                                   InsertPointTy CodeGenIP,
3636                                                   llvm::BasicBlock &FiniBB) {
3637       OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
3638       OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MasterRegionBodyStmt,
3639                                              CodeGenIP, FiniBB);
3640     };
3641 
3642     LexicalScope Scope(*this, S.getSourceRange());
3643     EmitStopPoint(&S);
3644     Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB));
3645 
3646     return;
3647   }
3648   LexicalScope Scope(*this, S.getSourceRange());
3649   EmitStopPoint(&S);
3650   emitMaster(*this, S);
3651 }
3652 
3653 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
3654   if (CGM.getLangOpts().OpenMPIRBuilder) {
3655     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3656     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3657 
3658     const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt();
3659     const Expr *Hint = nullptr;
3660     if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
3661       Hint = HintClause->getHint();
3662 
3663     // TODO: This is slightly different from what's currently being done in
3664     // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything
3665     // about typing is final.
3666     llvm::Value *HintInst = nullptr;
3667     if (Hint)
3668       HintInst =
3669           Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false);
3670 
3671     auto FiniCB = [this](InsertPointTy IP) {
3672       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
3673     };
3674 
3675     auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP,
3676                                                     InsertPointTy CodeGenIP,
3677                                                     llvm::BasicBlock &FiniBB) {
3678       OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
3679       OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CriticalRegionBodyStmt,
3680                                              CodeGenIP, FiniBB);
3681     };
3682 
3683     LexicalScope Scope(*this, S.getSourceRange());
3684     EmitStopPoint(&S);
3685     Builder.restoreIP(OMPBuilder.createCritical(
3686         Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(),
3687         HintInst));
3688 
3689     return;
3690   }
3691 
3692   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3693     Action.Enter(CGF);
3694     CGF.EmitStmt(S.getAssociatedStmt());
3695   };
3696   const Expr *Hint = nullptr;
3697   if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
3698     Hint = HintClause->getHint();
3699   LexicalScope Scope(*this, S.getSourceRange());
3700   EmitStopPoint(&S);
3701   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
3702                                             S.getDirectiveName().getAsString(),
3703                                             CodeGen, S.getBeginLoc(), Hint);
3704 }
3705 
3706 void CodeGenFunction::EmitOMPParallelForDirective(
3707     const OMPParallelForDirective &S) {
3708   // Emit directive as a combined directive that consists of two implicit
3709   // directives: 'parallel' with 'for' directive.
3710   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3711     Action.Enter(CGF);
3712     (void)emitWorksharingDirective(CGF, S, S.hasCancel());
3713   };
3714   {
3715     auto LPCRegion =
3716         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3717     emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
3718                                    emitEmptyBoundParameters);
3719   }
3720   // Check for outer lastprivate conditional update.
3721   checkForLastprivateConditionalUpdate(*this, S);
3722 }
3723 
3724 void CodeGenFunction::EmitOMPParallelForSimdDirective(
3725     const OMPParallelForSimdDirective &S) {
3726   // Emit directive as a combined directive that consists of two implicit
3727   // directives: 'parallel' with 'for' directive.
3728   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3729     Action.Enter(CGF);
3730     (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
3731   };
3732   {
3733     auto LPCRegion =
3734         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3735     emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen,
3736                                    emitEmptyBoundParameters);
3737   }
3738   // Check for outer lastprivate conditional update.
3739   checkForLastprivateConditionalUpdate(*this, S);
3740 }
3741 
3742 void CodeGenFunction::EmitOMPParallelMasterDirective(
3743     const OMPParallelMasterDirective &S) {
3744   // Emit directive as a combined directive that consists of two implicit
3745   // directives: 'parallel' with 'master' directive.
3746   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3747     Action.Enter(CGF);
3748     OMPPrivateScope PrivateScope(CGF);
3749     bool Copyins = CGF.EmitOMPCopyinClause(S);
3750     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3751     if (Copyins) {
3752       // Emit implicit barrier to synchronize threads and avoid data races on
3753       // propagation master's thread values of threadprivate variables to local
3754       // instances of that variables of all other implicit threads.
3755       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
3756           CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3757           /*ForceSimpleCall=*/true);
3758     }
3759     CGF.EmitOMPPrivateClause(S, PrivateScope);
3760     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3761     (void)PrivateScope.Privatize();
3762     emitMaster(CGF, S);
3763     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
3764   };
3765   {
3766     auto LPCRegion =
3767         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3768     emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen,
3769                                    emitEmptyBoundParameters);
3770     emitPostUpdateForReductionClause(*this, S,
3771                                      [](CodeGenFunction &) { return nullptr; });
3772   }
3773   // Check for outer lastprivate conditional update.
3774   checkForLastprivateConditionalUpdate(*this, S);
3775 }
3776 
3777 void CodeGenFunction::EmitOMPParallelSectionsDirective(
3778     const OMPParallelSectionsDirective &S) {
3779   // Emit directive as a combined directive that consists of two implicit
3780   // directives: 'parallel' with 'sections' directive.
3781   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3782     Action.Enter(CGF);
3783     CGF.EmitSections(S);
3784   };
3785   {
3786     auto LPCRegion =
3787         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3788     emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
3789                                    emitEmptyBoundParameters);
3790   }
3791   // Check for outer lastprivate conditional update.
3792   checkForLastprivateConditionalUpdate(*this, S);
3793 }
3794 
3795 namespace {
3796 /// Get the list of variables declared in the context of the untied tasks.
3797 class CheckVarsEscapingUntiedTaskDeclContext final
3798     : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> {
3799   llvm::SmallVector<const VarDecl *, 4> PrivateDecls;
3800 
3801 public:
3802   explicit CheckVarsEscapingUntiedTaskDeclContext() = default;
3803   virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default;
3804   void VisitDeclStmt(const DeclStmt *S) {
3805     if (!S)
3806       return;
3807     // Need to privatize only local vars, static locals can be processed as is.
3808     for (const Decl *D : S->decls()) {
3809       if (const auto *VD = dyn_cast_or_null<VarDecl>(D))
3810         if (VD->hasLocalStorage())
3811           PrivateDecls.push_back(VD);
3812     }
3813   }
3814   void VisitOMPExecutableDirective(const OMPExecutableDirective *) { return; }
3815   void VisitCapturedStmt(const CapturedStmt *) { return; }
3816   void VisitLambdaExpr(const LambdaExpr *) { return; }
3817   void VisitBlockExpr(const BlockExpr *) { return; }
3818   void VisitStmt(const Stmt *S) {
3819     if (!S)
3820       return;
3821     for (const Stmt *Child : S->children())
3822       if (Child)
3823         Visit(Child);
3824   }
3825 
3826   /// Swaps list of vars with the provided one.
3827   ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; }
3828 };
3829 } // anonymous namespace
3830 
3831 void CodeGenFunction::EmitOMPTaskBasedDirective(
3832     const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
3833     const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
3834     OMPTaskDataTy &Data) {
3835   // Emit outlined function for task construct.
3836   const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion);
3837   auto I = CS->getCapturedDecl()->param_begin();
3838   auto PartId = std::next(I);
3839   auto TaskT = std::next(I, 4);
3840   // Check if the task is final
3841   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
3842     // If the condition constant folds and can be elided, try to avoid emitting
3843     // the condition and the dead arm of the if/else.
3844     const Expr *Cond = Clause->getCondition();
3845     bool CondConstant;
3846     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
3847       Data.Final.setInt(CondConstant);
3848     else
3849       Data.Final.setPointer(EvaluateExprAsBool(Cond));
3850   } else {
3851     // By default the task is not final.
3852     Data.Final.setInt(/*IntVal=*/false);
3853   }
3854   // Check if the task has 'priority' clause.
3855   if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
3856     const Expr *Prio = Clause->getPriority();
3857     Data.Priority.setInt(/*IntVal=*/true);
3858     Data.Priority.setPointer(EmitScalarConversion(
3859         EmitScalarExpr(Prio), Prio->getType(),
3860         getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
3861         Prio->getExprLoc()));
3862   }
3863   // The first function argument for tasks is a thread id, the second one is a
3864   // part id (0 for tied tasks, >=0 for untied task).
3865   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
3866   // Get list of private variables.
3867   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
3868     auto IRef = C->varlist_begin();
3869     for (const Expr *IInit : C->private_copies()) {
3870       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
3871       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
3872         Data.PrivateVars.push_back(*IRef);
3873         Data.PrivateCopies.push_back(IInit);
3874       }
3875       ++IRef;
3876     }
3877   }
3878   EmittedAsPrivate.clear();
3879   // Get list of firstprivate variables.
3880   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
3881     auto IRef = C->varlist_begin();
3882     auto IElemInitRef = C->inits().begin();
3883     for (const Expr *IInit : C->private_copies()) {
3884       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
3885       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
3886         Data.FirstprivateVars.push_back(*IRef);
3887         Data.FirstprivateCopies.push_back(IInit);
3888         Data.FirstprivateInits.push_back(*IElemInitRef);
3889       }
3890       ++IRef;
3891       ++IElemInitRef;
3892     }
3893   }
3894   // Get list of lastprivate variables (for taskloops).
3895   llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
3896   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
3897     auto IRef = C->varlist_begin();
3898     auto ID = C->destination_exprs().begin();
3899     for (const Expr *IInit : C->private_copies()) {
3900       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
3901       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
3902         Data.LastprivateVars.push_back(*IRef);
3903         Data.LastprivateCopies.push_back(IInit);
3904       }
3905       LastprivateDstsOrigs.insert(
3906           {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
3907            cast<DeclRefExpr>(*IRef)});
3908       ++IRef;
3909       ++ID;
3910     }
3911   }
3912   SmallVector<const Expr *, 4> LHSs;
3913   SmallVector<const Expr *, 4> RHSs;
3914   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3915     Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
3916     Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
3917     Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
3918     Data.ReductionOps.append(C->reduction_ops().begin(),
3919                              C->reduction_ops().end());
3920     LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
3921     RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
3922   }
3923   Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
3924       *this, S.getBeginLoc(), LHSs, RHSs, Data);
3925   // Build list of dependences.
3926   for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
3927     OMPTaskDataTy::DependData &DD =
3928         Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
3929     DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
3930   }
3931   // Get list of local vars for untied tasks.
3932   if (!Data.Tied) {
3933     CheckVarsEscapingUntiedTaskDeclContext Checker;
3934     Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt());
3935     Data.PrivateLocals.append(Checker.getPrivateDecls().begin(),
3936                               Checker.getPrivateDecls().end());
3937   }
3938   auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
3939                     CapturedRegion](CodeGenFunction &CGF,
3940                                     PrePostActionTy &Action) {
3941     llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, std::pair<Address, Address>>
3942         UntiedLocalVars;
3943     // Set proper addresses for generated private copies.
3944     OMPPrivateScope Scope(CGF);
3945     llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs;
3946     if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
3947         !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) {
3948       llvm::FunctionType *CopyFnTy = llvm::FunctionType::get(
3949           CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true);
3950       enum { PrivatesParam = 2, CopyFnParam = 3 };
3951       llvm::Value *CopyFn = CGF.Builder.CreateLoad(
3952           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
3953       llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
3954           CS->getCapturedDecl()->getParam(PrivatesParam)));
3955       // Map privates.
3956       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
3957       llvm::SmallVector<llvm::Value *, 16> CallArgs;
3958       CallArgs.push_back(PrivatesPtr);
3959       for (const Expr *E : Data.PrivateVars) {
3960         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3961         Address PrivatePtr = CGF.CreateMemTemp(
3962             CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
3963         PrivatePtrs.emplace_back(VD, PrivatePtr);
3964         CallArgs.push_back(PrivatePtr.getPointer());
3965       }
3966       for (const Expr *E : Data.FirstprivateVars) {
3967         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3968         Address PrivatePtr =
3969             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
3970                               ".firstpriv.ptr.addr");
3971         PrivatePtrs.emplace_back(VD, PrivatePtr);
3972         FirstprivatePtrs.emplace_back(VD, PrivatePtr);
3973         CallArgs.push_back(PrivatePtr.getPointer());
3974       }
3975       for (const Expr *E : Data.LastprivateVars) {
3976         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3977         Address PrivatePtr =
3978             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
3979                               ".lastpriv.ptr.addr");
3980         PrivatePtrs.emplace_back(VD, PrivatePtr);
3981         CallArgs.push_back(PrivatePtr.getPointer());
3982       }
3983       for (const VarDecl *VD : Data.PrivateLocals) {
3984         QualType Ty = VD->getType().getNonReferenceType();
3985         if (VD->getType()->isLValueReferenceType())
3986           Ty = CGF.getContext().getPointerType(Ty);
3987         if (isAllocatableDecl(VD))
3988           Ty = CGF.getContext().getPointerType(Ty);
3989         Address PrivatePtr = CGF.CreateMemTemp(
3990             CGF.getContext().getPointerType(Ty), ".local.ptr.addr");
3991         UntiedLocalVars.try_emplace(VD, PrivatePtr, Address::invalid());
3992         CallArgs.push_back(PrivatePtr.getPointer());
3993       }
3994       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
3995           CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
3996       for (const auto &Pair : LastprivateDstsOrigs) {
3997         const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
3998         DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
3999                         /*RefersToEnclosingVariableOrCapture=*/
4000                             CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
4001                         Pair.second->getType(), VK_LValue,
4002                         Pair.second->getExprLoc());
4003         Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
4004           return CGF.EmitLValue(&DRE).getAddress(CGF);
4005         });
4006       }
4007       for (const auto &Pair : PrivatePtrs) {
4008         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
4009                             CGF.getContext().getDeclAlign(Pair.first));
4010         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
4011       }
4012       // Adjust mapping for internal locals by mapping actual memory instead of
4013       // a pointer to this memory.
4014       for (auto &Pair : UntiedLocalVars) {
4015         if (isAllocatableDecl(Pair.first)) {
4016           llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4017           Address Replacement(Ptr, CGF.getPointerAlign());
4018           Pair.getSecond().first = Replacement;
4019           Ptr = CGF.Builder.CreateLoad(Replacement);
4020           Replacement = Address(Ptr, CGF.getContext().getDeclAlign(Pair.first));
4021           Pair.getSecond().second = Replacement;
4022         } else {
4023           llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4024           Address Replacement(Ptr, CGF.getContext().getDeclAlign(Pair.first));
4025           Pair.getSecond().first = Replacement;
4026         }
4027       }
4028     }
4029     if (Data.Reductions) {
4030       OMPPrivateScope FirstprivateScope(CGF);
4031       for (const auto &Pair : FirstprivatePtrs) {
4032         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
4033                             CGF.getContext().getDeclAlign(Pair.first));
4034         FirstprivateScope.addPrivate(Pair.first,
4035                                      [Replacement]() { return Replacement; });
4036       }
4037       (void)FirstprivateScope.Privatize();
4038       OMPLexicalScope LexScope(CGF, S, CapturedRegion);
4039       ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
4040                              Data.ReductionCopies, Data.ReductionOps);
4041       llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
4042           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
4043       for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
4044         RedCG.emitSharedOrigLValue(CGF, Cnt);
4045         RedCG.emitAggregateType(CGF, Cnt);
4046         // FIXME: This must removed once the runtime library is fixed.
4047         // Emit required threadprivate variables for
4048         // initializer/combiner/finalizer.
4049         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
4050                                                            RedCG, Cnt);
4051         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4052             CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
4053         Replacement =
4054             Address(CGF.EmitScalarConversion(
4055                         Replacement.getPointer(), CGF.getContext().VoidPtrTy,
4056                         CGF.getContext().getPointerType(
4057                             Data.ReductionCopies[Cnt]->getType()),
4058                         Data.ReductionCopies[Cnt]->getExprLoc()),
4059                     Replacement.getAlignment());
4060         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
4061         Scope.addPrivate(RedCG.getBaseDecl(Cnt),
4062                          [Replacement]() { return Replacement; });
4063       }
4064     }
4065     // Privatize all private variables except for in_reduction items.
4066     (void)Scope.Privatize();
4067     SmallVector<const Expr *, 4> InRedVars;
4068     SmallVector<const Expr *, 4> InRedPrivs;
4069     SmallVector<const Expr *, 4> InRedOps;
4070     SmallVector<const Expr *, 4> TaskgroupDescriptors;
4071     for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
4072       auto IPriv = C->privates().begin();
4073       auto IRed = C->reduction_ops().begin();
4074       auto ITD = C->taskgroup_descriptors().begin();
4075       for (const Expr *Ref : C->varlists()) {
4076         InRedVars.emplace_back(Ref);
4077         InRedPrivs.emplace_back(*IPriv);
4078         InRedOps.emplace_back(*IRed);
4079         TaskgroupDescriptors.emplace_back(*ITD);
4080         std::advance(IPriv, 1);
4081         std::advance(IRed, 1);
4082         std::advance(ITD, 1);
4083       }
4084     }
4085     // Privatize in_reduction items here, because taskgroup descriptors must be
4086     // privatized earlier.
4087     OMPPrivateScope InRedScope(CGF);
4088     if (!InRedVars.empty()) {
4089       ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
4090       for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
4091         RedCG.emitSharedOrigLValue(CGF, Cnt);
4092         RedCG.emitAggregateType(CGF, Cnt);
4093         // The taskgroup descriptor variable is always implicit firstprivate and
4094         // privatized already during processing of the firstprivates.
4095         // FIXME: This must removed once the runtime library is fixed.
4096         // Emit required threadprivate variables for
4097         // initializer/combiner/finalizer.
4098         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
4099                                                            RedCG, Cnt);
4100         llvm::Value *ReductionsPtr;
4101         if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
4102           ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr),
4103                                                TRExpr->getExprLoc());
4104         } else {
4105           ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4106         }
4107         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4108             CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
4109         Replacement = Address(
4110             CGF.EmitScalarConversion(
4111                 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
4112                 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
4113                 InRedPrivs[Cnt]->getExprLoc()),
4114             Replacement.getAlignment());
4115         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
4116         InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
4117                               [Replacement]() { return Replacement; });
4118       }
4119     }
4120     (void)InRedScope.Privatize();
4121 
4122     CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF,
4123                                                              UntiedLocalVars);
4124     Action.Enter(CGF);
4125     BodyGen(CGF);
4126   };
4127   llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
4128       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
4129       Data.NumberOfParts);
4130   OMPLexicalScope Scope(*this, S, llvm::None,
4131                         !isOpenMPParallelDirective(S.getDirectiveKind()) &&
4132                             !isOpenMPSimdDirective(S.getDirectiveKind()));
4133   TaskGen(*this, OutlinedFn, Data);
4134 }
4135 
4136 static ImplicitParamDecl *
4137 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
4138                                   QualType Ty, CapturedDecl *CD,
4139                                   SourceLocation Loc) {
4140   auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
4141                                            ImplicitParamDecl::Other);
4142   auto *OrigRef = DeclRefExpr::Create(
4143       C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
4144       /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
4145   auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
4146                                               ImplicitParamDecl::Other);
4147   auto *PrivateRef = DeclRefExpr::Create(
4148       C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
4149       /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
4150   QualType ElemType = C.getBaseElementType(Ty);
4151   auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType,
4152                                            ImplicitParamDecl::Other);
4153   auto *InitRef = DeclRefExpr::Create(
4154       C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
4155       /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
4156   PrivateVD->setInitStyle(VarDecl::CInit);
4157   PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
4158                                               InitRef, /*BasePath=*/nullptr,
4159                                               VK_RValue, FPOptionsOverride()));
4160   Data.FirstprivateVars.emplace_back(OrigRef);
4161   Data.FirstprivateCopies.emplace_back(PrivateRef);
4162   Data.FirstprivateInits.emplace_back(InitRef);
4163   return OrigVD;
4164 }
4165 
4166 void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
4167     const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
4168     OMPTargetDataInfo &InputInfo) {
4169   // Emit outlined function for task construct.
4170   const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
4171   Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
4172   QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4173   auto I = CS->getCapturedDecl()->param_begin();
4174   auto PartId = std::next(I);
4175   auto TaskT = std::next(I, 4);
4176   OMPTaskDataTy Data;
4177   // The task is not final.
4178   Data.Final.setInt(/*IntVal=*/false);
4179   // Get list of firstprivate variables.
4180   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
4181     auto IRef = C->varlist_begin();
4182     auto IElemInitRef = C->inits().begin();
4183     for (auto *IInit : C->private_copies()) {
4184       Data.FirstprivateVars.push_back(*IRef);
4185       Data.FirstprivateCopies.push_back(IInit);
4186       Data.FirstprivateInits.push_back(*IElemInitRef);
4187       ++IRef;
4188       ++IElemInitRef;
4189     }
4190   }
4191   OMPPrivateScope TargetScope(*this);
4192   VarDecl *BPVD = nullptr;
4193   VarDecl *PVD = nullptr;
4194   VarDecl *SVD = nullptr;
4195   VarDecl *MVD = nullptr;
4196   if (InputInfo.NumberOfTargetItems > 0) {
4197     auto *CD = CapturedDecl::Create(
4198         getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
4199     llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
4200     QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType(
4201         getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal,
4202         /*IndexTypeQuals=*/0);
4203     BPVD = createImplicitFirstprivateForType(
4204         getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
4205     PVD = createImplicitFirstprivateForType(
4206         getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
4207     QualType SizesType = getContext().getConstantArrayType(
4208         getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
4209         ArrSize, nullptr, ArrayType::Normal,
4210         /*IndexTypeQuals=*/0);
4211     SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
4212                                             S.getBeginLoc());
4213     TargetScope.addPrivate(
4214         BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; });
4215     TargetScope.addPrivate(PVD,
4216                            [&InputInfo]() { return InputInfo.PointersArray; });
4217     TargetScope.addPrivate(SVD,
4218                            [&InputInfo]() { return InputInfo.SizesArray; });
4219     // If there is no user-defined mapper, the mapper array will be nullptr. In
4220     // this case, we don't need to privatize it.
4221     if (!dyn_cast_or_null<llvm::ConstantPointerNull>(
4222             InputInfo.MappersArray.getPointer())) {
4223       MVD = createImplicitFirstprivateForType(
4224           getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
4225       TargetScope.addPrivate(MVD,
4226                              [&InputInfo]() { return InputInfo.MappersArray; });
4227     }
4228   }
4229   (void)TargetScope.Privatize();
4230   // Build list of dependences.
4231   for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
4232     OMPTaskDataTy::DependData &DD =
4233         Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
4234     DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
4235   }
4236   auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD,
4237                     &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
4238     // Set proper addresses for generated private copies.
4239     OMPPrivateScope Scope(CGF);
4240     if (!Data.FirstprivateVars.empty()) {
4241       llvm::FunctionType *CopyFnTy = llvm::FunctionType::get(
4242           CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true);
4243       enum { PrivatesParam = 2, CopyFnParam = 3 };
4244       llvm::Value *CopyFn = CGF.Builder.CreateLoad(
4245           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
4246       llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
4247           CS->getCapturedDecl()->getParam(PrivatesParam)));
4248       // Map privates.
4249       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
4250       llvm::SmallVector<llvm::Value *, 16> CallArgs;
4251       CallArgs.push_back(PrivatesPtr);
4252       for (const Expr *E : Data.FirstprivateVars) {
4253         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4254         Address PrivatePtr =
4255             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4256                               ".firstpriv.ptr.addr");
4257         PrivatePtrs.emplace_back(VD, PrivatePtr);
4258         CallArgs.push_back(PrivatePtr.getPointer());
4259       }
4260       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
4261           CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
4262       for (const auto &Pair : PrivatePtrs) {
4263         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
4264                             CGF.getContext().getDeclAlign(Pair.first));
4265         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
4266       }
4267     }
4268     // Privatize all private variables except for in_reduction items.
4269     (void)Scope.Privatize();
4270     if (InputInfo.NumberOfTargetItems > 0) {
4271       InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
4272           CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0);
4273       InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
4274           CGF.GetAddrOfLocalVar(PVD), /*Index=*/0);
4275       InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
4276           CGF.GetAddrOfLocalVar(SVD), /*Index=*/0);
4277       // If MVD is nullptr, the mapper array is not privatized
4278       if (MVD)
4279         InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP(
4280             CGF.GetAddrOfLocalVar(MVD), /*Index=*/0);
4281     }
4282 
4283     Action.Enter(CGF);
4284     OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
4285     BodyGen(CGF);
4286   };
4287   llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
4288       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
4289       Data.NumberOfParts);
4290   llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
4291   IntegerLiteral IfCond(getContext(), TrueOrFalse,
4292                         getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
4293                         SourceLocation());
4294 
4295   CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn,
4296                                       SharedsTy, CapturedStruct, &IfCond, Data);
4297 }
4298 
4299 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
4300   // Emit outlined function for task construct.
4301   const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
4302   Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
4303   QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4304   const Expr *IfCond = nullptr;
4305   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4306     if (C->getNameModifier() == OMPD_unknown ||
4307         C->getNameModifier() == OMPD_task) {
4308       IfCond = C->getCondition();
4309       break;
4310     }
4311   }
4312 
4313   OMPTaskDataTy Data;
4314   // Check if we should emit tied or untied task.
4315   Data.Tied = !S.getSingleClause<OMPUntiedClause>();
4316   auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
4317     CGF.EmitStmt(CS->getCapturedStmt());
4318   };
4319   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
4320                     IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
4321                             const OMPTaskDataTy &Data) {
4322     CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn,
4323                                             SharedsTy, CapturedStruct, IfCond,
4324                                             Data);
4325   };
4326   auto LPCRegion =
4327       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4328   EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);
4329 }
4330 
4331 void CodeGenFunction::EmitOMPTaskyieldDirective(
4332     const OMPTaskyieldDirective &S) {
4333   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc());
4334 }
4335 
4336 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
4337   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier);
4338 }
4339 
4340 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
4341   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc());
4342 }
4343 
4344 void CodeGenFunction::EmitOMPTaskgroupDirective(
4345     const OMPTaskgroupDirective &S) {
4346   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4347     Action.Enter(CGF);
4348     if (const Expr *E = S.getReductionRef()) {
4349       SmallVector<const Expr *, 4> LHSs;
4350       SmallVector<const Expr *, 4> RHSs;
4351       OMPTaskDataTy Data;
4352       for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
4353         Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
4354         Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
4355         Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
4356         Data.ReductionOps.append(C->reduction_ops().begin(),
4357                                  C->reduction_ops().end());
4358         LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4359         RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4360       }
4361       llvm::Value *ReductionDesc =
4362           CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(),
4363                                                            LHSs, RHSs, Data);
4364       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4365       CGF.EmitVarDecl(*VD);
4366       CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
4367                             /*Volatile=*/false, E->getType());
4368     }
4369     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
4370   };
4371   OMPLexicalScope Scope(*this, S, OMPD_unknown);
4372   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc());
4373 }
4374 
4375 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
4376   llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>()
4377                                 ? llvm::AtomicOrdering::NotAtomic
4378                                 : llvm::AtomicOrdering::AcquireRelease;
4379   CGM.getOpenMPRuntime().emitFlush(
4380       *this,
4381       [&S]() -> ArrayRef<const Expr *> {
4382         if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>())
4383           return llvm::makeArrayRef(FlushClause->varlist_begin(),
4384                                     FlushClause->varlist_end());
4385         return llvm::None;
4386       }(),
4387       S.getBeginLoc(), AO);
4388 }
4389 
4390 void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {
4391   const auto *DO = S.getSingleClause<OMPDepobjClause>();
4392   LValue DOLVal = EmitLValue(DO->getDepobj());
4393   if (const auto *DC = S.getSingleClause<OMPDependClause>()) {
4394     OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(),
4395                                            DC->getModifier());
4396     Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end());
4397     Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause(
4398         *this, Dependencies, DC->getBeginLoc());
4399     EmitStoreOfScalar(DepAddr.getPointer(), DOLVal);
4400     return;
4401   }
4402   if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) {
4403     CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc());
4404     return;
4405   }
4406   if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) {
4407     CGM.getOpenMPRuntime().emitUpdateClause(
4408         *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc());
4409     return;
4410   }
4411 }
4412 
4413 void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
4414   if (!OMPParentLoopDirectiveForScan)
4415     return;
4416   const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan;
4417   bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
4418   SmallVector<const Expr *, 4> Shareds;
4419   SmallVector<const Expr *, 4> Privates;
4420   SmallVector<const Expr *, 4> LHSs;
4421   SmallVector<const Expr *, 4> RHSs;
4422   SmallVector<const Expr *, 4> ReductionOps;
4423   SmallVector<const Expr *, 4> CopyOps;
4424   SmallVector<const Expr *, 4> CopyArrayTemps;
4425   SmallVector<const Expr *, 4> CopyArrayElems;
4426   for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) {
4427     if (C->getModifier() != OMPC_REDUCTION_inscan)
4428       continue;
4429     Shareds.append(C->varlist_begin(), C->varlist_end());
4430     Privates.append(C->privates().begin(), C->privates().end());
4431     LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4432     RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4433     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
4434     CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
4435     CopyArrayTemps.append(C->copy_array_temps().begin(),
4436                           C->copy_array_temps().end());
4437     CopyArrayElems.append(C->copy_array_elems().begin(),
4438                           C->copy_array_elems().end());
4439   }
4440   if (ParentDir.getDirectiveKind() == OMPD_simd ||
4441       (getLangOpts().OpenMPSimd &&
4442        isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) {
4443     // For simd directive and simd-based directives in simd only mode, use the
4444     // following codegen:
4445     // int x = 0;
4446     // #pragma omp simd reduction(inscan, +: x)
4447     // for (..) {
4448     //   <first part>
4449     //   #pragma omp scan inclusive(x)
4450     //   <second part>
4451     //  }
4452     // is transformed to:
4453     // int x = 0;
4454     // for (..) {
4455     //   int x_priv = 0;
4456     //   <first part>
4457     //   x = x_priv + x;
4458     //   x_priv = x;
4459     //   <second part>
4460     // }
4461     // and
4462     // int x = 0;
4463     // #pragma omp simd reduction(inscan, +: x)
4464     // for (..) {
4465     //   <first part>
4466     //   #pragma omp scan exclusive(x)
4467     //   <second part>
4468     // }
4469     // to
4470     // int x = 0;
4471     // for (..) {
4472     //   int x_priv = 0;
4473     //   <second part>
4474     //   int temp = x;
4475     //   x = x_priv + x;
4476     //   x_priv = temp;
4477     //   <first part>
4478     // }
4479     llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce");
4480     EmitBranch(IsInclusive
4481                    ? OMPScanReduce
4482                    : BreakContinueStack.back().ContinueBlock.getBlock());
4483     EmitBlock(OMPScanDispatch);
4484     {
4485       // New scope for correct construction/destruction of temp variables for
4486       // exclusive scan.
4487       LexicalScope Scope(*this, S.getSourceRange());
4488       EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock);
4489       EmitBlock(OMPScanReduce);
4490       if (!IsInclusive) {
4491         // Create temp var and copy LHS value to this temp value.
4492         // TMP = LHS;
4493         for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
4494           const Expr *PrivateExpr = Privates[I];
4495           const Expr *TempExpr = CopyArrayTemps[I];
4496           EmitAutoVarDecl(
4497               *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl()));
4498           LValue DestLVal = EmitLValue(TempExpr);
4499           LValue SrcLVal = EmitLValue(LHSs[I]);
4500           EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
4501                       SrcLVal.getAddress(*this),
4502                       cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
4503                       cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
4504                       CopyOps[I]);
4505         }
4506       }
4507       CGM.getOpenMPRuntime().emitReduction(
4508           *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
4509           {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd});
4510       for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
4511         const Expr *PrivateExpr = Privates[I];
4512         LValue DestLVal;
4513         LValue SrcLVal;
4514         if (IsInclusive) {
4515           DestLVal = EmitLValue(RHSs[I]);
4516           SrcLVal = EmitLValue(LHSs[I]);
4517         } else {
4518           const Expr *TempExpr = CopyArrayTemps[I];
4519           DestLVal = EmitLValue(RHSs[I]);
4520           SrcLVal = EmitLValue(TempExpr);
4521         }
4522         EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
4523                     SrcLVal.getAddress(*this),
4524                     cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
4525                     cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
4526                     CopyOps[I]);
4527       }
4528     }
4529     EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock);
4530     OMPScanExitBlock = IsInclusive
4531                            ? BreakContinueStack.back().ContinueBlock.getBlock()
4532                            : OMPScanReduce;
4533     EmitBlock(OMPAfterScanBlock);
4534     return;
4535   }
4536   if (!IsInclusive) {
4537     EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
4538     EmitBlock(OMPScanExitBlock);
4539   }
4540   if (OMPFirstScanLoop) {
4541     // Emit buffer[i] = red; at the end of the input phase.
4542     const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
4543                              .getIterationVariable()
4544                              ->IgnoreParenImpCasts();
4545     LValue IdxLVal = EmitLValue(IVExpr);
4546     llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
4547     IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
4548     for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
4549       const Expr *PrivateExpr = Privates[I];
4550       const Expr *OrigExpr = Shareds[I];
4551       const Expr *CopyArrayElem = CopyArrayElems[I];
4552       OpaqueValueMapping IdxMapping(
4553           *this,
4554           cast<OpaqueValueExpr>(
4555               cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
4556           RValue::get(IdxVal));
4557       LValue DestLVal = EmitLValue(CopyArrayElem);
4558       LValue SrcLVal = EmitLValue(OrigExpr);
4559       EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
4560                   SrcLVal.getAddress(*this),
4561                   cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
4562                   cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
4563                   CopyOps[I]);
4564     }
4565   }
4566   EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
4567   if (IsInclusive) {
4568     EmitBlock(OMPScanExitBlock);
4569     EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
4570   }
4571   EmitBlock(OMPScanDispatch);
4572   if (!OMPFirstScanLoop) {
4573     // Emit red = buffer[i]; at the entrance to the scan phase.
4574     const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
4575                              .getIterationVariable()
4576                              ->IgnoreParenImpCasts();
4577     LValue IdxLVal = EmitLValue(IVExpr);
4578     llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
4579     IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
4580     llvm::BasicBlock *ExclusiveExitBB = nullptr;
4581     if (!IsInclusive) {
4582       llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec");
4583       ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit");
4584       llvm::Value *Cmp = Builder.CreateIsNull(IdxVal);
4585       Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB);
4586       EmitBlock(ContBB);
4587       // Use idx - 1 iteration for exclusive scan.
4588       IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1));
4589     }
4590     for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
4591       const Expr *PrivateExpr = Privates[I];
4592       const Expr *OrigExpr = Shareds[I];
4593       const Expr *CopyArrayElem = CopyArrayElems[I];
4594       OpaqueValueMapping IdxMapping(
4595           *this,
4596           cast<OpaqueValueExpr>(
4597               cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
4598           RValue::get(IdxVal));
4599       LValue SrcLVal = EmitLValue(CopyArrayElem);
4600       LValue DestLVal = EmitLValue(OrigExpr);
4601       EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
4602                   SrcLVal.getAddress(*this),
4603                   cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
4604                   cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
4605                   CopyOps[I]);
4606     }
4607     if (!IsInclusive) {
4608       EmitBlock(ExclusiveExitBB);
4609     }
4610   }
4611   EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock
4612                                                : OMPAfterScanBlock);
4613   EmitBlock(OMPAfterScanBlock);
4614 }
4615 
4616 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
4617                                             const CodeGenLoopTy &CodeGenLoop,
4618                                             Expr *IncExpr) {
4619   // Emit the loop iteration variable.
4620   const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
4621   const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
4622   EmitVarDecl(*IVDecl);
4623 
4624   // Emit the iterations count variable.
4625   // If it is not a variable, Sema decided to calculate iterations count on each
4626   // iteration (e.g., it is foldable into a constant).
4627   if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
4628     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
4629     // Emit calculation of the iterations count.
4630     EmitIgnoredExpr(S.getCalcLastIteration());
4631   }
4632 
4633   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
4634 
4635   bool HasLastprivateClause = false;
4636   // Check pre-condition.
4637   {
4638     OMPLoopScope PreInitScope(*this, S);
4639     // Skip the entire loop if we don't meet the precondition.
4640     // If the condition constant folds and can be elided, avoid emitting the
4641     // whole loop.
4642     bool CondConstant;
4643     llvm::BasicBlock *ContBlock = nullptr;
4644     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
4645       if (!CondConstant)
4646         return;
4647     } else {
4648       llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
4649       ContBlock = createBasicBlock("omp.precond.end");
4650       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
4651                   getProfileCount(&S));
4652       EmitBlock(ThenBlock);
4653       incrementProfileCounter(&S);
4654     }
4655 
4656     emitAlignedClause(*this, S);
4657     // Emit 'then' code.
4658     {
4659       // Emit helper vars inits.
4660 
4661       LValue LB = EmitOMPHelperVar(
4662           *this, cast<DeclRefExpr>(
4663                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
4664                           ? S.getCombinedLowerBoundVariable()
4665                           : S.getLowerBoundVariable())));
4666       LValue UB = EmitOMPHelperVar(
4667           *this, cast<DeclRefExpr>(
4668                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
4669                           ? S.getCombinedUpperBoundVariable()
4670                           : S.getUpperBoundVariable())));
4671       LValue ST =
4672           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
4673       LValue IL =
4674           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
4675 
4676       OMPPrivateScope LoopScope(*this);
4677       if (EmitOMPFirstprivateClause(S, LoopScope)) {
4678         // Emit implicit barrier to synchronize threads and avoid data races
4679         // on initialization of firstprivate variables and post-update of
4680         // lastprivate variables.
4681         CGM.getOpenMPRuntime().emitBarrierCall(
4682             *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
4683             /*ForceSimpleCall=*/true);
4684       }
4685       EmitOMPPrivateClause(S, LoopScope);
4686       if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
4687           !isOpenMPParallelDirective(S.getDirectiveKind()) &&
4688           !isOpenMPTeamsDirective(S.getDirectiveKind()))
4689         EmitOMPReductionClauseInit(S, LoopScope);
4690       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
4691       EmitOMPPrivateLoopCounters(S, LoopScope);
4692       (void)LoopScope.Privatize();
4693       if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
4694         CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
4695 
4696       // Detect the distribute schedule kind and chunk.
4697       llvm::Value *Chunk = nullptr;
4698       OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
4699       if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
4700         ScheduleKind = C->getDistScheduleKind();
4701         if (const Expr *Ch = C->getChunkSize()) {
4702           Chunk = EmitScalarExpr(Ch);
4703           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
4704                                        S.getIterationVariable()->getType(),
4705                                        S.getBeginLoc());
4706         }
4707       } else {
4708         // Default behaviour for dist_schedule clause.
4709         CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
4710             *this, S, ScheduleKind, Chunk);
4711       }
4712       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
4713       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
4714 
4715       // OpenMP [2.10.8, distribute Construct, Description]
4716       // If dist_schedule is specified, kind must be static. If specified,
4717       // iterations are divided into chunks of size chunk_size, chunks are
4718       // assigned to the teams of the league in a round-robin fashion in the
4719       // order of the team number. When no chunk_size is specified, the
4720       // iteration space is divided into chunks that are approximately equal
4721       // in size, and at most one chunk is distributed to each team of the
4722       // league. The size of the chunks is unspecified in this case.
4723       bool StaticChunked = RT.isStaticChunked(
4724           ScheduleKind, /* Chunked */ Chunk != nullptr) &&
4725           isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
4726       if (RT.isStaticNonchunked(ScheduleKind,
4727                                 /* Chunked */ Chunk != nullptr) ||
4728           StaticChunked) {
4729         CGOpenMPRuntime::StaticRTInput StaticInit(
4730             IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(*this),
4731             LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
4732             StaticChunked ? Chunk : nullptr);
4733         RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind,
4734                                     StaticInit);
4735         JumpDest LoopExit =
4736             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
4737         // UB = min(UB, GlobalUB);
4738         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
4739                             ? S.getCombinedEnsureUpperBound()
4740                             : S.getEnsureUpperBound());
4741         // IV = LB;
4742         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
4743                             ? S.getCombinedInit()
4744                             : S.getInit());
4745 
4746         const Expr *Cond =
4747             isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
4748                 ? S.getCombinedCond()
4749                 : S.getCond();
4750 
4751         if (StaticChunked)
4752           Cond = S.getCombinedDistCond();
4753 
4754         // For static unchunked schedules generate:
4755         //
4756         //  1. For distribute alone, codegen
4757         //    while (idx <= UB) {
4758         //      BODY;
4759         //      ++idx;
4760         //    }
4761         //
4762         //  2. When combined with 'for' (e.g. as in 'distribute parallel for')
4763         //    while (idx <= UB) {
4764         //      <CodeGen rest of pragma>(LB, UB);
4765         //      idx += ST;
4766         //    }
4767         //
4768         // For static chunk one schedule generate:
4769         //
4770         // while (IV <= GlobalUB) {
4771         //   <CodeGen rest of pragma>(LB, UB);
4772         //   LB += ST;
4773         //   UB += ST;
4774         //   UB = min(UB, GlobalUB);
4775         //   IV = LB;
4776         // }
4777         //
4778         emitCommonSimdLoop(
4779             *this, S,
4780             [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4781               if (isOpenMPSimdDirective(S.getDirectiveKind()))
4782                 CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true);
4783             },
4784             [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop,
4785              StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) {
4786               CGF.EmitOMPInnerLoop(
4787                   S, LoopScope.requiresCleanups(), Cond, IncExpr,
4788                   [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
4789                     CodeGenLoop(CGF, S, LoopExit);
4790                   },
4791                   [&S, StaticChunked](CodeGenFunction &CGF) {
4792                     if (StaticChunked) {
4793                       CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound());
4794                       CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound());
4795                       CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound());
4796                       CGF.EmitIgnoredExpr(S.getCombinedInit());
4797                     }
4798                   });
4799             });
4800         EmitBlock(LoopExit.getBlock());
4801         // Tell the runtime we are done.
4802         RT.emitForStaticFinish(*this, S.getEndLoc(), S.getDirectiveKind());
4803       } else {
4804         // Emit the outer loop, which requests its work chunk [LB..UB] from
4805         // runtime and runs the inner loop to process it.
4806         const OMPLoopArguments LoopArguments = {
4807             LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
4808             IL.getAddress(*this), Chunk};
4809         EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
4810                                    CodeGenLoop);
4811       }
4812       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
4813         EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
4814           return CGF.Builder.CreateIsNotNull(
4815               CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
4816         });
4817       }
4818       if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
4819           !isOpenMPParallelDirective(S.getDirectiveKind()) &&
4820           !isOpenMPTeamsDirective(S.getDirectiveKind())) {
4821         EmitOMPReductionClauseFinal(S, OMPD_simd);
4822         // Emit post-update of the reduction variables if IsLastIter != 0.
4823         emitPostUpdateForReductionClause(
4824             *this, S, [IL, &S](CodeGenFunction &CGF) {
4825               return CGF.Builder.CreateIsNotNull(
4826                   CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
4827             });
4828       }
4829       // Emit final copy of the lastprivate variables if IsLastIter != 0.
4830       if (HasLastprivateClause) {
4831         EmitOMPLastprivateClauseFinal(
4832             S, /*NoFinals=*/false,
4833             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
4834       }
4835     }
4836 
4837     // We're now done with the loop, so jump to the continuation block.
4838     if (ContBlock) {
4839       EmitBranch(ContBlock);
4840       EmitBlock(ContBlock, true);
4841     }
4842   }
4843 }
4844 
4845 void CodeGenFunction::EmitOMPDistributeDirective(
4846     const OMPDistributeDirective &S) {
4847   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4848     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4849   };
4850   OMPLexicalScope Scope(*this, S, OMPD_unknown);
4851   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
4852 }
4853 
4854 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
4855                                                    const CapturedStmt *S,
4856                                                    SourceLocation Loc) {
4857   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
4858   CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
4859   CGF.CapturedStmtInfo = &CapStmtInfo;
4860   llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc);
4861   Fn->setDoesNotRecurse();
4862   return Fn;
4863 }
4864 
4865 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
4866   if (S.hasClausesOfKind<OMPDependClause>()) {
4867     assert(!S.hasAssociatedStmt() &&
4868            "No associated statement must be in ordered depend construct.");
4869     for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
4870       CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
4871     return;
4872   }
4873   const auto *C = S.getSingleClause<OMPSIMDClause>();
4874   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
4875                                  PrePostActionTy &Action) {
4876     const CapturedStmt *CS = S.getInnermostCapturedStmt();
4877     if (C) {
4878       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
4879       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
4880       llvm::Function *OutlinedFn =
4881           emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc());
4882       CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(),
4883                                                       OutlinedFn, CapturedVars);
4884     } else {
4885       Action.Enter(CGF);
4886       CGF.EmitStmt(CS->getCapturedStmt());
4887     }
4888   };
4889   OMPLexicalScope Scope(*this, S, OMPD_unknown);
4890   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C);
4891 }
4892 
4893 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
4894                                          QualType SrcType, QualType DestType,
4895                                          SourceLocation Loc) {
4896   assert(CGF.hasScalarEvaluationKind(DestType) &&
4897          "DestType must have scalar evaluation kind.");
4898   assert(!Val.isAggregate() && "Must be a scalar or complex.");
4899   return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
4900                                                    DestType, Loc)
4901                         : CGF.EmitComplexToScalarConversion(
4902                               Val.getComplexVal(), SrcType, DestType, Loc);
4903 }
4904 
4905 static CodeGenFunction::ComplexPairTy
4906 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
4907                       QualType DestType, SourceLocation Loc) {
4908   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
4909          "DestType must have complex evaluation kind.");
4910   CodeGenFunction::ComplexPairTy ComplexVal;
4911   if (Val.isScalar()) {
4912     // Convert the input element to the element type of the complex.
4913     QualType DestElementType =
4914         DestType->castAs<ComplexType>()->getElementType();
4915     llvm::Value *ScalarVal = CGF.EmitScalarConversion(
4916         Val.getScalarVal(), SrcType, DestElementType, Loc);
4917     ComplexVal = CodeGenFunction::ComplexPairTy(
4918         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
4919   } else {
4920     assert(Val.isComplex() && "Must be a scalar or complex.");
4921     QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
4922     QualType DestElementType =
4923         DestType->castAs<ComplexType>()->getElementType();
4924     ComplexVal.first = CGF.EmitScalarConversion(
4925         Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
4926     ComplexVal.second = CGF.EmitScalarConversion(
4927         Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
4928   }
4929   return ComplexVal;
4930 }
4931 
4932 static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
4933                                   LValue LVal, RValue RVal) {
4934   if (LVal.isGlobalReg())
4935     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
4936   else
4937     CGF.EmitAtomicStore(RVal, LVal, AO, LVal.isVolatile(), /*isInit=*/false);
4938 }
4939 
4940 static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF,
4941                                    llvm::AtomicOrdering AO, LValue LVal,
4942                                    SourceLocation Loc) {
4943   if (LVal.isGlobalReg())
4944     return CGF.EmitLoadOfLValue(LVal, Loc);
4945   return CGF.EmitAtomicLoad(
4946       LVal, Loc, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO),
4947       LVal.isVolatile());
4948 }
4949 
4950 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
4951                                          QualType RValTy, SourceLocation Loc) {
4952   switch (getEvaluationKind(LVal.getType())) {
4953   case TEK_Scalar:
4954     EmitStoreThroughLValue(RValue::get(convertToScalarValue(
4955                                *this, RVal, RValTy, LVal.getType(), Loc)),
4956                            LVal);
4957     break;
4958   case TEK_Complex:
4959     EmitStoreOfComplex(
4960         convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
4961         /*isInit=*/false);
4962     break;
4963   case TEK_Aggregate:
4964     llvm_unreachable("Must be a scalar or complex.");
4965   }
4966 }
4967 
4968 static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
4969                                   const Expr *X, const Expr *V,
4970                                   SourceLocation Loc) {
4971   // v = x;
4972   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
4973   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
4974   LValue XLValue = CGF.EmitLValue(X);
4975   LValue VLValue = CGF.EmitLValue(V);
4976   RValue Res = emitSimpleAtomicLoad(CGF, AO, XLValue, Loc);
4977   // OpenMP, 2.17.7, atomic Construct
4978   // If the read or capture clause is specified and the acquire, acq_rel, or
4979   // seq_cst clause is specified then the strong flush on exit from the atomic
4980   // operation is also an acquire flush.
4981   switch (AO) {
4982   case llvm::AtomicOrdering::Acquire:
4983   case llvm::AtomicOrdering::AcquireRelease:
4984   case llvm::AtomicOrdering::SequentiallyConsistent:
4985     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
4986                                          llvm::AtomicOrdering::Acquire);
4987     break;
4988   case llvm::AtomicOrdering::Monotonic:
4989   case llvm::AtomicOrdering::Release:
4990     break;
4991   case llvm::AtomicOrdering::NotAtomic:
4992   case llvm::AtomicOrdering::Unordered:
4993     llvm_unreachable("Unexpected ordering.");
4994   }
4995   CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
4996   CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
4997 }
4998 
4999 static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF,
5000                                    llvm::AtomicOrdering AO, const Expr *X,
5001                                    const Expr *E, SourceLocation Loc) {
5002   // x = expr;
5003   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
5004   emitSimpleAtomicStore(CGF, AO, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
5005   CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
5006   // OpenMP, 2.17.7, atomic Construct
5007   // If the write, update, or capture clause is specified and the release,
5008   // acq_rel, or seq_cst clause is specified then the strong flush on entry to
5009   // the atomic operation is also a release flush.
5010   switch (AO) {
5011   case llvm::AtomicOrdering::Release:
5012   case llvm::AtomicOrdering::AcquireRelease:
5013   case llvm::AtomicOrdering::SequentiallyConsistent:
5014     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5015                                          llvm::AtomicOrdering::Release);
5016     break;
5017   case llvm::AtomicOrdering::Acquire:
5018   case llvm::AtomicOrdering::Monotonic:
5019     break;
5020   case llvm::AtomicOrdering::NotAtomic:
5021   case llvm::AtomicOrdering::Unordered:
5022     llvm_unreachable("Unexpected ordering.");
5023   }
5024 }
5025 
5026 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
5027                                                 RValue Update,
5028                                                 BinaryOperatorKind BO,
5029                                                 llvm::AtomicOrdering AO,
5030                                                 bool IsXLHSInRHSPart) {
5031   ASTContext &Context = CGF.getContext();
5032   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
5033   // expression is simple and atomic is allowed for the given type for the
5034   // target platform.
5035   if (BO == BO_Comma || !Update.isScalar() ||
5036       !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() ||
5037       (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
5038        (Update.getScalarVal()->getType() !=
5039         X.getAddress(CGF).getElementType())) ||
5040       !X.getAddress(CGF).getElementType()->isIntegerTy() ||
5041       !Context.getTargetInfo().hasBuiltinAtomic(
5042           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
5043     return std::make_pair(false, RValue::get(nullptr));
5044 
5045   llvm::AtomicRMWInst::BinOp RMWOp;
5046   switch (BO) {
5047   case BO_Add:
5048     RMWOp = llvm::AtomicRMWInst::Add;
5049     break;
5050   case BO_Sub:
5051     if (!IsXLHSInRHSPart)
5052       return std::make_pair(false, RValue::get(nullptr));
5053     RMWOp = llvm::AtomicRMWInst::Sub;
5054     break;
5055   case BO_And:
5056     RMWOp = llvm::AtomicRMWInst::And;
5057     break;
5058   case BO_Or:
5059     RMWOp = llvm::AtomicRMWInst::Or;
5060     break;
5061   case BO_Xor:
5062     RMWOp = llvm::AtomicRMWInst::Xor;
5063     break;
5064   case BO_LT:
5065     RMWOp = X.getType()->hasSignedIntegerRepresentation()
5066                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
5067                                    : llvm::AtomicRMWInst::Max)
5068                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
5069                                    : llvm::AtomicRMWInst::UMax);
5070     break;
5071   case BO_GT:
5072     RMWOp = X.getType()->hasSignedIntegerRepresentation()
5073                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
5074                                    : llvm::AtomicRMWInst::Min)
5075                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
5076                                    : llvm::AtomicRMWInst::UMin);
5077     break;
5078   case BO_Assign:
5079     RMWOp = llvm::AtomicRMWInst::Xchg;
5080     break;
5081   case BO_Mul:
5082   case BO_Div:
5083   case BO_Rem:
5084   case BO_Shl:
5085   case BO_Shr:
5086   case BO_LAnd:
5087   case BO_LOr:
5088     return std::make_pair(false, RValue::get(nullptr));
5089   case BO_PtrMemD:
5090   case BO_PtrMemI:
5091   case BO_LE:
5092   case BO_GE:
5093   case BO_EQ:
5094   case BO_NE:
5095   case BO_Cmp:
5096   case BO_AddAssign:
5097   case BO_SubAssign:
5098   case BO_AndAssign:
5099   case BO_OrAssign:
5100   case BO_XorAssign:
5101   case BO_MulAssign:
5102   case BO_DivAssign:
5103   case BO_RemAssign:
5104   case BO_ShlAssign:
5105   case BO_ShrAssign:
5106   case BO_Comma:
5107     llvm_unreachable("Unsupported atomic update operation");
5108   }
5109   llvm::Value *UpdateVal = Update.getScalarVal();
5110   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
5111     UpdateVal = CGF.Builder.CreateIntCast(
5112         IC, X.getAddress(CGF).getElementType(),
5113         X.getType()->hasSignedIntegerRepresentation());
5114   }
5115   llvm::Value *Res =
5116       CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(CGF), UpdateVal, AO);
5117   return std::make_pair(true, RValue::get(Res));
5118 }
5119 
5120 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
5121     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
5122     llvm::AtomicOrdering AO, SourceLocation Loc,
5123     const llvm::function_ref<RValue(RValue)> CommonGen) {
5124   // Update expressions are allowed to have the following forms:
5125   // x binop= expr; -> xrval + expr;
5126   // x++, ++x -> xrval + 1;
5127   // x--, --x -> xrval - 1;
5128   // x = x binop expr; -> xrval binop expr
5129   // x = expr Op x; - > expr binop xrval;
5130   auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
5131   if (!Res.first) {
5132     if (X.isGlobalReg()) {
5133       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
5134       // 'xrval'.
5135       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
5136     } else {
5137       // Perform compare-and-swap procedure.
5138       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
5139     }
5140   }
5141   return Res;
5142 }
5143 
5144 static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF,
5145                                     llvm::AtomicOrdering AO, const Expr *X,
5146                                     const Expr *E, const Expr *UE,
5147                                     bool IsXLHSInRHSPart, SourceLocation Loc) {
5148   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
5149          "Update expr in 'atomic update' must be a binary operator.");
5150   const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
5151   // Update expressions are allowed to have the following forms:
5152   // x binop= expr; -> xrval + expr;
5153   // x++, ++x -> xrval + 1;
5154   // x--, --x -> xrval - 1;
5155   // x = x binop expr; -> xrval binop expr
5156   // x = expr Op x; - > expr binop xrval;
5157   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
5158   LValue XLValue = CGF.EmitLValue(X);
5159   RValue ExprRValue = CGF.EmitAnyExpr(E);
5160   const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
5161   const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
5162   const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
5163   const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
5164   auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) {
5165     CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
5166     CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
5167     return CGF.EmitAnyExpr(UE);
5168   };
5169   (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
5170       XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
5171   CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
5172   // OpenMP, 2.17.7, atomic Construct
5173   // If the write, update, or capture clause is specified and the release,
5174   // acq_rel, or seq_cst clause is specified then the strong flush on entry to
5175   // the atomic operation is also a release flush.
5176   switch (AO) {
5177   case llvm::AtomicOrdering::Release:
5178   case llvm::AtomicOrdering::AcquireRelease:
5179   case llvm::AtomicOrdering::SequentiallyConsistent:
5180     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5181                                          llvm::AtomicOrdering::Release);
5182     break;
5183   case llvm::AtomicOrdering::Acquire:
5184   case llvm::AtomicOrdering::Monotonic:
5185     break;
5186   case llvm::AtomicOrdering::NotAtomic:
5187   case llvm::AtomicOrdering::Unordered:
5188     llvm_unreachable("Unexpected ordering.");
5189   }
5190 }
5191 
5192 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
5193                             QualType SourceType, QualType ResType,
5194                             SourceLocation Loc) {
5195   switch (CGF.getEvaluationKind(ResType)) {
5196   case TEK_Scalar:
5197     return RValue::get(
5198         convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
5199   case TEK_Complex: {
5200     auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
5201     return RValue::getComplex(Res.first, Res.second);
5202   }
5203   case TEK_Aggregate:
5204     break;
5205   }
5206   llvm_unreachable("Must be a scalar or complex.");
5207 }
5208 
5209 static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF,
5210                                      llvm::AtomicOrdering AO,
5211                                      bool IsPostfixUpdate, const Expr *V,
5212                                      const Expr *X, const Expr *E,
5213                                      const Expr *UE, bool IsXLHSInRHSPart,
5214                                      SourceLocation Loc) {
5215   assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
5216   assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
5217   RValue NewVVal;
5218   LValue VLValue = CGF.EmitLValue(V);
5219   LValue XLValue = CGF.EmitLValue(X);
5220   RValue ExprRValue = CGF.EmitAnyExpr(E);
5221   QualType NewVValType;
5222   if (UE) {
5223     // 'x' is updated with some additional value.
5224     assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
5225            "Update expr in 'atomic capture' must be a binary operator.");
5226     const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
5227     // Update expressions are allowed to have the following forms:
5228     // x binop= expr; -> xrval + expr;
5229     // x++, ++x -> xrval + 1;
5230     // x--, --x -> xrval - 1;
5231     // x = x binop expr; -> xrval binop expr
5232     // x = expr Op x; - > expr binop xrval;
5233     const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
5234     const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
5235     const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
5236     NewVValType = XRValExpr->getType();
5237     const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
5238     auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
5239                   IsPostfixUpdate](RValue XRValue) {
5240       CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
5241       CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
5242       RValue Res = CGF.EmitAnyExpr(UE);
5243       NewVVal = IsPostfixUpdate ? XRValue : Res;
5244       return Res;
5245     };
5246     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
5247         XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
5248     CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
5249     if (Res.first) {
5250       // 'atomicrmw' instruction was generated.
5251       if (IsPostfixUpdate) {
5252         // Use old value from 'atomicrmw'.
5253         NewVVal = Res.second;
5254       } else {
5255         // 'atomicrmw' does not provide new value, so evaluate it using old
5256         // value of 'x'.
5257         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
5258         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
5259         NewVVal = CGF.EmitAnyExpr(UE);
5260       }
5261     }
5262   } else {
5263     // 'x' is simply rewritten with some 'expr'.
5264     NewVValType = X->getType().getNonReferenceType();
5265     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
5266                                X->getType().getNonReferenceType(), Loc);
5267     auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) {
5268       NewVVal = XRValue;
5269       return ExprRValue;
5270     };
5271     // Try to perform atomicrmw xchg, otherwise simple exchange.
5272     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
5273         XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
5274         Loc, Gen);
5275     CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
5276     if (Res.first) {
5277       // 'atomicrmw' instruction was generated.
5278       NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
5279     }
5280   }
5281   // Emit post-update store to 'v' of old/new 'x' value.
5282   CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
5283   CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
5284   // OpenMP, 2.17.7, atomic Construct
5285   // If the write, update, or capture clause is specified and the release,
5286   // acq_rel, or seq_cst clause is specified then the strong flush on entry to
5287   // the atomic operation is also a release flush.
5288   // If the read or capture clause is specified and the acquire, acq_rel, or
5289   // seq_cst clause is specified then the strong flush on exit from the atomic
5290   // operation is also an acquire flush.
5291   switch (AO) {
5292   case llvm::AtomicOrdering::Release:
5293     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5294                                          llvm::AtomicOrdering::Release);
5295     break;
5296   case llvm::AtomicOrdering::Acquire:
5297     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5298                                          llvm::AtomicOrdering::Acquire);
5299     break;
5300   case llvm::AtomicOrdering::AcquireRelease:
5301   case llvm::AtomicOrdering::SequentiallyConsistent:
5302     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5303                                          llvm::AtomicOrdering::AcquireRelease);
5304     break;
5305   case llvm::AtomicOrdering::Monotonic:
5306     break;
5307   case llvm::AtomicOrdering::NotAtomic:
5308   case llvm::AtomicOrdering::Unordered:
5309     llvm_unreachable("Unexpected ordering.");
5310   }
5311 }
5312 
5313 static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
5314                               llvm::AtomicOrdering AO, bool IsPostfixUpdate,
5315                               const Expr *X, const Expr *V, const Expr *E,
5316                               const Expr *UE, bool IsXLHSInRHSPart,
5317                               SourceLocation Loc) {
5318   switch (Kind) {
5319   case OMPC_read:
5320     emitOMPAtomicReadExpr(CGF, AO, X, V, Loc);
5321     break;
5322   case OMPC_write:
5323     emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc);
5324     break;
5325   case OMPC_unknown:
5326   case OMPC_update:
5327     emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc);
5328     break;
5329   case OMPC_capture:
5330     emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE,
5331                              IsXLHSInRHSPart, Loc);
5332     break;
5333   case OMPC_if:
5334   case OMPC_final:
5335   case OMPC_num_threads:
5336   case OMPC_private:
5337   case OMPC_firstprivate:
5338   case OMPC_lastprivate:
5339   case OMPC_reduction:
5340   case OMPC_task_reduction:
5341   case OMPC_in_reduction:
5342   case OMPC_safelen:
5343   case OMPC_simdlen:
5344   case OMPC_allocator:
5345   case OMPC_allocate:
5346   case OMPC_collapse:
5347   case OMPC_default:
5348   case OMPC_seq_cst:
5349   case OMPC_acq_rel:
5350   case OMPC_acquire:
5351   case OMPC_release:
5352   case OMPC_relaxed:
5353   case OMPC_shared:
5354   case OMPC_linear:
5355   case OMPC_aligned:
5356   case OMPC_copyin:
5357   case OMPC_copyprivate:
5358   case OMPC_flush:
5359   case OMPC_depobj:
5360   case OMPC_proc_bind:
5361   case OMPC_schedule:
5362   case OMPC_ordered:
5363   case OMPC_nowait:
5364   case OMPC_untied:
5365   case OMPC_threadprivate:
5366   case OMPC_depend:
5367   case OMPC_mergeable:
5368   case OMPC_device:
5369   case OMPC_threads:
5370   case OMPC_simd:
5371   case OMPC_map:
5372   case OMPC_num_teams:
5373   case OMPC_thread_limit:
5374   case OMPC_priority:
5375   case OMPC_grainsize:
5376   case OMPC_nogroup:
5377   case OMPC_num_tasks:
5378   case OMPC_hint:
5379   case OMPC_dist_schedule:
5380   case OMPC_defaultmap:
5381   case OMPC_uniform:
5382   case OMPC_to:
5383   case OMPC_from:
5384   case OMPC_use_device_ptr:
5385   case OMPC_use_device_addr:
5386   case OMPC_is_device_ptr:
5387   case OMPC_unified_address:
5388   case OMPC_unified_shared_memory:
5389   case OMPC_reverse_offload:
5390   case OMPC_dynamic_allocators:
5391   case OMPC_atomic_default_mem_order:
5392   case OMPC_device_type:
5393   case OMPC_match:
5394   case OMPC_nontemporal:
5395   case OMPC_order:
5396   case OMPC_destroy:
5397   case OMPC_detach:
5398   case OMPC_inclusive:
5399   case OMPC_exclusive:
5400   case OMPC_uses_allocators:
5401   case OMPC_affinity:
5402   default:
5403     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
5404   }
5405 }
5406 
5407 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
5408   llvm::AtomicOrdering AO = llvm::AtomicOrdering::Monotonic;
5409   bool MemOrderingSpecified = false;
5410   if (S.getSingleClause<OMPSeqCstClause>()) {
5411     AO = llvm::AtomicOrdering::SequentiallyConsistent;
5412     MemOrderingSpecified = true;
5413   } else if (S.getSingleClause<OMPAcqRelClause>()) {
5414     AO = llvm::AtomicOrdering::AcquireRelease;
5415     MemOrderingSpecified = true;
5416   } else if (S.getSingleClause<OMPAcquireClause>()) {
5417     AO = llvm::AtomicOrdering::Acquire;
5418     MemOrderingSpecified = true;
5419   } else if (S.getSingleClause<OMPReleaseClause>()) {
5420     AO = llvm::AtomicOrdering::Release;
5421     MemOrderingSpecified = true;
5422   } else if (S.getSingleClause<OMPRelaxedClause>()) {
5423     AO = llvm::AtomicOrdering::Monotonic;
5424     MemOrderingSpecified = true;
5425   }
5426   OpenMPClauseKind Kind = OMPC_unknown;
5427   for (const OMPClause *C : S.clauses()) {
5428     // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause,
5429     // if it is first).
5430     if (C->getClauseKind() != OMPC_seq_cst &&
5431         C->getClauseKind() != OMPC_acq_rel &&
5432         C->getClauseKind() != OMPC_acquire &&
5433         C->getClauseKind() != OMPC_release &&
5434         C->getClauseKind() != OMPC_relaxed) {
5435       Kind = C->getClauseKind();
5436       break;
5437     }
5438   }
5439   if (!MemOrderingSpecified) {
5440     llvm::AtomicOrdering DefaultOrder =
5441         CGM.getOpenMPRuntime().getDefaultMemoryOrdering();
5442     if (DefaultOrder == llvm::AtomicOrdering::Monotonic ||
5443         DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent ||
5444         (DefaultOrder == llvm::AtomicOrdering::AcquireRelease &&
5445          Kind == OMPC_capture)) {
5446       AO = DefaultOrder;
5447     } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) {
5448       if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) {
5449         AO = llvm::AtomicOrdering::Release;
5450       } else if (Kind == OMPC_read) {
5451         assert(Kind == OMPC_read && "Unexpected atomic kind.");
5452         AO = llvm::AtomicOrdering::Acquire;
5453       }
5454     }
5455   }
5456 
5457   LexicalScope Scope(*this, S.getSourceRange());
5458   EmitStopPoint(S.getAssociatedStmt());
5459   emitOMPAtomicExpr(*this, Kind, AO, S.isPostfixUpdate(), S.getX(), S.getV(),
5460                     S.getExpr(), S.getUpdateExpr(), S.isXLHSInRHSPart(),
5461                     S.getBeginLoc());
5462 }
5463 
5464 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
5465                                          const OMPExecutableDirective &S,
5466                                          const RegionCodeGenTy &CodeGen) {
5467   assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
5468   CodeGenModule &CGM = CGF.CGM;
5469 
5470   // On device emit this construct as inlined code.
5471   if (CGM.getLangOpts().OpenMPIsDevice) {
5472     OMPLexicalScope Scope(CGF, S, OMPD_target);
5473     CGM.getOpenMPRuntime().emitInlinedDirective(
5474         CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5475           CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
5476         });
5477     return;
5478   }
5479 
5480   auto LPCRegion =
5481       CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
5482   llvm::Function *Fn = nullptr;
5483   llvm::Constant *FnID = nullptr;
5484 
5485   const Expr *IfCond = nullptr;
5486   // Check for the at most one if clause associated with the target region.
5487   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
5488     if (C->getNameModifier() == OMPD_unknown ||
5489         C->getNameModifier() == OMPD_target) {
5490       IfCond = C->getCondition();
5491       break;
5492     }
5493   }
5494 
5495   // Check if we have any device clause associated with the directive.
5496   llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device(
5497       nullptr, OMPC_DEVICE_unknown);
5498   if (auto *C = S.getSingleClause<OMPDeviceClause>())
5499     Device.setPointerAndInt(C->getDevice(), C->getModifier());
5500 
5501   // Check if we have an if clause whose conditional always evaluates to false
5502   // or if we do not have any targets specified. If so the target region is not
5503   // an offload entry point.
5504   bool IsOffloadEntry = true;
5505   if (IfCond) {
5506     bool Val;
5507     if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
5508       IsOffloadEntry = false;
5509   }
5510   if (CGM.getLangOpts().OMPTargetTriples.empty())
5511     IsOffloadEntry = false;
5512 
5513   assert(CGF.CurFuncDecl && "No parent declaration for target region!");
5514   StringRef ParentName;
5515   // In case we have Ctors/Dtors we use the complete type variant to produce
5516   // the mangling of the device outlined kernel.
5517   if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
5518     ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
5519   else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
5520     ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
5521   else
5522     ParentName =
5523         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
5524 
5525   // Emit target region as a standalone region.
5526   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
5527                                                     IsOffloadEntry, CodeGen);
5528   OMPLexicalScope Scope(CGF, S, OMPD_task);
5529   auto &&SizeEmitter =
5530       [IsOffloadEntry](CodeGenFunction &CGF,
5531                        const OMPLoopDirective &D) -> llvm::Value * {
5532     if (IsOffloadEntry) {
5533       OMPLoopScope(CGF, D);
5534       // Emit calculation of the iterations count.
5535       llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations());
5536       NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty,
5537                                                 /*isSigned=*/false);
5538       return NumIterations;
5539     }
5540     return nullptr;
5541   };
5542   CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
5543                                         SizeEmitter);
5544 }
5545 
5546 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
5547                              PrePostActionTy &Action) {
5548   Action.Enter(CGF);
5549   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5550   (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
5551   CGF.EmitOMPPrivateClause(S, PrivateScope);
5552   (void)PrivateScope.Privatize();
5553   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
5554     CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
5555 
5556   CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
5557 }
5558 
5559 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
5560                                                   StringRef ParentName,
5561                                                   const OMPTargetDirective &S) {
5562   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5563     emitTargetRegion(CGF, S, Action);
5564   };
5565   llvm::Function *Fn;
5566   llvm::Constant *Addr;
5567   // Emit target region as a standalone region.
5568   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
5569       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
5570   assert(Fn && Addr && "Target device function emission failed.");
5571 }
5572 
5573 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
5574   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5575     emitTargetRegion(CGF, S, Action);
5576   };
5577   emitCommonOMPTargetDirective(*this, S, CodeGen);
5578 }
5579 
5580 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
5581                                         const OMPExecutableDirective &S,
5582                                         OpenMPDirectiveKind InnermostKind,
5583                                         const RegionCodeGenTy &CodeGen) {
5584   const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
5585   llvm::Function *OutlinedFn =
5586       CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
5587           S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
5588 
5589   const auto *NT = S.getSingleClause<OMPNumTeamsClause>();
5590   const auto *TL = S.getSingleClause<OMPThreadLimitClause>();
5591   if (NT || TL) {
5592     const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr;
5593     const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr;
5594 
5595     CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
5596                                                   S.getBeginLoc());
5597   }
5598 
5599   OMPTeamsScope Scope(CGF, S);
5600   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
5601   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
5602   CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn,
5603                                            CapturedVars);
5604 }
5605 
5606 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
5607   // Emit teams region as a standalone region.
5608   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5609     Action.Enter(CGF);
5610     OMPPrivateScope PrivateScope(CGF);
5611     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
5612     CGF.EmitOMPPrivateClause(S, PrivateScope);
5613     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5614     (void)PrivateScope.Privatize();
5615     CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt());
5616     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5617   };
5618   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
5619   emitPostUpdateForReductionClause(*this, S,
5620                                    [](CodeGenFunction &) { return nullptr; });
5621 }
5622 
5623 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
5624                                   const OMPTargetTeamsDirective &S) {
5625   auto *CS = S.getCapturedStmt(OMPD_teams);
5626   Action.Enter(CGF);
5627   // Emit teams region as a standalone region.
5628   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
5629     Action.Enter(CGF);
5630     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5631     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
5632     CGF.EmitOMPPrivateClause(S, PrivateScope);
5633     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5634     (void)PrivateScope.Privatize();
5635     if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
5636       CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
5637     CGF.EmitStmt(CS->getCapturedStmt());
5638     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5639   };
5640   emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
5641   emitPostUpdateForReductionClause(CGF, S,
5642                                    [](CodeGenFunction &) { return nullptr; });
5643 }
5644 
5645 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
5646     CodeGenModule &CGM, StringRef ParentName,
5647     const OMPTargetTeamsDirective &S) {
5648   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5649     emitTargetTeamsRegion(CGF, Action, S);
5650   };
5651   llvm::Function *Fn;
5652   llvm::Constant *Addr;
5653   // Emit target region as a standalone region.
5654   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
5655       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
5656   assert(Fn && Addr && "Target device function emission failed.");
5657 }
5658 
5659 void CodeGenFunction::EmitOMPTargetTeamsDirective(
5660     const OMPTargetTeamsDirective &S) {
5661   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5662     emitTargetTeamsRegion(CGF, Action, S);
5663   };
5664   emitCommonOMPTargetDirective(*this, S, CodeGen);
5665 }
5666 
5667 static void
5668 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
5669                                 const OMPTargetTeamsDistributeDirective &S) {
5670   Action.Enter(CGF);
5671   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5672     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
5673   };
5674 
5675   // Emit teams region as a standalone region.
5676   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
5677                                             PrePostActionTy &Action) {
5678     Action.Enter(CGF);
5679     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5680     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5681     (void)PrivateScope.Privatize();
5682     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
5683                                                     CodeGenDistribute);
5684     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5685   };
5686   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen);
5687   emitPostUpdateForReductionClause(CGF, S,
5688                                    [](CodeGenFunction &) { return nullptr; });
5689 }
5690 
5691 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
5692     CodeGenModule &CGM, StringRef ParentName,
5693     const OMPTargetTeamsDistributeDirective &S) {
5694   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5695     emitTargetTeamsDistributeRegion(CGF, Action, S);
5696   };
5697   llvm::Function *Fn;
5698   llvm::Constant *Addr;
5699   // Emit target region as a standalone region.
5700   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
5701       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
5702   assert(Fn && Addr && "Target device function emission failed.");
5703 }
5704 
5705 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
5706     const OMPTargetTeamsDistributeDirective &S) {
5707   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5708     emitTargetTeamsDistributeRegion(CGF, Action, S);
5709   };
5710   emitCommonOMPTargetDirective(*this, S, CodeGen);
5711 }
5712 
5713 static void emitTargetTeamsDistributeSimdRegion(
5714     CodeGenFunction &CGF, PrePostActionTy &Action,
5715     const OMPTargetTeamsDistributeSimdDirective &S) {
5716   Action.Enter(CGF);
5717   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5718     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
5719   };
5720 
5721   // Emit teams region as a standalone region.
5722   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
5723                                             PrePostActionTy &Action) {
5724     Action.Enter(CGF);
5725     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5726     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5727     (void)PrivateScope.Privatize();
5728     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
5729                                                     CodeGenDistribute);
5730     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5731   };
5732   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen);
5733   emitPostUpdateForReductionClause(CGF, S,
5734                                    [](CodeGenFunction &) { return nullptr; });
5735 }
5736 
5737 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
5738     CodeGenModule &CGM, StringRef ParentName,
5739     const OMPTargetTeamsDistributeSimdDirective &S) {
5740   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5741     emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
5742   };
5743   llvm::Function *Fn;
5744   llvm::Constant *Addr;
5745   // Emit target region as a standalone region.
5746   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
5747       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
5748   assert(Fn && Addr && "Target device function emission failed.");
5749 }
5750 
5751 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
5752     const OMPTargetTeamsDistributeSimdDirective &S) {
5753   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5754     emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
5755   };
5756   emitCommonOMPTargetDirective(*this, S, CodeGen);
5757 }
5758 
5759 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
5760     const OMPTeamsDistributeDirective &S) {
5761 
5762   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5763     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
5764   };
5765 
5766   // Emit teams region as a standalone region.
5767   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
5768                                             PrePostActionTy &Action) {
5769     Action.Enter(CGF);
5770     OMPPrivateScope PrivateScope(CGF);
5771     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5772     (void)PrivateScope.Privatize();
5773     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
5774                                                     CodeGenDistribute);
5775     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5776   };
5777   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
5778   emitPostUpdateForReductionClause(*this, S,
5779                                    [](CodeGenFunction &) { return nullptr; });
5780 }
5781 
5782 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
5783     const OMPTeamsDistributeSimdDirective &S) {
5784   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5785     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
5786   };
5787 
5788   // Emit teams region as a standalone region.
5789   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
5790                                             PrePostActionTy &Action) {
5791     Action.Enter(CGF);
5792     OMPPrivateScope PrivateScope(CGF);
5793     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5794     (void)PrivateScope.Privatize();
5795     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd,
5796                                                     CodeGenDistribute);
5797     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5798   };
5799   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen);
5800   emitPostUpdateForReductionClause(*this, S,
5801                                    [](CodeGenFunction &) { return nullptr; });
5802 }
5803 
5804 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
5805     const OMPTeamsDistributeParallelForDirective &S) {
5806   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5807     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
5808                               S.getDistInc());
5809   };
5810 
5811   // Emit teams region as a standalone region.
5812   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
5813                                             PrePostActionTy &Action) {
5814     Action.Enter(CGF);
5815     OMPPrivateScope PrivateScope(CGF);
5816     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5817     (void)PrivateScope.Privatize();
5818     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
5819                                                     CodeGenDistribute);
5820     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5821   };
5822   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
5823   emitPostUpdateForReductionClause(*this, S,
5824                                    [](CodeGenFunction &) { return nullptr; });
5825 }
5826 
5827 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
5828     const OMPTeamsDistributeParallelForSimdDirective &S) {
5829   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5830     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
5831                               S.getDistInc());
5832   };
5833 
5834   // Emit teams region as a standalone region.
5835   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
5836                                             PrePostActionTy &Action) {
5837     Action.Enter(CGF);
5838     OMPPrivateScope PrivateScope(CGF);
5839     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5840     (void)PrivateScope.Privatize();
5841     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
5842         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
5843     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5844   };
5845   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for_simd,
5846                               CodeGen);
5847   emitPostUpdateForReductionClause(*this, S,
5848                                    [](CodeGenFunction &) { return nullptr; });
5849 }
5850 
5851 static void emitTargetTeamsDistributeParallelForRegion(
5852     CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S,
5853     PrePostActionTy &Action) {
5854   Action.Enter(CGF);
5855   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5856     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
5857                               S.getDistInc());
5858   };
5859 
5860   // Emit teams region as a standalone region.
5861   auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
5862                                                  PrePostActionTy &Action) {
5863     Action.Enter(CGF);
5864     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5865     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5866     (void)PrivateScope.Privatize();
5867     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
5868         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
5869     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5870   };
5871 
5872   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
5873                               CodeGenTeams);
5874   emitPostUpdateForReductionClause(CGF, S,
5875                                    [](CodeGenFunction &) { return nullptr; });
5876 }
5877 
5878 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
5879     CodeGenModule &CGM, StringRef ParentName,
5880     const OMPTargetTeamsDistributeParallelForDirective &S) {
5881   // Emit SPMD target teams distribute parallel for region as a standalone
5882   // region.
5883   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5884     emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
5885   };
5886   llvm::Function *Fn;
5887   llvm::Constant *Addr;
5888   // Emit target region as a standalone region.
5889   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
5890       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
5891   assert(Fn && Addr && "Target device function emission failed.");
5892 }
5893 
5894 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
5895     const OMPTargetTeamsDistributeParallelForDirective &S) {
5896   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5897     emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
5898   };
5899   emitCommonOMPTargetDirective(*this, S, CodeGen);
5900 }
5901 
5902 static void emitTargetTeamsDistributeParallelForSimdRegion(
5903     CodeGenFunction &CGF,
5904     const OMPTargetTeamsDistributeParallelForSimdDirective &S,
5905     PrePostActionTy &Action) {
5906   Action.Enter(CGF);
5907   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5908     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
5909                               S.getDistInc());
5910   };
5911 
5912   // Emit teams region as a standalone region.
5913   auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
5914                                                  PrePostActionTy &Action) {
5915     Action.Enter(CGF);
5916     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5917     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5918     (void)PrivateScope.Privatize();
5919     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
5920         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
5921     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5922   };
5923 
5924   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd,
5925                               CodeGenTeams);
5926   emitPostUpdateForReductionClause(CGF, S,
5927                                    [](CodeGenFunction &) { return nullptr; });
5928 }
5929 
5930 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
5931     CodeGenModule &CGM, StringRef ParentName,
5932     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
5933   // Emit SPMD target teams distribute parallel for simd region as a standalone
5934   // region.
5935   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5936     emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
5937   };
5938   llvm::Function *Fn;
5939   llvm::Constant *Addr;
5940   // Emit target region as a standalone region.
5941   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
5942       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
5943   assert(Fn && Addr && "Target device function emission failed.");
5944 }
5945 
5946 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
5947     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
5948   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5949     emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
5950   };
5951   emitCommonOMPTargetDirective(*this, S, CodeGen);
5952 }
5953 
5954 void CodeGenFunction::EmitOMPCancellationPointDirective(
5955     const OMPCancellationPointDirective &S) {
5956   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(),
5957                                                    S.getCancelRegion());
5958 }
5959 
5960 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
5961   const Expr *IfCond = nullptr;
5962   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
5963     if (C->getNameModifier() == OMPD_unknown ||
5964         C->getNameModifier() == OMPD_cancel) {
5965       IfCond = C->getCondition();
5966       break;
5967     }
5968   }
5969   if (CGM.getLangOpts().OpenMPIRBuilder) {
5970     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
5971     // TODO: This check is necessary as we only generate `omp parallel` through
5972     // the OpenMPIRBuilder for now.
5973     if (S.getCancelRegion() == OMPD_parallel) {
5974       llvm::Value *IfCondition = nullptr;
5975       if (IfCond)
5976         IfCondition = EmitScalarExpr(IfCond,
5977                                      /*IgnoreResultAssign=*/true);
5978       return Builder.restoreIP(
5979           OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion()));
5980     }
5981   }
5982 
5983   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond,
5984                                         S.getCancelRegion());
5985 }
5986 
5987 CodeGenFunction::JumpDest
5988 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
5989   if (Kind == OMPD_parallel || Kind == OMPD_task ||
5990       Kind == OMPD_target_parallel || Kind == OMPD_taskloop ||
5991       Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop)
5992     return ReturnBlock;
5993   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
5994          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
5995          Kind == OMPD_distribute_parallel_for ||
5996          Kind == OMPD_target_parallel_for ||
5997          Kind == OMPD_teams_distribute_parallel_for ||
5998          Kind == OMPD_target_teams_distribute_parallel_for);
5999   return OMPCancelStack.getExitBlock();
6000 }
6001 
6002 void CodeGenFunction::EmitOMPUseDevicePtrClause(
6003     const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope,
6004     const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
6005   auto OrigVarIt = C.varlist_begin();
6006   auto InitIt = C.inits().begin();
6007   for (const Expr *PvtVarIt : C.private_copies()) {
6008     const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
6009     const auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
6010     const auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
6011 
6012     // In order to identify the right initializer we need to match the
6013     // declaration used by the mapping logic. In some cases we may get
6014     // OMPCapturedExprDecl that refers to the original declaration.
6015     const ValueDecl *MatchingVD = OrigVD;
6016     if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
6017       // OMPCapturedExprDecl are used to privative fields of the current
6018       // structure.
6019       const auto *ME = cast<MemberExpr>(OED->getInit());
6020       assert(isa<CXXThisExpr>(ME->getBase()) &&
6021              "Base should be the current struct!");
6022       MatchingVD = ME->getMemberDecl();
6023     }
6024 
6025     // If we don't have information about the current list item, move on to
6026     // the next one.
6027     auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
6028     if (InitAddrIt == CaptureDeviceAddrMap.end())
6029       continue;
6030 
6031     bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, OrigVD,
6032                                                          InitAddrIt, InitVD,
6033                                                          PvtVD]() {
6034       // Initialize the temporary initialization variable with the address we
6035       // get from the runtime library. We have to cast the source address
6036       // because it is always a void *. References are materialized in the
6037       // privatization scope, so the initialization here disregards the fact
6038       // the original variable is a reference.
6039       QualType AddrQTy =
6040           getContext().getPointerType(OrigVD->getType().getNonReferenceType());
6041       llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
6042       Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
6043       setAddrOfLocalVar(InitVD, InitAddr);
6044 
6045       // Emit private declaration, it will be initialized by the value we
6046       // declaration we just added to the local declarations map.
6047       EmitDecl(*PvtVD);
6048 
6049       // The initialization variables reached its purpose in the emission
6050       // of the previous declaration, so we don't need it anymore.
6051       LocalDeclMap.erase(InitVD);
6052 
6053       // Return the address of the private variable.
6054       return GetAddrOfLocalVar(PvtVD);
6055     });
6056     assert(IsRegistered && "firstprivate var already registered as private");
6057     // Silence the warning about unused variable.
6058     (void)IsRegistered;
6059 
6060     ++OrigVarIt;
6061     ++InitIt;
6062   }
6063 }
6064 
6065 static const VarDecl *getBaseDecl(const Expr *Ref) {
6066   const Expr *Base = Ref->IgnoreParenImpCasts();
6067   while (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Base))
6068     Base = OASE->getBase()->IgnoreParenImpCasts();
6069   while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Base))
6070     Base = ASE->getBase()->IgnoreParenImpCasts();
6071   return cast<VarDecl>(cast<DeclRefExpr>(Base)->getDecl());
6072 }
6073 
6074 void CodeGenFunction::EmitOMPUseDeviceAddrClause(
6075     const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope,
6076     const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
6077   llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
6078   for (const Expr *Ref : C.varlists()) {
6079     const VarDecl *OrigVD = getBaseDecl(Ref);
6080     if (!Processed.insert(OrigVD).second)
6081       continue;
6082     // In order to identify the right initializer we need to match the
6083     // declaration used by the mapping logic. In some cases we may get
6084     // OMPCapturedExprDecl that refers to the original declaration.
6085     const ValueDecl *MatchingVD = OrigVD;
6086     if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
6087       // OMPCapturedExprDecl are used to privative fields of the current
6088       // structure.
6089       const auto *ME = cast<MemberExpr>(OED->getInit());
6090       assert(isa<CXXThisExpr>(ME->getBase()) &&
6091              "Base should be the current struct!");
6092       MatchingVD = ME->getMemberDecl();
6093     }
6094 
6095     // If we don't have information about the current list item, move on to
6096     // the next one.
6097     auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
6098     if (InitAddrIt == CaptureDeviceAddrMap.end())
6099       continue;
6100 
6101     Address PrivAddr = InitAddrIt->getSecond();
6102     // For declrefs and variable length array need to load the pointer for
6103     // correct mapping, since the pointer to the data was passed to the runtime.
6104     if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) ||
6105         MatchingVD->getType()->isArrayType())
6106       PrivAddr =
6107           EmitLoadOfPointer(PrivAddr, getContext()
6108                                           .getPointerType(OrigVD->getType())
6109                                           ->castAs<PointerType>());
6110     llvm::Type *RealTy =
6111         ConvertTypeForMem(OrigVD->getType().getNonReferenceType())
6112             ->getPointerTo();
6113     PrivAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(PrivAddr, RealTy);
6114 
6115     (void)PrivateScope.addPrivate(OrigVD, [PrivAddr]() { return PrivAddr; });
6116   }
6117 }
6118 
6119 // Generate the instructions for '#pragma omp target data' directive.
6120 void CodeGenFunction::EmitOMPTargetDataDirective(
6121     const OMPTargetDataDirective &S) {
6122   CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true,
6123                                        /*SeparateBeginEndCalls=*/true);
6124 
6125   // Create a pre/post action to signal the privatization of the device pointer.
6126   // This action can be replaced by the OpenMP runtime code generation to
6127   // deactivate privatization.
6128   bool PrivatizeDevicePointers = false;
6129   class DevicePointerPrivActionTy : public PrePostActionTy {
6130     bool &PrivatizeDevicePointers;
6131 
6132   public:
6133     explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
6134         : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
6135     void Enter(CodeGenFunction &CGF) override {
6136       PrivatizeDevicePointers = true;
6137     }
6138   };
6139   DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
6140 
6141   auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
6142                        CodeGenFunction &CGF, PrePostActionTy &Action) {
6143     auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6144       CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
6145     };
6146 
6147     // Codegen that selects whether to generate the privatization code or not.
6148     auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
6149                           &InnermostCodeGen](CodeGenFunction &CGF,
6150                                              PrePostActionTy &Action) {
6151       RegionCodeGenTy RCG(InnermostCodeGen);
6152       PrivatizeDevicePointers = false;
6153 
6154       // Call the pre-action to change the status of PrivatizeDevicePointers if
6155       // needed.
6156       Action.Enter(CGF);
6157 
6158       if (PrivatizeDevicePointers) {
6159         OMPPrivateScope PrivateScope(CGF);
6160         // Emit all instances of the use_device_ptr clause.
6161         for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
6162           CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
6163                                         Info.CaptureDeviceAddrMap);
6164         for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
6165           CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope,
6166                                          Info.CaptureDeviceAddrMap);
6167         (void)PrivateScope.Privatize();
6168         RCG(CGF);
6169       } else {
6170         OMPLexicalScope Scope(CGF, S, OMPD_unknown);
6171         RCG(CGF);
6172       }
6173     };
6174 
6175     // Forward the provided action to the privatization codegen.
6176     RegionCodeGenTy PrivRCG(PrivCodeGen);
6177     PrivRCG.setAction(Action);
6178 
6179     // Notwithstanding the body of the region is emitted as inlined directive,
6180     // we don't use an inline scope as changes in the references inside the
6181     // region are expected to be visible outside, so we do not privative them.
6182     OMPLexicalScope Scope(CGF, S);
6183     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
6184                                                     PrivRCG);
6185   };
6186 
6187   RegionCodeGenTy RCG(CodeGen);
6188 
6189   // If we don't have target devices, don't bother emitting the data mapping
6190   // code.
6191   if (CGM.getLangOpts().OMPTargetTriples.empty()) {
6192     RCG(*this);
6193     return;
6194   }
6195 
6196   // Check if we have any if clause associated with the directive.
6197   const Expr *IfCond = nullptr;
6198   if (const auto *C = S.getSingleClause<OMPIfClause>())
6199     IfCond = C->getCondition();
6200 
6201   // Check if we have any device clause associated with the directive.
6202   const Expr *Device = nullptr;
6203   if (const auto *C = S.getSingleClause<OMPDeviceClause>())
6204     Device = C->getDevice();
6205 
6206   // Set the action to signal privatization of device pointers.
6207   RCG.setAction(PrivAction);
6208 
6209   // Emit region code.
6210   CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
6211                                              Info);
6212 }
6213 
6214 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
6215     const OMPTargetEnterDataDirective &S) {
6216   // If we don't have target devices, don't bother emitting the data mapping
6217   // code.
6218   if (CGM.getLangOpts().OMPTargetTriples.empty())
6219     return;
6220 
6221   // Check if we have any if clause associated with the directive.
6222   const Expr *IfCond = nullptr;
6223   if (const auto *C = S.getSingleClause<OMPIfClause>())
6224     IfCond = C->getCondition();
6225 
6226   // Check if we have any device clause associated with the directive.
6227   const Expr *Device = nullptr;
6228   if (const auto *C = S.getSingleClause<OMPDeviceClause>())
6229     Device = C->getDevice();
6230 
6231   OMPLexicalScope Scope(*this, S, OMPD_task);
6232   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
6233 }
6234 
6235 void CodeGenFunction::EmitOMPTargetExitDataDirective(
6236     const OMPTargetExitDataDirective &S) {
6237   // If we don't have target devices, don't bother emitting the data mapping
6238   // code.
6239   if (CGM.getLangOpts().OMPTargetTriples.empty())
6240     return;
6241 
6242   // Check if we have any if clause associated with the directive.
6243   const Expr *IfCond = nullptr;
6244   if (const auto *C = S.getSingleClause<OMPIfClause>())
6245     IfCond = C->getCondition();
6246 
6247   // Check if we have any device clause associated with the directive.
6248   const Expr *Device = nullptr;
6249   if (const auto *C = S.getSingleClause<OMPDeviceClause>())
6250     Device = C->getDevice();
6251 
6252   OMPLexicalScope Scope(*this, S, OMPD_task);
6253   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
6254 }
6255 
6256 static void emitTargetParallelRegion(CodeGenFunction &CGF,
6257                                      const OMPTargetParallelDirective &S,
6258                                      PrePostActionTy &Action) {
6259   // Get the captured statement associated with the 'parallel' region.
6260   const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
6261   Action.Enter(CGF);
6262   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
6263     Action.Enter(CGF);
6264     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6265     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6266     CGF.EmitOMPPrivateClause(S, PrivateScope);
6267     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6268     (void)PrivateScope.Privatize();
6269     if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
6270       CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
6271     // TODO: Add support for clauses.
6272     CGF.EmitStmt(CS->getCapturedStmt());
6273     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
6274   };
6275   emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
6276                                  emitEmptyBoundParameters);
6277   emitPostUpdateForReductionClause(CGF, S,
6278                                    [](CodeGenFunction &) { return nullptr; });
6279 }
6280 
6281 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
6282     CodeGenModule &CGM, StringRef ParentName,
6283     const OMPTargetParallelDirective &S) {
6284   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6285     emitTargetParallelRegion(CGF, S, Action);
6286   };
6287   llvm::Function *Fn;
6288   llvm::Constant *Addr;
6289   // Emit target region as a standalone region.
6290   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6291       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6292   assert(Fn && Addr && "Target device function emission failed.");
6293 }
6294 
6295 void CodeGenFunction::EmitOMPTargetParallelDirective(
6296     const OMPTargetParallelDirective &S) {
6297   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6298     emitTargetParallelRegion(CGF, S, Action);
6299   };
6300   emitCommonOMPTargetDirective(*this, S, CodeGen);
6301 }
6302 
6303 static void emitTargetParallelForRegion(CodeGenFunction &CGF,
6304                                         const OMPTargetParallelForDirective &S,
6305                                         PrePostActionTy &Action) {
6306   Action.Enter(CGF);
6307   // Emit directive as a combined directive that consists of two implicit
6308   // directives: 'parallel' with 'for' directive.
6309   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6310     Action.Enter(CGF);
6311     CodeGenFunction::OMPCancelStackRAII CancelRegion(
6312         CGF, OMPD_target_parallel_for, S.hasCancel());
6313     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
6314                                emitDispatchForLoopBounds);
6315   };
6316   emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
6317                                  emitEmptyBoundParameters);
6318 }
6319 
6320 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
6321     CodeGenModule &CGM, StringRef ParentName,
6322     const OMPTargetParallelForDirective &S) {
6323   // Emit SPMD target parallel for region as a standalone region.
6324   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6325     emitTargetParallelForRegion(CGF, S, Action);
6326   };
6327   llvm::Function *Fn;
6328   llvm::Constant *Addr;
6329   // Emit target region as a standalone region.
6330   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6331       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6332   assert(Fn && Addr && "Target device function emission failed.");
6333 }
6334 
6335 void CodeGenFunction::EmitOMPTargetParallelForDirective(
6336     const OMPTargetParallelForDirective &S) {
6337   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6338     emitTargetParallelForRegion(CGF, S, Action);
6339   };
6340   emitCommonOMPTargetDirective(*this, S, CodeGen);
6341 }
6342 
6343 static void
6344 emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
6345                                 const OMPTargetParallelForSimdDirective &S,
6346                                 PrePostActionTy &Action) {
6347   Action.Enter(CGF);
6348   // Emit directive as a combined directive that consists of two implicit
6349   // directives: 'parallel' with 'for' directive.
6350   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6351     Action.Enter(CGF);
6352     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
6353                                emitDispatchForLoopBounds);
6354   };
6355   emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen,
6356                                  emitEmptyBoundParameters);
6357 }
6358 
6359 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
6360     CodeGenModule &CGM, StringRef ParentName,
6361     const OMPTargetParallelForSimdDirective &S) {
6362   // Emit SPMD target parallel for region as a standalone region.
6363   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6364     emitTargetParallelForSimdRegion(CGF, S, Action);
6365   };
6366   llvm::Function *Fn;
6367   llvm::Constant *Addr;
6368   // Emit target region as a standalone region.
6369   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6370       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6371   assert(Fn && Addr && "Target device function emission failed.");
6372 }
6373 
6374 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
6375     const OMPTargetParallelForSimdDirective &S) {
6376   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6377     emitTargetParallelForSimdRegion(CGF, S, Action);
6378   };
6379   emitCommonOMPTargetDirective(*this, S, CodeGen);
6380 }
6381 
6382 /// Emit a helper variable and return corresponding lvalue.
6383 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
6384                      const ImplicitParamDecl *PVD,
6385                      CodeGenFunction::OMPPrivateScope &Privates) {
6386   const auto *VDecl = cast<VarDecl>(Helper->getDecl());
6387   Privates.addPrivate(VDecl,
6388                       [&CGF, PVD]() { return CGF.GetAddrOfLocalVar(PVD); });
6389 }
6390 
6391 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
6392   assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
6393   // Emit outlined function for task construct.
6394   const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop);
6395   Address CapturedStruct = Address::invalid();
6396   {
6397     OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
6398     CapturedStruct = GenerateCapturedStmtArgument(*CS);
6399   }
6400   QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
6401   const Expr *IfCond = nullptr;
6402   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
6403     if (C->getNameModifier() == OMPD_unknown ||
6404         C->getNameModifier() == OMPD_taskloop) {
6405       IfCond = C->getCondition();
6406       break;
6407     }
6408   }
6409 
6410   OMPTaskDataTy Data;
6411   // Check if taskloop must be emitted without taskgroup.
6412   Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
6413   // TODO: Check if we should emit tied or untied task.
6414   Data.Tied = true;
6415   // Set scheduling for taskloop
6416   if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
6417     // grainsize clause
6418     Data.Schedule.setInt(/*IntVal=*/false);
6419     Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
6420   } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
6421     // num_tasks clause
6422     Data.Schedule.setInt(/*IntVal=*/true);
6423     Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
6424   }
6425 
6426   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
6427     // if (PreCond) {
6428     //   for (IV in 0..LastIteration) BODY;
6429     //   <Final counter/linear vars updates>;
6430     // }
6431     //
6432 
6433     // Emit: if (PreCond) - begin.
6434     // If the condition constant folds and can be elided, avoid emitting the
6435     // whole loop.
6436     bool CondConstant;
6437     llvm::BasicBlock *ContBlock = nullptr;
6438     OMPLoopScope PreInitScope(CGF, S);
6439     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
6440       if (!CondConstant)
6441         return;
6442     } else {
6443       llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
6444       ContBlock = CGF.createBasicBlock("taskloop.if.end");
6445       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
6446                   CGF.getProfileCount(&S));
6447       CGF.EmitBlock(ThenBlock);
6448       CGF.incrementProfileCounter(&S);
6449     }
6450 
6451     (void)CGF.EmitOMPLinearClauseInit(S);
6452 
6453     OMPPrivateScope LoopScope(CGF);
6454     // Emit helper vars inits.
6455     enum { LowerBound = 5, UpperBound, Stride, LastIter };
6456     auto *I = CS->getCapturedDecl()->param_begin();
6457     auto *LBP = std::next(I, LowerBound);
6458     auto *UBP = std::next(I, UpperBound);
6459     auto *STP = std::next(I, Stride);
6460     auto *LIP = std::next(I, LastIter);
6461     mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
6462              LoopScope);
6463     mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
6464              LoopScope);
6465     mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
6466     mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
6467              LoopScope);
6468     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
6469     CGF.EmitOMPLinearClause(S, LoopScope);
6470     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
6471     (void)LoopScope.Privatize();
6472     // Emit the loop iteration variable.
6473     const Expr *IVExpr = S.getIterationVariable();
6474     const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
6475     CGF.EmitVarDecl(*IVDecl);
6476     CGF.EmitIgnoredExpr(S.getInit());
6477 
6478     // Emit the iterations count variable.
6479     // If it is not a variable, Sema decided to calculate iterations count on
6480     // each iteration (e.g., it is foldable into a constant).
6481     if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
6482       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
6483       // Emit calculation of the iterations count.
6484       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
6485     }
6486 
6487     {
6488       OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
6489       emitCommonSimdLoop(
6490           CGF, S,
6491           [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6492             if (isOpenMPSimdDirective(S.getDirectiveKind()))
6493               CGF.EmitOMPSimdInit(S);
6494           },
6495           [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
6496             CGF.EmitOMPInnerLoop(
6497                 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
6498                 [&S](CodeGenFunction &CGF) {
6499                   emitOMPLoopBodyWithStopPoint(CGF, S,
6500                                                CodeGenFunction::JumpDest());
6501                 },
6502                 [](CodeGenFunction &) {});
6503           });
6504     }
6505     // Emit: if (PreCond) - end.
6506     if (ContBlock) {
6507       CGF.EmitBranch(ContBlock);
6508       CGF.EmitBlock(ContBlock, true);
6509     }
6510     // Emit final copy of the lastprivate variables if IsLastIter != 0.
6511     if (HasLastprivateClause) {
6512       CGF.EmitOMPLastprivateClauseFinal(
6513           S, isOpenMPSimdDirective(S.getDirectiveKind()),
6514           CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
6515               CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
6516               (*LIP)->getType(), S.getBeginLoc())));
6517     }
6518     CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) {
6519       return CGF.Builder.CreateIsNotNull(
6520           CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
6521                                (*LIP)->getType(), S.getBeginLoc()));
6522     });
6523   };
6524   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
6525                     IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
6526                             const OMPTaskDataTy &Data) {
6527     auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond,
6528                       &Data](CodeGenFunction &CGF, PrePostActionTy &) {
6529       OMPLoopScope PreInitScope(CGF, S);
6530       CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S,
6531                                                   OutlinedFn, SharedsTy,
6532                                                   CapturedStruct, IfCond, Data);
6533     };
6534     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
6535                                                     CodeGen);
6536   };
6537   if (Data.Nogroup) {
6538     EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data);
6539   } else {
6540     CGM.getOpenMPRuntime().emitTaskgroupRegion(
6541         *this,
6542         [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
6543                                         PrePostActionTy &Action) {
6544           Action.Enter(CGF);
6545           CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen,
6546                                         Data);
6547         },
6548         S.getBeginLoc());
6549   }
6550 }
6551 
6552 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
6553   auto LPCRegion =
6554       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
6555   EmitOMPTaskLoopBasedDirective(S);
6556 }
6557 
6558 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
6559     const OMPTaskLoopSimdDirective &S) {
6560   auto LPCRegion =
6561       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
6562   OMPLexicalScope Scope(*this, S);
6563   EmitOMPTaskLoopBasedDirective(S);
6564 }
6565 
6566 void CodeGenFunction::EmitOMPMasterTaskLoopDirective(
6567     const OMPMasterTaskLoopDirective &S) {
6568   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6569     Action.Enter(CGF);
6570     EmitOMPTaskLoopBasedDirective(S);
6571   };
6572   auto LPCRegion =
6573       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
6574   OMPLexicalScope Scope(*this, S, llvm::None, /*EmitPreInitStmt=*/false);
6575   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
6576 }
6577 
6578 void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
6579     const OMPMasterTaskLoopSimdDirective &S) {
6580   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6581     Action.Enter(CGF);
6582     EmitOMPTaskLoopBasedDirective(S);
6583   };
6584   auto LPCRegion =
6585       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
6586   OMPLexicalScope Scope(*this, S);
6587   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
6588 }
6589 
6590 void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
6591     const OMPParallelMasterTaskLoopDirective &S) {
6592   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6593     auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
6594                                   PrePostActionTy &Action) {
6595       Action.Enter(CGF);
6596       CGF.EmitOMPTaskLoopBasedDirective(S);
6597     };
6598     OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
6599     CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,
6600                                             S.getBeginLoc());
6601   };
6602   auto LPCRegion =
6603       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
6604   emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen,
6605                                  emitEmptyBoundParameters);
6606 }
6607 
6608 void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
6609     const OMPParallelMasterTaskLoopSimdDirective &S) {
6610   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6611     auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
6612                                   PrePostActionTy &Action) {
6613       Action.Enter(CGF);
6614       CGF.EmitOMPTaskLoopBasedDirective(S);
6615     };
6616     OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
6617     CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,
6618                                             S.getBeginLoc());
6619   };
6620   auto LPCRegion =
6621       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
6622   emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen,
6623                                  emitEmptyBoundParameters);
6624 }
6625 
6626 // Generate the instructions for '#pragma omp target update' directive.
6627 void CodeGenFunction::EmitOMPTargetUpdateDirective(
6628     const OMPTargetUpdateDirective &S) {
6629   // If we don't have target devices, don't bother emitting the data mapping
6630   // code.
6631   if (CGM.getLangOpts().OMPTargetTriples.empty())
6632     return;
6633 
6634   // Check if we have any if clause associated with the directive.
6635   const Expr *IfCond = nullptr;
6636   if (const auto *C = S.getSingleClause<OMPIfClause>())
6637     IfCond = C->getCondition();
6638 
6639   // Check if we have any device clause associated with the directive.
6640   const Expr *Device = nullptr;
6641   if (const auto *C = S.getSingleClause<OMPDeviceClause>())
6642     Device = C->getDevice();
6643 
6644   OMPLexicalScope Scope(*this, S, OMPD_task);
6645   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
6646 }
6647 
6648 void CodeGenFunction::EmitSimpleOMPExecutableDirective(
6649     const OMPExecutableDirective &D) {
6650   if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) {
6651     EmitOMPScanDirective(*SD);
6652     return;
6653   }
6654   if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
6655     return;
6656   auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
6657     OMPPrivateScope GlobalsScope(CGF);
6658     if (isOpenMPTaskingDirective(D.getDirectiveKind())) {
6659       // Capture global firstprivates to avoid crash.
6660       for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
6661         for (const Expr *Ref : C->varlists()) {
6662           const auto *DRE = cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
6663           if (!DRE)
6664             continue;
6665           const auto *VD = dyn_cast<VarDecl>(DRE->getDecl());
6666           if (!VD || VD->hasLocalStorage())
6667             continue;
6668           if (!CGF.LocalDeclMap.count(VD)) {
6669             LValue GlobLVal = CGF.EmitLValue(Ref);
6670             GlobalsScope.addPrivate(
6671                 VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); });
6672           }
6673         }
6674       }
6675     }
6676     if (isOpenMPSimdDirective(D.getDirectiveKind())) {
6677       (void)GlobalsScope.Privatize();
6678       ParentLoopDirectiveForScanRegion ScanRegion(CGF, D);
6679       emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action);
6680     } else {
6681       if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {
6682         for (const Expr *E : LD->counters()) {
6683           const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
6684           if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) {
6685             LValue GlobLVal = CGF.EmitLValue(E);
6686             GlobalsScope.addPrivate(
6687                 VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); });
6688           }
6689           if (isa<OMPCapturedExprDecl>(VD)) {
6690             // Emit only those that were not explicitly referenced in clauses.
6691             if (!CGF.LocalDeclMap.count(VD))
6692               CGF.EmitVarDecl(*VD);
6693           }
6694         }
6695         for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) {
6696           if (!C->getNumForLoops())
6697             continue;
6698           for (unsigned I = LD->getCollapsedNumber(),
6699                         E = C->getLoopNumIterations().size();
6700                I < E; ++I) {
6701             if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
6702                     cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) {
6703               // Emit only those that were not explicitly referenced in clauses.
6704               if (!CGF.LocalDeclMap.count(VD))
6705                 CGF.EmitVarDecl(*VD);
6706             }
6707           }
6708         }
6709       }
6710       (void)GlobalsScope.Privatize();
6711       CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt());
6712     }
6713   };
6714   if (D.getDirectiveKind() == OMPD_atomic ||
6715       D.getDirectiveKind() == OMPD_critical ||
6716       D.getDirectiveKind() == OMPD_section ||
6717       D.getDirectiveKind() == OMPD_master) {
6718     EmitStmt(D.getAssociatedStmt());
6719   } else {
6720     auto LPCRegion =
6721         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D);
6722     OMPSimdLexicalScope Scope(*this, D);
6723     CGM.getOpenMPRuntime().emitInlinedDirective(
6724         *this,
6725         isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd
6726                                                     : D.getDirectiveKind(),
6727         CodeGen);
6728   }
6729   // Check for outer lastprivate conditional update.
6730   checkForLastprivateConditionalUpdate(*this, D);
6731 }
6732