xref: /freebsd/contrib/llvm-project/clang/lib/CodeGen/CGStmtOpenMP.cpp (revision f126890ac5386406dadf7c4cfa9566cbb56537c5)
1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This contains code to emit OpenMP nodes as LLVM code.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGCleanup.h"
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CodeGenModule.h"
17 #include "TargetInfo.h"
18 #include "clang/AST/ASTContext.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/AST/DeclOpenMP.h"
21 #include "clang/AST/OpenMPClause.h"
22 #include "clang/AST/Stmt.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/PrettyStackTrace.h"
27 #include "llvm/ADT/SmallSet.h"
28 #include "llvm/BinaryFormat/Dwarf.h"
29 #include "llvm/Frontend/OpenMP/OMPConstants.h"
30 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
31 #include "llvm/IR/Constants.h"
32 #include "llvm/IR/DebugInfoMetadata.h"
33 #include "llvm/IR/Instructions.h"
34 #include "llvm/IR/IntrinsicInst.h"
35 #include "llvm/IR/Metadata.h"
36 #include "llvm/Support/AtomicOrdering.h"
37 #include <optional>
38 using namespace clang;
39 using namespace CodeGen;
40 using namespace llvm::omp;
41 
42 static const VarDecl *getBaseDecl(const Expr *Ref);
43 
44 namespace {
45 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
46 /// for captured expressions.
47 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
48   void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
49     for (const auto *C : S.clauses()) {
50       if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
51         if (const auto *PreInit =
52                 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
53           for (const auto *I : PreInit->decls()) {
54             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
55               CGF.EmitVarDecl(cast<VarDecl>(*I));
56             } else {
57               CodeGenFunction::AutoVarEmission Emission =
58                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
59               CGF.EmitAutoVarCleanups(Emission);
60             }
61           }
62         }
63       }
64     }
65   }
66   CodeGenFunction::OMPPrivateScope InlinedShareds;
67 
68   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
69     return CGF.LambdaCaptureFields.lookup(VD) ||
70            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
71            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
72             cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
73   }
74 
75 public:
76   OMPLexicalScope(
77       CodeGenFunction &CGF, const OMPExecutableDirective &S,
78       const std::optional<OpenMPDirectiveKind> CapturedRegion = std::nullopt,
79       const bool EmitPreInitStmt = true)
80       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
81         InlinedShareds(CGF) {
82     if (EmitPreInitStmt)
83       emitPreInitStmt(CGF, S);
84     if (!CapturedRegion)
85       return;
86     assert(S.hasAssociatedStmt() &&
87            "Expected associated statement for inlined directive.");
88     const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
89     for (const auto &C : CS->captures()) {
90       if (C.capturesVariable() || C.capturesVariableByCopy()) {
91         auto *VD = C.getCapturedVar();
92         assert(VD == VD->getCanonicalDecl() &&
93                "Canonical decl must be captured.");
94         DeclRefExpr DRE(
95             CGF.getContext(), const_cast<VarDecl *>(VD),
96             isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
97                                        InlinedShareds.isGlobalVarCaptured(VD)),
98             VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
99         InlinedShareds.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
100       }
101     }
102     (void)InlinedShareds.Privatize();
103   }
104 };
105 
106 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
107 /// for captured expressions.
108 class OMPParallelScope final : public OMPLexicalScope {
109   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
110     OpenMPDirectiveKind Kind = S.getDirectiveKind();
111     return !(isOpenMPTargetExecutionDirective(Kind) ||
112              isOpenMPLoopBoundSharingDirective(Kind)) &&
113            isOpenMPParallelDirective(Kind);
114   }
115 
116 public:
117   OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
118       : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt,
119                         EmitPreInitStmt(S)) {}
120 };
121 
122 /// Lexical scope for OpenMP teams construct, that handles correct codegen
123 /// for captured expressions.
124 class OMPTeamsScope final : public OMPLexicalScope {
125   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
126     OpenMPDirectiveKind Kind = S.getDirectiveKind();
127     return !isOpenMPTargetExecutionDirective(Kind) &&
128            isOpenMPTeamsDirective(Kind);
129   }
130 
131 public:
132   OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
133       : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt,
134                         EmitPreInitStmt(S)) {}
135 };
136 
137 /// Private scope for OpenMP loop-based directives, that supports capturing
138 /// of used expression from loop statement.
139 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
140   void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) {
141     const DeclStmt *PreInits;
142     CodeGenFunction::OMPMapVars PreCondVars;
143     if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
144       llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
145       for (const auto *E : LD->counters()) {
146         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
147         EmittedAsPrivate.insert(VD->getCanonicalDecl());
148         (void)PreCondVars.setVarAddr(
149             CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
150       }
151       // Mark private vars as undefs.
152       for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) {
153         for (const Expr *IRef : C->varlists()) {
154           const auto *OrigVD =
155               cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
156           if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
157             QualType OrigVDTy = OrigVD->getType().getNonReferenceType();
158             (void)PreCondVars.setVarAddr(
159                 CGF, OrigVD,
160                 Address(llvm::UndefValue::get(CGF.ConvertTypeForMem(
161                             CGF.getContext().getPointerType(OrigVDTy))),
162                         CGF.ConvertTypeForMem(OrigVDTy),
163                         CGF.getContext().getDeclAlign(OrigVD)));
164           }
165         }
166       }
167       (void)PreCondVars.apply(CGF);
168       // Emit init, __range and __end variables for C++ range loops.
169       (void)OMPLoopBasedDirective::doForAllLoops(
170           LD->getInnermostCapturedStmt()->getCapturedStmt(),
171           /*TryImperfectlyNestedLoops=*/true, LD->getLoopsNumber(),
172           [&CGF](unsigned Cnt, const Stmt *CurStmt) {
173             if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(CurStmt)) {
174               if (const Stmt *Init = CXXFor->getInit())
175                 CGF.EmitStmt(Init);
176               CGF.EmitStmt(CXXFor->getRangeStmt());
177               CGF.EmitStmt(CXXFor->getEndStmt());
178             }
179             return false;
180           });
181       PreInits = cast_or_null<DeclStmt>(LD->getPreInits());
182     } else if (const auto *Tile = dyn_cast<OMPTileDirective>(&S)) {
183       PreInits = cast_or_null<DeclStmt>(Tile->getPreInits());
184     } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(&S)) {
185       PreInits = cast_or_null<DeclStmt>(Unroll->getPreInits());
186     } else {
187       llvm_unreachable("Unknown loop-based directive kind.");
188     }
189     if (PreInits) {
190       for (const auto *I : PreInits->decls())
191         CGF.EmitVarDecl(cast<VarDecl>(*I));
192     }
193     PreCondVars.restore(CGF);
194   }
195 
196 public:
197   OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S)
198       : CodeGenFunction::RunCleanupsScope(CGF) {
199     emitPreInitStmt(CGF, S);
200   }
201 };
202 
203 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
204   CodeGenFunction::OMPPrivateScope InlinedShareds;
205 
206   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
207     return CGF.LambdaCaptureFields.lookup(VD) ||
208            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
209            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
210             cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
211   }
212 
213 public:
214   OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
215       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
216         InlinedShareds(CGF) {
217     for (const auto *C : S.clauses()) {
218       if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
219         if (const auto *PreInit =
220                 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
221           for (const auto *I : PreInit->decls()) {
222             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
223               CGF.EmitVarDecl(cast<VarDecl>(*I));
224             } else {
225               CodeGenFunction::AutoVarEmission Emission =
226                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
227               CGF.EmitAutoVarCleanups(Emission);
228             }
229           }
230         }
231       } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) {
232         for (const Expr *E : UDP->varlists()) {
233           const Decl *D = cast<DeclRefExpr>(E)->getDecl();
234           if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
235             CGF.EmitVarDecl(*OED);
236         }
237       } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) {
238         for (const Expr *E : UDP->varlists()) {
239           const Decl *D = getBaseDecl(E);
240           if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
241             CGF.EmitVarDecl(*OED);
242         }
243       }
244     }
245     if (!isOpenMPSimdDirective(S.getDirectiveKind()))
246       CGF.EmitOMPPrivateClause(S, InlinedShareds);
247     if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
248       if (const Expr *E = TG->getReductionRef())
249         CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
250     }
251     // Temp copy arrays for inscan reductions should not be emitted as they are
252     // not used in simd only mode.
253     llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps;
254     for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
255       if (C->getModifier() != OMPC_REDUCTION_inscan)
256         continue;
257       for (const Expr *E : C->copy_array_temps())
258         CopyArrayTemps.insert(cast<DeclRefExpr>(E)->getDecl());
259     }
260     const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
261     while (CS) {
262       for (auto &C : CS->captures()) {
263         if (C.capturesVariable() || C.capturesVariableByCopy()) {
264           auto *VD = C.getCapturedVar();
265           if (CopyArrayTemps.contains(VD))
266             continue;
267           assert(VD == VD->getCanonicalDecl() &&
268                  "Canonical decl must be captured.");
269           DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
270                           isCapturedVar(CGF, VD) ||
271                               (CGF.CapturedStmtInfo &&
272                                InlinedShareds.isGlobalVarCaptured(VD)),
273                           VD->getType().getNonReferenceType(), VK_LValue,
274                           C.getLocation());
275           InlinedShareds.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
276         }
277       }
278       CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt());
279     }
280     (void)InlinedShareds.Privatize();
281   }
282 };
283 
284 } // namespace
285 
286 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
287                                          const OMPExecutableDirective &S,
288                                          const RegionCodeGenTy &CodeGen);
289 
290 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
291   if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
292     if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
293       OrigVD = OrigVD->getCanonicalDecl();
294       bool IsCaptured =
295           LambdaCaptureFields.lookup(OrigVD) ||
296           (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
297           (CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
298       DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured,
299                       OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
300       return EmitLValue(&DRE);
301     }
302   }
303   return EmitLValue(E);
304 }
305 
306 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
307   ASTContext &C = getContext();
308   llvm::Value *Size = nullptr;
309   auto SizeInChars = C.getTypeSizeInChars(Ty);
310   if (SizeInChars.isZero()) {
311     // getTypeSizeInChars() returns 0 for a VLA.
312     while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) {
313       VlaSizePair VlaSize = getVLASize(VAT);
314       Ty = VlaSize.Type;
315       Size =
316           Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) : VlaSize.NumElts;
317     }
318     SizeInChars = C.getTypeSizeInChars(Ty);
319     if (SizeInChars.isZero())
320       return llvm::ConstantInt::get(SizeTy, /*V=*/0);
321     return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
322   }
323   return CGM.getSize(SizeInChars);
324 }
325 
326 void CodeGenFunction::GenerateOpenMPCapturedVars(
327     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
328   const RecordDecl *RD = S.getCapturedRecordDecl();
329   auto CurField = RD->field_begin();
330   auto CurCap = S.captures().begin();
331   for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
332                                                  E = S.capture_init_end();
333        I != E; ++I, ++CurField, ++CurCap) {
334     if (CurField->hasCapturedVLAType()) {
335       const VariableArrayType *VAT = CurField->getCapturedVLAType();
336       llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
337       CapturedVars.push_back(Val);
338     } else if (CurCap->capturesThis()) {
339       CapturedVars.push_back(CXXThisValue);
340     } else if (CurCap->capturesVariableByCopy()) {
341       llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation());
342 
343       // If the field is not a pointer, we need to save the actual value
344       // and load it as a void pointer.
345       if (!CurField->getType()->isAnyPointerType()) {
346         ASTContext &Ctx = getContext();
347         Address DstAddr = CreateMemTemp(
348             Ctx.getUIntPtrType(),
349             Twine(CurCap->getCapturedVar()->getName(), ".casted"));
350         LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
351 
352         llvm::Value *SrcAddrVal = EmitScalarConversion(
353             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
354             Ctx.getPointerType(CurField->getType()), CurCap->getLocation());
355         LValue SrcLV =
356             MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
357 
358         // Store the value using the source type pointer.
359         EmitStoreThroughLValue(RValue::get(CV), SrcLV);
360 
361         // Load the value using the destination type pointer.
362         CV = EmitLoadOfScalar(DstLV, CurCap->getLocation());
363       }
364       CapturedVars.push_back(CV);
365     } else {
366       assert(CurCap->capturesVariable() && "Expected capture by reference.");
367       CapturedVars.push_back(EmitLValue(*I).getAddress(*this).getPointer());
368     }
369   }
370 }
371 
372 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
373                                     QualType DstType, StringRef Name,
374                                     LValue AddrLV) {
375   ASTContext &Ctx = CGF.getContext();
376 
377   llvm::Value *CastedPtr = CGF.EmitScalarConversion(
378       AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(),
379       Ctx.getPointerType(DstType), Loc);
380   Address TmpAddr =
381       CGF.MakeNaturalAlignAddrLValue(CastedPtr, DstType).getAddress(CGF);
382   return TmpAddr;
383 }
384 
385 static QualType getCanonicalParamType(ASTContext &C, QualType T) {
386   if (T->isLValueReferenceType())
387     return C.getLValueReferenceType(
388         getCanonicalParamType(C, T.getNonReferenceType()),
389         /*SpelledAsLValue=*/false);
390   if (T->isPointerType())
391     return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
392   if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
393     if (const auto *VLA = dyn_cast<VariableArrayType>(A))
394       return getCanonicalParamType(C, VLA->getElementType());
395     if (!A->isVariablyModifiedType())
396       return C.getCanonicalType(T);
397   }
398   return C.getCanonicalParamType(T);
399 }
400 
401 namespace {
402 /// Contains required data for proper outlined function codegen.
403 struct FunctionOptions {
404   /// Captured statement for which the function is generated.
405   const CapturedStmt *S = nullptr;
406   /// true if cast to/from  UIntPtr is required for variables captured by
407   /// value.
408   const bool UIntPtrCastRequired = true;
409   /// true if only casted arguments must be registered as local args or VLA
410   /// sizes.
411   const bool RegisterCastedArgsOnly = false;
412   /// Name of the generated function.
413   const StringRef FunctionName;
414   /// Location of the non-debug version of the outlined function.
415   SourceLocation Loc;
416   explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
417                            bool RegisterCastedArgsOnly, StringRef FunctionName,
418                            SourceLocation Loc)
419       : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
420         RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
421         FunctionName(FunctionName), Loc(Loc) {}
422 };
423 } // namespace
424 
425 static llvm::Function *emitOutlinedFunctionPrologue(
426     CodeGenFunction &CGF, FunctionArgList &Args,
427     llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
428         &LocalAddrs,
429     llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
430         &VLASizes,
431     llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
432   const CapturedDecl *CD = FO.S->getCapturedDecl();
433   const RecordDecl *RD = FO.S->getCapturedRecordDecl();
434   assert(CD->hasBody() && "missing CapturedDecl body");
435 
436   CXXThisValue = nullptr;
437   // Build the argument list.
438   CodeGenModule &CGM = CGF.CGM;
439   ASTContext &Ctx = CGM.getContext();
440   FunctionArgList TargetArgs;
441   Args.append(CD->param_begin(),
442               std::next(CD->param_begin(), CD->getContextParamPosition()));
443   TargetArgs.append(
444       CD->param_begin(),
445       std::next(CD->param_begin(), CD->getContextParamPosition()));
446   auto I = FO.S->captures().begin();
447   FunctionDecl *DebugFunctionDecl = nullptr;
448   if (!FO.UIntPtrCastRequired) {
449     FunctionProtoType::ExtProtoInfo EPI;
450     QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, std::nullopt, EPI);
451     DebugFunctionDecl = FunctionDecl::Create(
452         Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(),
453         SourceLocation(), DeclarationName(), FunctionTy,
454         Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static,
455         /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false,
456         /*hasWrittenPrototype=*/false);
457   }
458   for (const FieldDecl *FD : RD->fields()) {
459     QualType ArgType = FD->getType();
460     IdentifierInfo *II = nullptr;
461     VarDecl *CapVar = nullptr;
462 
463     // If this is a capture by copy and the type is not a pointer, the outlined
464     // function argument type should be uintptr and the value properly casted to
465     // uintptr. This is necessary given that the runtime library is only able to
466     // deal with pointers. We can pass in the same way the VLA type sizes to the
467     // outlined function.
468     if (FO.UIntPtrCastRequired &&
469         ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
470          I->capturesVariableArrayType()))
471       ArgType = Ctx.getUIntPtrType();
472 
473     if (I->capturesVariable() || I->capturesVariableByCopy()) {
474       CapVar = I->getCapturedVar();
475       II = CapVar->getIdentifier();
476     } else if (I->capturesThis()) {
477       II = &Ctx.Idents.get("this");
478     } else {
479       assert(I->capturesVariableArrayType());
480       II = &Ctx.Idents.get("vla");
481     }
482     if (ArgType->isVariablyModifiedType())
483       ArgType = getCanonicalParamType(Ctx, ArgType);
484     VarDecl *Arg;
485     if (CapVar && (CapVar->getTLSKind() != clang::VarDecl::TLS_None)) {
486       Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
487                                       II, ArgType,
488                                       ImplicitParamDecl::ThreadPrivateVar);
489     } else if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
490       Arg = ParmVarDecl::Create(
491           Ctx, DebugFunctionDecl,
492           CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(),
493           CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType,
494           /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
495     } else {
496       Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
497                                       II, ArgType, ImplicitParamDecl::Other);
498     }
499     Args.emplace_back(Arg);
500     // Do not cast arguments if we emit function with non-original types.
501     TargetArgs.emplace_back(
502         FO.UIntPtrCastRequired
503             ? Arg
504             : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
505     ++I;
506   }
507   Args.append(std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
508               CD->param_end());
509   TargetArgs.append(
510       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
511       CD->param_end());
512 
513   // Create the function declaration.
514   const CGFunctionInfo &FuncInfo =
515       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
516   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
517 
518   auto *F =
519       llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
520                              FO.FunctionName, &CGM.getModule());
521   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
522   if (CD->isNothrow())
523     F->setDoesNotThrow();
524   F->setDoesNotRecurse();
525 
526   // Always inline the outlined function if optimizations are enabled.
527   if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
528     F->removeFnAttr(llvm::Attribute::NoInline);
529     F->addFnAttr(llvm::Attribute::AlwaysInline);
530   }
531 
532   // Generate the function.
533   CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
534                     FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(),
535                     FO.UIntPtrCastRequired ? FO.Loc
536                                            : CD->getBody()->getBeginLoc());
537   unsigned Cnt = CD->getContextParamPosition();
538   I = FO.S->captures().begin();
539   for (const FieldDecl *FD : RD->fields()) {
540     // Do not map arguments if we emit function with non-original types.
541     Address LocalAddr(Address::invalid());
542     if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
543       LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
544                                                              TargetArgs[Cnt]);
545     } else {
546       LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
547     }
548     // If we are capturing a pointer by copy we don't need to do anything, just
549     // use the value that we get from the arguments.
550     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
551       const VarDecl *CurVD = I->getCapturedVar();
552       if (!FO.RegisterCastedArgsOnly)
553         LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
554       ++Cnt;
555       ++I;
556       continue;
557     }
558 
559     LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
560                                         AlignmentSource::Decl);
561     if (FD->hasCapturedVLAType()) {
562       if (FO.UIntPtrCastRequired) {
563         ArgLVal = CGF.MakeAddrLValue(
564             castValueFromUintptr(CGF, I->getLocation(), FD->getType(),
565                                  Args[Cnt]->getName(), ArgLVal),
566             FD->getType(), AlignmentSource::Decl);
567       }
568       llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
569       const VariableArrayType *VAT = FD->getCapturedVLAType();
570       VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg);
571     } else if (I->capturesVariable()) {
572       const VarDecl *Var = I->getCapturedVar();
573       QualType VarTy = Var->getType();
574       Address ArgAddr = ArgLVal.getAddress(CGF);
575       if (ArgLVal.getType()->isLValueReferenceType()) {
576         ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
577       } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
578         assert(ArgLVal.getType()->isPointerType());
579         ArgAddr = CGF.EmitLoadOfPointer(
580             ArgAddr, ArgLVal.getType()->castAs<PointerType>());
581       }
582       if (!FO.RegisterCastedArgsOnly) {
583         LocalAddrs.insert(
584             {Args[Cnt], {Var, ArgAddr.withAlignment(Ctx.getDeclAlign(Var))}});
585       }
586     } else if (I->capturesVariableByCopy()) {
587       assert(!FD->getType()->isAnyPointerType() &&
588              "Not expecting a captured pointer.");
589       const VarDecl *Var = I->getCapturedVar();
590       LocalAddrs.insert({Args[Cnt],
591                          {Var, FO.UIntPtrCastRequired
592                                    ? castValueFromUintptr(
593                                          CGF, I->getLocation(), FD->getType(),
594                                          Args[Cnt]->getName(), ArgLVal)
595                                    : ArgLVal.getAddress(CGF)}});
596     } else {
597       // If 'this' is captured, load it into CXXThisValue.
598       assert(I->capturesThis());
599       CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
600       LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}});
601     }
602     ++Cnt;
603     ++I;
604   }
605 
606   return F;
607 }
608 
609 llvm::Function *
610 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
611                                                     SourceLocation Loc) {
612   assert(
613       CapturedStmtInfo &&
614       "CapturedStmtInfo should be set when generating the captured function");
615   const CapturedDecl *CD = S.getCapturedDecl();
616   // Build the argument list.
617   bool NeedWrapperFunction =
618       getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo();
619   FunctionArgList Args;
620   llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
621   llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
622   SmallString<256> Buffer;
623   llvm::raw_svector_ostream Out(Buffer);
624   Out << CapturedStmtInfo->getHelperName();
625   if (NeedWrapperFunction)
626     Out << "_debug__";
627   FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
628                      Out.str(), Loc);
629   llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
630                                                    VLASizes, CXXThisValue, FO);
631   CodeGenFunction::OMPPrivateScope LocalScope(*this);
632   for (const auto &LocalAddrPair : LocalAddrs) {
633     if (LocalAddrPair.second.first) {
634       LocalScope.addPrivate(LocalAddrPair.second.first,
635                             LocalAddrPair.second.second);
636     }
637   }
638   (void)LocalScope.Privatize();
639   for (const auto &VLASizePair : VLASizes)
640     VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
641   PGO.assignRegionCounters(GlobalDecl(CD), F);
642   CapturedStmtInfo->EmitBody(*this, CD->getBody());
643   (void)LocalScope.ForceCleanup();
644   FinishFunction(CD->getBodyRBrace());
645   if (!NeedWrapperFunction)
646     return F;
647 
648   FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
649                             /*RegisterCastedArgsOnly=*/true,
650                             CapturedStmtInfo->getHelperName(), Loc);
651   CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
652   WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
653   Args.clear();
654   LocalAddrs.clear();
655   VLASizes.clear();
656   llvm::Function *WrapperF =
657       emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
658                                    WrapperCGF.CXXThisValue, WrapperFO);
659   llvm::SmallVector<llvm::Value *, 4> CallArgs;
660   auto *PI = F->arg_begin();
661   for (const auto *Arg : Args) {
662     llvm::Value *CallArg;
663     auto I = LocalAddrs.find(Arg);
664     if (I != LocalAddrs.end()) {
665       LValue LV = WrapperCGF.MakeAddrLValue(
666           I->second.second,
667           I->second.first ? I->second.first->getType() : Arg->getType(),
668           AlignmentSource::Decl);
669       if (LV.getType()->isAnyComplexType())
670         LV.setAddress(WrapperCGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
671             LV.getAddress(WrapperCGF),
672             PI->getType()->getPointerTo(
673                 LV.getAddress(WrapperCGF).getAddressSpace()),
674             PI->getType()));
675       CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
676     } else {
677       auto EI = VLASizes.find(Arg);
678       if (EI != VLASizes.end()) {
679         CallArg = EI->second.second;
680       } else {
681         LValue LV =
682             WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
683                                       Arg->getType(), AlignmentSource::Decl);
684         CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
685       }
686     }
687     CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
688     ++PI;
689   }
690   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs);
691   WrapperCGF.FinishFunction();
692   return WrapperF;
693 }
694 
695 //===----------------------------------------------------------------------===//
696 //                              OpenMP Directive Emission
697 //===----------------------------------------------------------------------===//
698 void CodeGenFunction::EmitOMPAggregateAssign(
699     Address DestAddr, Address SrcAddr, QualType OriginalType,
700     const llvm::function_ref<void(Address, Address)> CopyGen) {
701   // Perform element-by-element initialization.
702   QualType ElementTy;
703 
704   // Drill down to the base element type on both arrays.
705   const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
706   llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
707   SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
708 
709   llvm::Value *SrcBegin = SrcAddr.getPointer();
710   llvm::Value *DestBegin = DestAddr.getPointer();
711   // Cast from pointer to array type to pointer to single element.
712   llvm::Value *DestEnd = Builder.CreateInBoundsGEP(DestAddr.getElementType(),
713                                                    DestBegin, NumElements);
714 
715   // The basic structure here is a while-do loop.
716   llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body");
717   llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done");
718   llvm::Value *IsEmpty =
719       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
720   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
721 
722   // Enter the loop body, making that address the current address.
723   llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
724   EmitBlock(BodyBB);
725 
726   CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
727 
728   llvm::PHINode *SrcElementPHI =
729       Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
730   SrcElementPHI->addIncoming(SrcBegin, EntryBB);
731   Address SrcElementCurrent =
732       Address(SrcElementPHI, SrcAddr.getElementType(),
733               SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
734 
735   llvm::PHINode *DestElementPHI = Builder.CreatePHI(
736       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
737   DestElementPHI->addIncoming(DestBegin, EntryBB);
738   Address DestElementCurrent =
739       Address(DestElementPHI, DestAddr.getElementType(),
740               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
741 
742   // Emit copy.
743   CopyGen(DestElementCurrent, SrcElementCurrent);
744 
745   // Shift the address forward by one element.
746   llvm::Value *DestElementNext =
747       Builder.CreateConstGEP1_32(DestAddr.getElementType(), DestElementPHI,
748                                  /*Idx0=*/1, "omp.arraycpy.dest.element");
749   llvm::Value *SrcElementNext =
750       Builder.CreateConstGEP1_32(SrcAddr.getElementType(), SrcElementPHI,
751                                  /*Idx0=*/1, "omp.arraycpy.src.element");
752   // Check whether we've reached the end.
753   llvm::Value *Done =
754       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
755   Builder.CreateCondBr(Done, DoneBB, BodyBB);
756   DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
757   SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
758 
759   // Done.
760   EmitBlock(DoneBB, /*IsFinished=*/true);
761 }
762 
763 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
764                                   Address SrcAddr, const VarDecl *DestVD,
765                                   const VarDecl *SrcVD, const Expr *Copy) {
766   if (OriginalType->isArrayType()) {
767     const auto *BO = dyn_cast<BinaryOperator>(Copy);
768     if (BO && BO->getOpcode() == BO_Assign) {
769       // Perform simple memcpy for simple copying.
770       LValue Dest = MakeAddrLValue(DestAddr, OriginalType);
771       LValue Src = MakeAddrLValue(SrcAddr, OriginalType);
772       EmitAggregateAssign(Dest, Src, OriginalType);
773     } else {
774       // For arrays with complex element types perform element by element
775       // copying.
776       EmitOMPAggregateAssign(
777           DestAddr, SrcAddr, OriginalType,
778           [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
779             // Working with the single array element, so have to remap
780             // destination and source variables to corresponding array
781             // elements.
782             CodeGenFunction::OMPPrivateScope Remap(*this);
783             Remap.addPrivate(DestVD, DestElement);
784             Remap.addPrivate(SrcVD, SrcElement);
785             (void)Remap.Privatize();
786             EmitIgnoredExpr(Copy);
787           });
788     }
789   } else {
790     // Remap pseudo source variable to private copy.
791     CodeGenFunction::OMPPrivateScope Remap(*this);
792     Remap.addPrivate(SrcVD, SrcAddr);
793     Remap.addPrivate(DestVD, DestAddr);
794     (void)Remap.Privatize();
795     // Emit copying of the whole variable.
796     EmitIgnoredExpr(Copy);
797   }
798 }
799 
800 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
801                                                 OMPPrivateScope &PrivateScope) {
802   if (!HaveInsertPoint())
803     return false;
804   bool DeviceConstTarget =
805       getLangOpts().OpenMPIsTargetDevice &&
806       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
807   bool FirstprivateIsLastprivate = false;
808   llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;
809   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
810     for (const auto *D : C->varlists())
811       Lastprivates.try_emplace(
812           cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(),
813           C->getKind());
814   }
815   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
816   llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
817   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
818   // Force emission of the firstprivate copy if the directive does not emit
819   // outlined function, like omp for, omp simd, omp distribute etc.
820   bool MustEmitFirstprivateCopy =
821       CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
822   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
823     const auto *IRef = C->varlist_begin();
824     const auto *InitsRef = C->inits().begin();
825     for (const Expr *IInit : C->private_copies()) {
826       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
827       bool ThisFirstprivateIsLastprivate =
828           Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
829       const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD);
830       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
831       if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
832           !FD->getType()->isReferenceType() &&
833           (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
834         EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
835         ++IRef;
836         ++InitsRef;
837         continue;
838       }
839       // Do not emit copy for firstprivate constant variables in target regions,
840       // captured by reference.
841       if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) &&
842           FD && FD->getType()->isReferenceType() &&
843           (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
844         EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
845         ++IRef;
846         ++InitsRef;
847         continue;
848       }
849       FirstprivateIsLastprivate =
850           FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
851       if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
852         const auto *VDInit =
853             cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
854         bool IsRegistered;
855         DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
856                         /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
857                         (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
858         LValue OriginalLVal;
859         if (!FD) {
860           // Check if the firstprivate variable is just a constant value.
861           ConstantEmission CE = tryEmitAsConstant(&DRE);
862           if (CE && !CE.isReference()) {
863             // Constant value, no need to create a copy.
864             ++IRef;
865             ++InitsRef;
866             continue;
867           }
868           if (CE && CE.isReference()) {
869             OriginalLVal = CE.getReferenceLValue(*this, &DRE);
870           } else {
871             assert(!CE && "Expected non-constant firstprivate.");
872             OriginalLVal = EmitLValue(&DRE);
873           }
874         } else {
875           OriginalLVal = EmitLValue(&DRE);
876         }
877         QualType Type = VD->getType();
878         if (Type->isArrayType()) {
879           // Emit VarDecl with copy init for arrays.
880           // Get the address of the original variable captured in current
881           // captured region.
882           AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
883           const Expr *Init = VD->getInit();
884           if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
885             // Perform simple memcpy.
886             LValue Dest = MakeAddrLValue(Emission.getAllocatedAddress(), Type);
887             EmitAggregateAssign(Dest, OriginalLVal, Type);
888           } else {
889             EmitOMPAggregateAssign(
890                 Emission.getAllocatedAddress(), OriginalLVal.getAddress(*this),
891                 Type,
892                 [this, VDInit, Init](Address DestElement, Address SrcElement) {
893                   // Clean up any temporaries needed by the
894                   // initialization.
895                   RunCleanupsScope InitScope(*this);
896                   // Emit initialization for single element.
897                   setAddrOfLocalVar(VDInit, SrcElement);
898                   EmitAnyExprToMem(Init, DestElement,
899                                    Init->getType().getQualifiers(),
900                                    /*IsInitializer*/ false);
901                   LocalDeclMap.erase(VDInit);
902                 });
903           }
904           EmitAutoVarCleanups(Emission);
905           IsRegistered =
906               PrivateScope.addPrivate(OrigVD, Emission.getAllocatedAddress());
907         } else {
908           Address OriginalAddr = OriginalLVal.getAddress(*this);
909           // Emit private VarDecl with copy init.
910           // Remap temp VDInit variable to the address of the original
911           // variable (for proper handling of captured global variables).
912           setAddrOfLocalVar(VDInit, OriginalAddr);
913           EmitDecl(*VD);
914           LocalDeclMap.erase(VDInit);
915           Address VDAddr = GetAddrOfLocalVar(VD);
916           if (ThisFirstprivateIsLastprivate &&
917               Lastprivates[OrigVD->getCanonicalDecl()] ==
918                   OMPC_LASTPRIVATE_conditional) {
919             // Create/init special variable for lastprivate conditionals.
920             llvm::Value *V =
921                 EmitLoadOfScalar(MakeAddrLValue(VDAddr, (*IRef)->getType(),
922                                                 AlignmentSource::Decl),
923                                  (*IRef)->getExprLoc());
924             VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
925                 *this, OrigVD);
926             EmitStoreOfScalar(V, MakeAddrLValue(VDAddr, (*IRef)->getType(),
927                                                 AlignmentSource::Decl));
928             LocalDeclMap.erase(VD);
929             setAddrOfLocalVar(VD, VDAddr);
930           }
931           IsRegistered = PrivateScope.addPrivate(OrigVD, VDAddr);
932         }
933         assert(IsRegistered &&
934                "firstprivate var already registered as private");
935         // Silence the warning about unused variable.
936         (void)IsRegistered;
937       }
938       ++IRef;
939       ++InitsRef;
940     }
941   }
942   return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
943 }
944 
945 void CodeGenFunction::EmitOMPPrivateClause(
946     const OMPExecutableDirective &D,
947     CodeGenFunction::OMPPrivateScope &PrivateScope) {
948   if (!HaveInsertPoint())
949     return;
950   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
951   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
952     auto IRef = C->varlist_begin();
953     for (const Expr *IInit : C->private_copies()) {
954       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
955       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
956         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
957         EmitDecl(*VD);
958         // Emit private VarDecl with copy init.
959         bool IsRegistered =
960             PrivateScope.addPrivate(OrigVD, GetAddrOfLocalVar(VD));
961         assert(IsRegistered && "private var already registered as private");
962         // Silence the warning about unused variable.
963         (void)IsRegistered;
964       }
965       ++IRef;
966     }
967   }
968 }
969 
970 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
971   if (!HaveInsertPoint())
972     return false;
973   // threadprivate_var1 = master_threadprivate_var1;
974   // operator=(threadprivate_var2, master_threadprivate_var2);
975   // ...
976   // __kmpc_barrier(&loc, global_tid);
977   llvm::DenseSet<const VarDecl *> CopiedVars;
978   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
979   for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
980     auto IRef = C->varlist_begin();
981     auto ISrcRef = C->source_exprs().begin();
982     auto IDestRef = C->destination_exprs().begin();
983     for (const Expr *AssignOp : C->assignment_ops()) {
984       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
985       QualType Type = VD->getType();
986       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
987         // Get the address of the master variable. If we are emitting code with
988         // TLS support, the address is passed from the master as field in the
989         // captured declaration.
990         Address MasterAddr = Address::invalid();
991         if (getLangOpts().OpenMPUseTLS &&
992             getContext().getTargetInfo().isTLSSupported()) {
993           assert(CapturedStmtInfo->lookup(VD) &&
994                  "Copyin threadprivates should have been captured!");
995           DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true,
996                           (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
997           MasterAddr = EmitLValue(&DRE).getAddress(*this);
998           LocalDeclMap.erase(VD);
999         } else {
1000           MasterAddr =
1001               Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
1002                                           : CGM.GetAddrOfGlobal(VD),
1003                       CGM.getTypes().ConvertTypeForMem(VD->getType()),
1004                       getContext().getDeclAlign(VD));
1005         }
1006         // Get the address of the threadprivate variable.
1007         Address PrivateAddr = EmitLValue(*IRef).getAddress(*this);
1008         if (CopiedVars.size() == 1) {
1009           // At first check if current thread is a master thread. If it is, no
1010           // need to copy data.
1011           CopyBegin = createBasicBlock("copyin.not.master");
1012           CopyEnd = createBasicBlock("copyin.not.master.end");
1013           // TODO: Avoid ptrtoint conversion.
1014           auto *MasterAddrInt =
1015               Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy);
1016           auto *PrivateAddrInt =
1017               Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy);
1018           Builder.CreateCondBr(
1019               Builder.CreateICmpNE(MasterAddrInt, PrivateAddrInt), CopyBegin,
1020               CopyEnd);
1021           EmitBlock(CopyBegin);
1022         }
1023         const auto *SrcVD =
1024             cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1025         const auto *DestVD =
1026             cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1027         EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
1028       }
1029       ++IRef;
1030       ++ISrcRef;
1031       ++IDestRef;
1032     }
1033   }
1034   if (CopyEnd) {
1035     // Exit out of copying procedure for non-master thread.
1036     EmitBlock(CopyEnd, /*IsFinished=*/true);
1037     return true;
1038   }
1039   return false;
1040 }
1041 
1042 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
1043     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
1044   if (!HaveInsertPoint())
1045     return false;
1046   bool HasAtLeastOneLastprivate = false;
1047   llvm::DenseSet<const VarDecl *> SIMDLCVs;
1048   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1049     const auto *LoopDirective = cast<OMPLoopDirective>(&D);
1050     for (const Expr *C : LoopDirective->counters()) {
1051       SIMDLCVs.insert(
1052           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1053     }
1054   }
1055   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1056   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1057     HasAtLeastOneLastprivate = true;
1058     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
1059         !getLangOpts().OpenMPSimd)
1060       break;
1061     const auto *IRef = C->varlist_begin();
1062     const auto *IDestRef = C->destination_exprs().begin();
1063     for (const Expr *IInit : C->private_copies()) {
1064       // Keep the address of the original variable for future update at the end
1065       // of the loop.
1066       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1067       // Taskloops do not require additional initialization, it is done in
1068       // runtime support library.
1069       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
1070         const auto *DestVD =
1071             cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1072         DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1073                         /*RefersToEnclosingVariableOrCapture=*/
1074                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
1075                         (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1076         PrivateScope.addPrivate(DestVD, EmitLValue(&DRE).getAddress(*this));
1077         // Check if the variable is also a firstprivate: in this case IInit is
1078         // not generated. Initialization of this variable will happen in codegen
1079         // for 'firstprivate' clause.
1080         if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
1081           const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
1082           Address VDAddr = Address::invalid();
1083           if (C->getKind() == OMPC_LASTPRIVATE_conditional) {
1084             VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
1085                 *this, OrigVD);
1086             setAddrOfLocalVar(VD, VDAddr);
1087           } else {
1088             // Emit private VarDecl with copy init.
1089             EmitDecl(*VD);
1090             VDAddr = GetAddrOfLocalVar(VD);
1091           }
1092           bool IsRegistered = PrivateScope.addPrivate(OrigVD, VDAddr);
1093           assert(IsRegistered &&
1094                  "lastprivate var already registered as private");
1095           (void)IsRegistered;
1096         }
1097       }
1098       ++IRef;
1099       ++IDestRef;
1100     }
1101   }
1102   return HasAtLeastOneLastprivate;
1103 }
1104 
1105 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
1106     const OMPExecutableDirective &D, bool NoFinals,
1107     llvm::Value *IsLastIterCond) {
1108   if (!HaveInsertPoint())
1109     return;
1110   // Emit following code:
1111   // if (<IsLastIterCond>) {
1112   //   orig_var1 = private_orig_var1;
1113   //   ...
1114   //   orig_varn = private_orig_varn;
1115   // }
1116   llvm::BasicBlock *ThenBB = nullptr;
1117   llvm::BasicBlock *DoneBB = nullptr;
1118   if (IsLastIterCond) {
1119     // Emit implicit barrier if at least one lastprivate conditional is found
1120     // and this is not a simd mode.
1121     if (!getLangOpts().OpenMPSimd &&
1122         llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(),
1123                      [](const OMPLastprivateClause *C) {
1124                        return C->getKind() == OMPC_LASTPRIVATE_conditional;
1125                      })) {
1126       CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(),
1127                                              OMPD_unknown,
1128                                              /*EmitChecks=*/false,
1129                                              /*ForceSimpleCall=*/true);
1130     }
1131     ThenBB = createBasicBlock(".omp.lastprivate.then");
1132     DoneBB = createBasicBlock(".omp.lastprivate.done");
1133     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
1134     EmitBlock(ThenBB);
1135   }
1136   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1137   llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
1138   if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
1139     auto IC = LoopDirective->counters().begin();
1140     for (const Expr *F : LoopDirective->finals()) {
1141       const auto *D =
1142           cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
1143       if (NoFinals)
1144         AlreadyEmittedVars.insert(D);
1145       else
1146         LoopCountersAndUpdates[D] = F;
1147       ++IC;
1148     }
1149   }
1150   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1151     auto IRef = C->varlist_begin();
1152     auto ISrcRef = C->source_exprs().begin();
1153     auto IDestRef = C->destination_exprs().begin();
1154     for (const Expr *AssignOp : C->assignment_ops()) {
1155       const auto *PrivateVD =
1156           cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1157       QualType Type = PrivateVD->getType();
1158       const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1159       if (AlreadyEmittedVars.insert(CanonicalVD).second) {
1160         // If lastprivate variable is a loop control variable for loop-based
1161         // directive, update its value before copyin back to original
1162         // variable.
1163         if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
1164           EmitIgnoredExpr(FinalExpr);
1165         const auto *SrcVD =
1166             cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1167         const auto *DestVD =
1168             cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1169         // Get the address of the private variable.
1170         Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
1171         if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1172           PrivateAddr = Address(
1173               Builder.CreateLoad(PrivateAddr),
1174               CGM.getTypes().ConvertTypeForMem(RefTy->getPointeeType()),
1175               CGM.getNaturalTypeAlignment(RefTy->getPointeeType()));
1176         // Store the last value to the private copy in the last iteration.
1177         if (C->getKind() == OMPC_LASTPRIVATE_conditional)
1178           CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
1179               *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD,
1180               (*IRef)->getExprLoc());
1181         // Get the address of the original variable.
1182         Address OriginalAddr = GetAddrOfLocalVar(DestVD);
1183         EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
1184       }
1185       ++IRef;
1186       ++ISrcRef;
1187       ++IDestRef;
1188     }
1189     if (const Expr *PostUpdate = C->getPostUpdateExpr())
1190       EmitIgnoredExpr(PostUpdate);
1191   }
1192   if (IsLastIterCond)
1193     EmitBlock(DoneBB, /*IsFinished=*/true);
1194 }
1195 
1196 void CodeGenFunction::EmitOMPReductionClauseInit(
1197     const OMPExecutableDirective &D,
1198     CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) {
1199   if (!HaveInsertPoint())
1200     return;
1201   SmallVector<const Expr *, 4> Shareds;
1202   SmallVector<const Expr *, 4> Privates;
1203   SmallVector<const Expr *, 4> ReductionOps;
1204   SmallVector<const Expr *, 4> LHSs;
1205   SmallVector<const Expr *, 4> RHSs;
1206   OMPTaskDataTy Data;
1207   SmallVector<const Expr *, 4> TaskLHSs;
1208   SmallVector<const Expr *, 4> TaskRHSs;
1209   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1210     if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan))
1211       continue;
1212     Shareds.append(C->varlist_begin(), C->varlist_end());
1213     Privates.append(C->privates().begin(), C->privates().end());
1214     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1215     LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1216     RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1217     if (C->getModifier() == OMPC_REDUCTION_task) {
1218       Data.ReductionVars.append(C->privates().begin(), C->privates().end());
1219       Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
1220       Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
1221       Data.ReductionOps.append(C->reduction_ops().begin(),
1222                                C->reduction_ops().end());
1223       TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1224       TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1225     }
1226   }
1227   ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
1228   unsigned Count = 0;
1229   auto *ILHS = LHSs.begin();
1230   auto *IRHS = RHSs.begin();
1231   auto *IPriv = Privates.begin();
1232   for (const Expr *IRef : Shareds) {
1233     const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
1234     // Emit private VarDecl with reduction init.
1235     RedCG.emitSharedOrigLValue(*this, Count);
1236     RedCG.emitAggregateType(*this, Count);
1237     AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1238     RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
1239                              RedCG.getSharedLValue(Count).getAddress(*this),
1240                              [&Emission](CodeGenFunction &CGF) {
1241                                CGF.EmitAutoVarInit(Emission);
1242                                return true;
1243                              });
1244     EmitAutoVarCleanups(Emission);
1245     Address BaseAddr = RedCG.adjustPrivateAddress(
1246         *this, Count, Emission.getAllocatedAddress());
1247     bool IsRegistered =
1248         PrivateScope.addPrivate(RedCG.getBaseDecl(Count), BaseAddr);
1249     assert(IsRegistered && "private var already registered as private");
1250     // Silence the warning about unused variable.
1251     (void)IsRegistered;
1252 
1253     const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1254     const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1255     QualType Type = PrivateVD->getType();
1256     bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
1257     if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1258       // Store the address of the original variable associated with the LHS
1259       // implicit variable.
1260       PrivateScope.addPrivate(LHSVD,
1261                               RedCG.getSharedLValue(Count).getAddress(*this));
1262       PrivateScope.addPrivate(RHSVD, GetAddrOfLocalVar(PrivateVD));
1263     } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1264                isa<ArraySubscriptExpr>(IRef)) {
1265       // Store the address of the original variable associated with the LHS
1266       // implicit variable.
1267       PrivateScope.addPrivate(LHSVD,
1268                               RedCG.getSharedLValue(Count).getAddress(*this));
1269       PrivateScope.addPrivate(RHSVD,
1270                               GetAddrOfLocalVar(PrivateVD).withElementType(
1271                                   ConvertTypeForMem(RHSVD->getType())));
1272     } else {
1273       QualType Type = PrivateVD->getType();
1274       bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1275       Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(*this);
1276       // Store the address of the original variable associated with the LHS
1277       // implicit variable.
1278       if (IsArray) {
1279         OriginalAddr =
1280             OriginalAddr.withElementType(ConvertTypeForMem(LHSVD->getType()));
1281       }
1282       PrivateScope.addPrivate(LHSVD, OriginalAddr);
1283       PrivateScope.addPrivate(
1284           RHSVD, IsArray ? GetAddrOfLocalVar(PrivateVD).withElementType(
1285                                ConvertTypeForMem(RHSVD->getType()))
1286                          : GetAddrOfLocalVar(PrivateVD));
1287     }
1288     ++ILHS;
1289     ++IRHS;
1290     ++IPriv;
1291     ++Count;
1292   }
1293   if (!Data.ReductionVars.empty()) {
1294     Data.IsReductionWithTaskMod = true;
1295     Data.IsWorksharingReduction =
1296         isOpenMPWorksharingDirective(D.getDirectiveKind());
1297     llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
1298         *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data);
1299     const Expr *TaskRedRef = nullptr;
1300     switch (D.getDirectiveKind()) {
1301     case OMPD_parallel:
1302       TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr();
1303       break;
1304     case OMPD_for:
1305       TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr();
1306       break;
1307     case OMPD_sections:
1308       TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr();
1309       break;
1310     case OMPD_parallel_for:
1311       TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr();
1312       break;
1313     case OMPD_parallel_master:
1314       TaskRedRef =
1315           cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr();
1316       break;
1317     case OMPD_parallel_sections:
1318       TaskRedRef =
1319           cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr();
1320       break;
1321     case OMPD_target_parallel:
1322       TaskRedRef =
1323           cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr();
1324       break;
1325     case OMPD_target_parallel_for:
1326       TaskRedRef =
1327           cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr();
1328       break;
1329     case OMPD_distribute_parallel_for:
1330       TaskRedRef =
1331           cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr();
1332       break;
1333     case OMPD_teams_distribute_parallel_for:
1334       TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D)
1335                        .getTaskReductionRefExpr();
1336       break;
1337     case OMPD_target_teams_distribute_parallel_for:
1338       TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D)
1339                        .getTaskReductionRefExpr();
1340       break;
1341     case OMPD_simd:
1342     case OMPD_for_simd:
1343     case OMPD_section:
1344     case OMPD_single:
1345     case OMPD_master:
1346     case OMPD_critical:
1347     case OMPD_parallel_for_simd:
1348     case OMPD_task:
1349     case OMPD_taskyield:
1350     case OMPD_error:
1351     case OMPD_barrier:
1352     case OMPD_taskwait:
1353     case OMPD_taskgroup:
1354     case OMPD_flush:
1355     case OMPD_depobj:
1356     case OMPD_scan:
1357     case OMPD_ordered:
1358     case OMPD_atomic:
1359     case OMPD_teams:
1360     case OMPD_target:
1361     case OMPD_cancellation_point:
1362     case OMPD_cancel:
1363     case OMPD_target_data:
1364     case OMPD_target_enter_data:
1365     case OMPD_target_exit_data:
1366     case OMPD_taskloop:
1367     case OMPD_taskloop_simd:
1368     case OMPD_master_taskloop:
1369     case OMPD_master_taskloop_simd:
1370     case OMPD_parallel_master_taskloop:
1371     case OMPD_parallel_master_taskloop_simd:
1372     case OMPD_distribute:
1373     case OMPD_target_update:
1374     case OMPD_distribute_parallel_for_simd:
1375     case OMPD_distribute_simd:
1376     case OMPD_target_parallel_for_simd:
1377     case OMPD_target_simd:
1378     case OMPD_teams_distribute:
1379     case OMPD_teams_distribute_simd:
1380     case OMPD_teams_distribute_parallel_for_simd:
1381     case OMPD_target_teams:
1382     case OMPD_target_teams_distribute:
1383     case OMPD_target_teams_distribute_parallel_for_simd:
1384     case OMPD_target_teams_distribute_simd:
1385     case OMPD_declare_target:
1386     case OMPD_end_declare_target:
1387     case OMPD_threadprivate:
1388     case OMPD_allocate:
1389     case OMPD_declare_reduction:
1390     case OMPD_declare_mapper:
1391     case OMPD_declare_simd:
1392     case OMPD_requires:
1393     case OMPD_declare_variant:
1394     case OMPD_begin_declare_variant:
1395     case OMPD_end_declare_variant:
1396     case OMPD_unknown:
1397     default:
1398       llvm_unreachable("Enexpected directive with task reductions.");
1399     }
1400 
1401     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl());
1402     EmitVarDecl(*VD);
1403     EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD),
1404                       /*Volatile=*/false, TaskRedRef->getType());
1405   }
1406 }
1407 
1408 void CodeGenFunction::EmitOMPReductionClauseFinal(
1409     const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1410   if (!HaveInsertPoint())
1411     return;
1412   llvm::SmallVector<const Expr *, 8> Privates;
1413   llvm::SmallVector<const Expr *, 8> LHSExprs;
1414   llvm::SmallVector<const Expr *, 8> RHSExprs;
1415   llvm::SmallVector<const Expr *, 8> ReductionOps;
1416   bool HasAtLeastOneReduction = false;
1417   bool IsReductionWithTaskMod = false;
1418   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1419     // Do not emit for inscan reductions.
1420     if (C->getModifier() == OMPC_REDUCTION_inscan)
1421       continue;
1422     HasAtLeastOneReduction = true;
1423     Privates.append(C->privates().begin(), C->privates().end());
1424     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1425     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1426     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1427     IsReductionWithTaskMod =
1428         IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
1429   }
1430   if (HasAtLeastOneReduction) {
1431     if (IsReductionWithTaskMod) {
1432       CGM.getOpenMPRuntime().emitTaskReductionFini(
1433           *this, D.getBeginLoc(),
1434           isOpenMPWorksharingDirective(D.getDirectiveKind()));
1435     }
1436     bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1437                       isOpenMPParallelDirective(D.getDirectiveKind()) ||
1438                       ReductionKind == OMPD_simd;
1439     bool SimpleReduction = ReductionKind == OMPD_simd;
1440     // Emit nowait reduction if nowait clause is present or directive is a
1441     // parallel directive (it always has implicit barrier).
1442     CGM.getOpenMPRuntime().emitReduction(
1443         *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
1444         {WithNowait, SimpleReduction, ReductionKind});
1445   }
1446 }
1447 
1448 static void emitPostUpdateForReductionClause(
1449     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1450     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1451   if (!CGF.HaveInsertPoint())
1452     return;
1453   llvm::BasicBlock *DoneBB = nullptr;
1454   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1455     if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
1456       if (!DoneBB) {
1457         if (llvm::Value *Cond = CondGen(CGF)) {
1458           // If the first post-update expression is found, emit conditional
1459           // block if it was requested.
1460           llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1461           DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1462           CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1463           CGF.EmitBlock(ThenBB);
1464         }
1465       }
1466       CGF.EmitIgnoredExpr(PostUpdate);
1467     }
1468   }
1469   if (DoneBB)
1470     CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1471 }
1472 
1473 namespace {
1474 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1475 /// parallel function. This is necessary for combined constructs such as
1476 /// 'distribute parallel for'
1477 typedef llvm::function_ref<void(CodeGenFunction &,
1478                                 const OMPExecutableDirective &,
1479                                 llvm::SmallVectorImpl<llvm::Value *> &)>
1480     CodeGenBoundParametersTy;
1481 } // anonymous namespace
1482 
1483 static void
1484 checkForLastprivateConditionalUpdate(CodeGenFunction &CGF,
1485                                      const OMPExecutableDirective &S) {
1486   if (CGF.getLangOpts().OpenMP < 50)
1487     return;
1488   llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls;
1489   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
1490     for (const Expr *Ref : C->varlists()) {
1491       if (!Ref->getType()->isScalarType())
1492         continue;
1493       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1494       if (!DRE)
1495         continue;
1496       PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1497       CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1498     }
1499   }
1500   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
1501     for (const Expr *Ref : C->varlists()) {
1502       if (!Ref->getType()->isScalarType())
1503         continue;
1504       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1505       if (!DRE)
1506         continue;
1507       PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1508       CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1509     }
1510   }
1511   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
1512     for (const Expr *Ref : C->varlists()) {
1513       if (!Ref->getType()->isScalarType())
1514         continue;
1515       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1516       if (!DRE)
1517         continue;
1518       PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1519       CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1520     }
1521   }
1522   // Privates should ne analyzed since they are not captured at all.
1523   // Task reductions may be skipped - tasks are ignored.
1524   // Firstprivates do not return value but may be passed by reference - no need
1525   // to check for updated lastprivate conditional.
1526   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
1527     for (const Expr *Ref : C->varlists()) {
1528       if (!Ref->getType()->isScalarType())
1529         continue;
1530       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1531       if (!DRE)
1532         continue;
1533       PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1534     }
1535   }
1536   CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional(
1537       CGF, S, PrivateDecls);
1538 }
1539 
1540 static void emitCommonOMPParallelDirective(
1541     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1542     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1543     const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1544   const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1545   llvm::Value *NumThreads = nullptr;
1546   llvm::Function *OutlinedFn =
1547       CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1548           CGF, S, *CS->getCapturedDecl()->param_begin(), InnermostKind,
1549           CodeGen);
1550   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1551     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1552     NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1553                                     /*IgnoreResultAssign=*/true);
1554     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1555         CGF, NumThreads, NumThreadsClause->getBeginLoc());
1556   }
1557   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1558     CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1559     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1560         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc());
1561   }
1562   const Expr *IfCond = nullptr;
1563   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1564     if (C->getNameModifier() == OMPD_unknown ||
1565         C->getNameModifier() == OMPD_parallel) {
1566       IfCond = C->getCondition();
1567       break;
1568     }
1569   }
1570 
1571   OMPParallelScope Scope(CGF, S);
1572   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1573   // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1574   // lower and upper bounds with the pragma 'for' chunking mechanism.
1575   // The following lambda takes care of appending the lower and upper bound
1576   // parameters when necessary
1577   CodeGenBoundParameters(CGF, S, CapturedVars);
1578   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1579   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn,
1580                                               CapturedVars, IfCond, NumThreads);
1581 }
1582 
1583 static bool isAllocatableDecl(const VarDecl *VD) {
1584   const VarDecl *CVD = VD->getCanonicalDecl();
1585   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
1586     return false;
1587   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1588   // Use the default allocation.
1589   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
1590             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
1591            !AA->getAllocator());
1592 }
1593 
1594 static void emitEmptyBoundParameters(CodeGenFunction &,
1595                                      const OMPExecutableDirective &,
1596                                      llvm::SmallVectorImpl<llvm::Value *> &) {}
1597 
1598 static void emitOMPCopyinClause(CodeGenFunction &CGF,
1599                                 const OMPExecutableDirective &S) {
1600   bool Copyins = CGF.EmitOMPCopyinClause(S);
1601   if (Copyins) {
1602     // Emit implicit barrier to synchronize threads and avoid data races on
1603     // propagation master's thread values of threadprivate variables to local
1604     // instances of that variables of all other implicit threads.
1605     CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1606         CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
1607         /*ForceSimpleCall=*/true);
1608   }
1609 }
1610 
1611 Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
1612     CodeGenFunction &CGF, const VarDecl *VD) {
1613   CodeGenModule &CGM = CGF.CGM;
1614   auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1615 
1616   if (!VD)
1617     return Address::invalid();
1618   const VarDecl *CVD = VD->getCanonicalDecl();
1619   if (!isAllocatableDecl(CVD))
1620     return Address::invalid();
1621   llvm::Value *Size;
1622   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
1623   if (CVD->getType()->isVariablyModifiedType()) {
1624     Size = CGF.getTypeSize(CVD->getType());
1625     // Align the size: ((size + align - 1) / align) * align
1626     Size = CGF.Builder.CreateNUWAdd(
1627         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
1628     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
1629     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
1630   } else {
1631     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
1632     Size = CGM.getSize(Sz.alignTo(Align));
1633   }
1634 
1635   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1636   assert(AA->getAllocator() &&
1637          "Expected allocator expression for non-default allocator.");
1638   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
1639   // According to the standard, the original allocator type is a enum (integer).
1640   // Convert to pointer type, if required.
1641   if (Allocator->getType()->isIntegerTy())
1642     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
1643   else if (Allocator->getType()->isPointerTy())
1644     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
1645                                                                 CGM.VoidPtrTy);
1646 
1647   llvm::Value *Addr = OMPBuilder.createOMPAlloc(
1648       CGF.Builder, Size, Allocator,
1649       getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", "."));
1650   llvm::CallInst *FreeCI =
1651       OMPBuilder.createOMPFree(CGF.Builder, Addr, Allocator);
1652 
1653   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI);
1654   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1655       Addr,
1656       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
1657       getNameWithSeparators({CVD->getName(), ".addr"}, ".", "."));
1658   return Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
1659 }
1660 
1661 Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate(
1662     CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr,
1663     SourceLocation Loc) {
1664   CodeGenModule &CGM = CGF.CGM;
1665   if (CGM.getLangOpts().OpenMPUseTLS &&
1666       CGM.getContext().getTargetInfo().isTLSSupported())
1667     return VDAddr;
1668 
1669   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1670 
1671   llvm::Type *VarTy = VDAddr.getElementType();
1672   llvm::Value *Data =
1673       CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy);
1674   llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy));
1675   std::string Suffix = getNameWithSeparators({"cache", ""});
1676   llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix);
1677 
1678   llvm::CallInst *ThreadPrivateCacheCall =
1679       OMPBuilder.createCachedThreadPrivate(CGF.Builder, Data, Size, CacheName);
1680 
1681   return Address(ThreadPrivateCacheCall, CGM.Int8Ty, VDAddr.getAlignment());
1682 }
1683 
1684 std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators(
1685     ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) {
1686   SmallString<128> Buffer;
1687   llvm::raw_svector_ostream OS(Buffer);
1688   StringRef Sep = FirstSeparator;
1689   for (StringRef Part : Parts) {
1690     OS << Sep << Part;
1691     Sep = Separator;
1692   }
1693   return OS.str().str();
1694 }
1695 
1696 void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
1697     CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP,
1698     InsertPointTy CodeGenIP, Twine RegionName) {
1699   CGBuilderTy &Builder = CGF.Builder;
1700   Builder.restoreIP(CodeGenIP);
1701   llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false,
1702                                                "." + RegionName + ".after");
1703 
1704   {
1705     OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB);
1706     CGF.EmitStmt(RegionBodyStmt);
1707   }
1708 
1709   if (Builder.saveIP().isSet())
1710     Builder.CreateBr(FiniBB);
1711 }
1712 
1713 void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
1714     CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP,
1715     InsertPointTy CodeGenIP, Twine RegionName) {
1716   CGBuilderTy &Builder = CGF.Builder;
1717   Builder.restoreIP(CodeGenIP);
1718   llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false,
1719                                                "." + RegionName + ".after");
1720 
1721   {
1722     OMPBuilderCBHelpers::OutlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB);
1723     CGF.EmitStmt(RegionBodyStmt);
1724   }
1725 
1726   if (Builder.saveIP().isSet())
1727     Builder.CreateBr(FiniBB);
1728 }
1729 
1730 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1731   if (CGM.getLangOpts().OpenMPIRBuilder) {
1732     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1733     // Check if we have any if clause associated with the directive.
1734     llvm::Value *IfCond = nullptr;
1735     if (const auto *C = S.getSingleClause<OMPIfClause>())
1736       IfCond = EmitScalarExpr(C->getCondition(),
1737                               /*IgnoreResultAssign=*/true);
1738 
1739     llvm::Value *NumThreads = nullptr;
1740     if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>())
1741       NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(),
1742                                   /*IgnoreResultAssign=*/true);
1743 
1744     ProcBindKind ProcBind = OMP_PROC_BIND_default;
1745     if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>())
1746       ProcBind = ProcBindClause->getProcBindKind();
1747 
1748     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1749 
1750     // The cleanup callback that finalizes all variabels at the given location,
1751     // thus calls destructors etc.
1752     auto FiniCB = [this](InsertPointTy IP) {
1753       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
1754     };
1755 
1756     // Privatization callback that performs appropriate action for
1757     // shared/private/firstprivate/lastprivate/copyin/... variables.
1758     //
1759     // TODO: This defaults to shared right now.
1760     auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1761                      llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
1762       // The next line is appropriate only for variables (Val) with the
1763       // data-sharing attribute "shared".
1764       ReplVal = &Val;
1765 
1766       return CodeGenIP;
1767     };
1768 
1769     const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1770     const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();
1771 
1772     auto BodyGenCB = [&, this](InsertPointTy AllocaIP,
1773                                InsertPointTy CodeGenIP) {
1774       OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
1775           *this, ParallelRegionBodyStmt, AllocaIP, CodeGenIP, "parallel");
1776     };
1777 
1778     CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
1779     CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
1780     llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
1781         AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
1782     Builder.restoreIP(
1783         OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1784                                   IfCond, NumThreads, ProcBind, S.hasCancel()));
1785     return;
1786   }
1787 
1788   // Emit parallel region as a standalone region.
1789   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1790     Action.Enter(CGF);
1791     OMPPrivateScope PrivateScope(CGF);
1792     emitOMPCopyinClause(CGF, S);
1793     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1794     CGF.EmitOMPPrivateClause(S, PrivateScope);
1795     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1796     (void)PrivateScope.Privatize();
1797     CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
1798     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1799   };
1800   {
1801     auto LPCRegion =
1802         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
1803     emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1804                                    emitEmptyBoundParameters);
1805     emitPostUpdateForReductionClause(*this, S,
1806                                      [](CodeGenFunction &) { return nullptr; });
1807   }
1808   // Check for outer lastprivate conditional update.
1809   checkForLastprivateConditionalUpdate(*this, S);
1810 }
1811 
1812 void CodeGenFunction::EmitOMPMetaDirective(const OMPMetaDirective &S) {
1813   EmitStmt(S.getIfStmt());
1814 }
1815 
1816 namespace {
1817 /// RAII to handle scopes for loop transformation directives.
1818 class OMPTransformDirectiveScopeRAII {
1819   OMPLoopScope *Scope = nullptr;
1820   CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr;
1821   CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr;
1822 
1823   OMPTransformDirectiveScopeRAII(const OMPTransformDirectiveScopeRAII &) =
1824       delete;
1825   OMPTransformDirectiveScopeRAII &
1826   operator=(const OMPTransformDirectiveScopeRAII &) = delete;
1827 
1828 public:
1829   OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) {
1830     if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(S)) {
1831       Scope = new OMPLoopScope(CGF, *Dir);
1832       CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP);
1833       CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI);
1834     }
1835   }
1836   ~OMPTransformDirectiveScopeRAII() {
1837     if (!Scope)
1838       return;
1839     delete CapInfoRAII;
1840     delete CGSI;
1841     delete Scope;
1842   }
1843 };
1844 } // namespace
1845 
1846 static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
1847                      int MaxLevel, int Level = 0) {
1848   assert(Level < MaxLevel && "Too deep lookup during loop body codegen.");
1849   const Stmt *SimplifiedS = S->IgnoreContainers();
1850   if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) {
1851     PrettyStackTraceLoc CrashInfo(
1852         CGF.getContext().getSourceManager(), CS->getLBracLoc(),
1853         "LLVM IR generation of compound statement ('{}')");
1854 
1855     // Keep track of the current cleanup stack depth, including debug scopes.
1856     CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange());
1857     for (const Stmt *CurStmt : CS->body())
1858       emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level);
1859     return;
1860   }
1861   if (SimplifiedS == NextLoop) {
1862     if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(SimplifiedS))
1863       SimplifiedS = Dir->getTransformedStmt();
1864     if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS))
1865       SimplifiedS = CanonLoop->getLoopStmt();
1866     if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) {
1867       S = For->getBody();
1868     } else {
1869       assert(isa<CXXForRangeStmt>(SimplifiedS) &&
1870              "Expected canonical for loop or range-based for loop.");
1871       const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS);
1872       CGF.EmitStmt(CXXFor->getLoopVarStmt());
1873       S = CXXFor->getBody();
1874     }
1875     if (Level + 1 < MaxLevel) {
1876       NextLoop = OMPLoopDirective::tryToFindNextInnerLoop(
1877           S, /*TryImperfectlyNestedLoops=*/true);
1878       emitBody(CGF, S, NextLoop, MaxLevel, Level + 1);
1879       return;
1880     }
1881   }
1882   CGF.EmitStmt(S);
1883 }
1884 
1885 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1886                                       JumpDest LoopExit) {
1887   RunCleanupsScope BodyScope(*this);
1888   // Update counters values on current iteration.
1889   for (const Expr *UE : D.updates())
1890     EmitIgnoredExpr(UE);
1891   // Update the linear variables.
1892   // In distribute directives only loop counters may be marked as linear, no
1893   // need to generate the code for them.
1894   if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
1895     for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1896       for (const Expr *UE : C->updates())
1897         EmitIgnoredExpr(UE);
1898     }
1899   }
1900 
1901   // On a continue in the body, jump to the end.
1902   JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue");
1903   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1904   for (const Expr *E : D.finals_conditions()) {
1905     if (!E)
1906       continue;
1907     // Check that loop counter in non-rectangular nest fits into the iteration
1908     // space.
1909     llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next");
1910     EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(),
1911                          getProfileCount(D.getBody()));
1912     EmitBlock(NextBB);
1913   }
1914 
1915   OMPPrivateScope InscanScope(*this);
1916   EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true);
1917   bool IsInscanRegion = InscanScope.Privatize();
1918   if (IsInscanRegion) {
1919     // Need to remember the block before and after scan directive
1920     // to dispatch them correctly depending on the clause used in
1921     // this directive, inclusive or exclusive. For inclusive scan the natural
1922     // order of the blocks is used, for exclusive clause the blocks must be
1923     // executed in reverse order.
1924     OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb");
1925     OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb");
1926     // No need to allocate inscan exit block, in simd mode it is selected in the
1927     // codegen for the scan directive.
1928     if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd)
1929       OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb");
1930     OMPScanDispatch = createBasicBlock("omp.inscan.dispatch");
1931     EmitBranch(OMPScanDispatch);
1932     EmitBlock(OMPBeforeScanBlock);
1933   }
1934 
1935   // Emit loop variables for C++ range loops.
1936   const Stmt *Body =
1937       D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
1938   // Emit loop body.
1939   emitBody(*this, Body,
1940            OMPLoopBasedDirective::tryToFindNextInnerLoop(
1941                Body, /*TryImperfectlyNestedLoops=*/true),
1942            D.getLoopsNumber());
1943 
1944   // Jump to the dispatcher at the end of the loop body.
1945   if (IsInscanRegion)
1946     EmitBranch(OMPScanExitBlock);
1947 
1948   // The end (updates/cleanups).
1949   EmitBlock(Continue.getBlock());
1950   BreakContinueStack.pop_back();
1951 }
1952 
1953 using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>;
1954 
1955 /// Emit a captured statement and return the function as well as its captured
1956 /// closure context.
1957 static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF,
1958                                              const CapturedStmt *S) {
1959   LValue CapStruct = ParentCGF.InitCapturedStruct(*S);
1960   CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true);
1961   std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI =
1962       std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(*S);
1963   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get());
1964   llvm::Function *F = CGF.GenerateCapturedStmtFunction(*S);
1965 
1966   return {F, CapStruct.getPointer(ParentCGF)};
1967 }
1968 
1969 /// Emit a call to a previously captured closure.
1970 static llvm::CallInst *
1971 emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap,
1972                      llvm::ArrayRef<llvm::Value *> Args) {
1973   // Append the closure context to the argument.
1974   SmallVector<llvm::Value *> EffectiveArgs;
1975   EffectiveArgs.reserve(Args.size() + 1);
1976   llvm::append_range(EffectiveArgs, Args);
1977   EffectiveArgs.push_back(Cap.second);
1978 
1979   return ParentCGF.Builder.CreateCall(Cap.first, EffectiveArgs);
1980 }
1981 
1982 llvm::CanonicalLoopInfo *
1983 CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) {
1984   assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented");
1985 
1986   // The caller is processing the loop-associated directive processing the \p
1987   // Depth loops nested in \p S. Put the previous pending loop-associated
1988   // directive to the stack. If the current loop-associated directive is a loop
1989   // transformation directive, it will push its generated loops onto the stack
1990   // such that together with the loops left here they form the combined loop
1991   // nest for the parent loop-associated directive.
1992   int ParentExpectedOMPLoopDepth = ExpectedOMPLoopDepth;
1993   ExpectedOMPLoopDepth = Depth;
1994 
1995   EmitStmt(S);
1996   assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops");
1997 
1998   // The last added loop is the outermost one.
1999   llvm::CanonicalLoopInfo *Result = OMPLoopNestStack.back();
2000 
2001   // Pop the \p Depth loops requested by the call from that stack and restore
2002   // the previous context.
2003   OMPLoopNestStack.pop_back_n(Depth);
2004   ExpectedOMPLoopDepth = ParentExpectedOMPLoopDepth;
2005 
2006   return Result;
2007 }
2008 
2009 void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) {
2010   const Stmt *SyntacticalLoop = S->getLoopStmt();
2011   if (!getLangOpts().OpenMPIRBuilder) {
2012     // Ignore if OpenMPIRBuilder is not enabled.
2013     EmitStmt(SyntacticalLoop);
2014     return;
2015   }
2016 
2017   LexicalScope ForScope(*this, S->getSourceRange());
2018 
2019   // Emit init statements. The Distance/LoopVar funcs may reference variable
2020   // declarations they contain.
2021   const Stmt *BodyStmt;
2022   if (const auto *For = dyn_cast<ForStmt>(SyntacticalLoop)) {
2023     if (const Stmt *InitStmt = For->getInit())
2024       EmitStmt(InitStmt);
2025     BodyStmt = For->getBody();
2026   } else if (const auto *RangeFor =
2027                  dyn_cast<CXXForRangeStmt>(SyntacticalLoop)) {
2028     if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt())
2029       EmitStmt(RangeStmt);
2030     if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt())
2031       EmitStmt(BeginStmt);
2032     if (const DeclStmt *EndStmt = RangeFor->getEndStmt())
2033       EmitStmt(EndStmt);
2034     if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt())
2035       EmitStmt(LoopVarStmt);
2036     BodyStmt = RangeFor->getBody();
2037   } else
2038     llvm_unreachable("Expected for-stmt or range-based for-stmt");
2039 
2040   // Emit closure for later use. By-value captures will be captured here.
2041   const CapturedStmt *DistanceFunc = S->getDistanceFunc();
2042   EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(*this, DistanceFunc);
2043   const CapturedStmt *LoopVarFunc = S->getLoopVarFunc();
2044   EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(*this, LoopVarFunc);
2045 
2046   // Call the distance function to get the number of iterations of the loop to
2047   // come.
2048   QualType LogicalTy = DistanceFunc->getCapturedDecl()
2049                            ->getParam(0)
2050                            ->getType()
2051                            .getNonReferenceType();
2052   Address CountAddr = CreateMemTemp(LogicalTy, ".count.addr");
2053   emitCapturedStmtCall(*this, DistanceClosure, {CountAddr.getPointer()});
2054   llvm::Value *DistVal = Builder.CreateLoad(CountAddr, ".count");
2055 
2056   // Emit the loop structure.
2057   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2058   auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP,
2059                            llvm::Value *IndVar) {
2060     Builder.restoreIP(CodeGenIP);
2061 
2062     // Emit the loop body: Convert the logical iteration number to the loop
2063     // variable and emit the body.
2064     const DeclRefExpr *LoopVarRef = S->getLoopVarRef();
2065     LValue LCVal = EmitLValue(LoopVarRef);
2066     Address LoopVarAddress = LCVal.getAddress(*this);
2067     emitCapturedStmtCall(*this, LoopVarClosure,
2068                          {LoopVarAddress.getPointer(), IndVar});
2069 
2070     RunCleanupsScope BodyScope(*this);
2071     EmitStmt(BodyStmt);
2072   };
2073   llvm::CanonicalLoopInfo *CL =
2074       OMPBuilder.createCanonicalLoop(Builder, BodyGen, DistVal);
2075 
2076   // Finish up the loop.
2077   Builder.restoreIP(CL->getAfterIP());
2078   ForScope.ForceCleanup();
2079 
2080   // Remember the CanonicalLoopInfo for parent AST nodes consuming it.
2081   OMPLoopNestStack.push_back(CL);
2082 }
2083 
2084 void CodeGenFunction::EmitOMPInnerLoop(
2085     const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond,
2086     const Expr *IncExpr,
2087     const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
2088     const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
2089   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
2090 
2091   // Start the loop with a block that tests the condition.
2092   auto CondBlock = createBasicBlock("omp.inner.for.cond");
2093   EmitBlock(CondBlock);
2094   const SourceRange R = S.getSourceRange();
2095 
2096   // If attributes are attached, push to the basic block with them.
2097   const auto &OMPED = cast<OMPExecutableDirective>(S);
2098   const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt();
2099   const Stmt *SS = ICS->getCapturedStmt();
2100   const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS);
2101   OMPLoopNestStack.clear();
2102   if (AS)
2103     LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(),
2104                    AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()),
2105                    SourceLocToDebugLoc(R.getEnd()));
2106   else
2107     LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2108                    SourceLocToDebugLoc(R.getEnd()));
2109 
2110   // If there are any cleanups between here and the loop-exit scope,
2111   // create a block to stage a loop exit along.
2112   llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2113   if (RequiresCleanup)
2114     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
2115 
2116   llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body");
2117 
2118   // Emit condition.
2119   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
2120   if (ExitBlock != LoopExit.getBlock()) {
2121     EmitBlock(ExitBlock);
2122     EmitBranchThroughCleanup(LoopExit);
2123   }
2124 
2125   EmitBlock(LoopBody);
2126   incrementProfileCounter(&S);
2127 
2128   // Create a block for the increment.
2129   JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
2130   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
2131 
2132   BodyGen(*this);
2133 
2134   // Emit "IV = IV + 1" and a back-edge to the condition block.
2135   EmitBlock(Continue.getBlock());
2136   EmitIgnoredExpr(IncExpr);
2137   PostIncGen(*this);
2138   BreakContinueStack.pop_back();
2139   EmitBranch(CondBlock);
2140   LoopStack.pop();
2141   // Emit the fall-through block.
2142   EmitBlock(LoopExit.getBlock());
2143 }
2144 
2145 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
2146   if (!HaveInsertPoint())
2147     return false;
2148   // Emit inits for the linear variables.
2149   bool HasLinears = false;
2150   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2151     for (const Expr *Init : C->inits()) {
2152       HasLinears = true;
2153       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
2154       if (const auto *Ref =
2155               dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
2156         AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
2157         const auto *OrigVD = cast<VarDecl>(Ref->getDecl());
2158         DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2159                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
2160                         VD->getInit()->getType(), VK_LValue,
2161                         VD->getInit()->getExprLoc());
2162         EmitExprAsInit(
2163             &DRE, VD,
2164             MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()),
2165             /*capturedByInit=*/false);
2166         EmitAutoVarCleanups(Emission);
2167       } else {
2168         EmitVarDecl(*VD);
2169       }
2170     }
2171     // Emit the linear steps for the linear clauses.
2172     // If a step is not constant, it is pre-calculated before the loop.
2173     if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
2174       if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
2175         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
2176         // Emit calculation of the linear step.
2177         EmitIgnoredExpr(CS);
2178       }
2179   }
2180   return HasLinears;
2181 }
2182 
2183 void CodeGenFunction::EmitOMPLinearClauseFinal(
2184     const OMPLoopDirective &D,
2185     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2186   if (!HaveInsertPoint())
2187     return;
2188   llvm::BasicBlock *DoneBB = nullptr;
2189   // Emit the final values of the linear variables.
2190   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2191     auto IC = C->varlist_begin();
2192     for (const Expr *F : C->finals()) {
2193       if (!DoneBB) {
2194         if (llvm::Value *Cond = CondGen(*this)) {
2195           // If the first post-update expression is found, emit conditional
2196           // block if it was requested.
2197           llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu");
2198           DoneBB = createBasicBlock(".omp.linear.pu.done");
2199           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2200           EmitBlock(ThenBB);
2201         }
2202       }
2203       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
2204       DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2205                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
2206                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
2207       Address OrigAddr = EmitLValue(&DRE).getAddress(*this);
2208       CodeGenFunction::OMPPrivateScope VarScope(*this);
2209       VarScope.addPrivate(OrigVD, OrigAddr);
2210       (void)VarScope.Privatize();
2211       EmitIgnoredExpr(F);
2212       ++IC;
2213     }
2214     if (const Expr *PostUpdate = C->getPostUpdateExpr())
2215       EmitIgnoredExpr(PostUpdate);
2216   }
2217   if (DoneBB)
2218     EmitBlock(DoneBB, /*IsFinished=*/true);
2219 }
2220 
2221 static void emitAlignedClause(CodeGenFunction &CGF,
2222                               const OMPExecutableDirective &D) {
2223   if (!CGF.HaveInsertPoint())
2224     return;
2225   for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
2226     llvm::APInt ClauseAlignment(64, 0);
2227     if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2228       auto *AlignmentCI =
2229           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
2230       ClauseAlignment = AlignmentCI->getValue();
2231     }
2232     for (const Expr *E : Clause->varlists()) {
2233       llvm::APInt Alignment(ClauseAlignment);
2234       if (Alignment == 0) {
2235         // OpenMP [2.8.1, Description]
2236         // If no optional parameter is specified, implementation-defined default
2237         // alignments for SIMD instructions on the target platforms are assumed.
2238         Alignment =
2239             CGF.getContext()
2240                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
2241                     E->getType()->getPointeeType()))
2242                 .getQuantity();
2243       }
2244       assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2245              "alignment is not power of 2");
2246       if (Alignment != 0) {
2247         llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2248         CGF.emitAlignmentAssumption(
2249             PtrValue, E, /*No second loc needed*/ SourceLocation(),
2250             llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment));
2251       }
2252     }
2253   }
2254 }
2255 
2256 void CodeGenFunction::EmitOMPPrivateLoopCounters(
2257     const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
2258   if (!HaveInsertPoint())
2259     return;
2260   auto I = S.private_counters().begin();
2261   for (const Expr *E : S.counters()) {
2262     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2263     const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
2264     // Emit var without initialization.
2265     AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD);
2266     EmitAutoVarCleanups(VarEmission);
2267     LocalDeclMap.erase(PrivateVD);
2268     (void)LoopScope.addPrivate(VD, VarEmission.getAllocatedAddress());
2269     if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
2270         VD->hasGlobalStorage()) {
2271       DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD),
2272                       LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
2273                       E->getType(), VK_LValue, E->getExprLoc());
2274       (void)LoopScope.addPrivate(PrivateVD, EmitLValue(&DRE).getAddress(*this));
2275     } else {
2276       (void)LoopScope.addPrivate(PrivateVD, VarEmission.getAllocatedAddress());
2277     }
2278     ++I;
2279   }
2280   // Privatize extra loop counters used in loops for ordered(n) clauses.
2281   for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
2282     if (!C->getNumForLoops())
2283       continue;
2284     for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size();
2285          I < E; ++I) {
2286       const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I));
2287       const auto *VD = cast<VarDecl>(DRE->getDecl());
2288       // Override only those variables that can be captured to avoid re-emission
2289       // of the variables declared within the loops.
2290       if (DRE->refersToEnclosingVariableOrCapture()) {
2291         (void)LoopScope.addPrivate(
2292             VD, CreateMemTemp(DRE->getType(), VD->getName()));
2293       }
2294     }
2295   }
2296 }
2297 
2298 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
2299                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
2300                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
2301   if (!CGF.HaveInsertPoint())
2302     return;
2303   {
2304     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
2305     CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
2306     (void)PreCondScope.Privatize();
2307     // Get initial values of real counters.
2308     for (const Expr *I : S.inits()) {
2309       CGF.EmitIgnoredExpr(I);
2310     }
2311   }
2312   // Create temp loop control variables with their init values to support
2313   // non-rectangular loops.
2314   CodeGenFunction::OMPMapVars PreCondVars;
2315   for (const Expr *E : S.dependent_counters()) {
2316     if (!E)
2317       continue;
2318     assert(!E->getType().getNonReferenceType()->isRecordType() &&
2319            "dependent counter must not be an iterator.");
2320     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2321     Address CounterAddr =
2322         CGF.CreateMemTemp(VD->getType().getNonReferenceType());
2323     (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr);
2324   }
2325   (void)PreCondVars.apply(CGF);
2326   for (const Expr *E : S.dependent_inits()) {
2327     if (!E)
2328       continue;
2329     CGF.EmitIgnoredExpr(E);
2330   }
2331   // Check that loop is executed at least one time.
2332   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
2333   PreCondVars.restore(CGF);
2334 }
2335 
2336 void CodeGenFunction::EmitOMPLinearClause(
2337     const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
2338   if (!HaveInsertPoint())
2339     return;
2340   llvm::DenseSet<const VarDecl *> SIMDLCVs;
2341   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
2342     const auto *LoopDirective = cast<OMPLoopDirective>(&D);
2343     for (const Expr *C : LoopDirective->counters()) {
2344       SIMDLCVs.insert(
2345           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
2346     }
2347   }
2348   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2349     auto CurPrivate = C->privates().begin();
2350     for (const Expr *E : C->varlists()) {
2351       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2352       const auto *PrivateVD =
2353           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
2354       if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
2355         // Emit private VarDecl with copy init.
2356         EmitVarDecl(*PrivateVD);
2357         bool IsRegistered =
2358             PrivateScope.addPrivate(VD, GetAddrOfLocalVar(PrivateVD));
2359         assert(IsRegistered && "linear var already registered as private");
2360         // Silence the warning about unused variable.
2361         (void)IsRegistered;
2362       } else {
2363         EmitVarDecl(*PrivateVD);
2364       }
2365       ++CurPrivate;
2366     }
2367   }
2368 }
2369 
2370 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
2371                                      const OMPExecutableDirective &D) {
2372   if (!CGF.HaveInsertPoint())
2373     return;
2374   if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
2375     RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
2376                                  /*ignoreResult=*/true);
2377     auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2378     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2379     // In presence of finite 'safelen', it may be unsafe to mark all
2380     // the memory instructions parallel, because loop-carried
2381     // dependences of 'safelen' iterations are possible.
2382     CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
2383   } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
2384     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
2385                                  /*ignoreResult=*/true);
2386     auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2387     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2388     // In presence of finite 'safelen', it may be unsafe to mark all
2389     // the memory instructions parallel, because loop-carried
2390     // dependences of 'safelen' iterations are possible.
2391     CGF.LoopStack.setParallel(/*Enable=*/false);
2392   }
2393 }
2394 
2395 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) {
2396   // Walk clauses and process safelen/lastprivate.
2397   LoopStack.setParallel(/*Enable=*/true);
2398   LoopStack.setVectorizeEnable();
2399   emitSimdlenSafelenClause(*this, D);
2400   if (const auto *C = D.getSingleClause<OMPOrderClause>())
2401     if (C->getKind() == OMPC_ORDER_concurrent)
2402       LoopStack.setParallel(/*Enable=*/true);
2403   if ((D.getDirectiveKind() == OMPD_simd ||
2404        (getLangOpts().OpenMPSimd &&
2405         isOpenMPSimdDirective(D.getDirectiveKind()))) &&
2406       llvm::any_of(D.getClausesOfKind<OMPReductionClause>(),
2407                    [](const OMPReductionClause *C) {
2408                      return C->getModifier() == OMPC_REDUCTION_inscan;
2409                    }))
2410     // Disable parallel access in case of prefix sum.
2411     LoopStack.setParallel(/*Enable=*/false);
2412 }
2413 
2414 void CodeGenFunction::EmitOMPSimdFinal(
2415     const OMPLoopDirective &D,
2416     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2417   if (!HaveInsertPoint())
2418     return;
2419   llvm::BasicBlock *DoneBB = nullptr;
2420   auto IC = D.counters().begin();
2421   auto IPC = D.private_counters().begin();
2422   for (const Expr *F : D.finals()) {
2423     const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
2424     const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
2425     const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
2426     if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
2427         OrigVD->hasGlobalStorage() || CED) {
2428       if (!DoneBB) {
2429         if (llvm::Value *Cond = CondGen(*this)) {
2430           // If the first post-update expression is found, emit conditional
2431           // block if it was requested.
2432           llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then");
2433           DoneBB = createBasicBlock(".omp.final.done");
2434           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2435           EmitBlock(ThenBB);
2436         }
2437       }
2438       Address OrigAddr = Address::invalid();
2439       if (CED) {
2440         OrigAddr =
2441             EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(*this);
2442       } else {
2443         DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD),
2444                         /*RefersToEnclosingVariableOrCapture=*/false,
2445                         (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
2446         OrigAddr = EmitLValue(&DRE).getAddress(*this);
2447       }
2448       OMPPrivateScope VarScope(*this);
2449       VarScope.addPrivate(OrigVD, OrigAddr);
2450       (void)VarScope.Privatize();
2451       EmitIgnoredExpr(F);
2452     }
2453     ++IC;
2454     ++IPC;
2455   }
2456   if (DoneBB)
2457     EmitBlock(DoneBB, /*IsFinished=*/true);
2458 }
2459 
2460 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
2461                                          const OMPLoopDirective &S,
2462                                          CodeGenFunction::JumpDest LoopExit) {
2463   CGF.EmitOMPLoopBody(S, LoopExit);
2464   CGF.EmitStopPoint(&S);
2465 }
2466 
2467 /// Emit a helper variable and return corresponding lvalue.
2468 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
2469                                const DeclRefExpr *Helper) {
2470   auto VDecl = cast<VarDecl>(Helper->getDecl());
2471   CGF.EmitVarDecl(*VDecl);
2472   return CGF.EmitLValue(Helper);
2473 }
2474 
2475 static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S,
2476                                const RegionCodeGenTy &SimdInitGen,
2477                                const RegionCodeGenTy &BodyCodeGen) {
2478   auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF,
2479                                                     PrePostActionTy &) {
2480     CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S);
2481     CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2482     SimdInitGen(CGF);
2483 
2484     BodyCodeGen(CGF);
2485   };
2486   auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
2487     CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2488     CGF.LoopStack.setVectorizeEnable(/*Enable=*/false);
2489 
2490     BodyCodeGen(CGF);
2491   };
2492   const Expr *IfCond = nullptr;
2493   if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2494     for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2495       if (CGF.getLangOpts().OpenMP >= 50 &&
2496           (C->getNameModifier() == OMPD_unknown ||
2497            C->getNameModifier() == OMPD_simd)) {
2498         IfCond = C->getCondition();
2499         break;
2500       }
2501     }
2502   }
2503   if (IfCond) {
2504     CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2505   } else {
2506     RegionCodeGenTy ThenRCG(ThenGen);
2507     ThenRCG(CGF);
2508   }
2509 }
2510 
2511 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
2512                               PrePostActionTy &Action) {
2513   Action.Enter(CGF);
2514   assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
2515          "Expected simd directive");
2516   OMPLoopScope PreInitScope(CGF, S);
2517   // if (PreCond) {
2518   //   for (IV in 0..LastIteration) BODY;
2519   //   <Final counter/linear vars updates>;
2520   // }
2521   //
2522   if (isOpenMPDistributeDirective(S.getDirectiveKind()) ||
2523       isOpenMPWorksharingDirective(S.getDirectiveKind()) ||
2524       isOpenMPTaskLoopDirective(S.getDirectiveKind())) {
2525     (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
2526     (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
2527   }
2528 
2529   // Emit: if (PreCond) - begin.
2530   // If the condition constant folds and can be elided, avoid emitting the
2531   // whole loop.
2532   bool CondConstant;
2533   llvm::BasicBlock *ContBlock = nullptr;
2534   if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2535     if (!CondConstant)
2536       return;
2537   } else {
2538     llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then");
2539     ContBlock = CGF.createBasicBlock("simd.if.end");
2540     emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
2541                 CGF.getProfileCount(&S));
2542     CGF.EmitBlock(ThenBlock);
2543     CGF.incrementProfileCounter(&S);
2544   }
2545 
2546   // Emit the loop iteration variable.
2547   const Expr *IVExpr = S.getIterationVariable();
2548   const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
2549   CGF.EmitVarDecl(*IVDecl);
2550   CGF.EmitIgnoredExpr(S.getInit());
2551 
2552   // Emit the iterations count variable.
2553   // If it is not a variable, Sema decided to calculate iterations count on
2554   // each iteration (e.g., it is foldable into a constant).
2555   if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2556     CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2557     // Emit calculation of the iterations count.
2558     CGF.EmitIgnoredExpr(S.getCalcLastIteration());
2559   }
2560 
2561   emitAlignedClause(CGF, S);
2562   (void)CGF.EmitOMPLinearClauseInit(S);
2563   {
2564     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2565     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
2566     CGF.EmitOMPLinearClause(S, LoopScope);
2567     CGF.EmitOMPPrivateClause(S, LoopScope);
2568     CGF.EmitOMPReductionClauseInit(S, LoopScope);
2569     CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
2570         CGF, S, CGF.EmitLValue(S.getIterationVariable()));
2571     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2572     (void)LoopScope.Privatize();
2573     if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
2574       CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
2575 
2576     emitCommonSimdLoop(
2577         CGF, S,
2578         [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2579           CGF.EmitOMPSimdInit(S);
2580         },
2581         [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2582           CGF.EmitOMPInnerLoop(
2583               S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
2584               [&S](CodeGenFunction &CGF) {
2585                 emitOMPLoopBodyWithStopPoint(CGF, S,
2586                                              CodeGenFunction::JumpDest());
2587               },
2588               [](CodeGenFunction &) {});
2589         });
2590     CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; });
2591     // Emit final copy of the lastprivate variables at the end of loops.
2592     if (HasLastprivateClause)
2593       CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
2594     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
2595     emitPostUpdateForReductionClause(CGF, S,
2596                                      [](CodeGenFunction &) { return nullptr; });
2597     LoopScope.restoreMap();
2598     CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; });
2599   }
2600   // Emit: if (PreCond) - end.
2601   if (ContBlock) {
2602     CGF.EmitBranch(ContBlock);
2603     CGF.EmitBlock(ContBlock, true);
2604   }
2605 }
2606 
2607 static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective &S) {
2608   // Check for unsupported clauses
2609   for (OMPClause *C : S.clauses()) {
2610     // Currently only order, simdlen and safelen clauses are supported
2611     if (!(isa<OMPSimdlenClause>(C) || isa<OMPSafelenClause>(C) ||
2612           isa<OMPOrderClause>(C) || isa<OMPAlignedClause>(C)))
2613       return false;
2614   }
2615 
2616   // Check if we have a statement with the ordered directive.
2617   // Visit the statement hierarchy to find a compound statement
2618   // with a ordered directive in it.
2619   if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(S.getRawStmt())) {
2620     if (const Stmt *SyntacticalLoop = CanonLoop->getLoopStmt()) {
2621       for (const Stmt *SubStmt : SyntacticalLoop->children()) {
2622         if (!SubStmt)
2623           continue;
2624         if (const CompoundStmt *CS = dyn_cast<CompoundStmt>(SubStmt)) {
2625           for (const Stmt *CSSubStmt : CS->children()) {
2626             if (!CSSubStmt)
2627               continue;
2628             if (isa<OMPOrderedDirective>(CSSubStmt)) {
2629               return false;
2630             }
2631           }
2632         }
2633       }
2634     }
2635   }
2636   return true;
2637 }
2638 static llvm::MapVector<llvm::Value *, llvm::Value *>
2639 GetAlignedMapping(const OMPSimdDirective &S, CodeGenFunction &CGF) {
2640   llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars;
2641   for (const auto *Clause : S.getClausesOfKind<OMPAlignedClause>()) {
2642     llvm::APInt ClauseAlignment(64, 0);
2643     if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2644       auto *AlignmentCI =
2645           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
2646       ClauseAlignment = AlignmentCI->getValue();
2647     }
2648     for (const Expr *E : Clause->varlists()) {
2649       llvm::APInt Alignment(ClauseAlignment);
2650       if (Alignment == 0) {
2651         // OpenMP [2.8.1, Description]
2652         // If no optional parameter is specified, implementation-defined default
2653         // alignments for SIMD instructions on the target platforms are assumed.
2654         Alignment =
2655             CGF.getContext()
2656                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
2657                     E->getType()->getPointeeType()))
2658                 .getQuantity();
2659       }
2660       assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2661              "alignment is not power of 2");
2662       llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2663       AlignedVars[PtrValue] = CGF.Builder.getInt64(Alignment.getSExtValue());
2664     }
2665   }
2666   return AlignedVars;
2667 }
2668 
2669 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
2670   bool UseOMPIRBuilder =
2671       CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S);
2672   if (UseOMPIRBuilder) {
2673     auto &&CodeGenIRBuilder = [this, &S, UseOMPIRBuilder](CodeGenFunction &CGF,
2674                                                           PrePostActionTy &) {
2675       // Use the OpenMPIRBuilder if enabled.
2676       if (UseOMPIRBuilder) {
2677         llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars =
2678             GetAlignedMapping(S, CGF);
2679         // Emit the associated statement and get its loop representation.
2680         const Stmt *Inner = S.getRawStmt();
2681         llvm::CanonicalLoopInfo *CLI =
2682             EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
2683 
2684         llvm::OpenMPIRBuilder &OMPBuilder =
2685             CGM.getOpenMPRuntime().getOMPBuilder();
2686         // Add SIMD specific metadata
2687         llvm::ConstantInt *Simdlen = nullptr;
2688         if (const auto *C = S.getSingleClause<OMPSimdlenClause>()) {
2689           RValue Len =
2690               this->EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
2691                                 /*ignoreResult=*/true);
2692           auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2693           Simdlen = Val;
2694         }
2695         llvm::ConstantInt *Safelen = nullptr;
2696         if (const auto *C = S.getSingleClause<OMPSafelenClause>()) {
2697           RValue Len =
2698               this->EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
2699                                 /*ignoreResult=*/true);
2700           auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2701           Safelen = Val;
2702         }
2703         llvm::omp::OrderKind Order = llvm::omp::OrderKind::OMP_ORDER_unknown;
2704         if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
2705           if (C->getKind() == OpenMPOrderClauseKind ::OMPC_ORDER_concurrent) {
2706             Order = llvm::omp::OrderKind::OMP_ORDER_concurrent;
2707           }
2708         }
2709         // Add simd metadata to the collapsed loop. Do not generate
2710         // another loop for if clause. Support for if clause is done earlier.
2711         OMPBuilder.applySimd(CLI, AlignedVars,
2712                              /*IfCond*/ nullptr, Order, Simdlen, Safelen);
2713         return;
2714       }
2715     };
2716     {
2717       auto LPCRegion =
2718           CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
2719       OMPLexicalScope Scope(*this, S, OMPD_unknown);
2720       CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd,
2721                                                   CodeGenIRBuilder);
2722     }
2723     return;
2724   }
2725 
2726   ParentLoopDirectiveForScanRegion ScanRegion(*this, S);
2727   OMPFirstScanLoop = true;
2728   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2729     emitOMPSimdRegion(CGF, S, Action);
2730   };
2731   {
2732     auto LPCRegion =
2733         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
2734     OMPLexicalScope Scope(*this, S, OMPD_unknown);
2735     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2736   }
2737   // Check for outer lastprivate conditional update.
2738   checkForLastprivateConditionalUpdate(*this, S);
2739 }
2740 
2741 void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) {
2742   // Emit the de-sugared statement.
2743   OMPTransformDirectiveScopeRAII TileScope(*this, &S);
2744   EmitStmt(S.getTransformedStmt());
2745 }
2746 
2747 void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) {
2748   bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder;
2749 
2750   if (UseOMPIRBuilder) {
2751     auto DL = SourceLocToDebugLoc(S.getBeginLoc());
2752     const Stmt *Inner = S.getRawStmt();
2753 
2754     // Consume nested loop. Clear the entire remaining loop stack because a
2755     // fully unrolled loop is non-transformable. For partial unrolling the
2756     // generated outer loop is pushed back to the stack.
2757     llvm::CanonicalLoopInfo *CLI = EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
2758     OMPLoopNestStack.clear();
2759 
2760     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2761 
2762     bool NeedsUnrolledCLI = ExpectedOMPLoopDepth >= 1;
2763     llvm::CanonicalLoopInfo *UnrolledCLI = nullptr;
2764 
2765     if (S.hasClausesOfKind<OMPFullClause>()) {
2766       assert(ExpectedOMPLoopDepth == 0);
2767       OMPBuilder.unrollLoopFull(DL, CLI);
2768     } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
2769       uint64_t Factor = 0;
2770       if (Expr *FactorExpr = PartialClause->getFactor()) {
2771         Factor = FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue();
2772         assert(Factor >= 1 && "Only positive factors are valid");
2773       }
2774       OMPBuilder.unrollLoopPartial(DL, CLI, Factor,
2775                                    NeedsUnrolledCLI ? &UnrolledCLI : nullptr);
2776     } else {
2777       OMPBuilder.unrollLoopHeuristic(DL, CLI);
2778     }
2779 
2780     assert((!NeedsUnrolledCLI || UnrolledCLI) &&
2781            "NeedsUnrolledCLI implies UnrolledCLI to be set");
2782     if (UnrolledCLI)
2783       OMPLoopNestStack.push_back(UnrolledCLI);
2784 
2785     return;
2786   }
2787 
2788   // This function is only called if the unrolled loop is not consumed by any
2789   // other loop-associated construct. Such a loop-associated construct will have
2790   // used the transformed AST.
2791 
2792   // Set the unroll metadata for the next emitted loop.
2793   LoopStack.setUnrollState(LoopAttributes::Enable);
2794 
2795   if (S.hasClausesOfKind<OMPFullClause>()) {
2796     LoopStack.setUnrollState(LoopAttributes::Full);
2797   } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
2798     if (Expr *FactorExpr = PartialClause->getFactor()) {
2799       uint64_t Factor =
2800           FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue();
2801       assert(Factor >= 1 && "Only positive factors are valid");
2802       LoopStack.setUnrollCount(Factor);
2803     }
2804   }
2805 
2806   EmitStmt(S.getAssociatedStmt());
2807 }
2808 
2809 void CodeGenFunction::EmitOMPOuterLoop(
2810     bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
2811     CodeGenFunction::OMPPrivateScope &LoopScope,
2812     const CodeGenFunction::OMPLoopArguments &LoopArgs,
2813     const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
2814     const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
2815   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2816 
2817   const Expr *IVExpr = S.getIterationVariable();
2818   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2819   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2820 
2821   JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
2822 
2823   // Start the loop with a block that tests the condition.
2824   llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond");
2825   EmitBlock(CondBlock);
2826   const SourceRange R = S.getSourceRange();
2827   OMPLoopNestStack.clear();
2828   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2829                  SourceLocToDebugLoc(R.getEnd()));
2830 
2831   llvm::Value *BoolCondVal = nullptr;
2832   if (!DynamicOrOrdered) {
2833     // UB = min(UB, GlobalUB) or
2834     // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
2835     // 'distribute parallel for')
2836     EmitIgnoredExpr(LoopArgs.EUB);
2837     // IV = LB
2838     EmitIgnoredExpr(LoopArgs.Init);
2839     // IV < UB
2840     BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
2841   } else {
2842     BoolCondVal =
2843         RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL,
2844                        LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
2845   }
2846 
2847   // If there are any cleanups between here and the loop-exit scope,
2848   // create a block to stage a loop exit along.
2849   llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2850   if (LoopScope.requiresCleanups())
2851     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
2852 
2853   llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body");
2854   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
2855   if (ExitBlock != LoopExit.getBlock()) {
2856     EmitBlock(ExitBlock);
2857     EmitBranchThroughCleanup(LoopExit);
2858   }
2859   EmitBlock(LoopBody);
2860 
2861   // Emit "IV = LB" (in case of static schedule, we have already calculated new
2862   // LB for loop condition and emitted it above).
2863   if (DynamicOrOrdered)
2864     EmitIgnoredExpr(LoopArgs.Init);
2865 
2866   // Create a block for the increment.
2867   JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
2868   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
2869 
2870   emitCommonSimdLoop(
2871       *this, S,
2872       [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
2873         // Generate !llvm.loop.parallel metadata for loads and stores for loops
2874         // with dynamic/guided scheduling and without ordered clause.
2875         if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2876           CGF.LoopStack.setParallel(!IsMonotonic);
2877           if (const auto *C = S.getSingleClause<OMPOrderClause>())
2878             if (C->getKind() == OMPC_ORDER_concurrent)
2879               CGF.LoopStack.setParallel(/*Enable=*/true);
2880         } else {
2881           CGF.EmitOMPSimdInit(S);
2882         }
2883       },
2884       [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered,
2885        &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2886         SourceLocation Loc = S.getBeginLoc();
2887         // when 'distribute' is not combined with a 'for':
2888         // while (idx <= UB) { BODY; ++idx; }
2889         // when 'distribute' is combined with a 'for'
2890         // (e.g. 'distribute parallel for')
2891         // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
2892         CGF.EmitOMPInnerLoop(
2893             S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
2894             [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
2895               CodeGenLoop(CGF, S, LoopExit);
2896             },
2897             [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
2898               CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
2899             });
2900       });
2901 
2902   EmitBlock(Continue.getBlock());
2903   BreakContinueStack.pop_back();
2904   if (!DynamicOrOrdered) {
2905     // Emit "LB = LB + Stride", "UB = UB + Stride".
2906     EmitIgnoredExpr(LoopArgs.NextLB);
2907     EmitIgnoredExpr(LoopArgs.NextUB);
2908   }
2909 
2910   EmitBranch(CondBlock);
2911   OMPLoopNestStack.clear();
2912   LoopStack.pop();
2913   // Emit the fall-through block.
2914   EmitBlock(LoopExit.getBlock());
2915 
2916   // Tell the runtime we are done.
2917   auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
2918     if (!DynamicOrOrdered)
2919       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
2920                                                      S.getDirectiveKind());
2921   };
2922   OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2923 }
2924 
2925 void CodeGenFunction::EmitOMPForOuterLoop(
2926     const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
2927     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
2928     const OMPLoopArguments &LoopArgs,
2929     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2930   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2931 
2932   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
2933   const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind.Schedule);
2934 
2935   assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule,
2936                                             LoopArgs.Chunk != nullptr)) &&
2937          "static non-chunked schedule does not need outer loop");
2938 
2939   // Emit outer loop.
2940   //
2941   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2942   // When schedule(dynamic,chunk_size) is specified, the iterations are
2943   // distributed to threads in the team in chunks as the threads request them.
2944   // Each thread executes a chunk of iterations, then requests another chunk,
2945   // until no chunks remain to be distributed. Each chunk contains chunk_size
2946   // iterations, except for the last chunk to be distributed, which may have
2947   // fewer iterations. When no chunk_size is specified, it defaults to 1.
2948   //
2949   // When schedule(guided,chunk_size) is specified, the iterations are assigned
2950   // to threads in the team in chunks as the executing threads request them.
2951   // Each thread executes a chunk of iterations, then requests another chunk,
2952   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
2953   // each chunk is proportional to the number of unassigned iterations divided
2954   // by the number of threads in the team, decreasing to 1. For a chunk_size
2955   // with value k (greater than 1), the size of each chunk is determined in the
2956   // same way, with the restriction that the chunks do not contain fewer than k
2957   // iterations (except for the last chunk to be assigned, which may have fewer
2958   // than k iterations).
2959   //
2960   // When schedule(auto) is specified, the decision regarding scheduling is
2961   // delegated to the compiler and/or runtime system. The programmer gives the
2962   // implementation the freedom to choose any possible mapping of iterations to
2963   // threads in the team.
2964   //
2965   // When schedule(runtime) is specified, the decision regarding scheduling is
2966   // deferred until run time, and the schedule and chunk size are taken from the
2967   // run-sched-var ICV. If the ICV is set to auto, the schedule is
2968   // implementation defined
2969   //
2970   // while(__kmpc_dispatch_next(&LB, &UB)) {
2971   //   idx = LB;
2972   //   while (idx <= UB) { BODY; ++idx;
2973   //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
2974   //   } // inner loop
2975   // }
2976   //
2977   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2978   // When schedule(static, chunk_size) is specified, iterations are divided into
2979   // chunks of size chunk_size, and the chunks are assigned to the threads in
2980   // the team in a round-robin fashion in the order of the thread number.
2981   //
2982   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
2983   //   while (idx <= UB) { BODY; ++idx; } // inner loop
2984   //   LB = LB + ST;
2985   //   UB = UB + ST;
2986   // }
2987   //
2988 
2989   const Expr *IVExpr = S.getIterationVariable();
2990   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2991   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2992 
2993   if (DynamicOrOrdered) {
2994     const std::pair<llvm::Value *, llvm::Value *> DispatchBounds =
2995         CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
2996     llvm::Value *LBVal = DispatchBounds.first;
2997     llvm::Value *UBVal = DispatchBounds.second;
2998     CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
2999                                                              LoopArgs.Chunk};
3000     RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize,
3001                            IVSigned, Ordered, DipatchRTInputValues);
3002   } else {
3003     CGOpenMPRuntime::StaticRTInput StaticInit(
3004         IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
3005         LoopArgs.ST, LoopArgs.Chunk);
3006     RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
3007                          ScheduleKind, StaticInit);
3008   }
3009 
3010   auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
3011                                     const unsigned IVSize,
3012                                     const bool IVSigned) {
3013     if (Ordered) {
3014       CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
3015                                                             IVSigned);
3016     }
3017   };
3018 
3019   OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
3020                                  LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
3021   OuterLoopArgs.IncExpr = S.getInc();
3022   OuterLoopArgs.Init = S.getInit();
3023   OuterLoopArgs.Cond = S.getCond();
3024   OuterLoopArgs.NextLB = S.getNextLowerBound();
3025   OuterLoopArgs.NextUB = S.getNextUpperBound();
3026   EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
3027                    emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
3028 }
3029 
3030 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
3031                              const unsigned IVSize, const bool IVSigned) {}
3032 
3033 void CodeGenFunction::EmitOMPDistributeOuterLoop(
3034     OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
3035     OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
3036     const CodeGenLoopTy &CodeGenLoopContent) {
3037 
3038   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3039 
3040   // Emit outer loop.
3041   // Same behavior as a OMPForOuterLoop, except that schedule cannot be
3042   // dynamic
3043   //
3044 
3045   const Expr *IVExpr = S.getIterationVariable();
3046   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3047   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3048 
3049   CGOpenMPRuntime::StaticRTInput StaticInit(
3050       IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
3051       LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
3052   RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit);
3053 
3054   // for combined 'distribute' and 'for' the increment expression of distribute
3055   // is stored in DistInc. For 'distribute' alone, it is in Inc.
3056   Expr *IncExpr;
3057   if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
3058     IncExpr = S.getDistInc();
3059   else
3060     IncExpr = S.getInc();
3061 
3062   // this routine is shared by 'omp distribute parallel for' and
3063   // 'omp distribute': select the right EUB expression depending on the
3064   // directive
3065   OMPLoopArguments OuterLoopArgs;
3066   OuterLoopArgs.LB = LoopArgs.LB;
3067   OuterLoopArgs.UB = LoopArgs.UB;
3068   OuterLoopArgs.ST = LoopArgs.ST;
3069   OuterLoopArgs.IL = LoopArgs.IL;
3070   OuterLoopArgs.Chunk = LoopArgs.Chunk;
3071   OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3072                           ? S.getCombinedEnsureUpperBound()
3073                           : S.getEnsureUpperBound();
3074   OuterLoopArgs.IncExpr = IncExpr;
3075   OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3076                            ? S.getCombinedInit()
3077                            : S.getInit();
3078   OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3079                            ? S.getCombinedCond()
3080                            : S.getCond();
3081   OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3082                              ? S.getCombinedNextLowerBound()
3083                              : S.getNextLowerBound();
3084   OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3085                              ? S.getCombinedNextUpperBound()
3086                              : S.getNextUpperBound();
3087 
3088   EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
3089                    LoopScope, OuterLoopArgs, CodeGenLoopContent,
3090                    emitEmptyOrdered);
3091 }
3092 
3093 static std::pair<LValue, LValue>
3094 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
3095                                      const OMPExecutableDirective &S) {
3096   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
3097   LValue LB =
3098       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
3099   LValue UB =
3100       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
3101 
3102   // When composing 'distribute' with 'for' (e.g. as in 'distribute
3103   // parallel for') we need to use the 'distribute'
3104   // chunk lower and upper bounds rather than the whole loop iteration
3105   // space. These are parameters to the outlined function for 'parallel'
3106   // and we copy the bounds of the previous schedule into the
3107   // the current ones.
3108   LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
3109   LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
3110   llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
3111       PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc());
3112   PrevLBVal = CGF.EmitScalarConversion(
3113       PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
3114       LS.getIterationVariable()->getType(),
3115       LS.getPrevLowerBoundVariable()->getExprLoc());
3116   llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
3117       PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc());
3118   PrevUBVal = CGF.EmitScalarConversion(
3119       PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
3120       LS.getIterationVariable()->getType(),
3121       LS.getPrevUpperBoundVariable()->getExprLoc());
3122 
3123   CGF.EmitStoreOfScalar(PrevLBVal, LB);
3124   CGF.EmitStoreOfScalar(PrevUBVal, UB);
3125 
3126   return {LB, UB};
3127 }
3128 
3129 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
3130 /// we need to use the LB and UB expressions generated by the worksharing
3131 /// code generation support, whereas in non combined situations we would
3132 /// just emit 0 and the LastIteration expression
3133 /// This function is necessary due to the difference of the LB and UB
3134 /// types for the RT emission routines for 'for_static_init' and
3135 /// 'for_dispatch_init'
3136 static std::pair<llvm::Value *, llvm::Value *>
3137 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
3138                                         const OMPExecutableDirective &S,
3139                                         Address LB, Address UB) {
3140   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
3141   const Expr *IVExpr = LS.getIterationVariable();
3142   // when implementing a dynamic schedule for a 'for' combined with a
3143   // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
3144   // is not normalized as each team only executes its own assigned
3145   // distribute chunk
3146   QualType IteratorTy = IVExpr->getType();
3147   llvm::Value *LBVal =
3148       CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
3149   llvm::Value *UBVal =
3150       CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
3151   return {LBVal, UBVal};
3152 }
3153 
3154 static void emitDistributeParallelForDistributeInnerBoundParams(
3155     CodeGenFunction &CGF, const OMPExecutableDirective &S,
3156     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
3157   const auto &Dir = cast<OMPLoopDirective>(S);
3158   LValue LB =
3159       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
3160   llvm::Value *LBCast =
3161       CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(LB.getAddress(CGF)),
3162                                 CGF.SizeTy, /*isSigned=*/false);
3163   CapturedVars.push_back(LBCast);
3164   LValue UB =
3165       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
3166 
3167   llvm::Value *UBCast =
3168       CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(UB.getAddress(CGF)),
3169                                 CGF.SizeTy, /*isSigned=*/false);
3170   CapturedVars.push_back(UBCast);
3171 }
3172 
3173 static void
3174 emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
3175                                  const OMPLoopDirective &S,
3176                                  CodeGenFunction::JumpDest LoopExit) {
3177   auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
3178                                          PrePostActionTy &Action) {
3179     Action.Enter(CGF);
3180     bool HasCancel = false;
3181     if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
3182       if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
3183         HasCancel = D->hasCancel();
3184       else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
3185         HasCancel = D->hasCancel();
3186       else if (const auto *D =
3187                    dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
3188         HasCancel = D->hasCancel();
3189     }
3190     CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
3191                                                      HasCancel);
3192     CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
3193                                emitDistributeParallelForInnerBounds,
3194                                emitDistributeParallelForDispatchBounds);
3195   };
3196 
3197   emitCommonOMPParallelDirective(
3198       CGF, S,
3199       isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for,
3200       CGInlinedWorksharingLoop,
3201       emitDistributeParallelForDistributeInnerBoundParams);
3202 }
3203 
3204 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
3205     const OMPDistributeParallelForDirective &S) {
3206   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3207     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
3208                               S.getDistInc());
3209   };
3210   OMPLexicalScope Scope(*this, S, OMPD_parallel);
3211   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3212 }
3213 
3214 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
3215     const OMPDistributeParallelForSimdDirective &S) {
3216   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3217     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
3218                               S.getDistInc());
3219   };
3220   OMPLexicalScope Scope(*this, S, OMPD_parallel);
3221   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3222 }
3223 
3224 void CodeGenFunction::EmitOMPDistributeSimdDirective(
3225     const OMPDistributeSimdDirective &S) {
3226   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3227     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3228   };
3229   OMPLexicalScope Scope(*this, S, OMPD_unknown);
3230   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
3231 }
3232 
3233 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
3234     CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
3235   // Emit SPMD target parallel for region as a standalone region.
3236   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3237     emitOMPSimdRegion(CGF, S, Action);
3238   };
3239   llvm::Function *Fn;
3240   llvm::Constant *Addr;
3241   // Emit target region as a standalone region.
3242   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3243       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3244   assert(Fn && Addr && "Target device function emission failed.");
3245 }
3246 
3247 void CodeGenFunction::EmitOMPTargetSimdDirective(
3248     const OMPTargetSimdDirective &S) {
3249   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3250     emitOMPSimdRegion(CGF, S, Action);
3251   };
3252   emitCommonOMPTargetDirective(*this, S, CodeGen);
3253 }
3254 
3255 namespace {
3256 struct ScheduleKindModifiersTy {
3257   OpenMPScheduleClauseKind Kind;
3258   OpenMPScheduleClauseModifier M1;
3259   OpenMPScheduleClauseModifier M2;
3260   ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
3261                           OpenMPScheduleClauseModifier M1,
3262                           OpenMPScheduleClauseModifier M2)
3263       : Kind(Kind), M1(M1), M2(M2) {}
3264 };
3265 } // namespace
3266 
3267 bool CodeGenFunction::EmitOMPWorksharingLoop(
3268     const OMPLoopDirective &S, Expr *EUB,
3269     const CodeGenLoopBoundsTy &CodeGenLoopBounds,
3270     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
3271   // Emit the loop iteration variable.
3272   const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
3273   const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
3274   EmitVarDecl(*IVDecl);
3275 
3276   // Emit the iterations count variable.
3277   // If it is not a variable, Sema decided to calculate iterations count on each
3278   // iteration (e.g., it is foldable into a constant).
3279   if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3280     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3281     // Emit calculation of the iterations count.
3282     EmitIgnoredExpr(S.getCalcLastIteration());
3283   }
3284 
3285   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3286 
3287   bool HasLastprivateClause;
3288   // Check pre-condition.
3289   {
3290     OMPLoopScope PreInitScope(*this, S);
3291     // Skip the entire loop if we don't meet the precondition.
3292     // If the condition constant folds and can be elided, avoid emitting the
3293     // whole loop.
3294     bool CondConstant;
3295     llvm::BasicBlock *ContBlock = nullptr;
3296     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3297       if (!CondConstant)
3298         return false;
3299     } else {
3300       llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
3301       ContBlock = createBasicBlock("omp.precond.end");
3302       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3303                   getProfileCount(&S));
3304       EmitBlock(ThenBlock);
3305       incrementProfileCounter(&S);
3306     }
3307 
3308     RunCleanupsScope DoacrossCleanupScope(*this);
3309     bool Ordered = false;
3310     if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
3311       if (OrderedClause->getNumForLoops())
3312         RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations());
3313       else
3314         Ordered = true;
3315     }
3316 
3317     llvm::DenseSet<const Expr *> EmittedFinals;
3318     emitAlignedClause(*this, S);
3319     bool HasLinears = EmitOMPLinearClauseInit(S);
3320     // Emit helper vars inits.
3321 
3322     std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
3323     LValue LB = Bounds.first;
3324     LValue UB = Bounds.second;
3325     LValue ST =
3326         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3327     LValue IL =
3328         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3329 
3330     // Emit 'then' code.
3331     {
3332       OMPPrivateScope LoopScope(*this);
3333       if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
3334         // Emit implicit barrier to synchronize threads and avoid data races on
3335         // initialization of firstprivate variables and post-update of
3336         // lastprivate variables.
3337         CGM.getOpenMPRuntime().emitBarrierCall(
3338             *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3339             /*ForceSimpleCall=*/true);
3340       }
3341       EmitOMPPrivateClause(S, LoopScope);
3342       CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
3343           *this, S, EmitLValue(S.getIterationVariable()));
3344       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3345       EmitOMPReductionClauseInit(S, LoopScope);
3346       EmitOMPPrivateLoopCounters(S, LoopScope);
3347       EmitOMPLinearClause(S, LoopScope);
3348       (void)LoopScope.Privatize();
3349       if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
3350         CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
3351 
3352       // Detect the loop schedule kind and chunk.
3353       const Expr *ChunkExpr = nullptr;
3354       OpenMPScheduleTy ScheduleKind;
3355       if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
3356         ScheduleKind.Schedule = C->getScheduleKind();
3357         ScheduleKind.M1 = C->getFirstScheduleModifier();
3358         ScheduleKind.M2 = C->getSecondScheduleModifier();
3359         ChunkExpr = C->getChunkSize();
3360       } else {
3361         // Default behaviour for schedule clause.
3362         CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
3363             *this, S, ScheduleKind.Schedule, ChunkExpr);
3364       }
3365       bool HasChunkSizeOne = false;
3366       llvm::Value *Chunk = nullptr;
3367       if (ChunkExpr) {
3368         Chunk = EmitScalarExpr(ChunkExpr);
3369         Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(),
3370                                      S.getIterationVariable()->getType(),
3371                                      S.getBeginLoc());
3372         Expr::EvalResult Result;
3373         if (ChunkExpr->EvaluateAsInt(Result, getContext())) {
3374           llvm::APSInt EvaluatedChunk = Result.Val.getInt();
3375           HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
3376         }
3377       }
3378       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3379       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3380       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
3381       // If the static schedule kind is specified or if the ordered clause is
3382       // specified, and if no monotonic modifier is specified, the effect will
3383       // be as if the monotonic modifier was specified.
3384       bool StaticChunkedOne =
3385           RT.isStaticChunked(ScheduleKind.Schedule,
3386                              /* Chunked */ Chunk != nullptr) &&
3387           HasChunkSizeOne &&
3388           isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
3389       bool IsMonotonic =
3390           Ordered ||
3391           (ScheduleKind.Schedule == OMPC_SCHEDULE_static &&
3392            !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic ||
3393              ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) ||
3394           ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
3395           ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
3396       if ((RT.isStaticNonchunked(ScheduleKind.Schedule,
3397                                  /* Chunked */ Chunk != nullptr) ||
3398            StaticChunkedOne) &&
3399           !Ordered) {
3400         JumpDest LoopExit =
3401             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3402         emitCommonSimdLoop(
3403             *this, S,
3404             [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3405               if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3406                 CGF.EmitOMPSimdInit(S);
3407               } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
3408                 if (C->getKind() == OMPC_ORDER_concurrent)
3409                   CGF.LoopStack.setParallel(/*Enable=*/true);
3410               }
3411             },
3412             [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk,
3413              &S, ScheduleKind, LoopExit,
3414              &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3415               // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3416               // When no chunk_size is specified, the iteration space is divided
3417               // into chunks that are approximately equal in size, and at most
3418               // one chunk is distributed to each thread. Note that the size of
3419               // the chunks is unspecified in this case.
3420               CGOpenMPRuntime::StaticRTInput StaticInit(
3421                   IVSize, IVSigned, Ordered, IL.getAddress(CGF),
3422                   LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF),
3423                   StaticChunkedOne ? Chunk : nullptr);
3424               CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3425                   CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind,
3426                   StaticInit);
3427               // UB = min(UB, GlobalUB);
3428               if (!StaticChunkedOne)
3429                 CGF.EmitIgnoredExpr(S.getEnsureUpperBound());
3430               // IV = LB;
3431               CGF.EmitIgnoredExpr(S.getInit());
3432               // For unchunked static schedule generate:
3433               //
3434               // while (idx <= UB) {
3435               //   BODY;
3436               //   ++idx;
3437               // }
3438               //
3439               // For static schedule with chunk one:
3440               //
3441               // while (IV <= PrevUB) {
3442               //   BODY;
3443               //   IV += ST;
3444               // }
3445               CGF.EmitOMPInnerLoop(
3446                   S, LoopScope.requiresCleanups(),
3447                   StaticChunkedOne ? S.getCombinedParForInDistCond()
3448                                    : S.getCond(),
3449                   StaticChunkedOne ? S.getDistInc() : S.getInc(),
3450                   [&S, LoopExit](CodeGenFunction &CGF) {
3451                     emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit);
3452                   },
3453                   [](CodeGenFunction &) {});
3454             });
3455         EmitBlock(LoopExit.getBlock());
3456         // Tell the runtime we are done.
3457         auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3458           CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
3459                                                          S.getDirectiveKind());
3460         };
3461         OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
3462       } else {
3463         // Emit the outer loop, which requests its work chunk [LB..UB] from
3464         // runtime and runs the inner loop to process it.
3465         const OMPLoopArguments LoopArguments(
3466             LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
3467             IL.getAddress(*this), Chunk, EUB);
3468         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
3469                             LoopArguments, CGDispatchBounds);
3470       }
3471       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3472         EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
3473           return CGF.Builder.CreateIsNotNull(
3474               CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3475         });
3476       }
3477       EmitOMPReductionClauseFinal(
3478           S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
3479                  ? /*Parallel and Simd*/ OMPD_parallel_for_simd
3480                  : /*Parallel only*/ OMPD_parallel);
3481       // Emit post-update of the reduction variables if IsLastIter != 0.
3482       emitPostUpdateForReductionClause(
3483           *this, S, [IL, &S](CodeGenFunction &CGF) {
3484             return CGF.Builder.CreateIsNotNull(
3485                 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3486           });
3487       // Emit final copy of the lastprivate variables if IsLastIter != 0.
3488       if (HasLastprivateClause)
3489         EmitOMPLastprivateClauseFinal(
3490             S, isOpenMPSimdDirective(S.getDirectiveKind()),
3491             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
3492       LoopScope.restoreMap();
3493       EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
3494         return CGF.Builder.CreateIsNotNull(
3495             CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3496       });
3497     }
3498     DoacrossCleanupScope.ForceCleanup();
3499     // We're now done with the loop, so jump to the continuation block.
3500     if (ContBlock) {
3501       EmitBranch(ContBlock);
3502       EmitBlock(ContBlock, /*IsFinished=*/true);
3503     }
3504   }
3505   return HasLastprivateClause;
3506 }
3507 
3508 /// The following two functions generate expressions for the loop lower
3509 /// and upper bounds in case of static and dynamic (dispatch) schedule
3510 /// of the associated 'for' or 'distribute' loop.
3511 static std::pair<LValue, LValue>
3512 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
3513   const auto &LS = cast<OMPLoopDirective>(S);
3514   LValue LB =
3515       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
3516   LValue UB =
3517       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
3518   return {LB, UB};
3519 }
3520 
3521 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
3522 /// consider the lower and upper bound expressions generated by the
3523 /// worksharing loop support, but we use 0 and the iteration space size as
3524 /// constants
3525 static std::pair<llvm::Value *, llvm::Value *>
3526 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
3527                           Address LB, Address UB) {
3528   const auto &LS = cast<OMPLoopDirective>(S);
3529   const Expr *IVExpr = LS.getIterationVariable();
3530   const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
3531   llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
3532   llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
3533   return {LBVal, UBVal};
3534 }
3535 
3536 /// Emits internal temp array declarations for the directive with inscan
3537 /// reductions.
3538 /// The code is the following:
3539 /// \code
3540 /// size num_iters = <num_iters>;
3541 /// <type> buffer[num_iters];
3542 /// \endcode
3543 static void emitScanBasedDirectiveDecls(
3544     CodeGenFunction &CGF, const OMPLoopDirective &S,
3545     llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
3546   llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3547       NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3548   SmallVector<const Expr *, 4> Shareds;
3549   SmallVector<const Expr *, 4> Privates;
3550   SmallVector<const Expr *, 4> ReductionOps;
3551   SmallVector<const Expr *, 4> CopyArrayTemps;
3552   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3553     assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3554            "Only inscan reductions are expected.");
3555     Shareds.append(C->varlist_begin(), C->varlist_end());
3556     Privates.append(C->privates().begin(), C->privates().end());
3557     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3558     CopyArrayTemps.append(C->copy_array_temps().begin(),
3559                           C->copy_array_temps().end());
3560   }
3561   {
3562     // Emit buffers for each reduction variables.
3563     // ReductionCodeGen is required to emit correctly the code for array
3564     // reductions.
3565     ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
3566     unsigned Count = 0;
3567     auto *ITA = CopyArrayTemps.begin();
3568     for (const Expr *IRef : Privates) {
3569       const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
3570       // Emit variably modified arrays, used for arrays/array sections
3571       // reductions.
3572       if (PrivateVD->getType()->isVariablyModifiedType()) {
3573         RedCG.emitSharedOrigLValue(CGF, Count);
3574         RedCG.emitAggregateType(CGF, Count);
3575       }
3576       CodeGenFunction::OpaqueValueMapping DimMapping(
3577           CGF,
3578           cast<OpaqueValueExpr>(
3579               cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe())
3580                   ->getSizeExpr()),
3581           RValue::get(OMPScanNumIterations));
3582       // Emit temp buffer.
3583       CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl()));
3584       ++ITA;
3585       ++Count;
3586     }
3587   }
3588 }
3589 
3590 /// Copies final inscan reductions values to the original variables.
3591 /// The code is the following:
3592 /// \code
3593 /// <orig_var> = buffer[num_iters-1];
3594 /// \endcode
3595 static void emitScanBasedDirectiveFinals(
3596     CodeGenFunction &CGF, const OMPLoopDirective &S,
3597     llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
3598   llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3599       NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3600   SmallVector<const Expr *, 4> Shareds;
3601   SmallVector<const Expr *, 4> LHSs;
3602   SmallVector<const Expr *, 4> RHSs;
3603   SmallVector<const Expr *, 4> Privates;
3604   SmallVector<const Expr *, 4> CopyOps;
3605   SmallVector<const Expr *, 4> CopyArrayElems;
3606   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3607     assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3608            "Only inscan reductions are expected.");
3609     Shareds.append(C->varlist_begin(), C->varlist_end());
3610     LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
3611     RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
3612     Privates.append(C->privates().begin(), C->privates().end());
3613     CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
3614     CopyArrayElems.append(C->copy_array_elems().begin(),
3615                           C->copy_array_elems().end());
3616   }
3617   // Create temp var and copy LHS value to this temp value.
3618   // LHS = TMP[LastIter];
3619   llvm::Value *OMPLast = CGF.Builder.CreateNSWSub(
3620       OMPScanNumIterations,
3621       llvm::ConstantInt::get(CGF.SizeTy, 1, /*isSigned=*/false));
3622   for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
3623     const Expr *PrivateExpr = Privates[I];
3624     const Expr *OrigExpr = Shareds[I];
3625     const Expr *CopyArrayElem = CopyArrayElems[I];
3626     CodeGenFunction::OpaqueValueMapping IdxMapping(
3627         CGF,
3628         cast<OpaqueValueExpr>(
3629             cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3630         RValue::get(OMPLast));
3631     LValue DestLVal = CGF.EmitLValue(OrigExpr);
3632     LValue SrcLVal = CGF.EmitLValue(CopyArrayElem);
3633     CGF.EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(CGF),
3634                     SrcLVal.getAddress(CGF),
3635                     cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
3636                     cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
3637                     CopyOps[I]);
3638   }
3639 }
3640 
3641 /// Emits the code for the directive with inscan reductions.
3642 /// The code is the following:
3643 /// \code
3644 /// #pragma omp ...
3645 /// for (i: 0..<num_iters>) {
3646 ///   <input phase>;
3647 ///   buffer[i] = red;
3648 /// }
3649 /// #pragma omp master // in parallel region
3650 /// for (int k = 0; k != ceil(log2(num_iters)); ++k)
3651 /// for (size cnt = last_iter; cnt >= pow(2, k); --k)
3652 ///   buffer[i] op= buffer[i-pow(2,k)];
3653 /// #pragma omp barrier // in parallel region
3654 /// #pragma omp ...
3655 /// for (0..<num_iters>) {
3656 ///   red = InclusiveScan ? buffer[i] : buffer[i-1];
3657 ///   <scan phase>;
3658 /// }
3659 /// \endcode
3660 static void emitScanBasedDirective(
3661     CodeGenFunction &CGF, const OMPLoopDirective &S,
3662     llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
3663     llvm::function_ref<void(CodeGenFunction &)> FirstGen,
3664     llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
3665   llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3666       NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3667   SmallVector<const Expr *, 4> Privates;
3668   SmallVector<const Expr *, 4> ReductionOps;
3669   SmallVector<const Expr *, 4> LHSs;
3670   SmallVector<const Expr *, 4> RHSs;
3671   SmallVector<const Expr *, 4> CopyArrayElems;
3672   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3673     assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3674            "Only inscan reductions are expected.");
3675     Privates.append(C->privates().begin(), C->privates().end());
3676     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3677     LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
3678     RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
3679     CopyArrayElems.append(C->copy_array_elems().begin(),
3680                           C->copy_array_elems().end());
3681   }
3682   CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
3683   {
3684     // Emit loop with input phase:
3685     // #pragma omp ...
3686     // for (i: 0..<num_iters>) {
3687     //   <input phase>;
3688     //   buffer[i] = red;
3689     // }
3690     CGF.OMPFirstScanLoop = true;
3691     CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3692     FirstGen(CGF);
3693   }
3694   // #pragma omp barrier // in parallel region
3695   auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems,
3696                     &ReductionOps,
3697                     &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) {
3698     Action.Enter(CGF);
3699     // Emit prefix reduction:
3700     // #pragma omp master // in parallel region
3701     // for (int k = 0; k <= ceil(log2(n)); ++k)
3702     llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
3703     llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body");
3704     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit");
3705     llvm::Function *F =
3706         CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy);
3707     llvm::Value *Arg =
3708         CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy);
3709     llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg);
3710     F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy);
3711     LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal);
3712     LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy);
3713     llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
3714         OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1));
3715     auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc());
3716     CGF.EmitBlock(LoopBB);
3717     auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2);
3718     // size pow2k = 1;
3719     auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3720     Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB);
3721     Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB);
3722     // for (size i = n - 1; i >= 2 ^ k; --i)
3723     //   tmp[i] op= tmp[i-pow2k];
3724     llvm::BasicBlock *InnerLoopBB =
3725         CGF.createBasicBlock("omp.inner.log.scan.body");
3726     llvm::BasicBlock *InnerExitBB =
3727         CGF.createBasicBlock("omp.inner.log.scan.exit");
3728     llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K);
3729     CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3730     CGF.EmitBlock(InnerLoopBB);
3731     auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3732     IVal->addIncoming(NMin1, LoopBB);
3733     {
3734       CodeGenFunction::OMPPrivateScope PrivScope(CGF);
3735       auto *ILHS = LHSs.begin();
3736       auto *IRHS = RHSs.begin();
3737       for (const Expr *CopyArrayElem : CopyArrayElems) {
3738         const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3739         const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3740         Address LHSAddr = Address::invalid();
3741         {
3742           CodeGenFunction::OpaqueValueMapping IdxMapping(
3743               CGF,
3744               cast<OpaqueValueExpr>(
3745                   cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3746               RValue::get(IVal));
3747           LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
3748         }
3749         PrivScope.addPrivate(LHSVD, LHSAddr);
3750         Address RHSAddr = Address::invalid();
3751         {
3752           llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K);
3753           CodeGenFunction::OpaqueValueMapping IdxMapping(
3754               CGF,
3755               cast<OpaqueValueExpr>(
3756                   cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3757               RValue::get(OffsetIVal));
3758           RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
3759         }
3760         PrivScope.addPrivate(RHSVD, RHSAddr);
3761         ++ILHS;
3762         ++IRHS;
3763       }
3764       PrivScope.Privatize();
3765       CGF.CGM.getOpenMPRuntime().emitReduction(
3766           CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
3767           {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown});
3768     }
3769     llvm::Value *NextIVal =
3770         CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1));
3771     IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock());
3772     CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K);
3773     CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3774     CGF.EmitBlock(InnerExitBB);
3775     llvm::Value *Next =
3776         CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1));
3777     Counter->addIncoming(Next, CGF.Builder.GetInsertBlock());
3778     // pow2k <<= 1;
3779     llvm::Value *NextPow2K =
3780         CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
3781     Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock());
3782     llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal);
3783     CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
3784     auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc());
3785     CGF.EmitBlock(ExitBB);
3786   };
3787   if (isOpenMPParallelDirective(S.getDirectiveKind())) {
3788     CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
3789     CGF.CGM.getOpenMPRuntime().emitBarrierCall(
3790         CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3791         /*ForceSimpleCall=*/true);
3792   } else {
3793     RegionCodeGenTy RCG(CodeGen);
3794     RCG(CGF);
3795   }
3796 
3797   CGF.OMPFirstScanLoop = false;
3798   SecondGen(CGF);
3799 }
3800 
3801 static bool emitWorksharingDirective(CodeGenFunction &CGF,
3802                                      const OMPLoopDirective &S,
3803                                      bool HasCancel) {
3804   bool HasLastprivates;
3805   if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
3806                    [](const OMPReductionClause *C) {
3807                      return C->getModifier() == OMPC_REDUCTION_inscan;
3808                    })) {
3809     const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
3810       CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3811       OMPLoopScope LoopScope(CGF, S);
3812       return CGF.EmitScalarExpr(S.getNumIterations());
3813     };
3814     const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) {
3815       CodeGenFunction::OMPCancelStackRAII CancelRegion(
3816           CGF, S.getDirectiveKind(), HasCancel);
3817       (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3818                                        emitForLoopBounds,
3819                                        emitDispatchForLoopBounds);
3820       // Emit an implicit barrier at the end.
3821       CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(),
3822                                                  OMPD_for);
3823     };
3824     const auto &&SecondGen = [&S, HasCancel,
3825                               &HasLastprivates](CodeGenFunction &CGF) {
3826       CodeGenFunction::OMPCancelStackRAII CancelRegion(
3827           CGF, S.getDirectiveKind(), HasCancel);
3828       HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3829                                                    emitForLoopBounds,
3830                                                    emitDispatchForLoopBounds);
3831     };
3832     if (!isOpenMPParallelDirective(S.getDirectiveKind()))
3833       emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen);
3834     emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
3835     if (!isOpenMPParallelDirective(S.getDirectiveKind()))
3836       emitScanBasedDirectiveFinals(CGF, S, NumIteratorsGen);
3837   } else {
3838     CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
3839                                                      HasCancel);
3840     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3841                                                  emitForLoopBounds,
3842                                                  emitDispatchForLoopBounds);
3843   }
3844   return HasLastprivates;
3845 }
3846 
3847 static bool isSupportedByOpenMPIRBuilder(const OMPForDirective &S) {
3848   if (S.hasCancel())
3849     return false;
3850   for (OMPClause *C : S.clauses()) {
3851     if (isa<OMPNowaitClause>(C))
3852       continue;
3853 
3854     if (auto *SC = dyn_cast<OMPScheduleClause>(C)) {
3855       if (SC->getFirstScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown)
3856         return false;
3857       if (SC->getSecondScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown)
3858         return false;
3859       switch (SC->getScheduleKind()) {
3860       case OMPC_SCHEDULE_auto:
3861       case OMPC_SCHEDULE_dynamic:
3862       case OMPC_SCHEDULE_runtime:
3863       case OMPC_SCHEDULE_guided:
3864       case OMPC_SCHEDULE_static:
3865         continue;
3866       case OMPC_SCHEDULE_unknown:
3867         return false;
3868       }
3869     }
3870 
3871     return false;
3872   }
3873 
3874   return true;
3875 }
3876 
3877 static llvm::omp::ScheduleKind
3878 convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind) {
3879   switch (ScheduleClauseKind) {
3880   case OMPC_SCHEDULE_unknown:
3881     return llvm::omp::OMP_SCHEDULE_Default;
3882   case OMPC_SCHEDULE_auto:
3883     return llvm::omp::OMP_SCHEDULE_Auto;
3884   case OMPC_SCHEDULE_dynamic:
3885     return llvm::omp::OMP_SCHEDULE_Dynamic;
3886   case OMPC_SCHEDULE_guided:
3887     return llvm::omp::OMP_SCHEDULE_Guided;
3888   case OMPC_SCHEDULE_runtime:
3889     return llvm::omp::OMP_SCHEDULE_Runtime;
3890   case OMPC_SCHEDULE_static:
3891     return llvm::omp::OMP_SCHEDULE_Static;
3892   }
3893   llvm_unreachable("Unhandled schedule kind");
3894 }
3895 
3896 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
3897   bool HasLastprivates = false;
3898   bool UseOMPIRBuilder =
3899       CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S);
3900   auto &&CodeGen = [this, &S, &HasLastprivates,
3901                     UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) {
3902     // Use the OpenMPIRBuilder if enabled.
3903     if (UseOMPIRBuilder) {
3904       bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>();
3905 
3906       llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default;
3907       llvm::Value *ChunkSize = nullptr;
3908       if (auto *SchedClause = S.getSingleClause<OMPScheduleClause>()) {
3909         SchedKind =
3910             convertClauseKindToSchedKind(SchedClause->getScheduleKind());
3911         if (const Expr *ChunkSizeExpr = SchedClause->getChunkSize())
3912           ChunkSize = EmitScalarExpr(ChunkSizeExpr);
3913       }
3914 
3915       // Emit the associated statement and get its loop representation.
3916       const Stmt *Inner = S.getRawStmt();
3917       llvm::CanonicalLoopInfo *CLI =
3918           EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
3919 
3920       llvm::OpenMPIRBuilder &OMPBuilder =
3921           CGM.getOpenMPRuntime().getOMPBuilder();
3922       llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
3923           AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
3924       OMPBuilder.applyWorkshareLoop(
3925           Builder.getCurrentDebugLocation(), CLI, AllocaIP, NeedsBarrier,
3926           SchedKind, ChunkSize, /*HasSimdModifier=*/false,
3927           /*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false,
3928           /*HasOrderedClause=*/false);
3929       return;
3930     }
3931 
3932     HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel());
3933   };
3934   {
3935     auto LPCRegion =
3936         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3937     OMPLexicalScope Scope(*this, S, OMPD_unknown);
3938     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
3939                                                 S.hasCancel());
3940   }
3941 
3942   if (!UseOMPIRBuilder) {
3943     // Emit an implicit barrier at the end.
3944     if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
3945       CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3946   }
3947   // Check for outer lastprivate conditional update.
3948   checkForLastprivateConditionalUpdate(*this, S);
3949 }
3950 
3951 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
3952   bool HasLastprivates = false;
3953   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
3954                                           PrePostActionTy &) {
3955     HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
3956   };
3957   {
3958     auto LPCRegion =
3959         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3960     OMPLexicalScope Scope(*this, S, OMPD_unknown);
3961     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
3962   }
3963 
3964   // Emit an implicit barrier at the end.
3965   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
3966     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3967   // Check for outer lastprivate conditional update.
3968   checkForLastprivateConditionalUpdate(*this, S);
3969 }
3970 
3971 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
3972                                 const Twine &Name,
3973                                 llvm::Value *Init = nullptr) {
3974   LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
3975   if (Init)
3976     CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
3977   return LVal;
3978 }
3979 
3980 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
3981   const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
3982   const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
3983   bool HasLastprivates = false;
3984   auto &&CodeGen = [&S, CapturedStmt, CS,
3985                     &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
3986     const ASTContext &C = CGF.getContext();
3987     QualType KmpInt32Ty =
3988         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3989     // Emit helper vars inits.
3990     LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
3991                                   CGF.Builder.getInt32(0));
3992     llvm::ConstantInt *GlobalUBVal = CS != nullptr
3993                                          ? CGF.Builder.getInt32(CS->size() - 1)
3994                                          : CGF.Builder.getInt32(0);
3995     LValue UB =
3996         createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
3997     LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
3998                                   CGF.Builder.getInt32(1));
3999     LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
4000                                   CGF.Builder.getInt32(0));
4001     // Loop counter.
4002     LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
4003     OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
4004     CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
4005     OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
4006     CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
4007     // Generate condition for loop.
4008     BinaryOperator *Cond = BinaryOperator::Create(
4009         C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_PRValue, OK_Ordinary,
4010         S.getBeginLoc(), FPOptionsOverride());
4011     // Increment for loop counter.
4012     UnaryOperator *Inc = UnaryOperator::Create(
4013         C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_PRValue, OK_Ordinary,
4014         S.getBeginLoc(), true, FPOptionsOverride());
4015     auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
4016       // Iterate through all sections and emit a switch construct:
4017       // switch (IV) {
4018       //   case 0:
4019       //     <SectionStmt[0]>;
4020       //     break;
4021       // ...
4022       //   case <NumSection> - 1:
4023       //     <SectionStmt[<NumSection> - 1]>;
4024       //     break;
4025       // }
4026       // .omp.sections.exit:
4027       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
4028       llvm::SwitchInst *SwitchStmt =
4029           CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()),
4030                                    ExitBB, CS == nullptr ? 1 : CS->size());
4031       if (CS) {
4032         unsigned CaseNumber = 0;
4033         for (const Stmt *SubStmt : CS->children()) {
4034           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
4035           CGF.EmitBlock(CaseBB);
4036           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
4037           CGF.EmitStmt(SubStmt);
4038           CGF.EmitBranch(ExitBB);
4039           ++CaseNumber;
4040         }
4041       } else {
4042         llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case");
4043         CGF.EmitBlock(CaseBB);
4044         SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
4045         CGF.EmitStmt(CapturedStmt);
4046         CGF.EmitBranch(ExitBB);
4047       }
4048       CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
4049     };
4050 
4051     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
4052     if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
4053       // Emit implicit barrier to synchronize threads and avoid data races on
4054       // initialization of firstprivate variables and post-update of lastprivate
4055       // variables.
4056       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
4057           CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
4058           /*ForceSimpleCall=*/true);
4059     }
4060     CGF.EmitOMPPrivateClause(S, LoopScope);
4061     CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV);
4062     HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
4063     CGF.EmitOMPReductionClauseInit(S, LoopScope);
4064     (void)LoopScope.Privatize();
4065     if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
4066       CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
4067 
4068     // Emit static non-chunked loop.
4069     OpenMPScheduleTy ScheduleKind;
4070     ScheduleKind.Schedule = OMPC_SCHEDULE_static;
4071     CGOpenMPRuntime::StaticRTInput StaticInit(
4072         /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(CGF),
4073         LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF));
4074     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
4075         CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit);
4076     // UB = min(UB, GlobalUB);
4077     llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc());
4078     llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
4079         CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
4080     CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
4081     // IV = LB;
4082     CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV);
4083     // while (idx <= UB) { BODY; ++idx; }
4084     CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen,
4085                          [](CodeGenFunction &) {});
4086     // Tell the runtime we are done.
4087     auto &&CodeGen = [&S](CodeGenFunction &CGF) {
4088       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
4089                                                      S.getDirectiveKind());
4090     };
4091     CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
4092     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4093     // Emit post-update of the reduction variables if IsLastIter != 0.
4094     emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) {
4095       return CGF.Builder.CreateIsNotNull(
4096           CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
4097     });
4098 
4099     // Emit final copy of the lastprivate variables if IsLastIter != 0.
4100     if (HasLastprivates)
4101       CGF.EmitOMPLastprivateClauseFinal(
4102           S, /*NoFinals=*/false,
4103           CGF.Builder.CreateIsNotNull(
4104               CGF.EmitLoadOfScalar(IL, S.getBeginLoc())));
4105   };
4106 
4107   bool HasCancel = false;
4108   if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
4109     HasCancel = OSD->hasCancel();
4110   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
4111     HasCancel = OPSD->hasCancel();
4112   OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
4113   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
4114                                               HasCancel);
4115   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
4116   // clause. Otherwise the barrier will be generated by the codegen for the
4117   // directive.
4118   if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
4119     // Emit implicit barrier to synchronize threads and avoid data races on
4120     // initialization of firstprivate variables.
4121     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
4122                                            OMPD_unknown);
4123   }
4124 }
4125 
4126 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
4127   if (CGM.getLangOpts().OpenMPIRBuilder) {
4128     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4129     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4130     using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
4131 
4132     auto FiniCB = [this](InsertPointTy IP) {
4133       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4134     };
4135 
4136     const CapturedStmt *ICS = S.getInnermostCapturedStmt();
4137     const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
4138     const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
4139     llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
4140     if (CS) {
4141       for (const Stmt *SubStmt : CS->children()) {
4142         auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP,
4143                                          InsertPointTy CodeGenIP) {
4144           OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4145               *this, SubStmt, AllocaIP, CodeGenIP, "section");
4146         };
4147         SectionCBVector.push_back(SectionCB);
4148       }
4149     } else {
4150       auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP,
4151                                             InsertPointTy CodeGenIP) {
4152         OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4153             *this, CapturedStmt, AllocaIP, CodeGenIP, "section");
4154       };
4155       SectionCBVector.push_back(SectionCB);
4156     }
4157 
4158     // Privatization callback that performs appropriate action for
4159     // shared/private/firstprivate/lastprivate/copyin/... variables.
4160     //
4161     // TODO: This defaults to shared right now.
4162     auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
4163                      llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
4164       // The next line is appropriate only for variables (Val) with the
4165       // data-sharing attribute "shared".
4166       ReplVal = &Val;
4167 
4168       return CodeGenIP;
4169     };
4170 
4171     CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP);
4172     CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
4173     llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
4174         AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
4175     Builder.restoreIP(OMPBuilder.createSections(
4176         Builder, AllocaIP, SectionCBVector, PrivCB, FiniCB, S.hasCancel(),
4177         S.getSingleClause<OMPNowaitClause>()));
4178     return;
4179   }
4180   {
4181     auto LPCRegion =
4182         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4183     OMPLexicalScope Scope(*this, S, OMPD_unknown);
4184     EmitSections(S);
4185   }
4186   // Emit an implicit barrier at the end.
4187   if (!S.getSingleClause<OMPNowaitClause>()) {
4188     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
4189                                            OMPD_sections);
4190   }
4191   // Check for outer lastprivate conditional update.
4192   checkForLastprivateConditionalUpdate(*this, S);
4193 }
4194 
4195 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
4196   if (CGM.getLangOpts().OpenMPIRBuilder) {
4197     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4198     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4199 
4200     const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt();
4201     auto FiniCB = [this](InsertPointTy IP) {
4202       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4203     };
4204 
4205     auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP,
4206                                                    InsertPointTy CodeGenIP) {
4207       OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4208           *this, SectionRegionBodyStmt, AllocaIP, CodeGenIP, "section");
4209     };
4210 
4211     LexicalScope Scope(*this, S.getSourceRange());
4212     EmitStopPoint(&S);
4213     Builder.restoreIP(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB));
4214 
4215     return;
4216   }
4217   LexicalScope Scope(*this, S.getSourceRange());
4218   EmitStopPoint(&S);
4219   EmitStmt(S.getAssociatedStmt());
4220 }
4221 
4222 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
4223   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
4224   llvm::SmallVector<const Expr *, 8> DestExprs;
4225   llvm::SmallVector<const Expr *, 8> SrcExprs;
4226   llvm::SmallVector<const Expr *, 8> AssignmentOps;
4227   // Check if there are any 'copyprivate' clauses associated with this
4228   // 'single' construct.
4229   // Build a list of copyprivate variables along with helper expressions
4230   // (<source>, <destination>, <destination>=<source> expressions)
4231   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
4232     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
4233     DestExprs.append(C->destination_exprs().begin(),
4234                      C->destination_exprs().end());
4235     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
4236     AssignmentOps.append(C->assignment_ops().begin(),
4237                          C->assignment_ops().end());
4238   }
4239   // Emit code for 'single' region along with 'copyprivate' clauses
4240   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4241     Action.Enter(CGF);
4242     OMPPrivateScope SingleScope(CGF);
4243     (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
4244     CGF.EmitOMPPrivateClause(S, SingleScope);
4245     (void)SingleScope.Privatize();
4246     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
4247   };
4248   {
4249     auto LPCRegion =
4250         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4251     OMPLexicalScope Scope(*this, S, OMPD_unknown);
4252     CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(),
4253                                             CopyprivateVars, DestExprs,
4254                                             SrcExprs, AssignmentOps);
4255   }
4256   // Emit an implicit barrier at the end (to avoid data race on firstprivate
4257   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
4258   if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
4259     CGM.getOpenMPRuntime().emitBarrierCall(
4260         *this, S.getBeginLoc(),
4261         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
4262   }
4263   // Check for outer lastprivate conditional update.
4264   checkForLastprivateConditionalUpdate(*this, S);
4265 }
4266 
4267 static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4268   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4269     Action.Enter(CGF);
4270     CGF.EmitStmt(S.getRawStmt());
4271   };
4272   CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
4273 }
4274 
4275 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
4276   if (CGM.getLangOpts().OpenMPIRBuilder) {
4277     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4278     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4279 
4280     const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt();
4281 
4282     auto FiniCB = [this](InsertPointTy IP) {
4283       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4284     };
4285 
4286     auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP,
4287                                                   InsertPointTy CodeGenIP) {
4288       OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4289           *this, MasterRegionBodyStmt, AllocaIP, CodeGenIP, "master");
4290     };
4291 
4292     LexicalScope Scope(*this, S.getSourceRange());
4293     EmitStopPoint(&S);
4294     Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB));
4295 
4296     return;
4297   }
4298   LexicalScope Scope(*this, S.getSourceRange());
4299   EmitStopPoint(&S);
4300   emitMaster(*this, S);
4301 }
4302 
4303 static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4304   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4305     Action.Enter(CGF);
4306     CGF.EmitStmt(S.getRawStmt());
4307   };
4308   Expr *Filter = nullptr;
4309   if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4310     Filter = FilterClause->getThreadID();
4311   CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, CodeGen, S.getBeginLoc(),
4312                                               Filter);
4313 }
4314 
4315 void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) {
4316   if (CGM.getLangOpts().OpenMPIRBuilder) {
4317     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4318     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4319 
4320     const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt();
4321     const Expr *Filter = nullptr;
4322     if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4323       Filter = FilterClause->getThreadID();
4324     llvm::Value *FilterVal = Filter
4325                                  ? EmitScalarExpr(Filter, CGM.Int32Ty)
4326                                  : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
4327 
4328     auto FiniCB = [this](InsertPointTy IP) {
4329       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4330     };
4331 
4332     auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP,
4333                                                   InsertPointTy CodeGenIP) {
4334       OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4335           *this, MaskedRegionBodyStmt, AllocaIP, CodeGenIP, "masked");
4336     };
4337 
4338     LexicalScope Scope(*this, S.getSourceRange());
4339     EmitStopPoint(&S);
4340     Builder.restoreIP(
4341         OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal));
4342 
4343     return;
4344   }
4345   LexicalScope Scope(*this, S.getSourceRange());
4346   EmitStopPoint(&S);
4347   emitMasked(*this, S);
4348 }
4349 
4350 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
4351   if (CGM.getLangOpts().OpenMPIRBuilder) {
4352     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4353     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4354 
4355     const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt();
4356     const Expr *Hint = nullptr;
4357     if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4358       Hint = HintClause->getHint();
4359 
4360     // TODO: This is slightly different from what's currently being done in
4361     // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything
4362     // about typing is final.
4363     llvm::Value *HintInst = nullptr;
4364     if (Hint)
4365       HintInst =
4366           Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false);
4367 
4368     auto FiniCB = [this](InsertPointTy IP) {
4369       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4370     };
4371 
4372     auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP,
4373                                                     InsertPointTy CodeGenIP) {
4374       OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4375           *this, CriticalRegionBodyStmt, AllocaIP, CodeGenIP, "critical");
4376     };
4377 
4378     LexicalScope Scope(*this, S.getSourceRange());
4379     EmitStopPoint(&S);
4380     Builder.restoreIP(OMPBuilder.createCritical(
4381         Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(),
4382         HintInst));
4383 
4384     return;
4385   }
4386 
4387   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4388     Action.Enter(CGF);
4389     CGF.EmitStmt(S.getAssociatedStmt());
4390   };
4391   const Expr *Hint = nullptr;
4392   if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4393     Hint = HintClause->getHint();
4394   LexicalScope Scope(*this, S.getSourceRange());
4395   EmitStopPoint(&S);
4396   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
4397                                             S.getDirectiveName().getAsString(),
4398                                             CodeGen, S.getBeginLoc(), Hint);
4399 }
4400 
4401 void CodeGenFunction::EmitOMPParallelForDirective(
4402     const OMPParallelForDirective &S) {
4403   // Emit directive as a combined directive that consists of two implicit
4404   // directives: 'parallel' with 'for' directive.
4405   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4406     Action.Enter(CGF);
4407     emitOMPCopyinClause(CGF, S);
4408     (void)emitWorksharingDirective(CGF, S, S.hasCancel());
4409   };
4410   {
4411     const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4412       CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4413       CGCapturedStmtInfo CGSI(CR_OpenMP);
4414       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4415       OMPLoopScope LoopScope(CGF, S);
4416       return CGF.EmitScalarExpr(S.getNumIterations());
4417     };
4418     bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
4419                      [](const OMPReductionClause *C) {
4420                        return C->getModifier() == OMPC_REDUCTION_inscan;
4421                      });
4422     if (IsInscan)
4423       emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
4424     auto LPCRegion =
4425         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4426     emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
4427                                    emitEmptyBoundParameters);
4428     if (IsInscan)
4429       emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen);
4430   }
4431   // Check for outer lastprivate conditional update.
4432   checkForLastprivateConditionalUpdate(*this, S);
4433 }
4434 
4435 void CodeGenFunction::EmitOMPParallelForSimdDirective(
4436     const OMPParallelForSimdDirective &S) {
4437   // Emit directive as a combined directive that consists of two implicit
4438   // directives: 'parallel' with 'for' directive.
4439   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4440     Action.Enter(CGF);
4441     emitOMPCopyinClause(CGF, S);
4442     (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
4443   };
4444   {
4445     const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4446       CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4447       CGCapturedStmtInfo CGSI(CR_OpenMP);
4448       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4449       OMPLoopScope LoopScope(CGF, S);
4450       return CGF.EmitScalarExpr(S.getNumIterations());
4451     };
4452     bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
4453                      [](const OMPReductionClause *C) {
4454                        return C->getModifier() == OMPC_REDUCTION_inscan;
4455                      });
4456     if (IsInscan)
4457       emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
4458     auto LPCRegion =
4459         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4460     emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen,
4461                                    emitEmptyBoundParameters);
4462     if (IsInscan)
4463       emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen);
4464   }
4465   // Check for outer lastprivate conditional update.
4466   checkForLastprivateConditionalUpdate(*this, S);
4467 }
4468 
4469 void CodeGenFunction::EmitOMPParallelMasterDirective(
4470     const OMPParallelMasterDirective &S) {
4471   // Emit directive as a combined directive that consists of two implicit
4472   // directives: 'parallel' with 'master' directive.
4473   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4474     Action.Enter(CGF);
4475     OMPPrivateScope PrivateScope(CGF);
4476     emitOMPCopyinClause(CGF, S);
4477     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4478     CGF.EmitOMPPrivateClause(S, PrivateScope);
4479     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4480     (void)PrivateScope.Privatize();
4481     emitMaster(CGF, S);
4482     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4483   };
4484   {
4485     auto LPCRegion =
4486         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4487     emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen,
4488                                    emitEmptyBoundParameters);
4489     emitPostUpdateForReductionClause(*this, S,
4490                                      [](CodeGenFunction &) { return nullptr; });
4491   }
4492   // Check for outer lastprivate conditional update.
4493   checkForLastprivateConditionalUpdate(*this, S);
4494 }
4495 
4496 void CodeGenFunction::EmitOMPParallelMaskedDirective(
4497     const OMPParallelMaskedDirective &S) {
4498   // Emit directive as a combined directive that consists of two implicit
4499   // directives: 'parallel' with 'masked' directive.
4500   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4501     Action.Enter(CGF);
4502     OMPPrivateScope PrivateScope(CGF);
4503     emitOMPCopyinClause(CGF, S);
4504     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4505     CGF.EmitOMPPrivateClause(S, PrivateScope);
4506     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4507     (void)PrivateScope.Privatize();
4508     emitMasked(CGF, S);
4509     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4510   };
4511   {
4512     auto LPCRegion =
4513         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4514     emitCommonOMPParallelDirective(*this, S, OMPD_masked, CodeGen,
4515                                    emitEmptyBoundParameters);
4516     emitPostUpdateForReductionClause(*this, S,
4517                                      [](CodeGenFunction &) { return nullptr; });
4518   }
4519   // Check for outer lastprivate conditional update.
4520   checkForLastprivateConditionalUpdate(*this, S);
4521 }
4522 
4523 void CodeGenFunction::EmitOMPParallelSectionsDirective(
4524     const OMPParallelSectionsDirective &S) {
4525   // Emit directive as a combined directive that consists of two implicit
4526   // directives: 'parallel' with 'sections' directive.
4527   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4528     Action.Enter(CGF);
4529     emitOMPCopyinClause(CGF, S);
4530     CGF.EmitSections(S);
4531   };
4532   {
4533     auto LPCRegion =
4534         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4535     emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
4536                                    emitEmptyBoundParameters);
4537   }
4538   // Check for outer lastprivate conditional update.
4539   checkForLastprivateConditionalUpdate(*this, S);
4540 }
4541 
4542 namespace {
4543 /// Get the list of variables declared in the context of the untied tasks.
4544 class CheckVarsEscapingUntiedTaskDeclContext final
4545     : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> {
4546   llvm::SmallVector<const VarDecl *, 4> PrivateDecls;
4547 
4548 public:
4549   explicit CheckVarsEscapingUntiedTaskDeclContext() = default;
4550   virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default;
4551   void VisitDeclStmt(const DeclStmt *S) {
4552     if (!S)
4553       return;
4554     // Need to privatize only local vars, static locals can be processed as is.
4555     for (const Decl *D : S->decls()) {
4556       if (const auto *VD = dyn_cast_or_null<VarDecl>(D))
4557         if (VD->hasLocalStorage())
4558           PrivateDecls.push_back(VD);
4559     }
4560   }
4561   void VisitOMPExecutableDirective(const OMPExecutableDirective *) {}
4562   void VisitCapturedStmt(const CapturedStmt *) {}
4563   void VisitLambdaExpr(const LambdaExpr *) {}
4564   void VisitBlockExpr(const BlockExpr *) {}
4565   void VisitStmt(const Stmt *S) {
4566     if (!S)
4567       return;
4568     for (const Stmt *Child : S->children())
4569       if (Child)
4570         Visit(Child);
4571   }
4572 
4573   /// Swaps list of vars with the provided one.
4574   ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; }
4575 };
4576 } // anonymous namespace
4577 
4578 static void buildDependences(const OMPExecutableDirective &S,
4579                              OMPTaskDataTy &Data) {
4580 
4581   // First look for 'omp_all_memory' and add this first.
4582   bool OmpAllMemory = false;
4583   if (llvm::any_of(
4584           S.getClausesOfKind<OMPDependClause>(), [](const OMPDependClause *C) {
4585             return C->getDependencyKind() == OMPC_DEPEND_outallmemory ||
4586                    C->getDependencyKind() == OMPC_DEPEND_inoutallmemory;
4587           })) {
4588     OmpAllMemory = true;
4589     // Since both OMPC_DEPEND_outallmemory and OMPC_DEPEND_inoutallmemory are
4590     // equivalent to the runtime, always use OMPC_DEPEND_outallmemory to
4591     // simplify.
4592     OMPTaskDataTy::DependData &DD =
4593         Data.Dependences.emplace_back(OMPC_DEPEND_outallmemory,
4594                                       /*IteratorExpr=*/nullptr);
4595     // Add a nullptr Expr to simplify the codegen in emitDependData.
4596     DD.DepExprs.push_back(nullptr);
4597   }
4598   // Add remaining dependences skipping any 'out' or 'inout' if they are
4599   // overridden by 'omp_all_memory'.
4600   for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
4601     OpenMPDependClauseKind Kind = C->getDependencyKind();
4602     if (Kind == OMPC_DEPEND_outallmemory || Kind == OMPC_DEPEND_inoutallmemory)
4603       continue;
4604     if (OmpAllMemory && (Kind == OMPC_DEPEND_out || Kind == OMPC_DEPEND_inout))
4605       continue;
4606     OMPTaskDataTy::DependData &DD =
4607         Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
4608     DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
4609   }
4610 }
4611 
4612 void CodeGenFunction::EmitOMPTaskBasedDirective(
4613     const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
4614     const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
4615     OMPTaskDataTy &Data) {
4616   // Emit outlined function for task construct.
4617   const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion);
4618   auto I = CS->getCapturedDecl()->param_begin();
4619   auto PartId = std::next(I);
4620   auto TaskT = std::next(I, 4);
4621   // Check if the task is final
4622   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
4623     // If the condition constant folds and can be elided, try to avoid emitting
4624     // the condition and the dead arm of the if/else.
4625     const Expr *Cond = Clause->getCondition();
4626     bool CondConstant;
4627     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
4628       Data.Final.setInt(CondConstant);
4629     else
4630       Data.Final.setPointer(EvaluateExprAsBool(Cond));
4631   } else {
4632     // By default the task is not final.
4633     Data.Final.setInt(/*IntVal=*/false);
4634   }
4635   // Check if the task has 'priority' clause.
4636   if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
4637     const Expr *Prio = Clause->getPriority();
4638     Data.Priority.setInt(/*IntVal=*/true);
4639     Data.Priority.setPointer(EmitScalarConversion(
4640         EmitScalarExpr(Prio), Prio->getType(),
4641         getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
4642         Prio->getExprLoc()));
4643   }
4644   // The first function argument for tasks is a thread id, the second one is a
4645   // part id (0 for tied tasks, >=0 for untied task).
4646   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
4647   // Get list of private variables.
4648   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
4649     auto IRef = C->varlist_begin();
4650     for (const Expr *IInit : C->private_copies()) {
4651       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4652       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4653         Data.PrivateVars.push_back(*IRef);
4654         Data.PrivateCopies.push_back(IInit);
4655       }
4656       ++IRef;
4657     }
4658   }
4659   EmittedAsPrivate.clear();
4660   // Get list of firstprivate variables.
4661   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
4662     auto IRef = C->varlist_begin();
4663     auto IElemInitRef = C->inits().begin();
4664     for (const Expr *IInit : C->private_copies()) {
4665       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4666       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4667         Data.FirstprivateVars.push_back(*IRef);
4668         Data.FirstprivateCopies.push_back(IInit);
4669         Data.FirstprivateInits.push_back(*IElemInitRef);
4670       }
4671       ++IRef;
4672       ++IElemInitRef;
4673     }
4674   }
4675   // Get list of lastprivate variables (for taskloops).
4676   llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
4677   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
4678     auto IRef = C->varlist_begin();
4679     auto ID = C->destination_exprs().begin();
4680     for (const Expr *IInit : C->private_copies()) {
4681       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4682       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4683         Data.LastprivateVars.push_back(*IRef);
4684         Data.LastprivateCopies.push_back(IInit);
4685       }
4686       LastprivateDstsOrigs.insert(
4687           std::make_pair(cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
4688                          cast<DeclRefExpr>(*IRef)));
4689       ++IRef;
4690       ++ID;
4691     }
4692   }
4693   SmallVector<const Expr *, 4> LHSs;
4694   SmallVector<const Expr *, 4> RHSs;
4695   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
4696     Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
4697     Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
4698     Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
4699     Data.ReductionOps.append(C->reduction_ops().begin(),
4700                              C->reduction_ops().end());
4701     LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4702     RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4703   }
4704   Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
4705       *this, S.getBeginLoc(), LHSs, RHSs, Data);
4706   // Build list of dependences.
4707   buildDependences(S, Data);
4708   // Get list of local vars for untied tasks.
4709   if (!Data.Tied) {
4710     CheckVarsEscapingUntiedTaskDeclContext Checker;
4711     Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt());
4712     Data.PrivateLocals.append(Checker.getPrivateDecls().begin(),
4713                               Checker.getPrivateDecls().end());
4714   }
4715   auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
4716                     CapturedRegion](CodeGenFunction &CGF,
4717                                     PrePostActionTy &Action) {
4718     llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
4719                     std::pair<Address, Address>>
4720         UntiedLocalVars;
4721     // Set proper addresses for generated private copies.
4722     OMPPrivateScope Scope(CGF);
4723     // Generate debug info for variables present in shared clause.
4724     if (auto *DI = CGF.getDebugInfo()) {
4725       llvm::SmallDenseMap<const VarDecl *, FieldDecl *> CaptureFields =
4726           CGF.CapturedStmtInfo->getCaptureFields();
4727       llvm::Value *ContextValue = CGF.CapturedStmtInfo->getContextValue();
4728       if (CaptureFields.size() && ContextValue) {
4729         unsigned CharWidth = CGF.getContext().getCharWidth();
4730         // The shared variables are packed together as members of structure.
4731         // So the address of each shared variable can be computed by adding
4732         // offset of it (within record) to the base address of record. For each
4733         // shared variable, debug intrinsic llvm.dbg.declare is generated with
4734         // appropriate expressions (DIExpression).
4735         // Ex:
4736         //  %12 = load %struct.anon*, %struct.anon** %__context.addr.i
4737         //  call void @llvm.dbg.declare(metadata %struct.anon* %12,
4738         //            metadata !svar1,
4739         //            metadata !DIExpression(DW_OP_deref))
4740         //  call void @llvm.dbg.declare(metadata %struct.anon* %12,
4741         //            metadata !svar2,
4742         //            metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref))
4743         for (auto It = CaptureFields.begin(); It != CaptureFields.end(); ++It) {
4744           const VarDecl *SharedVar = It->first;
4745           RecordDecl *CaptureRecord = It->second->getParent();
4746           const ASTRecordLayout &Layout =
4747               CGF.getContext().getASTRecordLayout(CaptureRecord);
4748           unsigned Offset =
4749               Layout.getFieldOffset(It->second->getFieldIndex()) / CharWidth;
4750           if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo())
4751             (void)DI->EmitDeclareOfAutoVariable(SharedVar, ContextValue,
4752                                                 CGF.Builder, false);
4753           llvm::Instruction &Last = CGF.Builder.GetInsertBlock()->back();
4754           // Get the call dbg.declare instruction we just created and update
4755           // its DIExpression to add offset to base address.
4756           if (auto DDI = dyn_cast<llvm::DbgVariableIntrinsic>(&Last)) {
4757             SmallVector<uint64_t, 8> Ops;
4758             // Add offset to the base address if non zero.
4759             if (Offset) {
4760               Ops.push_back(llvm::dwarf::DW_OP_plus_uconst);
4761               Ops.push_back(Offset);
4762             }
4763             Ops.push_back(llvm::dwarf::DW_OP_deref);
4764             auto &Ctx = DDI->getContext();
4765             llvm::DIExpression *DIExpr = llvm::DIExpression::get(Ctx, Ops);
4766             Last.setOperand(2, llvm::MetadataAsValue::get(Ctx, DIExpr));
4767           }
4768         }
4769       }
4770     }
4771     llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs;
4772     if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
4773         !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) {
4774       enum { PrivatesParam = 2, CopyFnParam = 3 };
4775       llvm::Value *CopyFn = CGF.Builder.CreateLoad(
4776           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
4777       llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
4778           CS->getCapturedDecl()->getParam(PrivatesParam)));
4779       // Map privates.
4780       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
4781       llvm::SmallVector<llvm::Value *, 16> CallArgs;
4782       llvm::SmallVector<llvm::Type *, 4> ParamTypes;
4783       CallArgs.push_back(PrivatesPtr);
4784       ParamTypes.push_back(PrivatesPtr->getType());
4785       for (const Expr *E : Data.PrivateVars) {
4786         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4787         Address PrivatePtr = CGF.CreateMemTemp(
4788             CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
4789         PrivatePtrs.emplace_back(VD, PrivatePtr);
4790         CallArgs.push_back(PrivatePtr.getPointer());
4791         ParamTypes.push_back(PrivatePtr.getType());
4792       }
4793       for (const Expr *E : Data.FirstprivateVars) {
4794         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4795         Address PrivatePtr =
4796             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4797                               ".firstpriv.ptr.addr");
4798         PrivatePtrs.emplace_back(VD, PrivatePtr);
4799         FirstprivatePtrs.emplace_back(VD, PrivatePtr);
4800         CallArgs.push_back(PrivatePtr.getPointer());
4801         ParamTypes.push_back(PrivatePtr.getType());
4802       }
4803       for (const Expr *E : Data.LastprivateVars) {
4804         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4805         Address PrivatePtr =
4806             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4807                               ".lastpriv.ptr.addr");
4808         PrivatePtrs.emplace_back(VD, PrivatePtr);
4809         CallArgs.push_back(PrivatePtr.getPointer());
4810         ParamTypes.push_back(PrivatePtr.getType());
4811       }
4812       for (const VarDecl *VD : Data.PrivateLocals) {
4813         QualType Ty = VD->getType().getNonReferenceType();
4814         if (VD->getType()->isLValueReferenceType())
4815           Ty = CGF.getContext().getPointerType(Ty);
4816         if (isAllocatableDecl(VD))
4817           Ty = CGF.getContext().getPointerType(Ty);
4818         Address PrivatePtr = CGF.CreateMemTemp(
4819             CGF.getContext().getPointerType(Ty), ".local.ptr.addr");
4820         auto Result = UntiedLocalVars.insert(
4821             std::make_pair(VD, std::make_pair(PrivatePtr, Address::invalid())));
4822         // If key exists update in place.
4823         if (Result.second == false)
4824           *Result.first = std::make_pair(
4825               VD, std::make_pair(PrivatePtr, Address::invalid()));
4826         CallArgs.push_back(PrivatePtr.getPointer());
4827         ParamTypes.push_back(PrivatePtr.getType());
4828       }
4829       auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
4830                                                ParamTypes, /*isVarArg=*/false);
4831       CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4832           CopyFn, CopyFnTy->getPointerTo());
4833       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
4834           CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
4835       for (const auto &Pair : LastprivateDstsOrigs) {
4836         const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
4837         DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
4838                         /*RefersToEnclosingVariableOrCapture=*/
4839                         CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
4840                         Pair.second->getType(), VK_LValue,
4841                         Pair.second->getExprLoc());
4842         Scope.addPrivate(Pair.first, CGF.EmitLValue(&DRE).getAddress(CGF));
4843       }
4844       for (const auto &Pair : PrivatePtrs) {
4845         Address Replacement = Address(
4846             CGF.Builder.CreateLoad(Pair.second),
4847             CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()),
4848             CGF.getContext().getDeclAlign(Pair.first));
4849         Scope.addPrivate(Pair.first, Replacement);
4850         if (auto *DI = CGF.getDebugInfo())
4851           if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo())
4852             (void)DI->EmitDeclareOfAutoVariable(
4853                 Pair.first, Pair.second.getPointer(), CGF.Builder,
4854                 /*UsePointerValue*/ true);
4855       }
4856       // Adjust mapping for internal locals by mapping actual memory instead of
4857       // a pointer to this memory.
4858       for (auto &Pair : UntiedLocalVars) {
4859         QualType VDType = Pair.first->getType().getNonReferenceType();
4860         if (Pair.first->getType()->isLValueReferenceType())
4861           VDType = CGF.getContext().getPointerType(VDType);
4862         if (isAllocatableDecl(Pair.first)) {
4863           llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4864           Address Replacement(
4865               Ptr,
4866               CGF.ConvertTypeForMem(CGF.getContext().getPointerType(VDType)),
4867               CGF.getPointerAlign());
4868           Pair.second.first = Replacement;
4869           Ptr = CGF.Builder.CreateLoad(Replacement);
4870           Replacement = Address(Ptr, CGF.ConvertTypeForMem(VDType),
4871                                 CGF.getContext().getDeclAlign(Pair.first));
4872           Pair.second.second = Replacement;
4873         } else {
4874           llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4875           Address Replacement(Ptr, CGF.ConvertTypeForMem(VDType),
4876                               CGF.getContext().getDeclAlign(Pair.first));
4877           Pair.second.first = Replacement;
4878         }
4879       }
4880     }
4881     if (Data.Reductions) {
4882       OMPPrivateScope FirstprivateScope(CGF);
4883       for (const auto &Pair : FirstprivatePtrs) {
4884         Address Replacement(
4885             CGF.Builder.CreateLoad(Pair.second),
4886             CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()),
4887             CGF.getContext().getDeclAlign(Pair.first));
4888         FirstprivateScope.addPrivate(Pair.first, Replacement);
4889       }
4890       (void)FirstprivateScope.Privatize();
4891       OMPLexicalScope LexScope(CGF, S, CapturedRegion);
4892       ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
4893                              Data.ReductionCopies, Data.ReductionOps);
4894       llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
4895           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
4896       for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
4897         RedCG.emitSharedOrigLValue(CGF, Cnt);
4898         RedCG.emitAggregateType(CGF, Cnt);
4899         // FIXME: This must removed once the runtime library is fixed.
4900         // Emit required threadprivate variables for
4901         // initializer/combiner/finalizer.
4902         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
4903                                                            RedCG, Cnt);
4904         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4905             CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
4906         Replacement =
4907             Address(CGF.EmitScalarConversion(
4908                         Replacement.getPointer(), CGF.getContext().VoidPtrTy,
4909                         CGF.getContext().getPointerType(
4910                             Data.ReductionCopies[Cnt]->getType()),
4911                         Data.ReductionCopies[Cnt]->getExprLoc()),
4912                     CGF.ConvertTypeForMem(Data.ReductionCopies[Cnt]->getType()),
4913                     Replacement.getAlignment());
4914         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
4915         Scope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement);
4916       }
4917     }
4918     // Privatize all private variables except for in_reduction items.
4919     (void)Scope.Privatize();
4920     SmallVector<const Expr *, 4> InRedVars;
4921     SmallVector<const Expr *, 4> InRedPrivs;
4922     SmallVector<const Expr *, 4> InRedOps;
4923     SmallVector<const Expr *, 4> TaskgroupDescriptors;
4924     for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
4925       auto IPriv = C->privates().begin();
4926       auto IRed = C->reduction_ops().begin();
4927       auto ITD = C->taskgroup_descriptors().begin();
4928       for (const Expr *Ref : C->varlists()) {
4929         InRedVars.emplace_back(Ref);
4930         InRedPrivs.emplace_back(*IPriv);
4931         InRedOps.emplace_back(*IRed);
4932         TaskgroupDescriptors.emplace_back(*ITD);
4933         std::advance(IPriv, 1);
4934         std::advance(IRed, 1);
4935         std::advance(ITD, 1);
4936       }
4937     }
4938     // Privatize in_reduction items here, because taskgroup descriptors must be
4939     // privatized earlier.
4940     OMPPrivateScope InRedScope(CGF);
4941     if (!InRedVars.empty()) {
4942       ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
4943       for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
4944         RedCG.emitSharedOrigLValue(CGF, Cnt);
4945         RedCG.emitAggregateType(CGF, Cnt);
4946         // The taskgroup descriptor variable is always implicit firstprivate and
4947         // privatized already during processing of the firstprivates.
4948         // FIXME: This must removed once the runtime library is fixed.
4949         // Emit required threadprivate variables for
4950         // initializer/combiner/finalizer.
4951         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
4952                                                            RedCG, Cnt);
4953         llvm::Value *ReductionsPtr;
4954         if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
4955           ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr),
4956                                                TRExpr->getExprLoc());
4957         } else {
4958           ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4959         }
4960         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4961             CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
4962         Replacement = Address(
4963             CGF.EmitScalarConversion(
4964                 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
4965                 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
4966                 InRedPrivs[Cnt]->getExprLoc()),
4967             CGF.ConvertTypeForMem(InRedPrivs[Cnt]->getType()),
4968             Replacement.getAlignment());
4969         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
4970         InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement);
4971       }
4972     }
4973     (void)InRedScope.Privatize();
4974 
4975     CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF,
4976                                                              UntiedLocalVars);
4977     Action.Enter(CGF);
4978     BodyGen(CGF);
4979   };
4980   llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
4981       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
4982       Data.NumberOfParts);
4983   OMPLexicalScope Scope(*this, S, std::nullopt,
4984                         !isOpenMPParallelDirective(S.getDirectiveKind()) &&
4985                             !isOpenMPSimdDirective(S.getDirectiveKind()));
4986   TaskGen(*this, OutlinedFn, Data);
4987 }
4988 
4989 static ImplicitParamDecl *
4990 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
4991                                   QualType Ty, CapturedDecl *CD,
4992                                   SourceLocation Loc) {
4993   auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
4994                                            ImplicitParamDecl::Other);
4995   auto *OrigRef = DeclRefExpr::Create(
4996       C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
4997       /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
4998   auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
4999                                               ImplicitParamDecl::Other);
5000   auto *PrivateRef = DeclRefExpr::Create(
5001       C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
5002       /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
5003   QualType ElemType = C.getBaseElementType(Ty);
5004   auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType,
5005                                            ImplicitParamDecl::Other);
5006   auto *InitRef = DeclRefExpr::Create(
5007       C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
5008       /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
5009   PrivateVD->setInitStyle(VarDecl::CInit);
5010   PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
5011                                               InitRef, /*BasePath=*/nullptr,
5012                                               VK_PRValue, FPOptionsOverride()));
5013   Data.FirstprivateVars.emplace_back(OrigRef);
5014   Data.FirstprivateCopies.emplace_back(PrivateRef);
5015   Data.FirstprivateInits.emplace_back(InitRef);
5016   return OrigVD;
5017 }
5018 
5019 void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
5020     const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
5021     OMPTargetDataInfo &InputInfo) {
5022   // Emit outlined function for task construct.
5023   const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
5024   Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
5025   QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
5026   auto I = CS->getCapturedDecl()->param_begin();
5027   auto PartId = std::next(I);
5028   auto TaskT = std::next(I, 4);
5029   OMPTaskDataTy Data;
5030   // The task is not final.
5031   Data.Final.setInt(/*IntVal=*/false);
5032   // Get list of firstprivate variables.
5033   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
5034     auto IRef = C->varlist_begin();
5035     auto IElemInitRef = C->inits().begin();
5036     for (auto *IInit : C->private_copies()) {
5037       Data.FirstprivateVars.push_back(*IRef);
5038       Data.FirstprivateCopies.push_back(IInit);
5039       Data.FirstprivateInits.push_back(*IElemInitRef);
5040       ++IRef;
5041       ++IElemInitRef;
5042     }
5043   }
5044   SmallVector<const Expr *, 4> LHSs;
5045   SmallVector<const Expr *, 4> RHSs;
5046   for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
5047     Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
5048     Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
5049     Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
5050     Data.ReductionOps.append(C->reduction_ops().begin(),
5051                              C->reduction_ops().end());
5052     LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
5053     RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
5054   }
5055   OMPPrivateScope TargetScope(*this);
5056   VarDecl *BPVD = nullptr;
5057   VarDecl *PVD = nullptr;
5058   VarDecl *SVD = nullptr;
5059   VarDecl *MVD = nullptr;
5060   if (InputInfo.NumberOfTargetItems > 0) {
5061     auto *CD = CapturedDecl::Create(
5062         getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
5063     llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
5064     QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType(
5065         getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal,
5066         /*IndexTypeQuals=*/0);
5067     BPVD = createImplicitFirstprivateForType(
5068         getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
5069     PVD = createImplicitFirstprivateForType(
5070         getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
5071     QualType SizesType = getContext().getConstantArrayType(
5072         getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
5073         ArrSize, nullptr, ArrayType::Normal,
5074         /*IndexTypeQuals=*/0);
5075     SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
5076                                             S.getBeginLoc());
5077     TargetScope.addPrivate(BPVD, InputInfo.BasePointersArray);
5078     TargetScope.addPrivate(PVD, InputInfo.PointersArray);
5079     TargetScope.addPrivate(SVD, InputInfo.SizesArray);
5080     // If there is no user-defined mapper, the mapper array will be nullptr. In
5081     // this case, we don't need to privatize it.
5082     if (!isa_and_nonnull<llvm::ConstantPointerNull>(
5083             InputInfo.MappersArray.getPointer())) {
5084       MVD = createImplicitFirstprivateForType(
5085           getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
5086       TargetScope.addPrivate(MVD, InputInfo.MappersArray);
5087     }
5088   }
5089   (void)TargetScope.Privatize();
5090   buildDependences(S, Data);
5091   auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD,
5092                     &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
5093     // Set proper addresses for generated private copies.
5094     OMPPrivateScope Scope(CGF);
5095     if (!Data.FirstprivateVars.empty()) {
5096       enum { PrivatesParam = 2, CopyFnParam = 3 };
5097       llvm::Value *CopyFn = CGF.Builder.CreateLoad(
5098           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
5099       llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
5100           CS->getCapturedDecl()->getParam(PrivatesParam)));
5101       // Map privates.
5102       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
5103       llvm::SmallVector<llvm::Value *, 16> CallArgs;
5104       llvm::SmallVector<llvm::Type *, 4> ParamTypes;
5105       CallArgs.push_back(PrivatesPtr);
5106       ParamTypes.push_back(PrivatesPtr->getType());
5107       for (const Expr *E : Data.FirstprivateVars) {
5108         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5109         Address PrivatePtr =
5110             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
5111                               ".firstpriv.ptr.addr");
5112         PrivatePtrs.emplace_back(VD, PrivatePtr);
5113         CallArgs.push_back(PrivatePtr.getPointer());
5114         ParamTypes.push_back(PrivatePtr.getType());
5115       }
5116       auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
5117                                                ParamTypes, /*isVarArg=*/false);
5118       CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5119           CopyFn, CopyFnTy->getPointerTo());
5120       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
5121           CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
5122       for (const auto &Pair : PrivatePtrs) {
5123         Address Replacement(
5124             CGF.Builder.CreateLoad(Pair.second),
5125             CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()),
5126             CGF.getContext().getDeclAlign(Pair.first));
5127         Scope.addPrivate(Pair.first, Replacement);
5128       }
5129     }
5130     CGF.processInReduction(S, Data, CGF, CS, Scope);
5131     if (InputInfo.NumberOfTargetItems > 0) {
5132       InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
5133           CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0);
5134       InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
5135           CGF.GetAddrOfLocalVar(PVD), /*Index=*/0);
5136       InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
5137           CGF.GetAddrOfLocalVar(SVD), /*Index=*/0);
5138       // If MVD is nullptr, the mapper array is not privatized
5139       if (MVD)
5140         InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP(
5141             CGF.GetAddrOfLocalVar(MVD), /*Index=*/0);
5142     }
5143 
5144     Action.Enter(CGF);
5145     OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
5146     BodyGen(CGF);
5147   };
5148   llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
5149       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
5150       Data.NumberOfParts);
5151   llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
5152   IntegerLiteral IfCond(getContext(), TrueOrFalse,
5153                         getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
5154                         SourceLocation());
5155   CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn,
5156                                       SharedsTy, CapturedStruct, &IfCond, Data);
5157 }
5158 
5159 void CodeGenFunction::processInReduction(const OMPExecutableDirective &S,
5160                                          OMPTaskDataTy &Data,
5161                                          CodeGenFunction &CGF,
5162                                          const CapturedStmt *CS,
5163                                          OMPPrivateScope &Scope) {
5164   if (Data.Reductions) {
5165     OpenMPDirectiveKind CapturedRegion = S.getDirectiveKind();
5166     OMPLexicalScope LexScope(CGF, S, CapturedRegion);
5167     ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
5168                            Data.ReductionCopies, Data.ReductionOps);
5169     llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
5170         CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(4)));
5171     for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
5172       RedCG.emitSharedOrigLValue(CGF, Cnt);
5173       RedCG.emitAggregateType(CGF, Cnt);
5174       // FIXME: This must removed once the runtime library is fixed.
5175       // Emit required threadprivate variables for
5176       // initializer/combiner/finalizer.
5177       CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
5178                                                          RedCG, Cnt);
5179       Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5180           CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
5181       Replacement =
5182           Address(CGF.EmitScalarConversion(
5183                       Replacement.getPointer(), CGF.getContext().VoidPtrTy,
5184                       CGF.getContext().getPointerType(
5185                           Data.ReductionCopies[Cnt]->getType()),
5186                       Data.ReductionCopies[Cnt]->getExprLoc()),
5187                   CGF.ConvertTypeForMem(Data.ReductionCopies[Cnt]->getType()),
5188                   Replacement.getAlignment());
5189       Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
5190       Scope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement);
5191     }
5192   }
5193   (void)Scope.Privatize();
5194   SmallVector<const Expr *, 4> InRedVars;
5195   SmallVector<const Expr *, 4> InRedPrivs;
5196   SmallVector<const Expr *, 4> InRedOps;
5197   SmallVector<const Expr *, 4> TaskgroupDescriptors;
5198   for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
5199     auto IPriv = C->privates().begin();
5200     auto IRed = C->reduction_ops().begin();
5201     auto ITD = C->taskgroup_descriptors().begin();
5202     for (const Expr *Ref : C->varlists()) {
5203       InRedVars.emplace_back(Ref);
5204       InRedPrivs.emplace_back(*IPriv);
5205       InRedOps.emplace_back(*IRed);
5206       TaskgroupDescriptors.emplace_back(*ITD);
5207       std::advance(IPriv, 1);
5208       std::advance(IRed, 1);
5209       std::advance(ITD, 1);
5210     }
5211   }
5212   OMPPrivateScope InRedScope(CGF);
5213   if (!InRedVars.empty()) {
5214     ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
5215     for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
5216       RedCG.emitSharedOrigLValue(CGF, Cnt);
5217       RedCG.emitAggregateType(CGF, Cnt);
5218       // FIXME: This must removed once the runtime library is fixed.
5219       // Emit required threadprivate variables for
5220       // initializer/combiner/finalizer.
5221       CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
5222                                                          RedCG, Cnt);
5223       llvm::Value *ReductionsPtr;
5224       if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
5225         ReductionsPtr =
5226             CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr), TRExpr->getExprLoc());
5227       } else {
5228         ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5229       }
5230       Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5231           CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
5232       Replacement = Address(
5233           CGF.EmitScalarConversion(
5234               Replacement.getPointer(), CGF.getContext().VoidPtrTy,
5235               CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
5236               InRedPrivs[Cnt]->getExprLoc()),
5237           CGF.ConvertTypeForMem(InRedPrivs[Cnt]->getType()),
5238           Replacement.getAlignment());
5239       Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
5240       InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement);
5241     }
5242   }
5243   (void)InRedScope.Privatize();
5244 }
5245 
5246 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
5247   // Emit outlined function for task construct.
5248   const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
5249   Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
5250   QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
5251   const Expr *IfCond = nullptr;
5252   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
5253     if (C->getNameModifier() == OMPD_unknown ||
5254         C->getNameModifier() == OMPD_task) {
5255       IfCond = C->getCondition();
5256       break;
5257     }
5258   }
5259 
5260   OMPTaskDataTy Data;
5261   // Check if we should emit tied or untied task.
5262   Data.Tied = !S.getSingleClause<OMPUntiedClause>();
5263   auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
5264     CGF.EmitStmt(CS->getCapturedStmt());
5265   };
5266   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
5267                     IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
5268                             const OMPTaskDataTy &Data) {
5269     CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn,
5270                                             SharedsTy, CapturedStruct, IfCond,
5271                                             Data);
5272   };
5273   auto LPCRegion =
5274       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
5275   EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);
5276 }
5277 
5278 void CodeGenFunction::EmitOMPTaskyieldDirective(
5279     const OMPTaskyieldDirective &S) {
5280   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc());
5281 }
5282 
5283 void CodeGenFunction::EmitOMPErrorDirective(const OMPErrorDirective &S) {
5284   const OMPMessageClause *MC = S.getSingleClause<OMPMessageClause>();
5285   Expr *ME = MC ? MC->getMessageString() : nullptr;
5286   const OMPSeverityClause *SC = S.getSingleClause<OMPSeverityClause>();
5287   bool IsFatal = false;
5288   if (!SC || SC->getSeverityKind() == OMPC_SEVERITY_fatal)
5289     IsFatal = true;
5290   CGM.getOpenMPRuntime().emitErrorCall(*this, S.getBeginLoc(), ME, IsFatal);
5291 }
5292 
5293 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
5294   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier);
5295 }
5296 
5297 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
5298   OMPTaskDataTy Data;
5299   // Build list of dependences
5300   buildDependences(S, Data);
5301   Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>();
5302   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc(), Data);
5303 }
5304 
5305 bool isSupportedByOpenMPIRBuilder(const OMPTaskgroupDirective &T) {
5306   return T.clauses().empty();
5307 }
5308 
5309 void CodeGenFunction::EmitOMPTaskgroupDirective(
5310     const OMPTaskgroupDirective &S) {
5311   OMPLexicalScope Scope(*this, S, OMPD_unknown);
5312   if (CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S)) {
5313     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
5314     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
5315     InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
5316                            AllocaInsertPt->getIterator());
5317 
5318     auto BodyGenCB = [&, this](InsertPointTy AllocaIP,
5319                                InsertPointTy CodeGenIP) {
5320       Builder.restoreIP(CodeGenIP);
5321       EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
5322     };
5323     CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
5324     if (!CapturedStmtInfo)
5325       CapturedStmtInfo = &CapStmtInfo;
5326     Builder.restoreIP(OMPBuilder.createTaskgroup(Builder, AllocaIP, BodyGenCB));
5327     return;
5328   }
5329   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5330     Action.Enter(CGF);
5331     if (const Expr *E = S.getReductionRef()) {
5332       SmallVector<const Expr *, 4> LHSs;
5333       SmallVector<const Expr *, 4> RHSs;
5334       OMPTaskDataTy Data;
5335       for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
5336         Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
5337         Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
5338         Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
5339         Data.ReductionOps.append(C->reduction_ops().begin(),
5340                                  C->reduction_ops().end());
5341         LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
5342         RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
5343       }
5344       llvm::Value *ReductionDesc =
5345           CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(),
5346                                                            LHSs, RHSs, Data);
5347       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5348       CGF.EmitVarDecl(*VD);
5349       CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
5350                             /*Volatile=*/false, E->getType());
5351     }
5352     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
5353   };
5354   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc());
5355 }
5356 
5357 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
5358   llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>()
5359                                 ? llvm::AtomicOrdering::NotAtomic
5360                                 : llvm::AtomicOrdering::AcquireRelease;
5361   CGM.getOpenMPRuntime().emitFlush(
5362       *this,
5363       [&S]() -> ArrayRef<const Expr *> {
5364         if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>())
5365           return llvm::ArrayRef(FlushClause->varlist_begin(),
5366                                 FlushClause->varlist_end());
5367         return std::nullopt;
5368       }(),
5369       S.getBeginLoc(), AO);
5370 }
5371 
5372 void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {
5373   const auto *DO = S.getSingleClause<OMPDepobjClause>();
5374   LValue DOLVal = EmitLValue(DO->getDepobj());
5375   if (const auto *DC = S.getSingleClause<OMPDependClause>()) {
5376     OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(),
5377                                            DC->getModifier());
5378     Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end());
5379     Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause(
5380         *this, Dependencies, DC->getBeginLoc());
5381     EmitStoreOfScalar(DepAddr.getPointer(), DOLVal);
5382     return;
5383   }
5384   if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) {
5385     CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc());
5386     return;
5387   }
5388   if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) {
5389     CGM.getOpenMPRuntime().emitUpdateClause(
5390         *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc());
5391     return;
5392   }
5393 }
5394 
5395 void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
5396   if (!OMPParentLoopDirectiveForScan)
5397     return;
5398   const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan;
5399   bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
5400   SmallVector<const Expr *, 4> Shareds;
5401   SmallVector<const Expr *, 4> Privates;
5402   SmallVector<const Expr *, 4> LHSs;
5403   SmallVector<const Expr *, 4> RHSs;
5404   SmallVector<const Expr *, 4> ReductionOps;
5405   SmallVector<const Expr *, 4> CopyOps;
5406   SmallVector<const Expr *, 4> CopyArrayTemps;
5407   SmallVector<const Expr *, 4> CopyArrayElems;
5408   for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) {
5409     if (C->getModifier() != OMPC_REDUCTION_inscan)
5410       continue;
5411     Shareds.append(C->varlist_begin(), C->varlist_end());
5412     Privates.append(C->privates().begin(), C->privates().end());
5413     LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
5414     RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
5415     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
5416     CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
5417     CopyArrayTemps.append(C->copy_array_temps().begin(),
5418                           C->copy_array_temps().end());
5419     CopyArrayElems.append(C->copy_array_elems().begin(),
5420                           C->copy_array_elems().end());
5421   }
5422   if (ParentDir.getDirectiveKind() == OMPD_simd ||
5423       (getLangOpts().OpenMPSimd &&
5424        isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) {
5425     // For simd directive and simd-based directives in simd only mode, use the
5426     // following codegen:
5427     // int x = 0;
5428     // #pragma omp simd reduction(inscan, +: x)
5429     // for (..) {
5430     //   <first part>
5431     //   #pragma omp scan inclusive(x)
5432     //   <second part>
5433     //  }
5434     // is transformed to:
5435     // int x = 0;
5436     // for (..) {
5437     //   int x_priv = 0;
5438     //   <first part>
5439     //   x = x_priv + x;
5440     //   x_priv = x;
5441     //   <second part>
5442     // }
5443     // and
5444     // int x = 0;
5445     // #pragma omp simd reduction(inscan, +: x)
5446     // for (..) {
5447     //   <first part>
5448     //   #pragma omp scan exclusive(x)
5449     //   <second part>
5450     // }
5451     // to
5452     // int x = 0;
5453     // for (..) {
5454     //   int x_priv = 0;
5455     //   <second part>
5456     //   int temp = x;
5457     //   x = x_priv + x;
5458     //   x_priv = temp;
5459     //   <first part>
5460     // }
5461     llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce");
5462     EmitBranch(IsInclusive
5463                    ? OMPScanReduce
5464                    : BreakContinueStack.back().ContinueBlock.getBlock());
5465     EmitBlock(OMPScanDispatch);
5466     {
5467       // New scope for correct construction/destruction of temp variables for
5468       // exclusive scan.
5469       LexicalScope Scope(*this, S.getSourceRange());
5470       EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock);
5471       EmitBlock(OMPScanReduce);
5472       if (!IsInclusive) {
5473         // Create temp var and copy LHS value to this temp value.
5474         // TMP = LHS;
5475         for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5476           const Expr *PrivateExpr = Privates[I];
5477           const Expr *TempExpr = CopyArrayTemps[I];
5478           EmitAutoVarDecl(
5479               *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl()));
5480           LValue DestLVal = EmitLValue(TempExpr);
5481           LValue SrcLVal = EmitLValue(LHSs[I]);
5482           EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
5483                       SrcLVal.getAddress(*this),
5484                       cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5485                       cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
5486                       CopyOps[I]);
5487         }
5488       }
5489       CGM.getOpenMPRuntime().emitReduction(
5490           *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
5491           {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd});
5492       for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5493         const Expr *PrivateExpr = Privates[I];
5494         LValue DestLVal;
5495         LValue SrcLVal;
5496         if (IsInclusive) {
5497           DestLVal = EmitLValue(RHSs[I]);
5498           SrcLVal = EmitLValue(LHSs[I]);
5499         } else {
5500           const Expr *TempExpr = CopyArrayTemps[I];
5501           DestLVal = EmitLValue(RHSs[I]);
5502           SrcLVal = EmitLValue(TempExpr);
5503         }
5504         EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
5505                     SrcLVal.getAddress(*this),
5506                     cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5507                     cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
5508                     CopyOps[I]);
5509       }
5510     }
5511     EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock);
5512     OMPScanExitBlock = IsInclusive
5513                            ? BreakContinueStack.back().ContinueBlock.getBlock()
5514                            : OMPScanReduce;
5515     EmitBlock(OMPAfterScanBlock);
5516     return;
5517   }
5518   if (!IsInclusive) {
5519     EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
5520     EmitBlock(OMPScanExitBlock);
5521   }
5522   if (OMPFirstScanLoop) {
5523     // Emit buffer[i] = red; at the end of the input phase.
5524     const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
5525                              .getIterationVariable()
5526                              ->IgnoreParenImpCasts();
5527     LValue IdxLVal = EmitLValue(IVExpr);
5528     llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
5529     IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
5530     for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5531       const Expr *PrivateExpr = Privates[I];
5532       const Expr *OrigExpr = Shareds[I];
5533       const Expr *CopyArrayElem = CopyArrayElems[I];
5534       OpaqueValueMapping IdxMapping(
5535           *this,
5536           cast<OpaqueValueExpr>(
5537               cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
5538           RValue::get(IdxVal));
5539       LValue DestLVal = EmitLValue(CopyArrayElem);
5540       LValue SrcLVal = EmitLValue(OrigExpr);
5541       EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
5542                   SrcLVal.getAddress(*this),
5543                   cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5544                   cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
5545                   CopyOps[I]);
5546     }
5547   }
5548   EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
5549   if (IsInclusive) {
5550     EmitBlock(OMPScanExitBlock);
5551     EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
5552   }
5553   EmitBlock(OMPScanDispatch);
5554   if (!OMPFirstScanLoop) {
5555     // Emit red = buffer[i]; at the entrance to the scan phase.
5556     const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
5557                              .getIterationVariable()
5558                              ->IgnoreParenImpCasts();
5559     LValue IdxLVal = EmitLValue(IVExpr);
5560     llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
5561     IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
5562     llvm::BasicBlock *ExclusiveExitBB = nullptr;
5563     if (!IsInclusive) {
5564       llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec");
5565       ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit");
5566       llvm::Value *Cmp = Builder.CreateIsNull(IdxVal);
5567       Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB);
5568       EmitBlock(ContBB);
5569       // Use idx - 1 iteration for exclusive scan.
5570       IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1));
5571     }
5572     for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5573       const Expr *PrivateExpr = Privates[I];
5574       const Expr *OrigExpr = Shareds[I];
5575       const Expr *CopyArrayElem = CopyArrayElems[I];
5576       OpaqueValueMapping IdxMapping(
5577           *this,
5578           cast<OpaqueValueExpr>(
5579               cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
5580           RValue::get(IdxVal));
5581       LValue SrcLVal = EmitLValue(CopyArrayElem);
5582       LValue DestLVal = EmitLValue(OrigExpr);
5583       EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
5584                   SrcLVal.getAddress(*this),
5585                   cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5586                   cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
5587                   CopyOps[I]);
5588     }
5589     if (!IsInclusive) {
5590       EmitBlock(ExclusiveExitBB);
5591     }
5592   }
5593   EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock
5594                                                : OMPAfterScanBlock);
5595   EmitBlock(OMPAfterScanBlock);
5596 }
5597 
5598 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
5599                                             const CodeGenLoopTy &CodeGenLoop,
5600                                             Expr *IncExpr) {
5601   // Emit the loop iteration variable.
5602   const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
5603   const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
5604   EmitVarDecl(*IVDecl);
5605 
5606   // Emit the iterations count variable.
5607   // If it is not a variable, Sema decided to calculate iterations count on each
5608   // iteration (e.g., it is foldable into a constant).
5609   if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
5610     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
5611     // Emit calculation of the iterations count.
5612     EmitIgnoredExpr(S.getCalcLastIteration());
5613   }
5614 
5615   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
5616 
5617   bool HasLastprivateClause = false;
5618   // Check pre-condition.
5619   {
5620     OMPLoopScope PreInitScope(*this, S);
5621     // Skip the entire loop if we don't meet the precondition.
5622     // If the condition constant folds and can be elided, avoid emitting the
5623     // whole loop.
5624     bool CondConstant;
5625     llvm::BasicBlock *ContBlock = nullptr;
5626     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
5627       if (!CondConstant)
5628         return;
5629     } else {
5630       llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
5631       ContBlock = createBasicBlock("omp.precond.end");
5632       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
5633                   getProfileCount(&S));
5634       EmitBlock(ThenBlock);
5635       incrementProfileCounter(&S);
5636     }
5637 
5638     emitAlignedClause(*this, S);
5639     // Emit 'then' code.
5640     {
5641       // Emit helper vars inits.
5642 
5643       LValue LB = EmitOMPHelperVar(
5644           *this, cast<DeclRefExpr>(
5645                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5646                           ? S.getCombinedLowerBoundVariable()
5647                           : S.getLowerBoundVariable())));
5648       LValue UB = EmitOMPHelperVar(
5649           *this, cast<DeclRefExpr>(
5650                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5651                           ? S.getCombinedUpperBoundVariable()
5652                           : S.getUpperBoundVariable())));
5653       LValue ST =
5654           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
5655       LValue IL =
5656           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
5657 
5658       OMPPrivateScope LoopScope(*this);
5659       if (EmitOMPFirstprivateClause(S, LoopScope)) {
5660         // Emit implicit barrier to synchronize threads and avoid data races
5661         // on initialization of firstprivate variables and post-update of
5662         // lastprivate variables.
5663         CGM.getOpenMPRuntime().emitBarrierCall(
5664             *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
5665             /*ForceSimpleCall=*/true);
5666       }
5667       EmitOMPPrivateClause(S, LoopScope);
5668       if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
5669           !isOpenMPParallelDirective(S.getDirectiveKind()) &&
5670           !isOpenMPTeamsDirective(S.getDirectiveKind()))
5671         EmitOMPReductionClauseInit(S, LoopScope);
5672       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
5673       EmitOMPPrivateLoopCounters(S, LoopScope);
5674       (void)LoopScope.Privatize();
5675       if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
5676         CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
5677 
5678       // Detect the distribute schedule kind and chunk.
5679       llvm::Value *Chunk = nullptr;
5680       OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
5681       if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
5682         ScheduleKind = C->getDistScheduleKind();
5683         if (const Expr *Ch = C->getChunkSize()) {
5684           Chunk = EmitScalarExpr(Ch);
5685           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
5686                                        S.getIterationVariable()->getType(),
5687                                        S.getBeginLoc());
5688         }
5689       } else {
5690         // Default behaviour for dist_schedule clause.
5691         CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
5692             *this, S, ScheduleKind, Chunk);
5693       }
5694       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
5695       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
5696 
5697       // OpenMP [2.10.8, distribute Construct, Description]
5698       // If dist_schedule is specified, kind must be static. If specified,
5699       // iterations are divided into chunks of size chunk_size, chunks are
5700       // assigned to the teams of the league in a round-robin fashion in the
5701       // order of the team number. When no chunk_size is specified, the
5702       // iteration space is divided into chunks that are approximately equal
5703       // in size, and at most one chunk is distributed to each team of the
5704       // league. The size of the chunks is unspecified in this case.
5705       bool StaticChunked =
5706           RT.isStaticChunked(ScheduleKind, /* Chunked */ Chunk != nullptr) &&
5707           isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
5708       if (RT.isStaticNonchunked(ScheduleKind,
5709                                 /* Chunked */ Chunk != nullptr) ||
5710           StaticChunked) {
5711         CGOpenMPRuntime::StaticRTInput StaticInit(
5712             IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(*this),
5713             LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
5714             StaticChunked ? Chunk : nullptr);
5715         RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind,
5716                                     StaticInit);
5717         JumpDest LoopExit =
5718             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
5719         // UB = min(UB, GlobalUB);
5720         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5721                             ? S.getCombinedEnsureUpperBound()
5722                             : S.getEnsureUpperBound());
5723         // IV = LB;
5724         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5725                             ? S.getCombinedInit()
5726                             : S.getInit());
5727 
5728         const Expr *Cond =
5729             isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5730                 ? S.getCombinedCond()
5731                 : S.getCond();
5732 
5733         if (StaticChunked)
5734           Cond = S.getCombinedDistCond();
5735 
5736         // For static unchunked schedules generate:
5737         //
5738         //  1. For distribute alone, codegen
5739         //    while (idx <= UB) {
5740         //      BODY;
5741         //      ++idx;
5742         //    }
5743         //
5744         //  2. When combined with 'for' (e.g. as in 'distribute parallel for')
5745         //    while (idx <= UB) {
5746         //      <CodeGen rest of pragma>(LB, UB);
5747         //      idx += ST;
5748         //    }
5749         //
5750         // For static chunk one schedule generate:
5751         //
5752         // while (IV <= GlobalUB) {
5753         //   <CodeGen rest of pragma>(LB, UB);
5754         //   LB += ST;
5755         //   UB += ST;
5756         //   UB = min(UB, GlobalUB);
5757         //   IV = LB;
5758         // }
5759         //
5760         emitCommonSimdLoop(
5761             *this, S,
5762             [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5763               if (isOpenMPSimdDirective(S.getDirectiveKind()))
5764                 CGF.EmitOMPSimdInit(S);
5765             },
5766             [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop,
5767              StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) {
5768               CGF.EmitOMPInnerLoop(
5769                   S, LoopScope.requiresCleanups(), Cond, IncExpr,
5770                   [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
5771                     CodeGenLoop(CGF, S, LoopExit);
5772                   },
5773                   [&S, StaticChunked](CodeGenFunction &CGF) {
5774                     if (StaticChunked) {
5775                       CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound());
5776                       CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound());
5777                       CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound());
5778                       CGF.EmitIgnoredExpr(S.getCombinedInit());
5779                     }
5780                   });
5781             });
5782         EmitBlock(LoopExit.getBlock());
5783         // Tell the runtime we are done.
5784         RT.emitForStaticFinish(*this, S.getEndLoc(), S.getDirectiveKind());
5785       } else {
5786         // Emit the outer loop, which requests its work chunk [LB..UB] from
5787         // runtime and runs the inner loop to process it.
5788         const OMPLoopArguments LoopArguments = {
5789             LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
5790             IL.getAddress(*this), Chunk};
5791         EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
5792                                    CodeGenLoop);
5793       }
5794       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
5795         EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
5796           return CGF.Builder.CreateIsNotNull(
5797               CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
5798         });
5799       }
5800       if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
5801           !isOpenMPParallelDirective(S.getDirectiveKind()) &&
5802           !isOpenMPTeamsDirective(S.getDirectiveKind())) {
5803         EmitOMPReductionClauseFinal(S, OMPD_simd);
5804         // Emit post-update of the reduction variables if IsLastIter != 0.
5805         emitPostUpdateForReductionClause(
5806             *this, S, [IL, &S](CodeGenFunction &CGF) {
5807               return CGF.Builder.CreateIsNotNull(
5808                   CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
5809             });
5810       }
5811       // Emit final copy of the lastprivate variables if IsLastIter != 0.
5812       if (HasLastprivateClause) {
5813         EmitOMPLastprivateClauseFinal(
5814             S, /*NoFinals=*/false,
5815             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
5816       }
5817     }
5818 
5819     // We're now done with the loop, so jump to the continuation block.
5820     if (ContBlock) {
5821       EmitBranch(ContBlock);
5822       EmitBlock(ContBlock, true);
5823     }
5824   }
5825 }
5826 
5827 void CodeGenFunction::EmitOMPDistributeDirective(
5828     const OMPDistributeDirective &S) {
5829   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5830     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
5831   };
5832   OMPLexicalScope Scope(*this, S, OMPD_unknown);
5833   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
5834 }
5835 
5836 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
5837                                                    const CapturedStmt *S,
5838                                                    SourceLocation Loc) {
5839   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
5840   CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
5841   CGF.CapturedStmtInfo = &CapStmtInfo;
5842   llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc);
5843   Fn->setDoesNotRecurse();
5844   return Fn;
5845 }
5846 
5847 template <typename T>
5848 static void emitRestoreIP(CodeGenFunction &CGF, const T *C,
5849                           llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,
5850                           llvm::OpenMPIRBuilder &OMPBuilder) {
5851 
5852   unsigned NumLoops = C->getNumLoops();
5853   QualType Int64Ty = CGF.CGM.getContext().getIntTypeForBitwidth(
5854       /*DestWidth=*/64, /*Signed=*/1);
5855   llvm::SmallVector<llvm::Value *> StoreValues;
5856   for (unsigned I = 0; I < NumLoops; I++) {
5857     const Expr *CounterVal = C->getLoopData(I);
5858     assert(CounterVal);
5859     llvm::Value *StoreValue = CGF.EmitScalarConversion(
5860         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
5861         CounterVal->getExprLoc());
5862     StoreValues.emplace_back(StoreValue);
5863   }
5864   OMPDoacrossKind<T> ODK;
5865   bool IsDependSource = ODK.isSource(C);
5866   CGF.Builder.restoreIP(
5867       OMPBuilder.createOrderedDepend(CGF.Builder, AllocaIP, NumLoops,
5868                                      StoreValues, ".cnt.addr", IsDependSource));
5869 }
5870 
5871 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
5872   if (CGM.getLangOpts().OpenMPIRBuilder) {
5873     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
5874     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
5875 
5876     if (S.hasClausesOfKind<OMPDependClause>() ||
5877         S.hasClausesOfKind<OMPDoacrossClause>()) {
5878       // The ordered directive with depend clause.
5879       assert(!S.hasAssociatedStmt() && "No associated statement must be in "
5880                                        "ordered depend|doacross construct.");
5881       InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
5882                              AllocaInsertPt->getIterator());
5883       for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
5884         emitRestoreIP(*this, DC, AllocaIP, OMPBuilder);
5885       for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>())
5886         emitRestoreIP(*this, DC, AllocaIP, OMPBuilder);
5887     } else {
5888       // The ordered directive with threads or simd clause, or without clause.
5889       // Without clause, it behaves as if the threads clause is specified.
5890       const auto *C = S.getSingleClause<OMPSIMDClause>();
5891 
5892       auto FiniCB = [this](InsertPointTy IP) {
5893         OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
5894       };
5895 
5896       auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP,
5897                                      InsertPointTy CodeGenIP) {
5898         Builder.restoreIP(CodeGenIP);
5899 
5900         const CapturedStmt *CS = S.getInnermostCapturedStmt();
5901         if (C) {
5902           llvm::BasicBlock *FiniBB = splitBBWithSuffix(
5903               Builder, /*CreateBranch=*/false, ".ordered.after");
5904           llvm::SmallVector<llvm::Value *, 16> CapturedVars;
5905           GenerateOpenMPCapturedVars(*CS, CapturedVars);
5906           llvm::Function *OutlinedFn =
5907               emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc());
5908           assert(S.getBeginLoc().isValid() &&
5909                  "Outlined function call location must be valid.");
5910           ApplyDebugLocation::CreateDefaultArtificial(*this, S.getBeginLoc());
5911           OMPBuilderCBHelpers::EmitCaptureStmt(*this, CodeGenIP, *FiniBB,
5912                                                OutlinedFn, CapturedVars);
5913         } else {
5914           OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
5915               *this, CS->getCapturedStmt(), AllocaIP, CodeGenIP, "ordered");
5916         }
5917       };
5918 
5919       OMPLexicalScope Scope(*this, S, OMPD_unknown);
5920       Builder.restoreIP(
5921           OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, !C));
5922     }
5923     return;
5924   }
5925 
5926   if (S.hasClausesOfKind<OMPDependClause>()) {
5927     assert(!S.hasAssociatedStmt() &&
5928            "No associated statement must be in ordered depend construct.");
5929     for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
5930       CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
5931     return;
5932   }
5933   if (S.hasClausesOfKind<OMPDoacrossClause>()) {
5934     assert(!S.hasAssociatedStmt() &&
5935            "No associated statement must be in ordered doacross construct.");
5936     for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>())
5937       CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
5938     return;
5939   }
5940   const auto *C = S.getSingleClause<OMPSIMDClause>();
5941   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
5942                                  PrePostActionTy &Action) {
5943     const CapturedStmt *CS = S.getInnermostCapturedStmt();
5944     if (C) {
5945       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
5946       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
5947       llvm::Function *OutlinedFn =
5948           emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc());
5949       CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(),
5950                                                       OutlinedFn, CapturedVars);
5951     } else {
5952       Action.Enter(CGF);
5953       CGF.EmitStmt(CS->getCapturedStmt());
5954     }
5955   };
5956   OMPLexicalScope Scope(*this, S, OMPD_unknown);
5957   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C);
5958 }
5959 
5960 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
5961                                          QualType SrcType, QualType DestType,
5962                                          SourceLocation Loc) {
5963   assert(CGF.hasScalarEvaluationKind(DestType) &&
5964          "DestType must have scalar evaluation kind.");
5965   assert(!Val.isAggregate() && "Must be a scalar or complex.");
5966   return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
5967                                                    DestType, Loc)
5968                         : CGF.EmitComplexToScalarConversion(
5969                               Val.getComplexVal(), SrcType, DestType, Loc);
5970 }
5971 
5972 static CodeGenFunction::ComplexPairTy
5973 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
5974                       QualType DestType, SourceLocation Loc) {
5975   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
5976          "DestType must have complex evaluation kind.");
5977   CodeGenFunction::ComplexPairTy ComplexVal;
5978   if (Val.isScalar()) {
5979     // Convert the input element to the element type of the complex.
5980     QualType DestElementType =
5981         DestType->castAs<ComplexType>()->getElementType();
5982     llvm::Value *ScalarVal = CGF.EmitScalarConversion(
5983         Val.getScalarVal(), SrcType, DestElementType, Loc);
5984     ComplexVal = CodeGenFunction::ComplexPairTy(
5985         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
5986   } else {
5987     assert(Val.isComplex() && "Must be a scalar or complex.");
5988     QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
5989     QualType DestElementType =
5990         DestType->castAs<ComplexType>()->getElementType();
5991     ComplexVal.first = CGF.EmitScalarConversion(
5992         Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
5993     ComplexVal.second = CGF.EmitScalarConversion(
5994         Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
5995   }
5996   return ComplexVal;
5997 }
5998 
5999 static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
6000                                   LValue LVal, RValue RVal) {
6001   if (LVal.isGlobalReg())
6002     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
6003   else
6004     CGF.EmitAtomicStore(RVal, LVal, AO, LVal.isVolatile(), /*isInit=*/false);
6005 }
6006 
6007 static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF,
6008                                    llvm::AtomicOrdering AO, LValue LVal,
6009                                    SourceLocation Loc) {
6010   if (LVal.isGlobalReg())
6011     return CGF.EmitLoadOfLValue(LVal, Loc);
6012   return CGF.EmitAtomicLoad(
6013       LVal, Loc, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO),
6014       LVal.isVolatile());
6015 }
6016 
6017 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
6018                                          QualType RValTy, SourceLocation Loc) {
6019   switch (getEvaluationKind(LVal.getType())) {
6020   case TEK_Scalar:
6021     EmitStoreThroughLValue(RValue::get(convertToScalarValue(
6022                                *this, RVal, RValTy, LVal.getType(), Loc)),
6023                            LVal);
6024     break;
6025   case TEK_Complex:
6026     EmitStoreOfComplex(
6027         convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
6028         /*isInit=*/false);
6029     break;
6030   case TEK_Aggregate:
6031     llvm_unreachable("Must be a scalar or complex.");
6032   }
6033 }
6034 
6035 static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
6036                                   const Expr *X, const Expr *V,
6037                                   SourceLocation Loc) {
6038   // v = x;
6039   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
6040   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
6041   LValue XLValue = CGF.EmitLValue(X);
6042   LValue VLValue = CGF.EmitLValue(V);
6043   RValue Res = emitSimpleAtomicLoad(CGF, AO, XLValue, Loc);
6044   // OpenMP, 2.17.7, atomic Construct
6045   // If the read or capture clause is specified and the acquire, acq_rel, or
6046   // seq_cst clause is specified then the strong flush on exit from the atomic
6047   // operation is also an acquire flush.
6048   switch (AO) {
6049   case llvm::AtomicOrdering::Acquire:
6050   case llvm::AtomicOrdering::AcquireRelease:
6051   case llvm::AtomicOrdering::SequentiallyConsistent:
6052     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
6053                                          llvm::AtomicOrdering::Acquire);
6054     break;
6055   case llvm::AtomicOrdering::Monotonic:
6056   case llvm::AtomicOrdering::Release:
6057     break;
6058   case llvm::AtomicOrdering::NotAtomic:
6059   case llvm::AtomicOrdering::Unordered:
6060     llvm_unreachable("Unexpected ordering.");
6061   }
6062   CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
6063   CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
6064 }
6065 
6066 static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF,
6067                                    llvm::AtomicOrdering AO, const Expr *X,
6068                                    const Expr *E, SourceLocation Loc) {
6069   // x = expr;
6070   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
6071   emitSimpleAtomicStore(CGF, AO, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
6072   CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
6073   // OpenMP, 2.17.7, atomic Construct
6074   // If the write, update, or capture clause is specified and the release,
6075   // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6076   // the atomic operation is also a release flush.
6077   switch (AO) {
6078   case llvm::AtomicOrdering::Release:
6079   case llvm::AtomicOrdering::AcquireRelease:
6080   case llvm::AtomicOrdering::SequentiallyConsistent:
6081     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
6082                                          llvm::AtomicOrdering::Release);
6083     break;
6084   case llvm::AtomicOrdering::Acquire:
6085   case llvm::AtomicOrdering::Monotonic:
6086     break;
6087   case llvm::AtomicOrdering::NotAtomic:
6088   case llvm::AtomicOrdering::Unordered:
6089     llvm_unreachable("Unexpected ordering.");
6090   }
6091 }
6092 
6093 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
6094                                                 RValue Update,
6095                                                 BinaryOperatorKind BO,
6096                                                 llvm::AtomicOrdering AO,
6097                                                 bool IsXLHSInRHSPart) {
6098   ASTContext &Context = CGF.getContext();
6099   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
6100   // expression is simple and atomic is allowed for the given type for the
6101   // target platform.
6102   if (BO == BO_Comma || !Update.isScalar() || !X.isSimple() ||
6103       (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
6104        (Update.getScalarVal()->getType() !=
6105         X.getAddress(CGF).getElementType())) ||
6106       !Context.getTargetInfo().hasBuiltinAtomic(
6107           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
6108     return std::make_pair(false, RValue::get(nullptr));
6109 
6110   auto &&CheckAtomicSupport = [&CGF](llvm::Type *T, BinaryOperatorKind BO) {
6111     if (T->isIntegerTy())
6112       return true;
6113 
6114     if (T->isFloatingPointTy() && (BO == BO_Add || BO == BO_Sub))
6115       return llvm::isPowerOf2_64(CGF.CGM.getDataLayout().getTypeStoreSize(T));
6116 
6117     return false;
6118   };
6119 
6120   if (!CheckAtomicSupport(Update.getScalarVal()->getType(), BO) ||
6121       !CheckAtomicSupport(X.getAddress(CGF).getElementType(), BO))
6122     return std::make_pair(false, RValue::get(nullptr));
6123 
6124   bool IsInteger = X.getAddress(CGF).getElementType()->isIntegerTy();
6125   llvm::AtomicRMWInst::BinOp RMWOp;
6126   switch (BO) {
6127   case BO_Add:
6128     RMWOp = IsInteger ? llvm::AtomicRMWInst::Add : llvm::AtomicRMWInst::FAdd;
6129     break;
6130   case BO_Sub:
6131     if (!IsXLHSInRHSPart)
6132       return std::make_pair(false, RValue::get(nullptr));
6133     RMWOp = IsInteger ? llvm::AtomicRMWInst::Sub : llvm::AtomicRMWInst::FSub;
6134     break;
6135   case BO_And:
6136     RMWOp = llvm::AtomicRMWInst::And;
6137     break;
6138   case BO_Or:
6139     RMWOp = llvm::AtomicRMWInst::Or;
6140     break;
6141   case BO_Xor:
6142     RMWOp = llvm::AtomicRMWInst::Xor;
6143     break;
6144   case BO_LT:
6145     if (IsInteger)
6146       RMWOp = X.getType()->hasSignedIntegerRepresentation()
6147                   ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
6148                                      : llvm::AtomicRMWInst::Max)
6149                   : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
6150                                      : llvm::AtomicRMWInst::UMax);
6151     else
6152       RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMin
6153                               : llvm::AtomicRMWInst::FMax;
6154     break;
6155   case BO_GT:
6156     if (IsInteger)
6157       RMWOp = X.getType()->hasSignedIntegerRepresentation()
6158                   ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
6159                                      : llvm::AtomicRMWInst::Min)
6160                   : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
6161                                      : llvm::AtomicRMWInst::UMin);
6162     else
6163       RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMax
6164                               : llvm::AtomicRMWInst::FMin;
6165     break;
6166   case BO_Assign:
6167     RMWOp = llvm::AtomicRMWInst::Xchg;
6168     break;
6169   case BO_Mul:
6170   case BO_Div:
6171   case BO_Rem:
6172   case BO_Shl:
6173   case BO_Shr:
6174   case BO_LAnd:
6175   case BO_LOr:
6176     return std::make_pair(false, RValue::get(nullptr));
6177   case BO_PtrMemD:
6178   case BO_PtrMemI:
6179   case BO_LE:
6180   case BO_GE:
6181   case BO_EQ:
6182   case BO_NE:
6183   case BO_Cmp:
6184   case BO_AddAssign:
6185   case BO_SubAssign:
6186   case BO_AndAssign:
6187   case BO_OrAssign:
6188   case BO_XorAssign:
6189   case BO_MulAssign:
6190   case BO_DivAssign:
6191   case BO_RemAssign:
6192   case BO_ShlAssign:
6193   case BO_ShrAssign:
6194   case BO_Comma:
6195     llvm_unreachable("Unsupported atomic update operation");
6196   }
6197   llvm::Value *UpdateVal = Update.getScalarVal();
6198   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
6199     if (IsInteger)
6200       UpdateVal = CGF.Builder.CreateIntCast(
6201           IC, X.getAddress(CGF).getElementType(),
6202           X.getType()->hasSignedIntegerRepresentation());
6203     else
6204       UpdateVal = CGF.Builder.CreateCast(llvm::Instruction::CastOps::UIToFP, IC,
6205                                          X.getAddress(CGF).getElementType());
6206   }
6207   llvm::Value *Res =
6208       CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(CGF), UpdateVal, AO);
6209   return std::make_pair(true, RValue::get(Res));
6210 }
6211 
6212 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
6213     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
6214     llvm::AtomicOrdering AO, SourceLocation Loc,
6215     const llvm::function_ref<RValue(RValue)> CommonGen) {
6216   // Update expressions are allowed to have the following forms:
6217   // x binop= expr; -> xrval + expr;
6218   // x++, ++x -> xrval + 1;
6219   // x--, --x -> xrval - 1;
6220   // x = x binop expr; -> xrval binop expr
6221   // x = expr Op x; - > expr binop xrval;
6222   auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
6223   if (!Res.first) {
6224     if (X.isGlobalReg()) {
6225       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
6226       // 'xrval'.
6227       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
6228     } else {
6229       // Perform compare-and-swap procedure.
6230       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
6231     }
6232   }
6233   return Res;
6234 }
6235 
6236 static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF,
6237                                     llvm::AtomicOrdering AO, const Expr *X,
6238                                     const Expr *E, const Expr *UE,
6239                                     bool IsXLHSInRHSPart, SourceLocation Loc) {
6240   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
6241          "Update expr in 'atomic update' must be a binary operator.");
6242   const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
6243   // Update expressions are allowed to have the following forms:
6244   // x binop= expr; -> xrval + expr;
6245   // x++, ++x -> xrval + 1;
6246   // x--, --x -> xrval - 1;
6247   // x = x binop expr; -> xrval binop expr
6248   // x = expr Op x; - > expr binop xrval;
6249   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
6250   LValue XLValue = CGF.EmitLValue(X);
6251   RValue ExprRValue = CGF.EmitAnyExpr(E);
6252   const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
6253   const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
6254   const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
6255   const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
6256   auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) {
6257     CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6258     CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
6259     return CGF.EmitAnyExpr(UE);
6260   };
6261   (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
6262       XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
6263   CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
6264   // OpenMP, 2.17.7, atomic Construct
6265   // If the write, update, or capture clause is specified and the release,
6266   // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6267   // the atomic operation is also a release flush.
6268   switch (AO) {
6269   case llvm::AtomicOrdering::Release:
6270   case llvm::AtomicOrdering::AcquireRelease:
6271   case llvm::AtomicOrdering::SequentiallyConsistent:
6272     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
6273                                          llvm::AtomicOrdering::Release);
6274     break;
6275   case llvm::AtomicOrdering::Acquire:
6276   case llvm::AtomicOrdering::Monotonic:
6277     break;
6278   case llvm::AtomicOrdering::NotAtomic:
6279   case llvm::AtomicOrdering::Unordered:
6280     llvm_unreachable("Unexpected ordering.");
6281   }
6282 }
6283 
6284 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
6285                             QualType SourceType, QualType ResType,
6286                             SourceLocation Loc) {
6287   switch (CGF.getEvaluationKind(ResType)) {
6288   case TEK_Scalar:
6289     return RValue::get(
6290         convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
6291   case TEK_Complex: {
6292     auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
6293     return RValue::getComplex(Res.first, Res.second);
6294   }
6295   case TEK_Aggregate:
6296     break;
6297   }
6298   llvm_unreachable("Must be a scalar or complex.");
6299 }
6300 
6301 static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF,
6302                                      llvm::AtomicOrdering AO,
6303                                      bool IsPostfixUpdate, const Expr *V,
6304                                      const Expr *X, const Expr *E,
6305                                      const Expr *UE, bool IsXLHSInRHSPart,
6306                                      SourceLocation Loc) {
6307   assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
6308   assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
6309   RValue NewVVal;
6310   LValue VLValue = CGF.EmitLValue(V);
6311   LValue XLValue = CGF.EmitLValue(X);
6312   RValue ExprRValue = CGF.EmitAnyExpr(E);
6313   QualType NewVValType;
6314   if (UE) {
6315     // 'x' is updated with some additional value.
6316     assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
6317            "Update expr in 'atomic capture' must be a binary operator.");
6318     const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
6319     // Update expressions are allowed to have the following forms:
6320     // x binop= expr; -> xrval + expr;
6321     // x++, ++x -> xrval + 1;
6322     // x--, --x -> xrval - 1;
6323     // x = x binop expr; -> xrval binop expr
6324     // x = expr Op x; - > expr binop xrval;
6325     const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
6326     const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
6327     const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
6328     NewVValType = XRValExpr->getType();
6329     const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
6330     auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
6331                   IsPostfixUpdate](RValue XRValue) {
6332       CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6333       CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
6334       RValue Res = CGF.EmitAnyExpr(UE);
6335       NewVVal = IsPostfixUpdate ? XRValue : Res;
6336       return Res;
6337     };
6338     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
6339         XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
6340     CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
6341     if (Res.first) {
6342       // 'atomicrmw' instruction was generated.
6343       if (IsPostfixUpdate) {
6344         // Use old value from 'atomicrmw'.
6345         NewVVal = Res.second;
6346       } else {
6347         // 'atomicrmw' does not provide new value, so evaluate it using old
6348         // value of 'x'.
6349         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6350         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
6351         NewVVal = CGF.EmitAnyExpr(UE);
6352       }
6353     }
6354   } else {
6355     // 'x' is simply rewritten with some 'expr'.
6356     NewVValType = X->getType().getNonReferenceType();
6357     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
6358                                X->getType().getNonReferenceType(), Loc);
6359     auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) {
6360       NewVVal = XRValue;
6361       return ExprRValue;
6362     };
6363     // Try to perform atomicrmw xchg, otherwise simple exchange.
6364     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
6365         XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
6366         Loc, Gen);
6367     CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
6368     if (Res.first) {
6369       // 'atomicrmw' instruction was generated.
6370       NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
6371     }
6372   }
6373   // Emit post-update store to 'v' of old/new 'x' value.
6374   CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
6375   CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
6376   // OpenMP 5.1 removes the required flush for capture clause.
6377   if (CGF.CGM.getLangOpts().OpenMP < 51) {
6378     // OpenMP, 2.17.7, atomic Construct
6379     // If the write, update, or capture clause is specified and the release,
6380     // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6381     // the atomic operation is also a release flush.
6382     // If the read or capture clause is specified and the acquire, acq_rel, or
6383     // seq_cst clause is specified then the strong flush on exit from the atomic
6384     // operation is also an acquire flush.
6385     switch (AO) {
6386     case llvm::AtomicOrdering::Release:
6387       CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
6388                                            llvm::AtomicOrdering::Release);
6389       break;
6390     case llvm::AtomicOrdering::Acquire:
6391       CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
6392                                            llvm::AtomicOrdering::Acquire);
6393       break;
6394     case llvm::AtomicOrdering::AcquireRelease:
6395     case llvm::AtomicOrdering::SequentiallyConsistent:
6396       CGF.CGM.getOpenMPRuntime().emitFlush(
6397           CGF, std::nullopt, Loc, llvm::AtomicOrdering::AcquireRelease);
6398       break;
6399     case llvm::AtomicOrdering::Monotonic:
6400       break;
6401     case llvm::AtomicOrdering::NotAtomic:
6402     case llvm::AtomicOrdering::Unordered:
6403       llvm_unreachable("Unexpected ordering.");
6404     }
6405   }
6406 }
6407 
6408 static void emitOMPAtomicCompareExpr(CodeGenFunction &CGF,
6409                                      llvm::AtomicOrdering AO, const Expr *X,
6410                                      const Expr *V, const Expr *R,
6411                                      const Expr *E, const Expr *D,
6412                                      const Expr *CE, bool IsXBinopExpr,
6413                                      bool IsPostfixUpdate, bool IsFailOnly,
6414                                      SourceLocation Loc) {
6415   llvm::OpenMPIRBuilder &OMPBuilder =
6416       CGF.CGM.getOpenMPRuntime().getOMPBuilder();
6417 
6418   OMPAtomicCompareOp Op;
6419   assert(isa<BinaryOperator>(CE) && "CE is not a BinaryOperator");
6420   switch (cast<BinaryOperator>(CE)->getOpcode()) {
6421   case BO_EQ:
6422     Op = OMPAtomicCompareOp::EQ;
6423     break;
6424   case BO_LT:
6425     Op = OMPAtomicCompareOp::MIN;
6426     break;
6427   case BO_GT:
6428     Op = OMPAtomicCompareOp::MAX;
6429     break;
6430   default:
6431     llvm_unreachable("unsupported atomic compare binary operator");
6432   }
6433 
6434   LValue XLVal = CGF.EmitLValue(X);
6435   Address XAddr = XLVal.getAddress(CGF);
6436 
6437   auto EmitRValueWithCastIfNeeded = [&CGF, Loc](const Expr *X, const Expr *E) {
6438     if (X->getType() == E->getType())
6439       return CGF.EmitScalarExpr(E);
6440     const Expr *NewE = E->IgnoreImplicitAsWritten();
6441     llvm::Value *V = CGF.EmitScalarExpr(NewE);
6442     if (NewE->getType() == X->getType())
6443       return V;
6444     return CGF.EmitScalarConversion(V, NewE->getType(), X->getType(), Loc);
6445   };
6446 
6447   llvm::Value *EVal = EmitRValueWithCastIfNeeded(X, E);
6448   llvm::Value *DVal = D ? EmitRValueWithCastIfNeeded(X, D) : nullptr;
6449   if (auto *CI = dyn_cast<llvm::ConstantInt>(EVal))
6450     EVal = CGF.Builder.CreateIntCast(
6451         CI, XLVal.getAddress(CGF).getElementType(),
6452         E->getType()->hasSignedIntegerRepresentation());
6453   if (DVal)
6454     if (auto *CI = dyn_cast<llvm::ConstantInt>(DVal))
6455       DVal = CGF.Builder.CreateIntCast(
6456           CI, XLVal.getAddress(CGF).getElementType(),
6457           D->getType()->hasSignedIntegerRepresentation());
6458 
6459   llvm::OpenMPIRBuilder::AtomicOpValue XOpVal{
6460       XAddr.getPointer(), XAddr.getElementType(),
6461       X->getType()->hasSignedIntegerRepresentation(),
6462       X->getType().isVolatileQualified()};
6463   llvm::OpenMPIRBuilder::AtomicOpValue VOpVal, ROpVal;
6464   if (V) {
6465     LValue LV = CGF.EmitLValue(V);
6466     Address Addr = LV.getAddress(CGF);
6467     VOpVal = {Addr.getPointer(), Addr.getElementType(),
6468               V->getType()->hasSignedIntegerRepresentation(),
6469               V->getType().isVolatileQualified()};
6470   }
6471   if (R) {
6472     LValue LV = CGF.EmitLValue(R);
6473     Address Addr = LV.getAddress(CGF);
6474     ROpVal = {Addr.getPointer(), Addr.getElementType(),
6475               R->getType()->hasSignedIntegerRepresentation(),
6476               R->getType().isVolatileQualified()};
6477   }
6478 
6479   CGF.Builder.restoreIP(OMPBuilder.createAtomicCompare(
6480       CGF.Builder, XOpVal, VOpVal, ROpVal, EVal, DVal, AO, Op, IsXBinopExpr,
6481       IsPostfixUpdate, IsFailOnly));
6482 }
6483 
6484 static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
6485                               llvm::AtomicOrdering AO, bool IsPostfixUpdate,
6486                               const Expr *X, const Expr *V, const Expr *R,
6487                               const Expr *E, const Expr *UE, const Expr *D,
6488                               const Expr *CE, bool IsXLHSInRHSPart,
6489                               bool IsFailOnly, SourceLocation Loc) {
6490   switch (Kind) {
6491   case OMPC_read:
6492     emitOMPAtomicReadExpr(CGF, AO, X, V, Loc);
6493     break;
6494   case OMPC_write:
6495     emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc);
6496     break;
6497   case OMPC_unknown:
6498   case OMPC_update:
6499     emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc);
6500     break;
6501   case OMPC_capture:
6502     emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE,
6503                              IsXLHSInRHSPart, Loc);
6504     break;
6505   case OMPC_compare: {
6506     emitOMPAtomicCompareExpr(CGF, AO, X, V, R, E, D, CE, IsXLHSInRHSPart,
6507                              IsPostfixUpdate, IsFailOnly, Loc);
6508     break;
6509   }
6510   default:
6511     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
6512   }
6513 }
6514 
6515 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
6516   llvm::AtomicOrdering AO = llvm::AtomicOrdering::Monotonic;
6517   bool MemOrderingSpecified = false;
6518   if (S.getSingleClause<OMPSeqCstClause>()) {
6519     AO = llvm::AtomicOrdering::SequentiallyConsistent;
6520     MemOrderingSpecified = true;
6521   } else if (S.getSingleClause<OMPAcqRelClause>()) {
6522     AO = llvm::AtomicOrdering::AcquireRelease;
6523     MemOrderingSpecified = true;
6524   } else if (S.getSingleClause<OMPAcquireClause>()) {
6525     AO = llvm::AtomicOrdering::Acquire;
6526     MemOrderingSpecified = true;
6527   } else if (S.getSingleClause<OMPReleaseClause>()) {
6528     AO = llvm::AtomicOrdering::Release;
6529     MemOrderingSpecified = true;
6530   } else if (S.getSingleClause<OMPRelaxedClause>()) {
6531     AO = llvm::AtomicOrdering::Monotonic;
6532     MemOrderingSpecified = true;
6533   }
6534   llvm::SmallSet<OpenMPClauseKind, 2> KindsEncountered;
6535   OpenMPClauseKind Kind = OMPC_unknown;
6536   for (const OMPClause *C : S.clauses()) {
6537     // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause,
6538     // if it is first).
6539     OpenMPClauseKind K = C->getClauseKind();
6540     if (K == OMPC_seq_cst || K == OMPC_acq_rel || K == OMPC_acquire ||
6541         K == OMPC_release || K == OMPC_relaxed || K == OMPC_hint)
6542       continue;
6543     Kind = K;
6544     KindsEncountered.insert(K);
6545   }
6546   // We just need to correct Kind here. No need to set a bool saying it is
6547   // actually compare capture because we can tell from whether V and R are
6548   // nullptr.
6549   if (KindsEncountered.contains(OMPC_compare) &&
6550       KindsEncountered.contains(OMPC_capture))
6551     Kind = OMPC_compare;
6552   if (!MemOrderingSpecified) {
6553     llvm::AtomicOrdering DefaultOrder =
6554         CGM.getOpenMPRuntime().getDefaultMemoryOrdering();
6555     if (DefaultOrder == llvm::AtomicOrdering::Monotonic ||
6556         DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent ||
6557         (DefaultOrder == llvm::AtomicOrdering::AcquireRelease &&
6558          Kind == OMPC_capture)) {
6559       AO = DefaultOrder;
6560     } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) {
6561       if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) {
6562         AO = llvm::AtomicOrdering::Release;
6563       } else if (Kind == OMPC_read) {
6564         assert(Kind == OMPC_read && "Unexpected atomic kind.");
6565         AO = llvm::AtomicOrdering::Acquire;
6566       }
6567     }
6568   }
6569 
6570   LexicalScope Scope(*this, S.getSourceRange());
6571   EmitStopPoint(S.getAssociatedStmt());
6572   emitOMPAtomicExpr(*this, Kind, AO, S.isPostfixUpdate(), S.getX(), S.getV(),
6573                     S.getR(), S.getExpr(), S.getUpdateExpr(), S.getD(),
6574                     S.getCondExpr(), S.isXLHSInRHSPart(), S.isFailOnly(),
6575                     S.getBeginLoc());
6576 }
6577 
6578 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
6579                                          const OMPExecutableDirective &S,
6580                                          const RegionCodeGenTy &CodeGen) {
6581   assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
6582   CodeGenModule &CGM = CGF.CGM;
6583 
6584   // On device emit this construct as inlined code.
6585   if (CGM.getLangOpts().OpenMPIsTargetDevice) {
6586     OMPLexicalScope Scope(CGF, S, OMPD_target);
6587     CGM.getOpenMPRuntime().emitInlinedDirective(
6588         CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6589           CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
6590         });
6591     return;
6592   }
6593 
6594   auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
6595   llvm::Function *Fn = nullptr;
6596   llvm::Constant *FnID = nullptr;
6597 
6598   const Expr *IfCond = nullptr;
6599   // Check for the at most one if clause associated with the target region.
6600   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
6601     if (C->getNameModifier() == OMPD_unknown ||
6602         C->getNameModifier() == OMPD_target) {
6603       IfCond = C->getCondition();
6604       break;
6605     }
6606   }
6607 
6608   // Check if we have any device clause associated with the directive.
6609   llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device(
6610       nullptr, OMPC_DEVICE_unknown);
6611   if (auto *C = S.getSingleClause<OMPDeviceClause>())
6612     Device.setPointerAndInt(C->getDevice(), C->getModifier());
6613 
6614   // Check if we have an if clause whose conditional always evaluates to false
6615   // or if we do not have any targets specified. If so the target region is not
6616   // an offload entry point.
6617   bool IsOffloadEntry = true;
6618   if (IfCond) {
6619     bool Val;
6620     if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
6621       IsOffloadEntry = false;
6622   }
6623   if (CGM.getLangOpts().OMPTargetTriples.empty())
6624     IsOffloadEntry = false;
6625 
6626   if (CGM.getLangOpts().OpenMPOffloadMandatory && !IsOffloadEntry) {
6627     unsigned DiagID = CGM.getDiags().getCustomDiagID(
6628         DiagnosticsEngine::Error,
6629         "No offloading entry generated while offloading is mandatory.");
6630     CGM.getDiags().Report(DiagID);
6631   }
6632 
6633   assert(CGF.CurFuncDecl && "No parent declaration for target region!");
6634   StringRef ParentName;
6635   // In case we have Ctors/Dtors we use the complete type variant to produce
6636   // the mangling of the device outlined kernel.
6637   if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
6638     ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
6639   else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
6640     ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
6641   else
6642     ParentName =
6643         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
6644 
6645   // Emit target region as a standalone region.
6646   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
6647                                                     IsOffloadEntry, CodeGen);
6648   OMPLexicalScope Scope(CGF, S, OMPD_task);
6649   auto &&SizeEmitter =
6650       [IsOffloadEntry](CodeGenFunction &CGF,
6651                        const OMPLoopDirective &D) -> llvm::Value * {
6652     if (IsOffloadEntry) {
6653       OMPLoopScope(CGF, D);
6654       // Emit calculation of the iterations count.
6655       llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations());
6656       NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty,
6657                                                 /*isSigned=*/false);
6658       return NumIterations;
6659     }
6660     return nullptr;
6661   };
6662   CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
6663                                         SizeEmitter);
6664 }
6665 
6666 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
6667                              PrePostActionTy &Action) {
6668   Action.Enter(CGF);
6669   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6670   (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6671   CGF.EmitOMPPrivateClause(S, PrivateScope);
6672   (void)PrivateScope.Privatize();
6673   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
6674     CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
6675 
6676   CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
6677   CGF.EnsureInsertPoint();
6678 }
6679 
6680 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
6681                                                   StringRef ParentName,
6682                                                   const OMPTargetDirective &S) {
6683   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6684     emitTargetRegion(CGF, S, Action);
6685   };
6686   llvm::Function *Fn;
6687   llvm::Constant *Addr;
6688   // Emit target region as a standalone region.
6689   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6690       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6691   assert(Fn && Addr && "Target device function emission failed.");
6692 }
6693 
6694 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
6695   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6696     emitTargetRegion(CGF, S, Action);
6697   };
6698   emitCommonOMPTargetDirective(*this, S, CodeGen);
6699 }
6700 
6701 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
6702                                         const OMPExecutableDirective &S,
6703                                         OpenMPDirectiveKind InnermostKind,
6704                                         const RegionCodeGenTy &CodeGen) {
6705   const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
6706   llvm::Function *OutlinedFn =
6707       CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
6708           CGF, S, *CS->getCapturedDecl()->param_begin(), InnermostKind,
6709           CodeGen);
6710 
6711   const auto *NT = S.getSingleClause<OMPNumTeamsClause>();
6712   const auto *TL = S.getSingleClause<OMPThreadLimitClause>();
6713   if (NT || TL) {
6714     const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr;
6715     const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr;
6716 
6717     CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
6718                                                   S.getBeginLoc());
6719   }
6720 
6721   OMPTeamsScope Scope(CGF, S);
6722   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
6723   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
6724   CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn,
6725                                            CapturedVars);
6726 }
6727 
6728 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
6729   // Emit teams region as a standalone region.
6730   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6731     Action.Enter(CGF);
6732     OMPPrivateScope PrivateScope(CGF);
6733     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6734     CGF.EmitOMPPrivateClause(S, PrivateScope);
6735     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6736     (void)PrivateScope.Privatize();
6737     CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt());
6738     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6739   };
6740   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
6741   emitPostUpdateForReductionClause(*this, S,
6742                                    [](CodeGenFunction &) { return nullptr; });
6743 }
6744 
6745 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
6746                                   const OMPTargetTeamsDirective &S) {
6747   auto *CS = S.getCapturedStmt(OMPD_teams);
6748   Action.Enter(CGF);
6749   // Emit teams region as a standalone region.
6750   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
6751     Action.Enter(CGF);
6752     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6753     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6754     CGF.EmitOMPPrivateClause(S, PrivateScope);
6755     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6756     (void)PrivateScope.Privatize();
6757     if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
6758       CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
6759     CGF.EmitStmt(CS->getCapturedStmt());
6760     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6761   };
6762   emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
6763   emitPostUpdateForReductionClause(CGF, S,
6764                                    [](CodeGenFunction &) { return nullptr; });
6765 }
6766 
6767 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
6768     CodeGenModule &CGM, StringRef ParentName,
6769     const OMPTargetTeamsDirective &S) {
6770   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6771     emitTargetTeamsRegion(CGF, Action, S);
6772   };
6773   llvm::Function *Fn;
6774   llvm::Constant *Addr;
6775   // Emit target region as a standalone region.
6776   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6777       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6778   assert(Fn && Addr && "Target device function emission failed.");
6779 }
6780 
6781 void CodeGenFunction::EmitOMPTargetTeamsDirective(
6782     const OMPTargetTeamsDirective &S) {
6783   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6784     emitTargetTeamsRegion(CGF, Action, S);
6785   };
6786   emitCommonOMPTargetDirective(*this, S, CodeGen);
6787 }
6788 
6789 static void
6790 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
6791                                 const OMPTargetTeamsDistributeDirective &S) {
6792   Action.Enter(CGF);
6793   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6794     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6795   };
6796 
6797   // Emit teams region as a standalone region.
6798   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6799                                             PrePostActionTy &Action) {
6800     Action.Enter(CGF);
6801     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6802     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6803     (void)PrivateScope.Privatize();
6804     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6805                                                     CodeGenDistribute);
6806     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6807   };
6808   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen);
6809   emitPostUpdateForReductionClause(CGF, S,
6810                                    [](CodeGenFunction &) { return nullptr; });
6811 }
6812 
6813 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
6814     CodeGenModule &CGM, StringRef ParentName,
6815     const OMPTargetTeamsDistributeDirective &S) {
6816   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6817     emitTargetTeamsDistributeRegion(CGF, Action, S);
6818   };
6819   llvm::Function *Fn;
6820   llvm::Constant *Addr;
6821   // Emit target region as a standalone region.
6822   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6823       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6824   assert(Fn && Addr && "Target device function emission failed.");
6825 }
6826 
6827 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
6828     const OMPTargetTeamsDistributeDirective &S) {
6829   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6830     emitTargetTeamsDistributeRegion(CGF, Action, S);
6831   };
6832   emitCommonOMPTargetDirective(*this, S, CodeGen);
6833 }
6834 
6835 static void emitTargetTeamsDistributeSimdRegion(
6836     CodeGenFunction &CGF, PrePostActionTy &Action,
6837     const OMPTargetTeamsDistributeSimdDirective &S) {
6838   Action.Enter(CGF);
6839   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6840     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6841   };
6842 
6843   // Emit teams region as a standalone region.
6844   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6845                                             PrePostActionTy &Action) {
6846     Action.Enter(CGF);
6847     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6848     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6849     (void)PrivateScope.Privatize();
6850     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6851                                                     CodeGenDistribute);
6852     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6853   };
6854   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen);
6855   emitPostUpdateForReductionClause(CGF, S,
6856                                    [](CodeGenFunction &) { return nullptr; });
6857 }
6858 
6859 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
6860     CodeGenModule &CGM, StringRef ParentName,
6861     const OMPTargetTeamsDistributeSimdDirective &S) {
6862   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6863     emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
6864   };
6865   llvm::Function *Fn;
6866   llvm::Constant *Addr;
6867   // Emit target region as a standalone region.
6868   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6869       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6870   assert(Fn && Addr && "Target device function emission failed.");
6871 }
6872 
6873 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
6874     const OMPTargetTeamsDistributeSimdDirective &S) {
6875   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6876     emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
6877   };
6878   emitCommonOMPTargetDirective(*this, S, CodeGen);
6879 }
6880 
6881 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
6882     const OMPTeamsDistributeDirective &S) {
6883 
6884   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6885     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6886   };
6887 
6888   // Emit teams region as a standalone region.
6889   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6890                                             PrePostActionTy &Action) {
6891     Action.Enter(CGF);
6892     OMPPrivateScope PrivateScope(CGF);
6893     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6894     (void)PrivateScope.Privatize();
6895     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6896                                                     CodeGenDistribute);
6897     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6898   };
6899   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
6900   emitPostUpdateForReductionClause(*this, S,
6901                                    [](CodeGenFunction &) { return nullptr; });
6902 }
6903 
6904 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
6905     const OMPTeamsDistributeSimdDirective &S) {
6906   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6907     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6908   };
6909 
6910   // Emit teams region as a standalone region.
6911   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6912                                             PrePostActionTy &Action) {
6913     Action.Enter(CGF);
6914     OMPPrivateScope PrivateScope(CGF);
6915     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6916     (void)PrivateScope.Privatize();
6917     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd,
6918                                                     CodeGenDistribute);
6919     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6920   };
6921   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen);
6922   emitPostUpdateForReductionClause(*this, S,
6923                                    [](CodeGenFunction &) { return nullptr; });
6924 }
6925 
6926 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
6927     const OMPTeamsDistributeParallelForDirective &S) {
6928   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6929     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
6930                               S.getDistInc());
6931   };
6932 
6933   // Emit teams region as a standalone region.
6934   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6935                                             PrePostActionTy &Action) {
6936     Action.Enter(CGF);
6937     OMPPrivateScope PrivateScope(CGF);
6938     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6939     (void)PrivateScope.Privatize();
6940     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6941                                                     CodeGenDistribute);
6942     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6943   };
6944   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
6945   emitPostUpdateForReductionClause(*this, S,
6946                                    [](CodeGenFunction &) { return nullptr; });
6947 }
6948 
6949 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
6950     const OMPTeamsDistributeParallelForSimdDirective &S) {
6951   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6952     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
6953                               S.getDistInc());
6954   };
6955 
6956   // Emit teams region as a standalone region.
6957   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6958                                             PrePostActionTy &Action) {
6959     Action.Enter(CGF);
6960     OMPPrivateScope PrivateScope(CGF);
6961     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6962     (void)PrivateScope.Privatize();
6963     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
6964         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
6965     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6966   };
6967   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for_simd,
6968                               CodeGen);
6969   emitPostUpdateForReductionClause(*this, S,
6970                                    [](CodeGenFunction &) { return nullptr; });
6971 }
6972 
6973 void CodeGenFunction::EmitOMPInteropDirective(const OMPInteropDirective &S) {
6974   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
6975   llvm::Value *Device = nullptr;
6976   llvm::Value *NumDependences = nullptr;
6977   llvm::Value *DependenceList = nullptr;
6978 
6979   if (const auto *C = S.getSingleClause<OMPDeviceClause>())
6980     Device = EmitScalarExpr(C->getDevice());
6981 
6982   // Build list and emit dependences
6983   OMPTaskDataTy Data;
6984   buildDependences(S, Data);
6985   if (!Data.Dependences.empty()) {
6986     Address DependenciesArray = Address::invalid();
6987     std::tie(NumDependences, DependenciesArray) =
6988         CGM.getOpenMPRuntime().emitDependClause(*this, Data.Dependences,
6989                                                 S.getBeginLoc());
6990     DependenceList = DependenciesArray.getPointer();
6991   }
6992   Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>();
6993 
6994   assert(!(Data.HasNowaitClause && !(S.getSingleClause<OMPInitClause>() ||
6995                                      S.getSingleClause<OMPDestroyClause>() ||
6996                                      S.getSingleClause<OMPUseClause>())) &&
6997          "OMPNowaitClause clause is used separately in OMPInteropDirective.");
6998 
6999   if (const auto *C = S.getSingleClause<OMPInitClause>()) {
7000     llvm::Value *InteropvarPtr =
7001         EmitLValue(C->getInteropVar()).getPointer(*this);
7002     llvm::omp::OMPInteropType InteropType = llvm::omp::OMPInteropType::Unknown;
7003     if (C->getIsTarget()) {
7004       InteropType = llvm::omp::OMPInteropType::Target;
7005     } else {
7006       assert(C->getIsTargetSync() && "Expected interop-type target/targetsync");
7007       InteropType = llvm::omp::OMPInteropType::TargetSync;
7008     }
7009     OMPBuilder.createOMPInteropInit(Builder, InteropvarPtr, InteropType, Device,
7010                                     NumDependences, DependenceList,
7011                                     Data.HasNowaitClause);
7012   } else if (const auto *C = S.getSingleClause<OMPDestroyClause>()) {
7013     llvm::Value *InteropvarPtr =
7014         EmitLValue(C->getInteropVar()).getPointer(*this);
7015     OMPBuilder.createOMPInteropDestroy(Builder, InteropvarPtr, Device,
7016                                        NumDependences, DependenceList,
7017                                        Data.HasNowaitClause);
7018   } else if (const auto *C = S.getSingleClause<OMPUseClause>()) {
7019     llvm::Value *InteropvarPtr =
7020         EmitLValue(C->getInteropVar()).getPointer(*this);
7021     OMPBuilder.createOMPInteropUse(Builder, InteropvarPtr, Device,
7022                                    NumDependences, DependenceList,
7023                                    Data.HasNowaitClause);
7024   }
7025 }
7026 
7027 static void emitTargetTeamsDistributeParallelForRegion(
7028     CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S,
7029     PrePostActionTy &Action) {
7030   Action.Enter(CGF);
7031   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7032     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
7033                               S.getDistInc());
7034   };
7035 
7036   // Emit teams region as a standalone region.
7037   auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7038                                                  PrePostActionTy &Action) {
7039     Action.Enter(CGF);
7040     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7041     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7042     (void)PrivateScope.Privatize();
7043     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7044         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
7045     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7046   };
7047 
7048   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
7049                               CodeGenTeams);
7050   emitPostUpdateForReductionClause(CGF, S,
7051                                    [](CodeGenFunction &) { return nullptr; });
7052 }
7053 
7054 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
7055     CodeGenModule &CGM, StringRef ParentName,
7056     const OMPTargetTeamsDistributeParallelForDirective &S) {
7057   // Emit SPMD target teams distribute parallel for region as a standalone
7058   // region.
7059   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7060     emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
7061   };
7062   llvm::Function *Fn;
7063   llvm::Constant *Addr;
7064   // Emit target region as a standalone region.
7065   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7066       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7067   assert(Fn && Addr && "Target device function emission failed.");
7068 }
7069 
7070 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
7071     const OMPTargetTeamsDistributeParallelForDirective &S) {
7072   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7073     emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
7074   };
7075   emitCommonOMPTargetDirective(*this, S, CodeGen);
7076 }
7077 
7078 static void emitTargetTeamsDistributeParallelForSimdRegion(
7079     CodeGenFunction &CGF,
7080     const OMPTargetTeamsDistributeParallelForSimdDirective &S,
7081     PrePostActionTy &Action) {
7082   Action.Enter(CGF);
7083   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7084     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
7085                               S.getDistInc());
7086   };
7087 
7088   // Emit teams region as a standalone region.
7089   auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7090                                                  PrePostActionTy &Action) {
7091     Action.Enter(CGF);
7092     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7093     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7094     (void)PrivateScope.Privatize();
7095     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7096         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
7097     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7098   };
7099 
7100   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd,
7101                               CodeGenTeams);
7102   emitPostUpdateForReductionClause(CGF, S,
7103                                    [](CodeGenFunction &) { return nullptr; });
7104 }
7105 
7106 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
7107     CodeGenModule &CGM, StringRef ParentName,
7108     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
7109   // Emit SPMD target teams distribute parallel for simd region as a standalone
7110   // region.
7111   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7112     emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
7113   };
7114   llvm::Function *Fn;
7115   llvm::Constant *Addr;
7116   // Emit target region as a standalone region.
7117   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7118       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7119   assert(Fn && Addr && "Target device function emission failed.");
7120 }
7121 
7122 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
7123     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
7124   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7125     emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
7126   };
7127   emitCommonOMPTargetDirective(*this, S, CodeGen);
7128 }
7129 
7130 void CodeGenFunction::EmitOMPCancellationPointDirective(
7131     const OMPCancellationPointDirective &S) {
7132   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(),
7133                                                    S.getCancelRegion());
7134 }
7135 
7136 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
7137   const Expr *IfCond = nullptr;
7138   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
7139     if (C->getNameModifier() == OMPD_unknown ||
7140         C->getNameModifier() == OMPD_cancel) {
7141       IfCond = C->getCondition();
7142       break;
7143     }
7144   }
7145   if (CGM.getLangOpts().OpenMPIRBuilder) {
7146     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
7147     // TODO: This check is necessary as we only generate `omp parallel` through
7148     // the OpenMPIRBuilder for now.
7149     if (S.getCancelRegion() == OMPD_parallel ||
7150         S.getCancelRegion() == OMPD_sections ||
7151         S.getCancelRegion() == OMPD_section) {
7152       llvm::Value *IfCondition = nullptr;
7153       if (IfCond)
7154         IfCondition = EmitScalarExpr(IfCond,
7155                                      /*IgnoreResultAssign=*/true);
7156       return Builder.restoreIP(
7157           OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion()));
7158     }
7159   }
7160 
7161   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond,
7162                                         S.getCancelRegion());
7163 }
7164 
7165 CodeGenFunction::JumpDest
7166 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
7167   if (Kind == OMPD_parallel || Kind == OMPD_task ||
7168       Kind == OMPD_target_parallel || Kind == OMPD_taskloop ||
7169       Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop)
7170     return ReturnBlock;
7171   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
7172          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
7173          Kind == OMPD_distribute_parallel_for ||
7174          Kind == OMPD_target_parallel_for ||
7175          Kind == OMPD_teams_distribute_parallel_for ||
7176          Kind == OMPD_target_teams_distribute_parallel_for);
7177   return OMPCancelStack.getExitBlock();
7178 }
7179 
7180 void CodeGenFunction::EmitOMPUseDevicePtrClause(
7181     const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope,
7182     const llvm::DenseMap<const ValueDecl *, llvm::Value *>
7183         CaptureDeviceAddrMap) {
7184   llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7185   for (const Expr *OrigVarIt : C.varlists()) {
7186     const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(OrigVarIt)->getDecl());
7187     if (!Processed.insert(OrigVD).second)
7188       continue;
7189 
7190     // In order to identify the right initializer we need to match the
7191     // declaration used by the mapping logic. In some cases we may get
7192     // OMPCapturedExprDecl that refers to the original declaration.
7193     const ValueDecl *MatchingVD = OrigVD;
7194     if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
7195       // OMPCapturedExprDecl are used to privative fields of the current
7196       // structure.
7197       const auto *ME = cast<MemberExpr>(OED->getInit());
7198       assert(isa<CXXThisExpr>(ME->getBase()->IgnoreImpCasts()) &&
7199              "Base should be the current struct!");
7200       MatchingVD = ME->getMemberDecl();
7201     }
7202 
7203     // If we don't have information about the current list item, move on to
7204     // the next one.
7205     auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
7206     if (InitAddrIt == CaptureDeviceAddrMap.end())
7207       continue;
7208 
7209     llvm::Type *Ty = ConvertTypeForMem(OrigVD->getType().getNonReferenceType());
7210 
7211     // Return the address of the private variable.
7212     bool IsRegistered = PrivateScope.addPrivate(
7213         OrigVD,
7214         Address(InitAddrIt->second, Ty,
7215                 getContext().getTypeAlignInChars(getContext().VoidPtrTy)));
7216     assert(IsRegistered && "firstprivate var already registered as private");
7217     // Silence the warning about unused variable.
7218     (void)IsRegistered;
7219   }
7220 }
7221 
7222 static const VarDecl *getBaseDecl(const Expr *Ref) {
7223   const Expr *Base = Ref->IgnoreParenImpCasts();
7224   while (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Base))
7225     Base = OASE->getBase()->IgnoreParenImpCasts();
7226   while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Base))
7227     Base = ASE->getBase()->IgnoreParenImpCasts();
7228   return cast<VarDecl>(cast<DeclRefExpr>(Base)->getDecl());
7229 }
7230 
7231 void CodeGenFunction::EmitOMPUseDeviceAddrClause(
7232     const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope,
7233     const llvm::DenseMap<const ValueDecl *, llvm::Value *>
7234         CaptureDeviceAddrMap) {
7235   llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7236   for (const Expr *Ref : C.varlists()) {
7237     const VarDecl *OrigVD = getBaseDecl(Ref);
7238     if (!Processed.insert(OrigVD).second)
7239       continue;
7240     // In order to identify the right initializer we need to match the
7241     // declaration used by the mapping logic. In some cases we may get
7242     // OMPCapturedExprDecl that refers to the original declaration.
7243     const ValueDecl *MatchingVD = OrigVD;
7244     if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
7245       // OMPCapturedExprDecl are used to privative fields of the current
7246       // structure.
7247       const auto *ME = cast<MemberExpr>(OED->getInit());
7248       assert(isa<CXXThisExpr>(ME->getBase()) &&
7249              "Base should be the current struct!");
7250       MatchingVD = ME->getMemberDecl();
7251     }
7252 
7253     // If we don't have information about the current list item, move on to
7254     // the next one.
7255     auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
7256     if (InitAddrIt == CaptureDeviceAddrMap.end())
7257       continue;
7258 
7259     llvm::Type *Ty = ConvertTypeForMem(OrigVD->getType().getNonReferenceType());
7260 
7261     Address PrivAddr =
7262         Address(InitAddrIt->second, Ty,
7263                 getContext().getTypeAlignInChars(getContext().VoidPtrTy));
7264     // For declrefs and variable length array need to load the pointer for
7265     // correct mapping, since the pointer to the data was passed to the runtime.
7266     if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) ||
7267         MatchingVD->getType()->isArrayType()) {
7268       QualType PtrTy = getContext().getPointerType(
7269           OrigVD->getType().getNonReferenceType());
7270       PrivAddr =
7271           EmitLoadOfPointer(PrivAddr.withElementType(ConvertTypeForMem(PtrTy)),
7272                             PtrTy->castAs<PointerType>());
7273     }
7274 
7275     (void)PrivateScope.addPrivate(OrigVD, PrivAddr);
7276   }
7277 }
7278 
7279 // Generate the instructions for '#pragma omp target data' directive.
7280 void CodeGenFunction::EmitOMPTargetDataDirective(
7281     const OMPTargetDataDirective &S) {
7282   CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true,
7283                                        /*SeparateBeginEndCalls=*/true);
7284 
7285   // Create a pre/post action to signal the privatization of the device pointer.
7286   // This action can be replaced by the OpenMP runtime code generation to
7287   // deactivate privatization.
7288   bool PrivatizeDevicePointers = false;
7289   class DevicePointerPrivActionTy : public PrePostActionTy {
7290     bool &PrivatizeDevicePointers;
7291 
7292   public:
7293     explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
7294         : PrivatizeDevicePointers(PrivatizeDevicePointers) {}
7295     void Enter(CodeGenFunction &CGF) override {
7296       PrivatizeDevicePointers = true;
7297     }
7298   };
7299   DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
7300 
7301   auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
7302     auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7303       CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
7304     };
7305 
7306     // Codegen that selects whether to generate the privatization code or not.
7307     auto &&PrivCodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
7308       RegionCodeGenTy RCG(InnermostCodeGen);
7309       PrivatizeDevicePointers = false;
7310 
7311       // Call the pre-action to change the status of PrivatizeDevicePointers if
7312       // needed.
7313       Action.Enter(CGF);
7314 
7315       if (PrivatizeDevicePointers) {
7316         OMPPrivateScope PrivateScope(CGF);
7317         // Emit all instances of the use_device_ptr clause.
7318         for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
7319           CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
7320                                         Info.CaptureDeviceAddrMap);
7321         for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
7322           CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope,
7323                                          Info.CaptureDeviceAddrMap);
7324         (void)PrivateScope.Privatize();
7325         RCG(CGF);
7326       } else {
7327         // If we don't have target devices, don't bother emitting the data
7328         // mapping code.
7329         std::optional<OpenMPDirectiveKind> CaptureRegion;
7330         if (CGM.getLangOpts().OMPTargetTriples.empty()) {
7331           // Emit helper decls of the use_device_ptr/use_device_addr clauses.
7332           for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
7333             for (const Expr *E : C->varlists()) {
7334               const Decl *D = cast<DeclRefExpr>(E)->getDecl();
7335               if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
7336                 CGF.EmitVarDecl(*OED);
7337             }
7338           for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
7339             for (const Expr *E : C->varlists()) {
7340               const Decl *D = getBaseDecl(E);
7341               if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
7342                 CGF.EmitVarDecl(*OED);
7343             }
7344         } else {
7345           CaptureRegion = OMPD_unknown;
7346         }
7347 
7348         OMPLexicalScope Scope(CGF, S, CaptureRegion);
7349         RCG(CGF);
7350       }
7351     };
7352 
7353     // Forward the provided action to the privatization codegen.
7354     RegionCodeGenTy PrivRCG(PrivCodeGen);
7355     PrivRCG.setAction(Action);
7356 
7357     // Notwithstanding the body of the region is emitted as inlined directive,
7358     // we don't use an inline scope as changes in the references inside the
7359     // region are expected to be visible outside, so we do not privative them.
7360     OMPLexicalScope Scope(CGF, S);
7361     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
7362                                                     PrivRCG);
7363   };
7364 
7365   RegionCodeGenTy RCG(CodeGen);
7366 
7367   // If we don't have target devices, don't bother emitting the data mapping
7368   // code.
7369   if (CGM.getLangOpts().OMPTargetTriples.empty()) {
7370     RCG(*this);
7371     return;
7372   }
7373 
7374   // Check if we have any if clause associated with the directive.
7375   const Expr *IfCond = nullptr;
7376   if (const auto *C = S.getSingleClause<OMPIfClause>())
7377     IfCond = C->getCondition();
7378 
7379   // Check if we have any device clause associated with the directive.
7380   const Expr *Device = nullptr;
7381   if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7382     Device = C->getDevice();
7383 
7384   // Set the action to signal privatization of device pointers.
7385   RCG.setAction(PrivAction);
7386 
7387   // Emit region code.
7388   CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
7389                                              Info);
7390 }
7391 
7392 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
7393     const OMPTargetEnterDataDirective &S) {
7394   // If we don't have target devices, don't bother emitting the data mapping
7395   // code.
7396   if (CGM.getLangOpts().OMPTargetTriples.empty())
7397     return;
7398 
7399   // Check if we have any if clause associated with the directive.
7400   const Expr *IfCond = nullptr;
7401   if (const auto *C = S.getSingleClause<OMPIfClause>())
7402     IfCond = C->getCondition();
7403 
7404   // Check if we have any device clause associated with the directive.
7405   const Expr *Device = nullptr;
7406   if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7407     Device = C->getDevice();
7408 
7409   OMPLexicalScope Scope(*this, S, OMPD_task);
7410   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
7411 }
7412 
7413 void CodeGenFunction::EmitOMPTargetExitDataDirective(
7414     const OMPTargetExitDataDirective &S) {
7415   // If we don't have target devices, don't bother emitting the data mapping
7416   // code.
7417   if (CGM.getLangOpts().OMPTargetTriples.empty())
7418     return;
7419 
7420   // Check if we have any if clause associated with the directive.
7421   const Expr *IfCond = nullptr;
7422   if (const auto *C = S.getSingleClause<OMPIfClause>())
7423     IfCond = C->getCondition();
7424 
7425   // Check if we have any device clause associated with the directive.
7426   const Expr *Device = nullptr;
7427   if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7428     Device = C->getDevice();
7429 
7430   OMPLexicalScope Scope(*this, S, OMPD_task);
7431   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
7432 }
7433 
7434 static void emitTargetParallelRegion(CodeGenFunction &CGF,
7435                                      const OMPTargetParallelDirective &S,
7436                                      PrePostActionTy &Action) {
7437   // Get the captured statement associated with the 'parallel' region.
7438   const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
7439   Action.Enter(CGF);
7440   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
7441     Action.Enter(CGF);
7442     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7443     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
7444     CGF.EmitOMPPrivateClause(S, PrivateScope);
7445     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7446     (void)PrivateScope.Privatize();
7447     if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
7448       CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
7449     // TODO: Add support for clauses.
7450     CGF.EmitStmt(CS->getCapturedStmt());
7451     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
7452   };
7453   emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
7454                                  emitEmptyBoundParameters);
7455   emitPostUpdateForReductionClause(CGF, S,
7456                                    [](CodeGenFunction &) { return nullptr; });
7457 }
7458 
7459 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
7460     CodeGenModule &CGM, StringRef ParentName,
7461     const OMPTargetParallelDirective &S) {
7462   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7463     emitTargetParallelRegion(CGF, S, Action);
7464   };
7465   llvm::Function *Fn;
7466   llvm::Constant *Addr;
7467   // Emit target region as a standalone region.
7468   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7469       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7470   assert(Fn && Addr && "Target device function emission failed.");
7471 }
7472 
7473 void CodeGenFunction::EmitOMPTargetParallelDirective(
7474     const OMPTargetParallelDirective &S) {
7475   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7476     emitTargetParallelRegion(CGF, S, Action);
7477   };
7478   emitCommonOMPTargetDirective(*this, S, CodeGen);
7479 }
7480 
7481 static void emitTargetParallelForRegion(CodeGenFunction &CGF,
7482                                         const OMPTargetParallelForDirective &S,
7483                                         PrePostActionTy &Action) {
7484   Action.Enter(CGF);
7485   // Emit directive as a combined directive that consists of two implicit
7486   // directives: 'parallel' with 'for' directive.
7487   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7488     Action.Enter(CGF);
7489     CodeGenFunction::OMPCancelStackRAII CancelRegion(
7490         CGF, OMPD_target_parallel_for, S.hasCancel());
7491     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
7492                                emitDispatchForLoopBounds);
7493   };
7494   emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
7495                                  emitEmptyBoundParameters);
7496 }
7497 
7498 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
7499     CodeGenModule &CGM, StringRef ParentName,
7500     const OMPTargetParallelForDirective &S) {
7501   // Emit SPMD target parallel for region as a standalone region.
7502   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7503     emitTargetParallelForRegion(CGF, S, Action);
7504   };
7505   llvm::Function *Fn;
7506   llvm::Constant *Addr;
7507   // Emit target region as a standalone region.
7508   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7509       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7510   assert(Fn && Addr && "Target device function emission failed.");
7511 }
7512 
7513 void CodeGenFunction::EmitOMPTargetParallelForDirective(
7514     const OMPTargetParallelForDirective &S) {
7515   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7516     emitTargetParallelForRegion(CGF, S, Action);
7517   };
7518   emitCommonOMPTargetDirective(*this, S, CodeGen);
7519 }
7520 
7521 static void
7522 emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
7523                                 const OMPTargetParallelForSimdDirective &S,
7524                                 PrePostActionTy &Action) {
7525   Action.Enter(CGF);
7526   // Emit directive as a combined directive that consists of two implicit
7527   // directives: 'parallel' with 'for' directive.
7528   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7529     Action.Enter(CGF);
7530     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
7531                                emitDispatchForLoopBounds);
7532   };
7533   emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen,
7534                                  emitEmptyBoundParameters);
7535 }
7536 
7537 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
7538     CodeGenModule &CGM, StringRef ParentName,
7539     const OMPTargetParallelForSimdDirective &S) {
7540   // Emit SPMD target parallel for region as a standalone region.
7541   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7542     emitTargetParallelForSimdRegion(CGF, S, Action);
7543   };
7544   llvm::Function *Fn;
7545   llvm::Constant *Addr;
7546   // Emit target region as a standalone region.
7547   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7548       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7549   assert(Fn && Addr && "Target device function emission failed.");
7550 }
7551 
7552 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
7553     const OMPTargetParallelForSimdDirective &S) {
7554   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7555     emitTargetParallelForSimdRegion(CGF, S, Action);
7556   };
7557   emitCommonOMPTargetDirective(*this, S, CodeGen);
7558 }
7559 
7560 /// Emit a helper variable and return corresponding lvalue.
7561 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
7562                      const ImplicitParamDecl *PVD,
7563                      CodeGenFunction::OMPPrivateScope &Privates) {
7564   const auto *VDecl = cast<VarDecl>(Helper->getDecl());
7565   Privates.addPrivate(VDecl, CGF.GetAddrOfLocalVar(PVD));
7566 }
7567 
7568 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
7569   assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
7570   // Emit outlined function for task construct.
7571   const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop);
7572   Address CapturedStruct = Address::invalid();
7573   {
7574     OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
7575     CapturedStruct = GenerateCapturedStmtArgument(*CS);
7576   }
7577   QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
7578   const Expr *IfCond = nullptr;
7579   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
7580     if (C->getNameModifier() == OMPD_unknown ||
7581         C->getNameModifier() == OMPD_taskloop) {
7582       IfCond = C->getCondition();
7583       break;
7584     }
7585   }
7586 
7587   OMPTaskDataTy Data;
7588   // Check if taskloop must be emitted without taskgroup.
7589   Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
7590   // TODO: Check if we should emit tied or untied task.
7591   Data.Tied = true;
7592   // Set scheduling for taskloop
7593   if (const auto *Clause = S.getSingleClause<OMPGrainsizeClause>()) {
7594     // grainsize clause
7595     Data.Schedule.setInt(/*IntVal=*/false);
7596     Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
7597   } else if (const auto *Clause = S.getSingleClause<OMPNumTasksClause>()) {
7598     // num_tasks clause
7599     Data.Schedule.setInt(/*IntVal=*/true);
7600     Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
7601   }
7602 
7603   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
7604     // if (PreCond) {
7605     //   for (IV in 0..LastIteration) BODY;
7606     //   <Final counter/linear vars updates>;
7607     // }
7608     //
7609 
7610     // Emit: if (PreCond) - begin.
7611     // If the condition constant folds and can be elided, avoid emitting the
7612     // whole loop.
7613     bool CondConstant;
7614     llvm::BasicBlock *ContBlock = nullptr;
7615     OMPLoopScope PreInitScope(CGF, S);
7616     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
7617       if (!CondConstant)
7618         return;
7619     } else {
7620       llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
7621       ContBlock = CGF.createBasicBlock("taskloop.if.end");
7622       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
7623                   CGF.getProfileCount(&S));
7624       CGF.EmitBlock(ThenBlock);
7625       CGF.incrementProfileCounter(&S);
7626     }
7627 
7628     (void)CGF.EmitOMPLinearClauseInit(S);
7629 
7630     OMPPrivateScope LoopScope(CGF);
7631     // Emit helper vars inits.
7632     enum { LowerBound = 5, UpperBound, Stride, LastIter };
7633     auto *I = CS->getCapturedDecl()->param_begin();
7634     auto *LBP = std::next(I, LowerBound);
7635     auto *UBP = std::next(I, UpperBound);
7636     auto *STP = std::next(I, Stride);
7637     auto *LIP = std::next(I, LastIter);
7638     mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
7639              LoopScope);
7640     mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
7641              LoopScope);
7642     mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
7643     mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
7644              LoopScope);
7645     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
7646     CGF.EmitOMPLinearClause(S, LoopScope);
7647     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
7648     (void)LoopScope.Privatize();
7649     // Emit the loop iteration variable.
7650     const Expr *IVExpr = S.getIterationVariable();
7651     const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
7652     CGF.EmitVarDecl(*IVDecl);
7653     CGF.EmitIgnoredExpr(S.getInit());
7654 
7655     // Emit the iterations count variable.
7656     // If it is not a variable, Sema decided to calculate iterations count on
7657     // each iteration (e.g., it is foldable into a constant).
7658     if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
7659       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
7660       // Emit calculation of the iterations count.
7661       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
7662     }
7663 
7664     {
7665       OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
7666       emitCommonSimdLoop(
7667           CGF, S,
7668           [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7669             if (isOpenMPSimdDirective(S.getDirectiveKind()))
7670               CGF.EmitOMPSimdInit(S);
7671           },
7672           [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
7673             CGF.EmitOMPInnerLoop(
7674                 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
7675                 [&S](CodeGenFunction &CGF) {
7676                   emitOMPLoopBodyWithStopPoint(CGF, S,
7677                                                CodeGenFunction::JumpDest());
7678                 },
7679                 [](CodeGenFunction &) {});
7680           });
7681     }
7682     // Emit: if (PreCond) - end.
7683     if (ContBlock) {
7684       CGF.EmitBranch(ContBlock);
7685       CGF.EmitBlock(ContBlock, true);
7686     }
7687     // Emit final copy of the lastprivate variables if IsLastIter != 0.
7688     if (HasLastprivateClause) {
7689       CGF.EmitOMPLastprivateClauseFinal(
7690           S, isOpenMPSimdDirective(S.getDirectiveKind()),
7691           CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
7692               CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
7693               (*LIP)->getType(), S.getBeginLoc())));
7694     }
7695     LoopScope.restoreMap();
7696     CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) {
7697       return CGF.Builder.CreateIsNotNull(
7698           CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
7699                                (*LIP)->getType(), S.getBeginLoc()));
7700     });
7701   };
7702   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
7703                     IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
7704                             const OMPTaskDataTy &Data) {
7705     auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond,
7706                       &Data](CodeGenFunction &CGF, PrePostActionTy &) {
7707       OMPLoopScope PreInitScope(CGF, S);
7708       CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S,
7709                                                   OutlinedFn, SharedsTy,
7710                                                   CapturedStruct, IfCond, Data);
7711     };
7712     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
7713                                                     CodeGen);
7714   };
7715   if (Data.Nogroup) {
7716     EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data);
7717   } else {
7718     CGM.getOpenMPRuntime().emitTaskgroupRegion(
7719         *this,
7720         [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
7721                                         PrePostActionTy &Action) {
7722           Action.Enter(CGF);
7723           CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen,
7724                                         Data);
7725         },
7726         S.getBeginLoc());
7727   }
7728 }
7729 
7730 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
7731   auto LPCRegion =
7732       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7733   EmitOMPTaskLoopBasedDirective(S);
7734 }
7735 
7736 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
7737     const OMPTaskLoopSimdDirective &S) {
7738   auto LPCRegion =
7739       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7740   OMPLexicalScope Scope(*this, S);
7741   EmitOMPTaskLoopBasedDirective(S);
7742 }
7743 
7744 void CodeGenFunction::EmitOMPMasterTaskLoopDirective(
7745     const OMPMasterTaskLoopDirective &S) {
7746   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7747     Action.Enter(CGF);
7748     EmitOMPTaskLoopBasedDirective(S);
7749   };
7750   auto LPCRegion =
7751       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7752   OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false);
7753   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
7754 }
7755 
7756 void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
7757     const OMPMasterTaskLoopSimdDirective &S) {
7758   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7759     Action.Enter(CGF);
7760     EmitOMPTaskLoopBasedDirective(S);
7761   };
7762   auto LPCRegion =
7763       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7764   OMPLexicalScope Scope(*this, S);
7765   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
7766 }
7767 
7768 void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
7769     const OMPParallelMasterTaskLoopDirective &S) {
7770   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7771     auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
7772                                   PrePostActionTy &Action) {
7773       Action.Enter(CGF);
7774       CGF.EmitOMPTaskLoopBasedDirective(S);
7775     };
7776     OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
7777     CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,
7778                                             S.getBeginLoc());
7779   };
7780   auto LPCRegion =
7781       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7782   emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen,
7783                                  emitEmptyBoundParameters);
7784 }
7785 
7786 void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
7787     const OMPParallelMasterTaskLoopSimdDirective &S) {
7788   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7789     auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
7790                                   PrePostActionTy &Action) {
7791       Action.Enter(CGF);
7792       CGF.EmitOMPTaskLoopBasedDirective(S);
7793     };
7794     OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
7795     CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,
7796                                             S.getBeginLoc());
7797   };
7798   auto LPCRegion =
7799       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7800   emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen,
7801                                  emitEmptyBoundParameters);
7802 }
7803 
7804 // Generate the instructions for '#pragma omp target update' directive.
7805 void CodeGenFunction::EmitOMPTargetUpdateDirective(
7806     const OMPTargetUpdateDirective &S) {
7807   // If we don't have target devices, don't bother emitting the data mapping
7808   // code.
7809   if (CGM.getLangOpts().OMPTargetTriples.empty())
7810     return;
7811 
7812   // Check if we have any if clause associated with the directive.
7813   const Expr *IfCond = nullptr;
7814   if (const auto *C = S.getSingleClause<OMPIfClause>())
7815     IfCond = C->getCondition();
7816 
7817   // Check if we have any device clause associated with the directive.
7818   const Expr *Device = nullptr;
7819   if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7820     Device = C->getDevice();
7821 
7822   OMPLexicalScope Scope(*this, S, OMPD_task);
7823   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
7824 }
7825 
7826 void CodeGenFunction::EmitOMPGenericLoopDirective(
7827     const OMPGenericLoopDirective &S) {
7828   // Unimplemented, just inline the underlying statement for now.
7829   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7830     // Emit the loop iteration variable.
7831     const Stmt *CS =
7832         cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
7833     const auto *ForS = dyn_cast<ForStmt>(CS);
7834     if (ForS && !isa<DeclStmt>(ForS->getInit())) {
7835       OMPPrivateScope LoopScope(CGF);
7836       CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
7837       (void)LoopScope.Privatize();
7838       CGF.EmitStmt(CS);
7839       LoopScope.restoreMap();
7840     } else {
7841       CGF.EmitStmt(CS);
7842     }
7843   };
7844   OMPLexicalScope Scope(*this, S, OMPD_unknown);
7845   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_loop, CodeGen);
7846 }
7847 
7848 void CodeGenFunction::EmitOMPParallelGenericLoopDirective(
7849     const OMPLoopDirective &S) {
7850   // Emit combined directive as if its consituent constructs are 'parallel'
7851   // and 'for'.
7852   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7853     Action.Enter(CGF);
7854     emitOMPCopyinClause(CGF, S);
7855     (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
7856   };
7857   {
7858     auto LPCRegion =
7859         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7860     emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
7861                                    emitEmptyBoundParameters);
7862   }
7863   // Check for outer lastprivate conditional update.
7864   checkForLastprivateConditionalUpdate(*this, S);
7865 }
7866 
7867 void CodeGenFunction::EmitOMPTeamsGenericLoopDirective(
7868     const OMPTeamsGenericLoopDirective &S) {
7869   // To be consistent with current behavior of 'target teams loop', emit
7870   // 'teams loop' as if its constituent constructs are 'distribute,
7871   // 'parallel, and 'for'.
7872   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7873     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
7874                               S.getDistInc());
7875   };
7876 
7877   // Emit teams region as a standalone region.
7878   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7879                                             PrePostActionTy &Action) {
7880     Action.Enter(CGF);
7881     OMPPrivateScope PrivateScope(CGF);
7882     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7883     (void)PrivateScope.Privatize();
7884     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
7885                                                     CodeGenDistribute);
7886     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7887   };
7888   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
7889   emitPostUpdateForReductionClause(*this, S,
7890                                    [](CodeGenFunction &) { return nullptr; });
7891 }
7892 
7893 static void
7894 emitTargetTeamsGenericLoopRegion(CodeGenFunction &CGF,
7895                                  const OMPTargetTeamsGenericLoopDirective &S,
7896                                  PrePostActionTy &Action) {
7897   Action.Enter(CGF);
7898   // Emit 'teams loop' as if its constituent constructs are 'distribute,
7899   // 'parallel, and 'for'.
7900   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7901     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
7902                               S.getDistInc());
7903   };
7904 
7905   // Emit teams region as a standalone region.
7906   auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7907                                                  PrePostActionTy &Action) {
7908     Action.Enter(CGF);
7909     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7910     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7911     (void)PrivateScope.Privatize();
7912     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7913         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
7914     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7915   };
7916 
7917   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
7918                               CodeGenTeams);
7919   emitPostUpdateForReductionClause(CGF, S,
7920                                    [](CodeGenFunction &) { return nullptr; });
7921 }
7922 
7923 /// Emit combined directive 'target teams loop' as if its constituent
7924 /// constructs are 'target', 'teams', 'distribute', 'parallel', and 'for'.
7925 void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDirective(
7926     const OMPTargetTeamsGenericLoopDirective &S) {
7927   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7928     emitTargetTeamsGenericLoopRegion(CGF, S, Action);
7929   };
7930   emitCommonOMPTargetDirective(*this, S, CodeGen);
7931 }
7932 
7933 void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
7934     CodeGenModule &CGM, StringRef ParentName,
7935     const OMPTargetTeamsGenericLoopDirective &S) {
7936   // Emit SPMD target parallel loop region as a standalone region.
7937   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7938     emitTargetTeamsGenericLoopRegion(CGF, S, Action);
7939   };
7940   llvm::Function *Fn;
7941   llvm::Constant *Addr;
7942   // Emit target region as a standalone region.
7943   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7944       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7945   assert(Fn && Addr &&
7946          "Target device function emission failed for 'target teams loop'.");
7947 }
7948 
7949 static void emitTargetParallelGenericLoopRegion(
7950     CodeGenFunction &CGF, const OMPTargetParallelGenericLoopDirective &S,
7951     PrePostActionTy &Action) {
7952   Action.Enter(CGF);
7953   // Emit as 'parallel for'.
7954   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7955     Action.Enter(CGF);
7956     CodeGenFunction::OMPCancelStackRAII CancelRegion(
7957         CGF, OMPD_target_parallel_loop, /*hasCancel=*/false);
7958     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
7959                                emitDispatchForLoopBounds);
7960   };
7961   emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
7962                                  emitEmptyBoundParameters);
7963 }
7964 
7965 void CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
7966     CodeGenModule &CGM, StringRef ParentName,
7967     const OMPTargetParallelGenericLoopDirective &S) {
7968   // Emit target parallel loop region as a standalone region.
7969   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7970     emitTargetParallelGenericLoopRegion(CGF, S, Action);
7971   };
7972   llvm::Function *Fn;
7973   llvm::Constant *Addr;
7974   // Emit target region as a standalone region.
7975   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7976       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7977   assert(Fn && Addr && "Target device function emission failed.");
7978 }
7979 
7980 /// Emit combined directive 'target parallel loop' as if its constituent
7981 /// constructs are 'target', 'parallel', and 'for'.
7982 void CodeGenFunction::EmitOMPTargetParallelGenericLoopDirective(
7983     const OMPTargetParallelGenericLoopDirective &S) {
7984   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7985     emitTargetParallelGenericLoopRegion(CGF, S, Action);
7986   };
7987   emitCommonOMPTargetDirective(*this, S, CodeGen);
7988 }
7989 
7990 void CodeGenFunction::EmitSimpleOMPExecutableDirective(
7991     const OMPExecutableDirective &D) {
7992   if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) {
7993     EmitOMPScanDirective(*SD);
7994     return;
7995   }
7996   if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
7997     return;
7998   auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
7999     OMPPrivateScope GlobalsScope(CGF);
8000     if (isOpenMPTaskingDirective(D.getDirectiveKind())) {
8001       // Capture global firstprivates to avoid crash.
8002       for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
8003         for (const Expr *Ref : C->varlists()) {
8004           const auto *DRE = cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
8005           if (!DRE)
8006             continue;
8007           const auto *VD = dyn_cast<VarDecl>(DRE->getDecl());
8008           if (!VD || VD->hasLocalStorage())
8009             continue;
8010           if (!CGF.LocalDeclMap.count(VD)) {
8011             LValue GlobLVal = CGF.EmitLValue(Ref);
8012             GlobalsScope.addPrivate(VD, GlobLVal.getAddress(CGF));
8013           }
8014         }
8015       }
8016     }
8017     if (isOpenMPSimdDirective(D.getDirectiveKind())) {
8018       (void)GlobalsScope.Privatize();
8019       ParentLoopDirectiveForScanRegion ScanRegion(CGF, D);
8020       emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action);
8021     } else {
8022       if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {
8023         for (const Expr *E : LD->counters()) {
8024           const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
8025           if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) {
8026             LValue GlobLVal = CGF.EmitLValue(E);
8027             GlobalsScope.addPrivate(VD, GlobLVal.getAddress(CGF));
8028           }
8029           if (isa<OMPCapturedExprDecl>(VD)) {
8030             // Emit only those that were not explicitly referenced in clauses.
8031             if (!CGF.LocalDeclMap.count(VD))
8032               CGF.EmitVarDecl(*VD);
8033           }
8034         }
8035         for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) {
8036           if (!C->getNumForLoops())
8037             continue;
8038           for (unsigned I = LD->getLoopsNumber(),
8039                         E = C->getLoopNumIterations().size();
8040                I < E; ++I) {
8041             if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
8042                     cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) {
8043               // Emit only those that were not explicitly referenced in clauses.
8044               if (!CGF.LocalDeclMap.count(VD))
8045                 CGF.EmitVarDecl(*VD);
8046             }
8047           }
8048         }
8049       }
8050       (void)GlobalsScope.Privatize();
8051       CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt());
8052     }
8053   };
8054   if (D.getDirectiveKind() == OMPD_atomic ||
8055       D.getDirectiveKind() == OMPD_critical ||
8056       D.getDirectiveKind() == OMPD_section ||
8057       D.getDirectiveKind() == OMPD_master ||
8058       D.getDirectiveKind() == OMPD_masked) {
8059     EmitStmt(D.getAssociatedStmt());
8060   } else {
8061     auto LPCRegion =
8062         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D);
8063     OMPSimdLexicalScope Scope(*this, D);
8064     CGM.getOpenMPRuntime().emitInlinedDirective(
8065         *this,
8066         isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd
8067                                                     : D.getDirectiveKind(),
8068         CodeGen);
8069   }
8070   // Check for outer lastprivate conditional update.
8071   checkForLastprivateConditionalUpdate(*this, D);
8072 }
8073