xref: /freebsd/contrib/llvm-project/clang/lib/CodeGen/CGStmtOpenMP.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This contains code to emit OpenMP nodes as LLVM code.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGCleanup.h"
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CodeGenModule.h"
17 #include "TargetInfo.h"
18 #include "clang/AST/ASTContext.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/AST/DeclOpenMP.h"
21 #include "clang/AST/OpenMPClause.h"
22 #include "clang/AST/Stmt.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/PrettyStackTrace.h"
27 #include "clang/Basic/SourceManager.h"
28 #include "llvm/ADT/SmallSet.h"
29 #include "llvm/BinaryFormat/Dwarf.h"
30 #include "llvm/Frontend/OpenMP/OMPConstants.h"
31 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DebugInfoMetadata.h"
34 #include "llvm/IR/Instructions.h"
35 #include "llvm/IR/IntrinsicInst.h"
36 #include "llvm/IR/Metadata.h"
37 #include "llvm/Support/AtomicOrdering.h"
38 #include "llvm/Support/Debug.h"
39 #include <optional>
40 using namespace clang;
41 using namespace CodeGen;
42 using namespace llvm::omp;
43 
44 #define TTL_CODEGEN_TYPE "target-teams-loop-codegen"
45 
46 static const VarDecl *getBaseDecl(const Expr *Ref);
47 
48 namespace {
49 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
50 /// for captured expressions.
51 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
emitPreInitStmt(CodeGenFunction & CGF,const OMPExecutableDirective & S)52   void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
53     for (const auto *C : S.clauses()) {
54       if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
55         if (const auto *PreInit =
56                 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
57           for (const auto *I : PreInit->decls()) {
58             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
59               CGF.EmitVarDecl(cast<VarDecl>(*I));
60             } else {
61               CodeGenFunction::AutoVarEmission Emission =
62                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
63               CGF.EmitAutoVarCleanups(Emission);
64             }
65           }
66         }
67       }
68     }
69   }
70   CodeGenFunction::OMPPrivateScope InlinedShareds;
71 
isCapturedVar(CodeGenFunction & CGF,const VarDecl * VD)72   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
73     return CGF.LambdaCaptureFields.lookup(VD) ||
74            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
75            (isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl) &&
76             cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
77   }
78 
79 public:
OMPLexicalScope(CodeGenFunction & CGF,const OMPExecutableDirective & S,const std::optional<OpenMPDirectiveKind> CapturedRegion=std::nullopt,const bool EmitPreInitStmt=true)80   OMPLexicalScope(
81       CodeGenFunction &CGF, const OMPExecutableDirective &S,
82       const std::optional<OpenMPDirectiveKind> CapturedRegion = std::nullopt,
83       const bool EmitPreInitStmt = true)
84       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
85         InlinedShareds(CGF) {
86     if (EmitPreInitStmt)
87       emitPreInitStmt(CGF, S);
88     if (!CapturedRegion)
89       return;
90     assert(S.hasAssociatedStmt() &&
91            "Expected associated statement for inlined directive.");
92     const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
93     for (const auto &C : CS->captures()) {
94       if (C.capturesVariable() || C.capturesVariableByCopy()) {
95         auto *VD = C.getCapturedVar();
96         assert(VD == VD->getCanonicalDecl() &&
97                "Canonical decl must be captured.");
98         DeclRefExpr DRE(
99             CGF.getContext(), const_cast<VarDecl *>(VD),
100             isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
101                                        InlinedShareds.isGlobalVarCaptured(VD)),
102             VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
103         InlinedShareds.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress());
104       }
105     }
106     (void)InlinedShareds.Privatize();
107   }
108 };
109 
110 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
111 /// for captured expressions.
112 class OMPParallelScope final : public OMPLexicalScope {
EmitPreInitStmt(const OMPExecutableDirective & S)113   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
114     OpenMPDirectiveKind Kind = S.getDirectiveKind();
115     return !(isOpenMPTargetExecutionDirective(Kind) ||
116              isOpenMPLoopBoundSharingDirective(Kind)) &&
117            isOpenMPParallelDirective(Kind);
118   }
119 
120 public:
OMPParallelScope(CodeGenFunction & CGF,const OMPExecutableDirective & S)121   OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
122       : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt,
123                         EmitPreInitStmt(S)) {}
124 };
125 
126 /// Lexical scope for OpenMP teams construct, that handles correct codegen
127 /// for captured expressions.
128 class OMPTeamsScope final : public OMPLexicalScope {
EmitPreInitStmt(const OMPExecutableDirective & S)129   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
130     OpenMPDirectiveKind Kind = S.getDirectiveKind();
131     return !isOpenMPTargetExecutionDirective(Kind) &&
132            isOpenMPTeamsDirective(Kind);
133   }
134 
135 public:
OMPTeamsScope(CodeGenFunction & CGF,const OMPExecutableDirective & S)136   OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
137       : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt,
138                         EmitPreInitStmt(S)) {}
139 };
140 
141 /// Private scope for OpenMP loop-based directives, that supports capturing
142 /// of used expression from loop statement.
143 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
emitPreInitStmt(CodeGenFunction & CGF,const OMPLoopBasedDirective & S)144   void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) {
145     const Stmt *PreInits;
146     CodeGenFunction::OMPMapVars PreCondVars;
147     if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
148       llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
149       for (const auto *E : LD->counters()) {
150         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
151         EmittedAsPrivate.insert(VD->getCanonicalDecl());
152         (void)PreCondVars.setVarAddr(
153             CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
154       }
155       // Mark private vars as undefs.
156       for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) {
157         for (const Expr *IRef : C->varlists()) {
158           const auto *OrigVD =
159               cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
160           if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
161             QualType OrigVDTy = OrigVD->getType().getNonReferenceType();
162             (void)PreCondVars.setVarAddr(
163                 CGF, OrigVD,
164                 Address(llvm::UndefValue::get(CGF.ConvertTypeForMem(
165                             CGF.getContext().getPointerType(OrigVDTy))),
166                         CGF.ConvertTypeForMem(OrigVDTy),
167                         CGF.getContext().getDeclAlign(OrigVD)));
168           }
169         }
170       }
171       (void)PreCondVars.apply(CGF);
172       // Emit init, __range and __end variables for C++ range loops.
173       (void)OMPLoopBasedDirective::doForAllLoops(
174           LD->getInnermostCapturedStmt()->getCapturedStmt(),
175           /*TryImperfectlyNestedLoops=*/true, LD->getLoopsNumber(),
176           [&CGF](unsigned Cnt, const Stmt *CurStmt) {
177             if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(CurStmt)) {
178               if (const Stmt *Init = CXXFor->getInit())
179                 CGF.EmitStmt(Init);
180               CGF.EmitStmt(CXXFor->getRangeStmt());
181               CGF.EmitStmt(CXXFor->getEndStmt());
182             }
183             return false;
184           });
185       PreInits = LD->getPreInits();
186     } else if (const auto *Tile = dyn_cast<OMPTileDirective>(&S)) {
187       PreInits = Tile->getPreInits();
188     } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(&S)) {
189       PreInits = Unroll->getPreInits();
190     } else if (const auto *Reverse = dyn_cast<OMPReverseDirective>(&S)) {
191       PreInits = Reverse->getPreInits();
192     } else if (const auto *Interchange =
193                    dyn_cast<OMPInterchangeDirective>(&S)) {
194       PreInits = Interchange->getPreInits();
195     } else {
196       llvm_unreachable("Unknown loop-based directive kind.");
197     }
198     if (PreInits) {
199       // CompoundStmts and DeclStmts are used as lists of PreInit statements and
200       // declarations. Since declarations must be visible in the the following
201       // that they initialize, unpack the CompoundStmt they are nested in.
202       SmallVector<const Stmt *> PreInitStmts;
203       if (auto *PreInitCompound = dyn_cast<CompoundStmt>(PreInits))
204         llvm::append_range(PreInitStmts, PreInitCompound->body());
205       else
206         PreInitStmts.push_back(PreInits);
207 
208       for (const Stmt *S : PreInitStmts) {
209         // EmitStmt skips any OMPCapturedExprDecls, but needs to be emitted
210         // here.
211         if (auto *PreInitDecl = dyn_cast<DeclStmt>(S)) {
212           for (Decl *I : PreInitDecl->decls())
213             CGF.EmitVarDecl(cast<VarDecl>(*I));
214           continue;
215         }
216         CGF.EmitStmt(S);
217       }
218     }
219     PreCondVars.restore(CGF);
220   }
221 
222 public:
OMPLoopScope(CodeGenFunction & CGF,const OMPLoopBasedDirective & S)223   OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S)
224       : CodeGenFunction::RunCleanupsScope(CGF) {
225     emitPreInitStmt(CGF, S);
226   }
227 };
228 
229 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
230   CodeGenFunction::OMPPrivateScope InlinedShareds;
231 
isCapturedVar(CodeGenFunction & CGF,const VarDecl * VD)232   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
233     return CGF.LambdaCaptureFields.lookup(VD) ||
234            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
235            (isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl) &&
236             cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
237   }
238 
239 public:
OMPSimdLexicalScope(CodeGenFunction & CGF,const OMPExecutableDirective & S)240   OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
241       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
242         InlinedShareds(CGF) {
243     for (const auto *C : S.clauses()) {
244       if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
245         if (const auto *PreInit =
246                 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
247           for (const auto *I : PreInit->decls()) {
248             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
249               CGF.EmitVarDecl(cast<VarDecl>(*I));
250             } else {
251               CodeGenFunction::AutoVarEmission Emission =
252                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
253               CGF.EmitAutoVarCleanups(Emission);
254             }
255           }
256         }
257       } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) {
258         for (const Expr *E : UDP->varlists()) {
259           const Decl *D = cast<DeclRefExpr>(E)->getDecl();
260           if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
261             CGF.EmitVarDecl(*OED);
262         }
263       } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) {
264         for (const Expr *E : UDP->varlists()) {
265           const Decl *D = getBaseDecl(E);
266           if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
267             CGF.EmitVarDecl(*OED);
268         }
269       }
270     }
271     if (!isOpenMPSimdDirective(S.getDirectiveKind()))
272       CGF.EmitOMPPrivateClause(S, InlinedShareds);
273     if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
274       if (const Expr *E = TG->getReductionRef())
275         CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
276     }
277     // Temp copy arrays for inscan reductions should not be emitted as they are
278     // not used in simd only mode.
279     llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps;
280     for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
281       if (C->getModifier() != OMPC_REDUCTION_inscan)
282         continue;
283       for (const Expr *E : C->copy_array_temps())
284         CopyArrayTemps.insert(cast<DeclRefExpr>(E)->getDecl());
285     }
286     const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
287     while (CS) {
288       for (auto &C : CS->captures()) {
289         if (C.capturesVariable() || C.capturesVariableByCopy()) {
290           auto *VD = C.getCapturedVar();
291           if (CopyArrayTemps.contains(VD))
292             continue;
293           assert(VD == VD->getCanonicalDecl() &&
294                  "Canonical decl must be captured.");
295           DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
296                           isCapturedVar(CGF, VD) ||
297                               (CGF.CapturedStmtInfo &&
298                                InlinedShareds.isGlobalVarCaptured(VD)),
299                           VD->getType().getNonReferenceType(), VK_LValue,
300                           C.getLocation());
301           InlinedShareds.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress());
302         }
303       }
304       CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt());
305     }
306     (void)InlinedShareds.Privatize();
307   }
308 };
309 
310 } // namespace
311 
312 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
313                                          const OMPExecutableDirective &S,
314                                          const RegionCodeGenTy &CodeGen);
315 
EmitOMPSharedLValue(const Expr * E)316 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
317   if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
318     if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
319       OrigVD = OrigVD->getCanonicalDecl();
320       bool IsCaptured =
321           LambdaCaptureFields.lookup(OrigVD) ||
322           (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
323           (isa_and_nonnull<BlockDecl>(CurCodeDecl));
324       DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured,
325                       OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
326       return EmitLValue(&DRE);
327     }
328   }
329   return EmitLValue(E);
330 }
331 
getTypeSize(QualType Ty)332 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
333   ASTContext &C = getContext();
334   llvm::Value *Size = nullptr;
335   auto SizeInChars = C.getTypeSizeInChars(Ty);
336   if (SizeInChars.isZero()) {
337     // getTypeSizeInChars() returns 0 for a VLA.
338     while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) {
339       VlaSizePair VlaSize = getVLASize(VAT);
340       Ty = VlaSize.Type;
341       Size =
342           Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) : VlaSize.NumElts;
343     }
344     SizeInChars = C.getTypeSizeInChars(Ty);
345     if (SizeInChars.isZero())
346       return llvm::ConstantInt::get(SizeTy, /*V=*/0);
347     return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
348   }
349   return CGM.getSize(SizeInChars);
350 }
351 
GenerateOpenMPCapturedVars(const CapturedStmt & S,SmallVectorImpl<llvm::Value * > & CapturedVars)352 void CodeGenFunction::GenerateOpenMPCapturedVars(
353     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
354   const RecordDecl *RD = S.getCapturedRecordDecl();
355   auto CurField = RD->field_begin();
356   auto CurCap = S.captures().begin();
357   for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
358                                                  E = S.capture_init_end();
359        I != E; ++I, ++CurField, ++CurCap) {
360     if (CurField->hasCapturedVLAType()) {
361       const VariableArrayType *VAT = CurField->getCapturedVLAType();
362       llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
363       CapturedVars.push_back(Val);
364     } else if (CurCap->capturesThis()) {
365       CapturedVars.push_back(CXXThisValue);
366     } else if (CurCap->capturesVariableByCopy()) {
367       llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation());
368 
369       // If the field is not a pointer, we need to save the actual value
370       // and load it as a void pointer.
371       if (!CurField->getType()->isAnyPointerType()) {
372         ASTContext &Ctx = getContext();
373         Address DstAddr = CreateMemTemp(
374             Ctx.getUIntPtrType(),
375             Twine(CurCap->getCapturedVar()->getName(), ".casted"));
376         LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
377 
378         llvm::Value *SrcAddrVal = EmitScalarConversion(
379             DstAddr.emitRawPointer(*this),
380             Ctx.getPointerType(Ctx.getUIntPtrType()),
381             Ctx.getPointerType(CurField->getType()), CurCap->getLocation());
382         LValue SrcLV =
383             MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
384 
385         // Store the value using the source type pointer.
386         EmitStoreThroughLValue(RValue::get(CV), SrcLV);
387 
388         // Load the value using the destination type pointer.
389         CV = EmitLoadOfScalar(DstLV, CurCap->getLocation());
390       }
391       CapturedVars.push_back(CV);
392     } else {
393       assert(CurCap->capturesVariable() && "Expected capture by reference.");
394       CapturedVars.push_back(EmitLValue(*I).getAddress().emitRawPointer(*this));
395     }
396   }
397 }
398 
castValueFromUintptr(CodeGenFunction & CGF,SourceLocation Loc,QualType DstType,StringRef Name,LValue AddrLV)399 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
400                                     QualType DstType, StringRef Name,
401                                     LValue AddrLV) {
402   ASTContext &Ctx = CGF.getContext();
403 
404   llvm::Value *CastedPtr = CGF.EmitScalarConversion(
405       AddrLV.getAddress().emitRawPointer(CGF), Ctx.getUIntPtrType(),
406       Ctx.getPointerType(DstType), Loc);
407   // FIXME: should the pointee type (DstType) be passed?
408   Address TmpAddr =
409       CGF.MakeNaturalAlignAddrLValue(CastedPtr, DstType).getAddress();
410   return TmpAddr;
411 }
412 
getCanonicalParamType(ASTContext & C,QualType T)413 static QualType getCanonicalParamType(ASTContext &C, QualType T) {
414   if (T->isLValueReferenceType())
415     return C.getLValueReferenceType(
416         getCanonicalParamType(C, T.getNonReferenceType()),
417         /*SpelledAsLValue=*/false);
418   if (T->isPointerType())
419     return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
420   if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
421     if (const auto *VLA = dyn_cast<VariableArrayType>(A))
422       return getCanonicalParamType(C, VLA->getElementType());
423     if (!A->isVariablyModifiedType())
424       return C.getCanonicalType(T);
425   }
426   return C.getCanonicalParamType(T);
427 }
428 
429 namespace {
430 /// Contains required data for proper outlined function codegen.
431 struct FunctionOptions {
432   /// Captured statement for which the function is generated.
433   const CapturedStmt *S = nullptr;
434   /// true if cast to/from  UIntPtr is required for variables captured by
435   /// value.
436   const bool UIntPtrCastRequired = true;
437   /// true if only casted arguments must be registered as local args or VLA
438   /// sizes.
439   const bool RegisterCastedArgsOnly = false;
440   /// Name of the generated function.
441   const StringRef FunctionName;
442   /// Location of the non-debug version of the outlined function.
443   SourceLocation Loc;
FunctionOptions__anon53c5fabf0311::FunctionOptions444   explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
445                            bool RegisterCastedArgsOnly, StringRef FunctionName,
446                            SourceLocation Loc)
447       : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
448         RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
449         FunctionName(FunctionName), Loc(Loc) {}
450 };
451 } // namespace
452 
emitOutlinedFunctionPrologue(CodeGenFunction & CGF,FunctionArgList & Args,llvm::MapVector<const Decl *,std::pair<const VarDecl *,Address>> & LocalAddrs,llvm::DenseMap<const Decl *,std::pair<const Expr *,llvm::Value * >> & VLASizes,llvm::Value * & CXXThisValue,const FunctionOptions & FO)453 static llvm::Function *emitOutlinedFunctionPrologue(
454     CodeGenFunction &CGF, FunctionArgList &Args,
455     llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
456         &LocalAddrs,
457     llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
458         &VLASizes,
459     llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
460   const CapturedDecl *CD = FO.S->getCapturedDecl();
461   const RecordDecl *RD = FO.S->getCapturedRecordDecl();
462   assert(CD->hasBody() && "missing CapturedDecl body");
463 
464   CXXThisValue = nullptr;
465   // Build the argument list.
466   CodeGenModule &CGM = CGF.CGM;
467   ASTContext &Ctx = CGM.getContext();
468   FunctionArgList TargetArgs;
469   Args.append(CD->param_begin(),
470               std::next(CD->param_begin(), CD->getContextParamPosition()));
471   TargetArgs.append(
472       CD->param_begin(),
473       std::next(CD->param_begin(), CD->getContextParamPosition()));
474   auto I = FO.S->captures().begin();
475   FunctionDecl *DebugFunctionDecl = nullptr;
476   if (!FO.UIntPtrCastRequired) {
477     FunctionProtoType::ExtProtoInfo EPI;
478     QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, std::nullopt, EPI);
479     DebugFunctionDecl = FunctionDecl::Create(
480         Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(),
481         SourceLocation(), DeclarationName(), FunctionTy,
482         Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static,
483         /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false,
484         /*hasWrittenPrototype=*/false);
485   }
486   for (const FieldDecl *FD : RD->fields()) {
487     QualType ArgType = FD->getType();
488     IdentifierInfo *II = nullptr;
489     VarDecl *CapVar = nullptr;
490 
491     // If this is a capture by copy and the type is not a pointer, the outlined
492     // function argument type should be uintptr and the value properly casted to
493     // uintptr. This is necessary given that the runtime library is only able to
494     // deal with pointers. We can pass in the same way the VLA type sizes to the
495     // outlined function.
496     if (FO.UIntPtrCastRequired &&
497         ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
498          I->capturesVariableArrayType()))
499       ArgType = Ctx.getUIntPtrType();
500 
501     if (I->capturesVariable() || I->capturesVariableByCopy()) {
502       CapVar = I->getCapturedVar();
503       II = CapVar->getIdentifier();
504     } else if (I->capturesThis()) {
505       II = &Ctx.Idents.get("this");
506     } else {
507       assert(I->capturesVariableArrayType());
508       II = &Ctx.Idents.get("vla");
509     }
510     if (ArgType->isVariablyModifiedType())
511       ArgType = getCanonicalParamType(Ctx, ArgType);
512     VarDecl *Arg;
513     if (CapVar && (CapVar->getTLSKind() != clang::VarDecl::TLS_None)) {
514       Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
515                                       II, ArgType,
516                                       ImplicitParamKind::ThreadPrivateVar);
517     } else if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
518       Arg = ParmVarDecl::Create(
519           Ctx, DebugFunctionDecl,
520           CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(),
521           CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType,
522           /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
523     } else {
524       Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
525                                       II, ArgType, ImplicitParamKind::Other);
526     }
527     Args.emplace_back(Arg);
528     // Do not cast arguments if we emit function with non-original types.
529     TargetArgs.emplace_back(
530         FO.UIntPtrCastRequired
531             ? Arg
532             : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
533     ++I;
534   }
535   Args.append(std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
536               CD->param_end());
537   TargetArgs.append(
538       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
539       CD->param_end());
540 
541   // Create the function declaration.
542   const CGFunctionInfo &FuncInfo =
543       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
544   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
545 
546   auto *F =
547       llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
548                              FO.FunctionName, &CGM.getModule());
549   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
550   if (CD->isNothrow())
551     F->setDoesNotThrow();
552   F->setDoesNotRecurse();
553 
554   // Always inline the outlined function if optimizations are enabled.
555   if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
556     F->removeFnAttr(llvm::Attribute::NoInline);
557     F->addFnAttr(llvm::Attribute::AlwaysInline);
558   }
559 
560   // Generate the function.
561   CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
562                     FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(),
563                     FO.UIntPtrCastRequired ? FO.Loc
564                                            : CD->getBody()->getBeginLoc());
565   unsigned Cnt = CD->getContextParamPosition();
566   I = FO.S->captures().begin();
567   for (const FieldDecl *FD : RD->fields()) {
568     // Do not map arguments if we emit function with non-original types.
569     Address LocalAddr(Address::invalid());
570     if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
571       LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
572                                                              TargetArgs[Cnt]);
573     } else {
574       LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
575     }
576     // If we are capturing a pointer by copy we don't need to do anything, just
577     // use the value that we get from the arguments.
578     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
579       const VarDecl *CurVD = I->getCapturedVar();
580       if (!FO.RegisterCastedArgsOnly)
581         LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
582       ++Cnt;
583       ++I;
584       continue;
585     }
586 
587     LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
588                                         AlignmentSource::Decl);
589     if (FD->hasCapturedVLAType()) {
590       if (FO.UIntPtrCastRequired) {
591         ArgLVal = CGF.MakeAddrLValue(
592             castValueFromUintptr(CGF, I->getLocation(), FD->getType(),
593                                  Args[Cnt]->getName(), ArgLVal),
594             FD->getType(), AlignmentSource::Decl);
595       }
596       llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
597       const VariableArrayType *VAT = FD->getCapturedVLAType();
598       VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg);
599     } else if (I->capturesVariable()) {
600       const VarDecl *Var = I->getCapturedVar();
601       QualType VarTy = Var->getType();
602       Address ArgAddr = ArgLVal.getAddress();
603       if (ArgLVal.getType()->isLValueReferenceType()) {
604         ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
605       } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
606         assert(ArgLVal.getType()->isPointerType());
607         ArgAddr = CGF.EmitLoadOfPointer(
608             ArgAddr, ArgLVal.getType()->castAs<PointerType>());
609       }
610       if (!FO.RegisterCastedArgsOnly) {
611         LocalAddrs.insert(
612             {Args[Cnt], {Var, ArgAddr.withAlignment(Ctx.getDeclAlign(Var))}});
613       }
614     } else if (I->capturesVariableByCopy()) {
615       assert(!FD->getType()->isAnyPointerType() &&
616              "Not expecting a captured pointer.");
617       const VarDecl *Var = I->getCapturedVar();
618       LocalAddrs.insert({Args[Cnt],
619                          {Var, FO.UIntPtrCastRequired
620                                    ? castValueFromUintptr(
621                                          CGF, I->getLocation(), FD->getType(),
622                                          Args[Cnt]->getName(), ArgLVal)
623                                    : ArgLVal.getAddress()}});
624     } else {
625       // If 'this' is captured, load it into CXXThisValue.
626       assert(I->capturesThis());
627       CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
628       LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}});
629     }
630     ++Cnt;
631     ++I;
632   }
633 
634   return F;
635 }
636 
637 llvm::Function *
GenerateOpenMPCapturedStmtFunction(const CapturedStmt & S,SourceLocation Loc)638 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
639                                                     SourceLocation Loc) {
640   assert(
641       CapturedStmtInfo &&
642       "CapturedStmtInfo should be set when generating the captured function");
643   const CapturedDecl *CD = S.getCapturedDecl();
644   // Build the argument list.
645   bool NeedWrapperFunction =
646       getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo();
647   FunctionArgList Args;
648   llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
649   llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
650   SmallString<256> Buffer;
651   llvm::raw_svector_ostream Out(Buffer);
652   Out << CapturedStmtInfo->getHelperName();
653   if (NeedWrapperFunction)
654     Out << "_debug__";
655   FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
656                      Out.str(), Loc);
657   llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
658                                                    VLASizes, CXXThisValue, FO);
659   CodeGenFunction::OMPPrivateScope LocalScope(*this);
660   for (const auto &LocalAddrPair : LocalAddrs) {
661     if (LocalAddrPair.second.first) {
662       LocalScope.addPrivate(LocalAddrPair.second.first,
663                             LocalAddrPair.second.second);
664     }
665   }
666   (void)LocalScope.Privatize();
667   for (const auto &VLASizePair : VLASizes)
668     VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
669   PGO.assignRegionCounters(GlobalDecl(CD), F);
670   CapturedStmtInfo->EmitBody(*this, CD->getBody());
671   (void)LocalScope.ForceCleanup();
672   FinishFunction(CD->getBodyRBrace());
673   if (!NeedWrapperFunction)
674     return F;
675 
676   FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
677                             /*RegisterCastedArgsOnly=*/true,
678                             CapturedStmtInfo->getHelperName(), Loc);
679   CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
680   WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
681   Args.clear();
682   LocalAddrs.clear();
683   VLASizes.clear();
684   llvm::Function *WrapperF =
685       emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
686                                    WrapperCGF.CXXThisValue, WrapperFO);
687   llvm::SmallVector<llvm::Value *, 4> CallArgs;
688   auto *PI = F->arg_begin();
689   for (const auto *Arg : Args) {
690     llvm::Value *CallArg;
691     auto I = LocalAddrs.find(Arg);
692     if (I != LocalAddrs.end()) {
693       LValue LV = WrapperCGF.MakeAddrLValue(
694           I->second.second,
695           I->second.first ? I->second.first->getType() : Arg->getType(),
696           AlignmentSource::Decl);
697       if (LV.getType()->isAnyComplexType())
698         LV.setAddress(LV.getAddress().withElementType(PI->getType()));
699       CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
700     } else {
701       auto EI = VLASizes.find(Arg);
702       if (EI != VLASizes.end()) {
703         CallArg = EI->second.second;
704       } else {
705         LValue LV =
706             WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
707                                       Arg->getType(), AlignmentSource::Decl);
708         CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
709       }
710     }
711     CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
712     ++PI;
713   }
714   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs);
715   WrapperCGF.FinishFunction();
716   return WrapperF;
717 }
718 
719 //===----------------------------------------------------------------------===//
720 //                              OpenMP Directive Emission
721 //===----------------------------------------------------------------------===//
EmitOMPAggregateAssign(Address DestAddr,Address SrcAddr,QualType OriginalType,const llvm::function_ref<void (Address,Address)> CopyGen)722 void CodeGenFunction::EmitOMPAggregateAssign(
723     Address DestAddr, Address SrcAddr, QualType OriginalType,
724     const llvm::function_ref<void(Address, Address)> CopyGen) {
725   // Perform element-by-element initialization.
726   QualType ElementTy;
727 
728   // Drill down to the base element type on both arrays.
729   const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
730   llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
731   SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
732 
733   llvm::Value *SrcBegin = SrcAddr.emitRawPointer(*this);
734   llvm::Value *DestBegin = DestAddr.emitRawPointer(*this);
735   // Cast from pointer to array type to pointer to single element.
736   llvm::Value *DestEnd = Builder.CreateInBoundsGEP(DestAddr.getElementType(),
737                                                    DestBegin, NumElements);
738 
739   // The basic structure here is a while-do loop.
740   llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body");
741   llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done");
742   llvm::Value *IsEmpty =
743       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
744   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
745 
746   // Enter the loop body, making that address the current address.
747   llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
748   EmitBlock(BodyBB);
749 
750   CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
751 
752   llvm::PHINode *SrcElementPHI =
753       Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
754   SrcElementPHI->addIncoming(SrcBegin, EntryBB);
755   Address SrcElementCurrent =
756       Address(SrcElementPHI, SrcAddr.getElementType(),
757               SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
758 
759   llvm::PHINode *DestElementPHI = Builder.CreatePHI(
760       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
761   DestElementPHI->addIncoming(DestBegin, EntryBB);
762   Address DestElementCurrent =
763       Address(DestElementPHI, DestAddr.getElementType(),
764               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
765 
766   // Emit copy.
767   CopyGen(DestElementCurrent, SrcElementCurrent);
768 
769   // Shift the address forward by one element.
770   llvm::Value *DestElementNext =
771       Builder.CreateConstGEP1_32(DestAddr.getElementType(), DestElementPHI,
772                                  /*Idx0=*/1, "omp.arraycpy.dest.element");
773   llvm::Value *SrcElementNext =
774       Builder.CreateConstGEP1_32(SrcAddr.getElementType(), SrcElementPHI,
775                                  /*Idx0=*/1, "omp.arraycpy.src.element");
776   // Check whether we've reached the end.
777   llvm::Value *Done =
778       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
779   Builder.CreateCondBr(Done, DoneBB, BodyBB);
780   DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
781   SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
782 
783   // Done.
784   EmitBlock(DoneBB, /*IsFinished=*/true);
785 }
786 
EmitOMPCopy(QualType OriginalType,Address DestAddr,Address SrcAddr,const VarDecl * DestVD,const VarDecl * SrcVD,const Expr * Copy)787 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
788                                   Address SrcAddr, const VarDecl *DestVD,
789                                   const VarDecl *SrcVD, const Expr *Copy) {
790   if (OriginalType->isArrayType()) {
791     const auto *BO = dyn_cast<BinaryOperator>(Copy);
792     if (BO && BO->getOpcode() == BO_Assign) {
793       // Perform simple memcpy for simple copying.
794       LValue Dest = MakeAddrLValue(DestAddr, OriginalType);
795       LValue Src = MakeAddrLValue(SrcAddr, OriginalType);
796       EmitAggregateAssign(Dest, Src, OriginalType);
797     } else {
798       // For arrays with complex element types perform element by element
799       // copying.
800       EmitOMPAggregateAssign(
801           DestAddr, SrcAddr, OriginalType,
802           [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
803             // Working with the single array element, so have to remap
804             // destination and source variables to corresponding array
805             // elements.
806             CodeGenFunction::OMPPrivateScope Remap(*this);
807             Remap.addPrivate(DestVD, DestElement);
808             Remap.addPrivate(SrcVD, SrcElement);
809             (void)Remap.Privatize();
810             EmitIgnoredExpr(Copy);
811           });
812     }
813   } else {
814     // Remap pseudo source variable to private copy.
815     CodeGenFunction::OMPPrivateScope Remap(*this);
816     Remap.addPrivate(SrcVD, SrcAddr);
817     Remap.addPrivate(DestVD, DestAddr);
818     (void)Remap.Privatize();
819     // Emit copying of the whole variable.
820     EmitIgnoredExpr(Copy);
821   }
822 }
823 
EmitOMPFirstprivateClause(const OMPExecutableDirective & D,OMPPrivateScope & PrivateScope)824 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
825                                                 OMPPrivateScope &PrivateScope) {
826   if (!HaveInsertPoint())
827     return false;
828   bool DeviceConstTarget =
829       getLangOpts().OpenMPIsTargetDevice &&
830       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
831   bool FirstprivateIsLastprivate = false;
832   llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;
833   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
834     for (const auto *D : C->varlists())
835       Lastprivates.try_emplace(
836           cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(),
837           C->getKind());
838   }
839   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
840   llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
841   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
842   // Force emission of the firstprivate copy if the directive does not emit
843   // outlined function, like omp for, omp simd, omp distribute etc.
844   bool MustEmitFirstprivateCopy =
845       CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
846   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
847     const auto *IRef = C->varlist_begin();
848     const auto *InitsRef = C->inits().begin();
849     for (const Expr *IInit : C->private_copies()) {
850       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
851       bool ThisFirstprivateIsLastprivate =
852           Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
853       const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD);
854       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
855       if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
856           !FD->getType()->isReferenceType() &&
857           (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
858         EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
859         ++IRef;
860         ++InitsRef;
861         continue;
862       }
863       // Do not emit copy for firstprivate constant variables in target regions,
864       // captured by reference.
865       if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) &&
866           FD && FD->getType()->isReferenceType() &&
867           (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
868         EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
869         ++IRef;
870         ++InitsRef;
871         continue;
872       }
873       FirstprivateIsLastprivate =
874           FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
875       if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
876         const auto *VDInit =
877             cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
878         bool IsRegistered;
879         DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
880                         /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
881                         (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
882         LValue OriginalLVal;
883         if (!FD) {
884           // Check if the firstprivate variable is just a constant value.
885           ConstantEmission CE = tryEmitAsConstant(&DRE);
886           if (CE && !CE.isReference()) {
887             // Constant value, no need to create a copy.
888             ++IRef;
889             ++InitsRef;
890             continue;
891           }
892           if (CE && CE.isReference()) {
893             OriginalLVal = CE.getReferenceLValue(*this, &DRE);
894           } else {
895             assert(!CE && "Expected non-constant firstprivate.");
896             OriginalLVal = EmitLValue(&DRE);
897           }
898         } else {
899           OriginalLVal = EmitLValue(&DRE);
900         }
901         QualType Type = VD->getType();
902         if (Type->isArrayType()) {
903           // Emit VarDecl with copy init for arrays.
904           // Get the address of the original variable captured in current
905           // captured region.
906           AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
907           const Expr *Init = VD->getInit();
908           if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
909             // Perform simple memcpy.
910             LValue Dest = MakeAddrLValue(Emission.getAllocatedAddress(), Type);
911             EmitAggregateAssign(Dest, OriginalLVal, Type);
912           } else {
913             EmitOMPAggregateAssign(
914                 Emission.getAllocatedAddress(), OriginalLVal.getAddress(), Type,
915                 [this, VDInit, Init](Address DestElement, Address SrcElement) {
916                   // Clean up any temporaries needed by the
917                   // initialization.
918                   RunCleanupsScope InitScope(*this);
919                   // Emit initialization for single element.
920                   setAddrOfLocalVar(VDInit, SrcElement);
921                   EmitAnyExprToMem(Init, DestElement,
922                                    Init->getType().getQualifiers(),
923                                    /*IsInitializer*/ false);
924                   LocalDeclMap.erase(VDInit);
925                 });
926           }
927           EmitAutoVarCleanups(Emission);
928           IsRegistered =
929               PrivateScope.addPrivate(OrigVD, Emission.getAllocatedAddress());
930         } else {
931           Address OriginalAddr = OriginalLVal.getAddress();
932           // Emit private VarDecl with copy init.
933           // Remap temp VDInit variable to the address of the original
934           // variable (for proper handling of captured global variables).
935           setAddrOfLocalVar(VDInit, OriginalAddr);
936           EmitDecl(*VD);
937           LocalDeclMap.erase(VDInit);
938           Address VDAddr = GetAddrOfLocalVar(VD);
939           if (ThisFirstprivateIsLastprivate &&
940               Lastprivates[OrigVD->getCanonicalDecl()] ==
941                   OMPC_LASTPRIVATE_conditional) {
942             // Create/init special variable for lastprivate conditionals.
943             llvm::Value *V =
944                 EmitLoadOfScalar(MakeAddrLValue(VDAddr, (*IRef)->getType(),
945                                                 AlignmentSource::Decl),
946                                  (*IRef)->getExprLoc());
947             VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
948                 *this, OrigVD);
949             EmitStoreOfScalar(V, MakeAddrLValue(VDAddr, (*IRef)->getType(),
950                                                 AlignmentSource::Decl));
951             LocalDeclMap.erase(VD);
952             setAddrOfLocalVar(VD, VDAddr);
953           }
954           IsRegistered = PrivateScope.addPrivate(OrigVD, VDAddr);
955         }
956         assert(IsRegistered &&
957                "firstprivate var already registered as private");
958         // Silence the warning about unused variable.
959         (void)IsRegistered;
960       }
961       ++IRef;
962       ++InitsRef;
963     }
964   }
965   return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
966 }
967 
EmitOMPPrivateClause(const OMPExecutableDirective & D,CodeGenFunction::OMPPrivateScope & PrivateScope)968 void CodeGenFunction::EmitOMPPrivateClause(
969     const OMPExecutableDirective &D,
970     CodeGenFunction::OMPPrivateScope &PrivateScope) {
971   if (!HaveInsertPoint())
972     return;
973   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
974   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
975     auto IRef = C->varlist_begin();
976     for (const Expr *IInit : C->private_copies()) {
977       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
978       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
979         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
980         EmitDecl(*VD);
981         // Emit private VarDecl with copy init.
982         bool IsRegistered =
983             PrivateScope.addPrivate(OrigVD, GetAddrOfLocalVar(VD));
984         assert(IsRegistered && "private var already registered as private");
985         // Silence the warning about unused variable.
986         (void)IsRegistered;
987       }
988       ++IRef;
989     }
990   }
991 }
992 
EmitOMPCopyinClause(const OMPExecutableDirective & D)993 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
994   if (!HaveInsertPoint())
995     return false;
996   // threadprivate_var1 = master_threadprivate_var1;
997   // operator=(threadprivate_var2, master_threadprivate_var2);
998   // ...
999   // __kmpc_barrier(&loc, global_tid);
1000   llvm::DenseSet<const VarDecl *> CopiedVars;
1001   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
1002   for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
1003     auto IRef = C->varlist_begin();
1004     auto ISrcRef = C->source_exprs().begin();
1005     auto IDestRef = C->destination_exprs().begin();
1006     for (const Expr *AssignOp : C->assignment_ops()) {
1007       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1008       QualType Type = VD->getType();
1009       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
1010         // Get the address of the master variable. If we are emitting code with
1011         // TLS support, the address is passed from the master as field in the
1012         // captured declaration.
1013         Address MasterAddr = Address::invalid();
1014         if (getLangOpts().OpenMPUseTLS &&
1015             getContext().getTargetInfo().isTLSSupported()) {
1016           assert(CapturedStmtInfo->lookup(VD) &&
1017                  "Copyin threadprivates should have been captured!");
1018           DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true,
1019                           (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1020           MasterAddr = EmitLValue(&DRE).getAddress();
1021           LocalDeclMap.erase(VD);
1022         } else {
1023           MasterAddr =
1024               Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
1025                                           : CGM.GetAddrOfGlobal(VD),
1026                       CGM.getTypes().ConvertTypeForMem(VD->getType()),
1027                       getContext().getDeclAlign(VD));
1028         }
1029         // Get the address of the threadprivate variable.
1030         Address PrivateAddr = EmitLValue(*IRef).getAddress();
1031         if (CopiedVars.size() == 1) {
1032           // At first check if current thread is a master thread. If it is, no
1033           // need to copy data.
1034           CopyBegin = createBasicBlock("copyin.not.master");
1035           CopyEnd = createBasicBlock("copyin.not.master.end");
1036           // TODO: Avoid ptrtoint conversion.
1037           auto *MasterAddrInt = Builder.CreatePtrToInt(
1038               MasterAddr.emitRawPointer(*this), CGM.IntPtrTy);
1039           auto *PrivateAddrInt = Builder.CreatePtrToInt(
1040               PrivateAddr.emitRawPointer(*this), CGM.IntPtrTy);
1041           Builder.CreateCondBr(
1042               Builder.CreateICmpNE(MasterAddrInt, PrivateAddrInt), CopyBegin,
1043               CopyEnd);
1044           EmitBlock(CopyBegin);
1045         }
1046         const auto *SrcVD =
1047             cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1048         const auto *DestVD =
1049             cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1050         EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
1051       }
1052       ++IRef;
1053       ++ISrcRef;
1054       ++IDestRef;
1055     }
1056   }
1057   if (CopyEnd) {
1058     // Exit out of copying procedure for non-master thread.
1059     EmitBlock(CopyEnd, /*IsFinished=*/true);
1060     return true;
1061   }
1062   return false;
1063 }
1064 
EmitOMPLastprivateClauseInit(const OMPExecutableDirective & D,OMPPrivateScope & PrivateScope)1065 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
1066     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
1067   if (!HaveInsertPoint())
1068     return false;
1069   bool HasAtLeastOneLastprivate = false;
1070   llvm::DenseSet<const VarDecl *> SIMDLCVs;
1071   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1072     const auto *LoopDirective = cast<OMPLoopDirective>(&D);
1073     for (const Expr *C : LoopDirective->counters()) {
1074       SIMDLCVs.insert(
1075           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1076     }
1077   }
1078   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1079   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1080     HasAtLeastOneLastprivate = true;
1081     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
1082         !getLangOpts().OpenMPSimd)
1083       break;
1084     const auto *IRef = C->varlist_begin();
1085     const auto *IDestRef = C->destination_exprs().begin();
1086     for (const Expr *IInit : C->private_copies()) {
1087       // Keep the address of the original variable for future update at the end
1088       // of the loop.
1089       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1090       // Taskloops do not require additional initialization, it is done in
1091       // runtime support library.
1092       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
1093         const auto *DestVD =
1094             cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1095         DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1096                         /*RefersToEnclosingVariableOrCapture=*/
1097                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
1098                         (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1099         PrivateScope.addPrivate(DestVD, EmitLValue(&DRE).getAddress());
1100         // Check if the variable is also a firstprivate: in this case IInit is
1101         // not generated. Initialization of this variable will happen in codegen
1102         // for 'firstprivate' clause.
1103         if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
1104           const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
1105           Address VDAddr = Address::invalid();
1106           if (C->getKind() == OMPC_LASTPRIVATE_conditional) {
1107             VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
1108                 *this, OrigVD);
1109             setAddrOfLocalVar(VD, VDAddr);
1110           } else {
1111             // Emit private VarDecl with copy init.
1112             EmitDecl(*VD);
1113             VDAddr = GetAddrOfLocalVar(VD);
1114           }
1115           bool IsRegistered = PrivateScope.addPrivate(OrigVD, VDAddr);
1116           assert(IsRegistered &&
1117                  "lastprivate var already registered as private");
1118           (void)IsRegistered;
1119         }
1120       }
1121       ++IRef;
1122       ++IDestRef;
1123     }
1124   }
1125   return HasAtLeastOneLastprivate;
1126 }
1127 
EmitOMPLastprivateClauseFinal(const OMPExecutableDirective & D,bool NoFinals,llvm::Value * IsLastIterCond)1128 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
1129     const OMPExecutableDirective &D, bool NoFinals,
1130     llvm::Value *IsLastIterCond) {
1131   if (!HaveInsertPoint())
1132     return;
1133   // Emit following code:
1134   // if (<IsLastIterCond>) {
1135   //   orig_var1 = private_orig_var1;
1136   //   ...
1137   //   orig_varn = private_orig_varn;
1138   // }
1139   llvm::BasicBlock *ThenBB = nullptr;
1140   llvm::BasicBlock *DoneBB = nullptr;
1141   if (IsLastIterCond) {
1142     // Emit implicit barrier if at least one lastprivate conditional is found
1143     // and this is not a simd mode.
1144     if (!getLangOpts().OpenMPSimd &&
1145         llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(),
1146                      [](const OMPLastprivateClause *C) {
1147                        return C->getKind() == OMPC_LASTPRIVATE_conditional;
1148                      })) {
1149       CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(),
1150                                              OMPD_unknown,
1151                                              /*EmitChecks=*/false,
1152                                              /*ForceSimpleCall=*/true);
1153     }
1154     ThenBB = createBasicBlock(".omp.lastprivate.then");
1155     DoneBB = createBasicBlock(".omp.lastprivate.done");
1156     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
1157     EmitBlock(ThenBB);
1158   }
1159   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1160   llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
1161   if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
1162     auto IC = LoopDirective->counters().begin();
1163     for (const Expr *F : LoopDirective->finals()) {
1164       const auto *D =
1165           cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
1166       if (NoFinals)
1167         AlreadyEmittedVars.insert(D);
1168       else
1169         LoopCountersAndUpdates[D] = F;
1170       ++IC;
1171     }
1172   }
1173   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1174     auto IRef = C->varlist_begin();
1175     auto ISrcRef = C->source_exprs().begin();
1176     auto IDestRef = C->destination_exprs().begin();
1177     for (const Expr *AssignOp : C->assignment_ops()) {
1178       const auto *PrivateVD =
1179           cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1180       QualType Type = PrivateVD->getType();
1181       const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1182       if (AlreadyEmittedVars.insert(CanonicalVD).second) {
1183         // If lastprivate variable is a loop control variable for loop-based
1184         // directive, update its value before copyin back to original
1185         // variable.
1186         if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
1187           EmitIgnoredExpr(FinalExpr);
1188         const auto *SrcVD =
1189             cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1190         const auto *DestVD =
1191             cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1192         // Get the address of the private variable.
1193         Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
1194         if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1195           PrivateAddr = Address(
1196               Builder.CreateLoad(PrivateAddr),
1197               CGM.getTypes().ConvertTypeForMem(RefTy->getPointeeType()),
1198               CGM.getNaturalTypeAlignment(RefTy->getPointeeType()));
1199         // Store the last value to the private copy in the last iteration.
1200         if (C->getKind() == OMPC_LASTPRIVATE_conditional)
1201           CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
1202               *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD,
1203               (*IRef)->getExprLoc());
1204         // Get the address of the original variable.
1205         Address OriginalAddr = GetAddrOfLocalVar(DestVD);
1206         EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
1207       }
1208       ++IRef;
1209       ++ISrcRef;
1210       ++IDestRef;
1211     }
1212     if (const Expr *PostUpdate = C->getPostUpdateExpr())
1213       EmitIgnoredExpr(PostUpdate);
1214   }
1215   if (IsLastIterCond)
1216     EmitBlock(DoneBB, /*IsFinished=*/true);
1217 }
1218 
EmitOMPReductionClauseInit(const OMPExecutableDirective & D,CodeGenFunction::OMPPrivateScope & PrivateScope,bool ForInscan)1219 void CodeGenFunction::EmitOMPReductionClauseInit(
1220     const OMPExecutableDirective &D,
1221     CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) {
1222   if (!HaveInsertPoint())
1223     return;
1224   SmallVector<const Expr *, 4> Shareds;
1225   SmallVector<const Expr *, 4> Privates;
1226   SmallVector<const Expr *, 4> ReductionOps;
1227   SmallVector<const Expr *, 4> LHSs;
1228   SmallVector<const Expr *, 4> RHSs;
1229   OMPTaskDataTy Data;
1230   SmallVector<const Expr *, 4> TaskLHSs;
1231   SmallVector<const Expr *, 4> TaskRHSs;
1232   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1233     if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan))
1234       continue;
1235     Shareds.append(C->varlist_begin(), C->varlist_end());
1236     Privates.append(C->privates().begin(), C->privates().end());
1237     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1238     LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1239     RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1240     if (C->getModifier() == OMPC_REDUCTION_task) {
1241       Data.ReductionVars.append(C->privates().begin(), C->privates().end());
1242       Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
1243       Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
1244       Data.ReductionOps.append(C->reduction_ops().begin(),
1245                                C->reduction_ops().end());
1246       TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1247       TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1248     }
1249   }
1250   ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
1251   unsigned Count = 0;
1252   auto *ILHS = LHSs.begin();
1253   auto *IRHS = RHSs.begin();
1254   auto *IPriv = Privates.begin();
1255   for (const Expr *IRef : Shareds) {
1256     const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
1257     // Emit private VarDecl with reduction init.
1258     RedCG.emitSharedOrigLValue(*this, Count);
1259     RedCG.emitAggregateType(*this, Count);
1260     AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1261     RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
1262                              RedCG.getSharedLValue(Count).getAddress(),
1263                              [&Emission](CodeGenFunction &CGF) {
1264                                CGF.EmitAutoVarInit(Emission);
1265                                return true;
1266                              });
1267     EmitAutoVarCleanups(Emission);
1268     Address BaseAddr = RedCG.adjustPrivateAddress(
1269         *this, Count, Emission.getAllocatedAddress());
1270     bool IsRegistered =
1271         PrivateScope.addPrivate(RedCG.getBaseDecl(Count), BaseAddr);
1272     assert(IsRegistered && "private var already registered as private");
1273     // Silence the warning about unused variable.
1274     (void)IsRegistered;
1275 
1276     const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1277     const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1278     QualType Type = PrivateVD->getType();
1279     bool isaOMPArraySectionExpr = isa<ArraySectionExpr>(IRef);
1280     if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1281       // Store the address of the original variable associated with the LHS
1282       // implicit variable.
1283       PrivateScope.addPrivate(LHSVD, RedCG.getSharedLValue(Count).getAddress());
1284       PrivateScope.addPrivate(RHSVD, GetAddrOfLocalVar(PrivateVD));
1285     } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1286                isa<ArraySubscriptExpr>(IRef)) {
1287       // Store the address of the original variable associated with the LHS
1288       // implicit variable.
1289       PrivateScope.addPrivate(LHSVD, RedCG.getSharedLValue(Count).getAddress());
1290       PrivateScope.addPrivate(RHSVD,
1291                               GetAddrOfLocalVar(PrivateVD).withElementType(
1292                                   ConvertTypeForMem(RHSVD->getType())));
1293     } else {
1294       QualType Type = PrivateVD->getType();
1295       bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1296       Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress();
1297       // Store the address of the original variable associated with the LHS
1298       // implicit variable.
1299       if (IsArray) {
1300         OriginalAddr =
1301             OriginalAddr.withElementType(ConvertTypeForMem(LHSVD->getType()));
1302       }
1303       PrivateScope.addPrivate(LHSVD, OriginalAddr);
1304       PrivateScope.addPrivate(
1305           RHSVD, IsArray ? GetAddrOfLocalVar(PrivateVD).withElementType(
1306                                ConvertTypeForMem(RHSVD->getType()))
1307                          : GetAddrOfLocalVar(PrivateVD));
1308     }
1309     ++ILHS;
1310     ++IRHS;
1311     ++IPriv;
1312     ++Count;
1313   }
1314   if (!Data.ReductionVars.empty()) {
1315     Data.IsReductionWithTaskMod = true;
1316     Data.IsWorksharingReduction =
1317         isOpenMPWorksharingDirective(D.getDirectiveKind());
1318     llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
1319         *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data);
1320     const Expr *TaskRedRef = nullptr;
1321     switch (D.getDirectiveKind()) {
1322     case OMPD_parallel:
1323       TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr();
1324       break;
1325     case OMPD_for:
1326       TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr();
1327       break;
1328     case OMPD_sections:
1329       TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr();
1330       break;
1331     case OMPD_parallel_for:
1332       TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr();
1333       break;
1334     case OMPD_parallel_master:
1335       TaskRedRef =
1336           cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr();
1337       break;
1338     case OMPD_parallel_sections:
1339       TaskRedRef =
1340           cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr();
1341       break;
1342     case OMPD_target_parallel:
1343       TaskRedRef =
1344           cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr();
1345       break;
1346     case OMPD_target_parallel_for:
1347       TaskRedRef =
1348           cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr();
1349       break;
1350     case OMPD_distribute_parallel_for:
1351       TaskRedRef =
1352           cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr();
1353       break;
1354     case OMPD_teams_distribute_parallel_for:
1355       TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D)
1356                        .getTaskReductionRefExpr();
1357       break;
1358     case OMPD_target_teams_distribute_parallel_for:
1359       TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D)
1360                        .getTaskReductionRefExpr();
1361       break;
1362     case OMPD_simd:
1363     case OMPD_for_simd:
1364     case OMPD_section:
1365     case OMPD_single:
1366     case OMPD_master:
1367     case OMPD_critical:
1368     case OMPD_parallel_for_simd:
1369     case OMPD_task:
1370     case OMPD_taskyield:
1371     case OMPD_error:
1372     case OMPD_barrier:
1373     case OMPD_taskwait:
1374     case OMPD_taskgroup:
1375     case OMPD_flush:
1376     case OMPD_depobj:
1377     case OMPD_scan:
1378     case OMPD_ordered:
1379     case OMPD_atomic:
1380     case OMPD_teams:
1381     case OMPD_target:
1382     case OMPD_cancellation_point:
1383     case OMPD_cancel:
1384     case OMPD_target_data:
1385     case OMPD_target_enter_data:
1386     case OMPD_target_exit_data:
1387     case OMPD_taskloop:
1388     case OMPD_taskloop_simd:
1389     case OMPD_master_taskloop:
1390     case OMPD_master_taskloop_simd:
1391     case OMPD_parallel_master_taskloop:
1392     case OMPD_parallel_master_taskloop_simd:
1393     case OMPD_distribute:
1394     case OMPD_target_update:
1395     case OMPD_distribute_parallel_for_simd:
1396     case OMPD_distribute_simd:
1397     case OMPD_target_parallel_for_simd:
1398     case OMPD_target_simd:
1399     case OMPD_teams_distribute:
1400     case OMPD_teams_distribute_simd:
1401     case OMPD_teams_distribute_parallel_for_simd:
1402     case OMPD_target_teams:
1403     case OMPD_target_teams_distribute:
1404     case OMPD_target_teams_distribute_parallel_for_simd:
1405     case OMPD_target_teams_distribute_simd:
1406     case OMPD_declare_target:
1407     case OMPD_end_declare_target:
1408     case OMPD_threadprivate:
1409     case OMPD_allocate:
1410     case OMPD_declare_reduction:
1411     case OMPD_declare_mapper:
1412     case OMPD_declare_simd:
1413     case OMPD_requires:
1414     case OMPD_declare_variant:
1415     case OMPD_begin_declare_variant:
1416     case OMPD_end_declare_variant:
1417     case OMPD_unknown:
1418     default:
1419       llvm_unreachable("Unexpected directive with task reductions.");
1420     }
1421 
1422     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl());
1423     EmitVarDecl(*VD);
1424     EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD),
1425                       /*Volatile=*/false, TaskRedRef->getType());
1426   }
1427 }
1428 
EmitOMPReductionClauseFinal(const OMPExecutableDirective & D,const OpenMPDirectiveKind ReductionKind)1429 void CodeGenFunction::EmitOMPReductionClauseFinal(
1430     const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1431   if (!HaveInsertPoint())
1432     return;
1433   llvm::SmallVector<const Expr *, 8> Privates;
1434   llvm::SmallVector<const Expr *, 8> LHSExprs;
1435   llvm::SmallVector<const Expr *, 8> RHSExprs;
1436   llvm::SmallVector<const Expr *, 8> ReductionOps;
1437   bool HasAtLeastOneReduction = false;
1438   bool IsReductionWithTaskMod = false;
1439   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1440     // Do not emit for inscan reductions.
1441     if (C->getModifier() == OMPC_REDUCTION_inscan)
1442       continue;
1443     HasAtLeastOneReduction = true;
1444     Privates.append(C->privates().begin(), C->privates().end());
1445     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1446     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1447     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1448     IsReductionWithTaskMod =
1449         IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
1450   }
1451   if (HasAtLeastOneReduction) {
1452     if (IsReductionWithTaskMod) {
1453       CGM.getOpenMPRuntime().emitTaskReductionFini(
1454           *this, D.getBeginLoc(),
1455           isOpenMPWorksharingDirective(D.getDirectiveKind()));
1456     }
1457     bool TeamsLoopCanBeParallel = false;
1458     if (auto *TTLD = dyn_cast<OMPTargetTeamsGenericLoopDirective>(&D))
1459       TeamsLoopCanBeParallel = TTLD->canBeParallelFor();
1460     bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1461                       isOpenMPParallelDirective(D.getDirectiveKind()) ||
1462                       TeamsLoopCanBeParallel || ReductionKind == OMPD_simd;
1463     bool SimpleReduction = ReductionKind == OMPD_simd;
1464     // Emit nowait reduction if nowait clause is present or directive is a
1465     // parallel directive (it always has implicit barrier).
1466     CGM.getOpenMPRuntime().emitReduction(
1467         *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
1468         {WithNowait, SimpleReduction, ReductionKind});
1469   }
1470 }
1471 
emitPostUpdateForReductionClause(CodeGenFunction & CGF,const OMPExecutableDirective & D,const llvm::function_ref<llvm::Value * (CodeGenFunction &)> CondGen)1472 static void emitPostUpdateForReductionClause(
1473     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1474     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1475   if (!CGF.HaveInsertPoint())
1476     return;
1477   llvm::BasicBlock *DoneBB = nullptr;
1478   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1479     if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
1480       if (!DoneBB) {
1481         if (llvm::Value *Cond = CondGen(CGF)) {
1482           // If the first post-update expression is found, emit conditional
1483           // block if it was requested.
1484           llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1485           DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1486           CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1487           CGF.EmitBlock(ThenBB);
1488         }
1489       }
1490       CGF.EmitIgnoredExpr(PostUpdate);
1491     }
1492   }
1493   if (DoneBB)
1494     CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1495 }
1496 
1497 namespace {
1498 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1499 /// parallel function. This is necessary for combined constructs such as
1500 /// 'distribute parallel for'
1501 typedef llvm::function_ref<void(CodeGenFunction &,
1502                                 const OMPExecutableDirective &,
1503                                 llvm::SmallVectorImpl<llvm::Value *> &)>
1504     CodeGenBoundParametersTy;
1505 } // anonymous namespace
1506 
1507 static void
checkForLastprivateConditionalUpdate(CodeGenFunction & CGF,const OMPExecutableDirective & S)1508 checkForLastprivateConditionalUpdate(CodeGenFunction &CGF,
1509                                      const OMPExecutableDirective &S) {
1510   if (CGF.getLangOpts().OpenMP < 50)
1511     return;
1512   llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls;
1513   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
1514     for (const Expr *Ref : C->varlists()) {
1515       if (!Ref->getType()->isScalarType())
1516         continue;
1517       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1518       if (!DRE)
1519         continue;
1520       PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1521       CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1522     }
1523   }
1524   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
1525     for (const Expr *Ref : C->varlists()) {
1526       if (!Ref->getType()->isScalarType())
1527         continue;
1528       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1529       if (!DRE)
1530         continue;
1531       PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1532       CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1533     }
1534   }
1535   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
1536     for (const Expr *Ref : C->varlists()) {
1537       if (!Ref->getType()->isScalarType())
1538         continue;
1539       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1540       if (!DRE)
1541         continue;
1542       PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1543       CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1544     }
1545   }
1546   // Privates should ne analyzed since they are not captured at all.
1547   // Task reductions may be skipped - tasks are ignored.
1548   // Firstprivates do not return value but may be passed by reference - no need
1549   // to check for updated lastprivate conditional.
1550   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
1551     for (const Expr *Ref : C->varlists()) {
1552       if (!Ref->getType()->isScalarType())
1553         continue;
1554       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1555       if (!DRE)
1556         continue;
1557       PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1558     }
1559   }
1560   CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional(
1561       CGF, S, PrivateDecls);
1562 }
1563 
emitCommonOMPParallelDirective(CodeGenFunction & CGF,const OMPExecutableDirective & S,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen,const CodeGenBoundParametersTy & CodeGenBoundParameters)1564 static void emitCommonOMPParallelDirective(
1565     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1566     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1567     const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1568   const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1569   llvm::Value *NumThreads = nullptr;
1570   llvm::Function *OutlinedFn =
1571       CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1572           CGF, S, *CS->getCapturedDecl()->param_begin(), InnermostKind,
1573           CodeGen);
1574   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1575     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1576     NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1577                                     /*IgnoreResultAssign=*/true);
1578     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1579         CGF, NumThreads, NumThreadsClause->getBeginLoc());
1580   }
1581   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1582     CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1583     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1584         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc());
1585   }
1586   const Expr *IfCond = nullptr;
1587   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1588     if (C->getNameModifier() == OMPD_unknown ||
1589         C->getNameModifier() == OMPD_parallel) {
1590       IfCond = C->getCondition();
1591       break;
1592     }
1593   }
1594 
1595   OMPParallelScope Scope(CGF, S);
1596   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1597   // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1598   // lower and upper bounds with the pragma 'for' chunking mechanism.
1599   // The following lambda takes care of appending the lower and upper bound
1600   // parameters when necessary
1601   CodeGenBoundParameters(CGF, S, CapturedVars);
1602   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1603   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn,
1604                                               CapturedVars, IfCond, NumThreads);
1605 }
1606 
isAllocatableDecl(const VarDecl * VD)1607 static bool isAllocatableDecl(const VarDecl *VD) {
1608   const VarDecl *CVD = VD->getCanonicalDecl();
1609   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
1610     return false;
1611   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1612   // Use the default allocation.
1613   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
1614             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
1615            !AA->getAllocator());
1616 }
1617 
emitEmptyBoundParameters(CodeGenFunction &,const OMPExecutableDirective &,llvm::SmallVectorImpl<llvm::Value * > &)1618 static void emitEmptyBoundParameters(CodeGenFunction &,
1619                                      const OMPExecutableDirective &,
1620                                      llvm::SmallVectorImpl<llvm::Value *> &) {}
1621 
emitOMPCopyinClause(CodeGenFunction & CGF,const OMPExecutableDirective & S)1622 static void emitOMPCopyinClause(CodeGenFunction &CGF,
1623                                 const OMPExecutableDirective &S) {
1624   bool Copyins = CGF.EmitOMPCopyinClause(S);
1625   if (Copyins) {
1626     // Emit implicit barrier to synchronize threads and avoid data races on
1627     // propagation master's thread values of threadprivate variables to local
1628     // instances of that variables of all other implicit threads.
1629     CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1630         CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
1631         /*ForceSimpleCall=*/true);
1632   }
1633 }
1634 
getAddressOfLocalVariable(CodeGenFunction & CGF,const VarDecl * VD)1635 Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
1636     CodeGenFunction &CGF, const VarDecl *VD) {
1637   CodeGenModule &CGM = CGF.CGM;
1638   auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1639 
1640   if (!VD)
1641     return Address::invalid();
1642   const VarDecl *CVD = VD->getCanonicalDecl();
1643   if (!isAllocatableDecl(CVD))
1644     return Address::invalid();
1645   llvm::Value *Size;
1646   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
1647   if (CVD->getType()->isVariablyModifiedType()) {
1648     Size = CGF.getTypeSize(CVD->getType());
1649     // Align the size: ((size + align - 1) / align) * align
1650     Size = CGF.Builder.CreateNUWAdd(
1651         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
1652     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
1653     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
1654   } else {
1655     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
1656     Size = CGM.getSize(Sz.alignTo(Align));
1657   }
1658 
1659   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1660   assert(AA->getAllocator() &&
1661          "Expected allocator expression for non-default allocator.");
1662   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
1663   // According to the standard, the original allocator type is a enum (integer).
1664   // Convert to pointer type, if required.
1665   if (Allocator->getType()->isIntegerTy())
1666     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
1667   else if (Allocator->getType()->isPointerTy())
1668     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
1669                                                                 CGM.VoidPtrTy);
1670 
1671   llvm::Value *Addr = OMPBuilder.createOMPAlloc(
1672       CGF.Builder, Size, Allocator,
1673       getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", "."));
1674   llvm::CallInst *FreeCI =
1675       OMPBuilder.createOMPFree(CGF.Builder, Addr, Allocator);
1676 
1677   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI);
1678   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1679       Addr,
1680       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
1681       getNameWithSeparators({CVD->getName(), ".addr"}, ".", "."));
1682   return Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
1683 }
1684 
getAddrOfThreadPrivate(CodeGenFunction & CGF,const VarDecl * VD,Address VDAddr,SourceLocation Loc)1685 Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate(
1686     CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr,
1687     SourceLocation Loc) {
1688   CodeGenModule &CGM = CGF.CGM;
1689   if (CGM.getLangOpts().OpenMPUseTLS &&
1690       CGM.getContext().getTargetInfo().isTLSSupported())
1691     return VDAddr;
1692 
1693   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1694 
1695   llvm::Type *VarTy = VDAddr.getElementType();
1696   llvm::Value *Data =
1697       CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy);
1698   llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy));
1699   std::string Suffix = getNameWithSeparators({"cache", ""});
1700   llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix);
1701 
1702   llvm::CallInst *ThreadPrivateCacheCall =
1703       OMPBuilder.createCachedThreadPrivate(CGF.Builder, Data, Size, CacheName);
1704 
1705   return Address(ThreadPrivateCacheCall, CGM.Int8Ty, VDAddr.getAlignment());
1706 }
1707 
getNameWithSeparators(ArrayRef<StringRef> Parts,StringRef FirstSeparator,StringRef Separator)1708 std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators(
1709     ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) {
1710   SmallString<128> Buffer;
1711   llvm::raw_svector_ostream OS(Buffer);
1712   StringRef Sep = FirstSeparator;
1713   for (StringRef Part : Parts) {
1714     OS << Sep << Part;
1715     Sep = Separator;
1716   }
1717   return OS.str().str();
1718 }
1719 
EmitOMPInlinedRegionBody(CodeGenFunction & CGF,const Stmt * RegionBodyStmt,InsertPointTy AllocaIP,InsertPointTy CodeGenIP,Twine RegionName)1720 void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
1721     CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP,
1722     InsertPointTy CodeGenIP, Twine RegionName) {
1723   CGBuilderTy &Builder = CGF.Builder;
1724   Builder.restoreIP(CodeGenIP);
1725   llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false,
1726                                                "." + RegionName + ".after");
1727 
1728   {
1729     OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB);
1730     CGF.EmitStmt(RegionBodyStmt);
1731   }
1732 
1733   if (Builder.saveIP().isSet())
1734     Builder.CreateBr(FiniBB);
1735 }
1736 
EmitOMPOutlinedRegionBody(CodeGenFunction & CGF,const Stmt * RegionBodyStmt,InsertPointTy AllocaIP,InsertPointTy CodeGenIP,Twine RegionName)1737 void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
1738     CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP,
1739     InsertPointTy CodeGenIP, Twine RegionName) {
1740   CGBuilderTy &Builder = CGF.Builder;
1741   Builder.restoreIP(CodeGenIP);
1742   llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false,
1743                                                "." + RegionName + ".after");
1744 
1745   {
1746     OMPBuilderCBHelpers::OutlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB);
1747     CGF.EmitStmt(RegionBodyStmt);
1748   }
1749 
1750   if (Builder.saveIP().isSet())
1751     Builder.CreateBr(FiniBB);
1752 }
1753 
EmitOMPParallelDirective(const OMPParallelDirective & S)1754 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1755   if (CGM.getLangOpts().OpenMPIRBuilder) {
1756     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1757     // Check if we have any if clause associated with the directive.
1758     llvm::Value *IfCond = nullptr;
1759     if (const auto *C = S.getSingleClause<OMPIfClause>())
1760       IfCond = EmitScalarExpr(C->getCondition(),
1761                               /*IgnoreResultAssign=*/true);
1762 
1763     llvm::Value *NumThreads = nullptr;
1764     if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>())
1765       NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(),
1766                                   /*IgnoreResultAssign=*/true);
1767 
1768     ProcBindKind ProcBind = OMP_PROC_BIND_default;
1769     if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>())
1770       ProcBind = ProcBindClause->getProcBindKind();
1771 
1772     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1773 
1774     // The cleanup callback that finalizes all variables at the given location,
1775     // thus calls destructors etc.
1776     auto FiniCB = [this](InsertPointTy IP) {
1777       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
1778     };
1779 
1780     // Privatization callback that performs appropriate action for
1781     // shared/private/firstprivate/lastprivate/copyin/... variables.
1782     //
1783     // TODO: This defaults to shared right now.
1784     auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1785                      llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
1786       // The next line is appropriate only for variables (Val) with the
1787       // data-sharing attribute "shared".
1788       ReplVal = &Val;
1789 
1790       return CodeGenIP;
1791     };
1792 
1793     const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1794     const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();
1795 
1796     auto BodyGenCB = [&, this](InsertPointTy AllocaIP,
1797                                InsertPointTy CodeGenIP) {
1798       OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
1799           *this, ParallelRegionBodyStmt, AllocaIP, CodeGenIP, "parallel");
1800     };
1801 
1802     CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
1803     CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
1804     llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
1805         AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
1806     Builder.restoreIP(
1807         OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1808                                   IfCond, NumThreads, ProcBind, S.hasCancel()));
1809     return;
1810   }
1811 
1812   // Emit parallel region as a standalone region.
1813   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1814     Action.Enter(CGF);
1815     OMPPrivateScope PrivateScope(CGF);
1816     emitOMPCopyinClause(CGF, S);
1817     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1818     CGF.EmitOMPPrivateClause(S, PrivateScope);
1819     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1820     (void)PrivateScope.Privatize();
1821     CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
1822     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1823   };
1824   {
1825     auto LPCRegion =
1826         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
1827     emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1828                                    emitEmptyBoundParameters);
1829     emitPostUpdateForReductionClause(*this, S,
1830                                      [](CodeGenFunction &) { return nullptr; });
1831   }
1832   // Check for outer lastprivate conditional update.
1833   checkForLastprivateConditionalUpdate(*this, S);
1834 }
1835 
EmitOMPMetaDirective(const OMPMetaDirective & S)1836 void CodeGenFunction::EmitOMPMetaDirective(const OMPMetaDirective &S) {
1837   EmitStmt(S.getIfStmt());
1838 }
1839 
1840 namespace {
1841 /// RAII to handle scopes for loop transformation directives.
1842 class OMPTransformDirectiveScopeRAII {
1843   OMPLoopScope *Scope = nullptr;
1844   CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr;
1845   CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr;
1846 
1847   OMPTransformDirectiveScopeRAII(const OMPTransformDirectiveScopeRAII &) =
1848       delete;
1849   OMPTransformDirectiveScopeRAII &
1850   operator=(const OMPTransformDirectiveScopeRAII &) = delete;
1851 
1852 public:
OMPTransformDirectiveScopeRAII(CodeGenFunction & CGF,const Stmt * S)1853   OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) {
1854     if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(S)) {
1855       Scope = new OMPLoopScope(CGF, *Dir);
1856       CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP);
1857       CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI);
1858     }
1859   }
~OMPTransformDirectiveScopeRAII()1860   ~OMPTransformDirectiveScopeRAII() {
1861     if (!Scope)
1862       return;
1863     delete CapInfoRAII;
1864     delete CGSI;
1865     delete Scope;
1866   }
1867 };
1868 } // namespace
1869 
emitBody(CodeGenFunction & CGF,const Stmt * S,const Stmt * NextLoop,int MaxLevel,int Level=0)1870 static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
1871                      int MaxLevel, int Level = 0) {
1872   assert(Level < MaxLevel && "Too deep lookup during loop body codegen.");
1873   const Stmt *SimplifiedS = S->IgnoreContainers();
1874   if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) {
1875     PrettyStackTraceLoc CrashInfo(
1876         CGF.getContext().getSourceManager(), CS->getLBracLoc(),
1877         "LLVM IR generation of compound statement ('{}')");
1878 
1879     // Keep track of the current cleanup stack depth, including debug scopes.
1880     CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange());
1881     for (const Stmt *CurStmt : CS->body())
1882       emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level);
1883     return;
1884   }
1885   if (SimplifiedS == NextLoop) {
1886     if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(SimplifiedS))
1887       SimplifiedS = Dir->getTransformedStmt();
1888     if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS))
1889       SimplifiedS = CanonLoop->getLoopStmt();
1890     if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) {
1891       S = For->getBody();
1892     } else {
1893       assert(isa<CXXForRangeStmt>(SimplifiedS) &&
1894              "Expected canonical for loop or range-based for loop.");
1895       const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS);
1896       CGF.EmitStmt(CXXFor->getLoopVarStmt());
1897       S = CXXFor->getBody();
1898     }
1899     if (Level + 1 < MaxLevel) {
1900       NextLoop = OMPLoopDirective::tryToFindNextInnerLoop(
1901           S, /*TryImperfectlyNestedLoops=*/true);
1902       emitBody(CGF, S, NextLoop, MaxLevel, Level + 1);
1903       return;
1904     }
1905   }
1906   CGF.EmitStmt(S);
1907 }
1908 
EmitOMPLoopBody(const OMPLoopDirective & D,JumpDest LoopExit)1909 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1910                                       JumpDest LoopExit) {
1911   RunCleanupsScope BodyScope(*this);
1912   // Update counters values on current iteration.
1913   for (const Expr *UE : D.updates())
1914     EmitIgnoredExpr(UE);
1915   // Update the linear variables.
1916   // In distribute directives only loop counters may be marked as linear, no
1917   // need to generate the code for them.
1918   if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
1919     for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1920       for (const Expr *UE : C->updates())
1921         EmitIgnoredExpr(UE);
1922     }
1923   }
1924 
1925   // On a continue in the body, jump to the end.
1926   JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue");
1927   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1928   for (const Expr *E : D.finals_conditions()) {
1929     if (!E)
1930       continue;
1931     // Check that loop counter in non-rectangular nest fits into the iteration
1932     // space.
1933     llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next");
1934     EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(),
1935                          getProfileCount(D.getBody()));
1936     EmitBlock(NextBB);
1937   }
1938 
1939   OMPPrivateScope InscanScope(*this);
1940   EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true);
1941   bool IsInscanRegion = InscanScope.Privatize();
1942   if (IsInscanRegion) {
1943     // Need to remember the block before and after scan directive
1944     // to dispatch them correctly depending on the clause used in
1945     // this directive, inclusive or exclusive. For inclusive scan the natural
1946     // order of the blocks is used, for exclusive clause the blocks must be
1947     // executed in reverse order.
1948     OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb");
1949     OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb");
1950     // No need to allocate inscan exit block, in simd mode it is selected in the
1951     // codegen for the scan directive.
1952     if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd)
1953       OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb");
1954     OMPScanDispatch = createBasicBlock("omp.inscan.dispatch");
1955     EmitBranch(OMPScanDispatch);
1956     EmitBlock(OMPBeforeScanBlock);
1957   }
1958 
1959   // Emit loop variables for C++ range loops.
1960   const Stmt *Body =
1961       D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
1962   // Emit loop body.
1963   emitBody(*this, Body,
1964            OMPLoopBasedDirective::tryToFindNextInnerLoop(
1965                Body, /*TryImperfectlyNestedLoops=*/true),
1966            D.getLoopsNumber());
1967 
1968   // Jump to the dispatcher at the end of the loop body.
1969   if (IsInscanRegion)
1970     EmitBranch(OMPScanExitBlock);
1971 
1972   // The end (updates/cleanups).
1973   EmitBlock(Continue.getBlock());
1974   BreakContinueStack.pop_back();
1975 }
1976 
1977 using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>;
1978 
1979 /// Emit a captured statement and return the function as well as its captured
1980 /// closure context.
emitCapturedStmtFunc(CodeGenFunction & ParentCGF,const CapturedStmt * S)1981 static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF,
1982                                              const CapturedStmt *S) {
1983   LValue CapStruct = ParentCGF.InitCapturedStruct(*S);
1984   CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true);
1985   std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI =
1986       std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(*S);
1987   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get());
1988   llvm::Function *F = CGF.GenerateCapturedStmtFunction(*S);
1989 
1990   return {F, CapStruct.getPointer(ParentCGF)};
1991 }
1992 
1993 /// Emit a call to a previously captured closure.
1994 static llvm::CallInst *
emitCapturedStmtCall(CodeGenFunction & ParentCGF,EmittedClosureTy Cap,llvm::ArrayRef<llvm::Value * > Args)1995 emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap,
1996                      llvm::ArrayRef<llvm::Value *> Args) {
1997   // Append the closure context to the argument.
1998   SmallVector<llvm::Value *> EffectiveArgs;
1999   EffectiveArgs.reserve(Args.size() + 1);
2000   llvm::append_range(EffectiveArgs, Args);
2001   EffectiveArgs.push_back(Cap.second);
2002 
2003   return ParentCGF.Builder.CreateCall(Cap.first, EffectiveArgs);
2004 }
2005 
2006 llvm::CanonicalLoopInfo *
EmitOMPCollapsedCanonicalLoopNest(const Stmt * S,int Depth)2007 CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) {
2008   assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented");
2009 
2010   // The caller is processing the loop-associated directive processing the \p
2011   // Depth loops nested in \p S. Put the previous pending loop-associated
2012   // directive to the stack. If the current loop-associated directive is a loop
2013   // transformation directive, it will push its generated loops onto the stack
2014   // such that together with the loops left here they form the combined loop
2015   // nest for the parent loop-associated directive.
2016   int ParentExpectedOMPLoopDepth = ExpectedOMPLoopDepth;
2017   ExpectedOMPLoopDepth = Depth;
2018 
2019   EmitStmt(S);
2020   assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops");
2021 
2022   // The last added loop is the outermost one.
2023   llvm::CanonicalLoopInfo *Result = OMPLoopNestStack.back();
2024 
2025   // Pop the \p Depth loops requested by the call from that stack and restore
2026   // the previous context.
2027   OMPLoopNestStack.pop_back_n(Depth);
2028   ExpectedOMPLoopDepth = ParentExpectedOMPLoopDepth;
2029 
2030   return Result;
2031 }
2032 
EmitOMPCanonicalLoop(const OMPCanonicalLoop * S)2033 void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) {
2034   const Stmt *SyntacticalLoop = S->getLoopStmt();
2035   if (!getLangOpts().OpenMPIRBuilder) {
2036     // Ignore if OpenMPIRBuilder is not enabled.
2037     EmitStmt(SyntacticalLoop);
2038     return;
2039   }
2040 
2041   LexicalScope ForScope(*this, S->getSourceRange());
2042 
2043   // Emit init statements. The Distance/LoopVar funcs may reference variable
2044   // declarations they contain.
2045   const Stmt *BodyStmt;
2046   if (const auto *For = dyn_cast<ForStmt>(SyntacticalLoop)) {
2047     if (const Stmt *InitStmt = For->getInit())
2048       EmitStmt(InitStmt);
2049     BodyStmt = For->getBody();
2050   } else if (const auto *RangeFor =
2051                  dyn_cast<CXXForRangeStmt>(SyntacticalLoop)) {
2052     if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt())
2053       EmitStmt(RangeStmt);
2054     if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt())
2055       EmitStmt(BeginStmt);
2056     if (const DeclStmt *EndStmt = RangeFor->getEndStmt())
2057       EmitStmt(EndStmt);
2058     if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt())
2059       EmitStmt(LoopVarStmt);
2060     BodyStmt = RangeFor->getBody();
2061   } else
2062     llvm_unreachable("Expected for-stmt or range-based for-stmt");
2063 
2064   // Emit closure for later use. By-value captures will be captured here.
2065   const CapturedStmt *DistanceFunc = S->getDistanceFunc();
2066   EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(*this, DistanceFunc);
2067   const CapturedStmt *LoopVarFunc = S->getLoopVarFunc();
2068   EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(*this, LoopVarFunc);
2069 
2070   // Call the distance function to get the number of iterations of the loop to
2071   // come.
2072   QualType LogicalTy = DistanceFunc->getCapturedDecl()
2073                            ->getParam(0)
2074                            ->getType()
2075                            .getNonReferenceType();
2076   RawAddress CountAddr = CreateMemTemp(LogicalTy, ".count.addr");
2077   emitCapturedStmtCall(*this, DistanceClosure, {CountAddr.getPointer()});
2078   llvm::Value *DistVal = Builder.CreateLoad(CountAddr, ".count");
2079 
2080   // Emit the loop structure.
2081   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2082   auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP,
2083                            llvm::Value *IndVar) {
2084     Builder.restoreIP(CodeGenIP);
2085 
2086     // Emit the loop body: Convert the logical iteration number to the loop
2087     // variable and emit the body.
2088     const DeclRefExpr *LoopVarRef = S->getLoopVarRef();
2089     LValue LCVal = EmitLValue(LoopVarRef);
2090     Address LoopVarAddress = LCVal.getAddress();
2091     emitCapturedStmtCall(*this, LoopVarClosure,
2092                          {LoopVarAddress.emitRawPointer(*this), IndVar});
2093 
2094     RunCleanupsScope BodyScope(*this);
2095     EmitStmt(BodyStmt);
2096   };
2097   llvm::CanonicalLoopInfo *CL =
2098       OMPBuilder.createCanonicalLoop(Builder, BodyGen, DistVal);
2099 
2100   // Finish up the loop.
2101   Builder.restoreIP(CL->getAfterIP());
2102   ForScope.ForceCleanup();
2103 
2104   // Remember the CanonicalLoopInfo for parent AST nodes consuming it.
2105   OMPLoopNestStack.push_back(CL);
2106 }
2107 
EmitOMPInnerLoop(const OMPExecutableDirective & S,bool RequiresCleanup,const Expr * LoopCond,const Expr * IncExpr,const llvm::function_ref<void (CodeGenFunction &)> BodyGen,const llvm::function_ref<void (CodeGenFunction &)> PostIncGen)2108 void CodeGenFunction::EmitOMPInnerLoop(
2109     const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond,
2110     const Expr *IncExpr,
2111     const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
2112     const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
2113   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
2114 
2115   // Start the loop with a block that tests the condition.
2116   auto CondBlock = createBasicBlock("omp.inner.for.cond");
2117   EmitBlock(CondBlock);
2118   const SourceRange R = S.getSourceRange();
2119 
2120   // If attributes are attached, push to the basic block with them.
2121   const auto &OMPED = cast<OMPExecutableDirective>(S);
2122   const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt();
2123   const Stmt *SS = ICS->getCapturedStmt();
2124   const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS);
2125   OMPLoopNestStack.clear();
2126   if (AS)
2127     LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(),
2128                    AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()),
2129                    SourceLocToDebugLoc(R.getEnd()));
2130   else
2131     LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2132                    SourceLocToDebugLoc(R.getEnd()));
2133 
2134   // If there are any cleanups between here and the loop-exit scope,
2135   // create a block to stage a loop exit along.
2136   llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2137   if (RequiresCleanup)
2138     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
2139 
2140   llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body");
2141 
2142   // Emit condition.
2143   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
2144   if (ExitBlock != LoopExit.getBlock()) {
2145     EmitBlock(ExitBlock);
2146     EmitBranchThroughCleanup(LoopExit);
2147   }
2148 
2149   EmitBlock(LoopBody);
2150   incrementProfileCounter(&S);
2151 
2152   // Create a block for the increment.
2153   JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
2154   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
2155 
2156   BodyGen(*this);
2157 
2158   // Emit "IV = IV + 1" and a back-edge to the condition block.
2159   EmitBlock(Continue.getBlock());
2160   EmitIgnoredExpr(IncExpr);
2161   PostIncGen(*this);
2162   BreakContinueStack.pop_back();
2163   EmitBranch(CondBlock);
2164   LoopStack.pop();
2165   // Emit the fall-through block.
2166   EmitBlock(LoopExit.getBlock());
2167 }
2168 
EmitOMPLinearClauseInit(const OMPLoopDirective & D)2169 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
2170   if (!HaveInsertPoint())
2171     return false;
2172   // Emit inits for the linear variables.
2173   bool HasLinears = false;
2174   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2175     for (const Expr *Init : C->inits()) {
2176       HasLinears = true;
2177       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
2178       if (const auto *Ref =
2179               dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
2180         AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
2181         const auto *OrigVD = cast<VarDecl>(Ref->getDecl());
2182         DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2183                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
2184                         VD->getInit()->getType(), VK_LValue,
2185                         VD->getInit()->getExprLoc());
2186         EmitExprAsInit(
2187             &DRE, VD,
2188             MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()),
2189             /*capturedByInit=*/false);
2190         EmitAutoVarCleanups(Emission);
2191       } else {
2192         EmitVarDecl(*VD);
2193       }
2194     }
2195     // Emit the linear steps for the linear clauses.
2196     // If a step is not constant, it is pre-calculated before the loop.
2197     if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
2198       if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
2199         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
2200         // Emit calculation of the linear step.
2201         EmitIgnoredExpr(CS);
2202       }
2203   }
2204   return HasLinears;
2205 }
2206 
EmitOMPLinearClauseFinal(const OMPLoopDirective & D,const llvm::function_ref<llvm::Value * (CodeGenFunction &)> CondGen)2207 void CodeGenFunction::EmitOMPLinearClauseFinal(
2208     const OMPLoopDirective &D,
2209     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2210   if (!HaveInsertPoint())
2211     return;
2212   llvm::BasicBlock *DoneBB = nullptr;
2213   // Emit the final values of the linear variables.
2214   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2215     auto IC = C->varlist_begin();
2216     for (const Expr *F : C->finals()) {
2217       if (!DoneBB) {
2218         if (llvm::Value *Cond = CondGen(*this)) {
2219           // If the first post-update expression is found, emit conditional
2220           // block if it was requested.
2221           llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu");
2222           DoneBB = createBasicBlock(".omp.linear.pu.done");
2223           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2224           EmitBlock(ThenBB);
2225         }
2226       }
2227       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
2228       DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2229                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
2230                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
2231       Address OrigAddr = EmitLValue(&DRE).getAddress();
2232       CodeGenFunction::OMPPrivateScope VarScope(*this);
2233       VarScope.addPrivate(OrigVD, OrigAddr);
2234       (void)VarScope.Privatize();
2235       EmitIgnoredExpr(F);
2236       ++IC;
2237     }
2238     if (const Expr *PostUpdate = C->getPostUpdateExpr())
2239       EmitIgnoredExpr(PostUpdate);
2240   }
2241   if (DoneBB)
2242     EmitBlock(DoneBB, /*IsFinished=*/true);
2243 }
2244 
emitAlignedClause(CodeGenFunction & CGF,const OMPExecutableDirective & D)2245 static void emitAlignedClause(CodeGenFunction &CGF,
2246                               const OMPExecutableDirective &D) {
2247   if (!CGF.HaveInsertPoint())
2248     return;
2249   for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
2250     llvm::APInt ClauseAlignment(64, 0);
2251     if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2252       auto *AlignmentCI =
2253           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
2254       ClauseAlignment = AlignmentCI->getValue();
2255     }
2256     for (const Expr *E : Clause->varlists()) {
2257       llvm::APInt Alignment(ClauseAlignment);
2258       if (Alignment == 0) {
2259         // OpenMP [2.8.1, Description]
2260         // If no optional parameter is specified, implementation-defined default
2261         // alignments for SIMD instructions on the target platforms are assumed.
2262         Alignment =
2263             CGF.getContext()
2264                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
2265                     E->getType()->getPointeeType()))
2266                 .getQuantity();
2267       }
2268       assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2269              "alignment is not power of 2");
2270       if (Alignment != 0) {
2271         llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2272         CGF.emitAlignmentAssumption(
2273             PtrValue, E, /*No second loc needed*/ SourceLocation(),
2274             llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment));
2275       }
2276     }
2277   }
2278 }
2279 
EmitOMPPrivateLoopCounters(const OMPLoopDirective & S,CodeGenFunction::OMPPrivateScope & LoopScope)2280 void CodeGenFunction::EmitOMPPrivateLoopCounters(
2281     const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
2282   if (!HaveInsertPoint())
2283     return;
2284   auto I = S.private_counters().begin();
2285   for (const Expr *E : S.counters()) {
2286     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2287     const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
2288     // Emit var without initialization.
2289     AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD);
2290     EmitAutoVarCleanups(VarEmission);
2291     LocalDeclMap.erase(PrivateVD);
2292     (void)LoopScope.addPrivate(VD, VarEmission.getAllocatedAddress());
2293     if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
2294         VD->hasGlobalStorage()) {
2295       DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD),
2296                       LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
2297                       E->getType(), VK_LValue, E->getExprLoc());
2298       (void)LoopScope.addPrivate(PrivateVD, EmitLValue(&DRE).getAddress());
2299     } else {
2300       (void)LoopScope.addPrivate(PrivateVD, VarEmission.getAllocatedAddress());
2301     }
2302     ++I;
2303   }
2304   // Privatize extra loop counters used in loops for ordered(n) clauses.
2305   for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
2306     if (!C->getNumForLoops())
2307       continue;
2308     for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size();
2309          I < E; ++I) {
2310       const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I));
2311       const auto *VD = cast<VarDecl>(DRE->getDecl());
2312       // Override only those variables that can be captured to avoid re-emission
2313       // of the variables declared within the loops.
2314       if (DRE->refersToEnclosingVariableOrCapture()) {
2315         (void)LoopScope.addPrivate(
2316             VD, CreateMemTemp(DRE->getType(), VD->getName()));
2317       }
2318     }
2319   }
2320 }
2321 
emitPreCond(CodeGenFunction & CGF,const OMPLoopDirective & S,const Expr * Cond,llvm::BasicBlock * TrueBlock,llvm::BasicBlock * FalseBlock,uint64_t TrueCount)2322 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
2323                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
2324                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
2325   if (!CGF.HaveInsertPoint())
2326     return;
2327   {
2328     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
2329     CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
2330     (void)PreCondScope.Privatize();
2331     // Get initial values of real counters.
2332     for (const Expr *I : S.inits()) {
2333       CGF.EmitIgnoredExpr(I);
2334     }
2335   }
2336   // Create temp loop control variables with their init values to support
2337   // non-rectangular loops.
2338   CodeGenFunction::OMPMapVars PreCondVars;
2339   for (const Expr *E : S.dependent_counters()) {
2340     if (!E)
2341       continue;
2342     assert(!E->getType().getNonReferenceType()->isRecordType() &&
2343            "dependent counter must not be an iterator.");
2344     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2345     Address CounterAddr =
2346         CGF.CreateMemTemp(VD->getType().getNonReferenceType());
2347     (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr);
2348   }
2349   (void)PreCondVars.apply(CGF);
2350   for (const Expr *E : S.dependent_inits()) {
2351     if (!E)
2352       continue;
2353     CGF.EmitIgnoredExpr(E);
2354   }
2355   // Check that loop is executed at least one time.
2356   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
2357   PreCondVars.restore(CGF);
2358 }
2359 
EmitOMPLinearClause(const OMPLoopDirective & D,CodeGenFunction::OMPPrivateScope & PrivateScope)2360 void CodeGenFunction::EmitOMPLinearClause(
2361     const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
2362   if (!HaveInsertPoint())
2363     return;
2364   llvm::DenseSet<const VarDecl *> SIMDLCVs;
2365   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
2366     const auto *LoopDirective = cast<OMPLoopDirective>(&D);
2367     for (const Expr *C : LoopDirective->counters()) {
2368       SIMDLCVs.insert(
2369           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
2370     }
2371   }
2372   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2373     auto CurPrivate = C->privates().begin();
2374     for (const Expr *E : C->varlists()) {
2375       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2376       const auto *PrivateVD =
2377           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
2378       if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
2379         // Emit private VarDecl with copy init.
2380         EmitVarDecl(*PrivateVD);
2381         bool IsRegistered =
2382             PrivateScope.addPrivate(VD, GetAddrOfLocalVar(PrivateVD));
2383         assert(IsRegistered && "linear var already registered as private");
2384         // Silence the warning about unused variable.
2385         (void)IsRegistered;
2386       } else {
2387         EmitVarDecl(*PrivateVD);
2388       }
2389       ++CurPrivate;
2390     }
2391   }
2392 }
2393 
emitSimdlenSafelenClause(CodeGenFunction & CGF,const OMPExecutableDirective & D)2394 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
2395                                      const OMPExecutableDirective &D) {
2396   if (!CGF.HaveInsertPoint())
2397     return;
2398   if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
2399     RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
2400                                  /*ignoreResult=*/true);
2401     auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2402     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2403     // In presence of finite 'safelen', it may be unsafe to mark all
2404     // the memory instructions parallel, because loop-carried
2405     // dependences of 'safelen' iterations are possible.
2406     CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
2407   } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
2408     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
2409                                  /*ignoreResult=*/true);
2410     auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2411     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2412     // In presence of finite 'safelen', it may be unsafe to mark all
2413     // the memory instructions parallel, because loop-carried
2414     // dependences of 'safelen' iterations are possible.
2415     CGF.LoopStack.setParallel(/*Enable=*/false);
2416   }
2417 }
2418 
EmitOMPSimdInit(const OMPLoopDirective & D)2419 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) {
2420   // Walk clauses and process safelen/lastprivate.
2421   LoopStack.setParallel(/*Enable=*/true);
2422   LoopStack.setVectorizeEnable();
2423   emitSimdlenSafelenClause(*this, D);
2424   if (const auto *C = D.getSingleClause<OMPOrderClause>())
2425     if (C->getKind() == OMPC_ORDER_concurrent)
2426       LoopStack.setParallel(/*Enable=*/true);
2427   if ((D.getDirectiveKind() == OMPD_simd ||
2428        (getLangOpts().OpenMPSimd &&
2429         isOpenMPSimdDirective(D.getDirectiveKind()))) &&
2430       llvm::any_of(D.getClausesOfKind<OMPReductionClause>(),
2431                    [](const OMPReductionClause *C) {
2432                      return C->getModifier() == OMPC_REDUCTION_inscan;
2433                    }))
2434     // Disable parallel access in case of prefix sum.
2435     LoopStack.setParallel(/*Enable=*/false);
2436 }
2437 
EmitOMPSimdFinal(const OMPLoopDirective & D,const llvm::function_ref<llvm::Value * (CodeGenFunction &)> CondGen)2438 void CodeGenFunction::EmitOMPSimdFinal(
2439     const OMPLoopDirective &D,
2440     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2441   if (!HaveInsertPoint())
2442     return;
2443   llvm::BasicBlock *DoneBB = nullptr;
2444   auto IC = D.counters().begin();
2445   auto IPC = D.private_counters().begin();
2446   for (const Expr *F : D.finals()) {
2447     const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
2448     const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
2449     const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
2450     if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
2451         OrigVD->hasGlobalStorage() || CED) {
2452       if (!DoneBB) {
2453         if (llvm::Value *Cond = CondGen(*this)) {
2454           // If the first post-update expression is found, emit conditional
2455           // block if it was requested.
2456           llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then");
2457           DoneBB = createBasicBlock(".omp.final.done");
2458           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2459           EmitBlock(ThenBB);
2460         }
2461       }
2462       Address OrigAddr = Address::invalid();
2463       if (CED) {
2464         OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
2465       } else {
2466         DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD),
2467                         /*RefersToEnclosingVariableOrCapture=*/false,
2468                         (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
2469         OrigAddr = EmitLValue(&DRE).getAddress();
2470       }
2471       OMPPrivateScope VarScope(*this);
2472       VarScope.addPrivate(OrigVD, OrigAddr);
2473       (void)VarScope.Privatize();
2474       EmitIgnoredExpr(F);
2475     }
2476     ++IC;
2477     ++IPC;
2478   }
2479   if (DoneBB)
2480     EmitBlock(DoneBB, /*IsFinished=*/true);
2481 }
2482 
emitOMPLoopBodyWithStopPoint(CodeGenFunction & CGF,const OMPLoopDirective & S,CodeGenFunction::JumpDest LoopExit)2483 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
2484                                          const OMPLoopDirective &S,
2485                                          CodeGenFunction::JumpDest LoopExit) {
2486   CGF.EmitOMPLoopBody(S, LoopExit);
2487   CGF.EmitStopPoint(&S);
2488 }
2489 
2490 /// Emit a helper variable and return corresponding lvalue.
EmitOMPHelperVar(CodeGenFunction & CGF,const DeclRefExpr * Helper)2491 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
2492                                const DeclRefExpr *Helper) {
2493   auto VDecl = cast<VarDecl>(Helper->getDecl());
2494   CGF.EmitVarDecl(*VDecl);
2495   return CGF.EmitLValue(Helper);
2496 }
2497 
emitCommonSimdLoop(CodeGenFunction & CGF,const OMPLoopDirective & S,const RegionCodeGenTy & SimdInitGen,const RegionCodeGenTy & BodyCodeGen)2498 static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S,
2499                                const RegionCodeGenTy &SimdInitGen,
2500                                const RegionCodeGenTy &BodyCodeGen) {
2501   auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF,
2502                                                     PrePostActionTy &) {
2503     CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S);
2504     CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2505     SimdInitGen(CGF);
2506 
2507     BodyCodeGen(CGF);
2508   };
2509   auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
2510     CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2511     CGF.LoopStack.setVectorizeEnable(/*Enable=*/false);
2512 
2513     BodyCodeGen(CGF);
2514   };
2515   const Expr *IfCond = nullptr;
2516   if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2517     for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2518       if (CGF.getLangOpts().OpenMP >= 50 &&
2519           (C->getNameModifier() == OMPD_unknown ||
2520            C->getNameModifier() == OMPD_simd)) {
2521         IfCond = C->getCondition();
2522         break;
2523       }
2524     }
2525   }
2526   if (IfCond) {
2527     CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2528   } else {
2529     RegionCodeGenTy ThenRCG(ThenGen);
2530     ThenRCG(CGF);
2531   }
2532 }
2533 
emitOMPSimdRegion(CodeGenFunction & CGF,const OMPLoopDirective & S,PrePostActionTy & Action)2534 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
2535                               PrePostActionTy &Action) {
2536   Action.Enter(CGF);
2537   assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
2538          "Expected simd directive");
2539   OMPLoopScope PreInitScope(CGF, S);
2540   // if (PreCond) {
2541   //   for (IV in 0..LastIteration) BODY;
2542   //   <Final counter/linear vars updates>;
2543   // }
2544   //
2545   if (isOpenMPDistributeDirective(S.getDirectiveKind()) ||
2546       isOpenMPWorksharingDirective(S.getDirectiveKind()) ||
2547       isOpenMPTaskLoopDirective(S.getDirectiveKind())) {
2548     (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
2549     (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
2550   }
2551 
2552   // Emit: if (PreCond) - begin.
2553   // If the condition constant folds and can be elided, avoid emitting the
2554   // whole loop.
2555   bool CondConstant;
2556   llvm::BasicBlock *ContBlock = nullptr;
2557   if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2558     if (!CondConstant)
2559       return;
2560   } else {
2561     llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then");
2562     ContBlock = CGF.createBasicBlock("simd.if.end");
2563     emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
2564                 CGF.getProfileCount(&S));
2565     CGF.EmitBlock(ThenBlock);
2566     CGF.incrementProfileCounter(&S);
2567   }
2568 
2569   // Emit the loop iteration variable.
2570   const Expr *IVExpr = S.getIterationVariable();
2571   const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
2572   CGF.EmitVarDecl(*IVDecl);
2573   CGF.EmitIgnoredExpr(S.getInit());
2574 
2575   // Emit the iterations count variable.
2576   // If it is not a variable, Sema decided to calculate iterations count on
2577   // each iteration (e.g., it is foldable into a constant).
2578   if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2579     CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2580     // Emit calculation of the iterations count.
2581     CGF.EmitIgnoredExpr(S.getCalcLastIteration());
2582   }
2583 
2584   emitAlignedClause(CGF, S);
2585   (void)CGF.EmitOMPLinearClauseInit(S);
2586   {
2587     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2588     CGF.EmitOMPPrivateClause(S, LoopScope);
2589     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
2590     CGF.EmitOMPLinearClause(S, LoopScope);
2591     CGF.EmitOMPReductionClauseInit(S, LoopScope);
2592     CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
2593         CGF, S, CGF.EmitLValue(S.getIterationVariable()));
2594     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2595     (void)LoopScope.Privatize();
2596     if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
2597       CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
2598 
2599     emitCommonSimdLoop(
2600         CGF, S,
2601         [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2602           CGF.EmitOMPSimdInit(S);
2603         },
2604         [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2605           CGF.EmitOMPInnerLoop(
2606               S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
2607               [&S](CodeGenFunction &CGF) {
2608                 emitOMPLoopBodyWithStopPoint(CGF, S,
2609                                              CodeGenFunction::JumpDest());
2610               },
2611               [](CodeGenFunction &) {});
2612         });
2613     CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; });
2614     // Emit final copy of the lastprivate variables at the end of loops.
2615     if (HasLastprivateClause)
2616       CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
2617     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
2618     emitPostUpdateForReductionClause(CGF, S,
2619                                      [](CodeGenFunction &) { return nullptr; });
2620     LoopScope.restoreMap();
2621     CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; });
2622   }
2623   // Emit: if (PreCond) - end.
2624   if (ContBlock) {
2625     CGF.EmitBranch(ContBlock);
2626     CGF.EmitBlock(ContBlock, true);
2627   }
2628 }
2629 
isSupportedByOpenMPIRBuilder(const OMPSimdDirective & S)2630 static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective &S) {
2631   // Check for unsupported clauses
2632   for (OMPClause *C : S.clauses()) {
2633     // Currently only order, simdlen and safelen clauses are supported
2634     if (!(isa<OMPSimdlenClause>(C) || isa<OMPSafelenClause>(C) ||
2635           isa<OMPOrderClause>(C) || isa<OMPAlignedClause>(C)))
2636       return false;
2637   }
2638 
2639   // Check if we have a statement with the ordered directive.
2640   // Visit the statement hierarchy to find a compound statement
2641   // with a ordered directive in it.
2642   if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(S.getRawStmt())) {
2643     if (const Stmt *SyntacticalLoop = CanonLoop->getLoopStmt()) {
2644       for (const Stmt *SubStmt : SyntacticalLoop->children()) {
2645         if (!SubStmt)
2646           continue;
2647         if (const CompoundStmt *CS = dyn_cast<CompoundStmt>(SubStmt)) {
2648           for (const Stmt *CSSubStmt : CS->children()) {
2649             if (!CSSubStmt)
2650               continue;
2651             if (isa<OMPOrderedDirective>(CSSubStmt)) {
2652               return false;
2653             }
2654           }
2655         }
2656       }
2657     }
2658   }
2659   return true;
2660 }
2661 static llvm::MapVector<llvm::Value *, llvm::Value *>
GetAlignedMapping(const OMPSimdDirective & S,CodeGenFunction & CGF)2662 GetAlignedMapping(const OMPSimdDirective &S, CodeGenFunction &CGF) {
2663   llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars;
2664   for (const auto *Clause : S.getClausesOfKind<OMPAlignedClause>()) {
2665     llvm::APInt ClauseAlignment(64, 0);
2666     if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2667       auto *AlignmentCI =
2668           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
2669       ClauseAlignment = AlignmentCI->getValue();
2670     }
2671     for (const Expr *E : Clause->varlists()) {
2672       llvm::APInt Alignment(ClauseAlignment);
2673       if (Alignment == 0) {
2674         // OpenMP [2.8.1, Description]
2675         // If no optional parameter is specified, implementation-defined default
2676         // alignments for SIMD instructions on the target platforms are assumed.
2677         Alignment =
2678             CGF.getContext()
2679                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
2680                     E->getType()->getPointeeType()))
2681                 .getQuantity();
2682       }
2683       assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2684              "alignment is not power of 2");
2685       llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2686       AlignedVars[PtrValue] = CGF.Builder.getInt64(Alignment.getSExtValue());
2687     }
2688   }
2689   return AlignedVars;
2690 }
2691 
EmitOMPSimdDirective(const OMPSimdDirective & S)2692 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
2693   bool UseOMPIRBuilder =
2694       CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S);
2695   if (UseOMPIRBuilder) {
2696     auto &&CodeGenIRBuilder = [this, &S, UseOMPIRBuilder](CodeGenFunction &CGF,
2697                                                           PrePostActionTy &) {
2698       // Use the OpenMPIRBuilder if enabled.
2699       if (UseOMPIRBuilder) {
2700         llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars =
2701             GetAlignedMapping(S, CGF);
2702         // Emit the associated statement and get its loop representation.
2703         const Stmt *Inner = S.getRawStmt();
2704         llvm::CanonicalLoopInfo *CLI =
2705             EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
2706 
2707         llvm::OpenMPIRBuilder &OMPBuilder =
2708             CGM.getOpenMPRuntime().getOMPBuilder();
2709         // Add SIMD specific metadata
2710         llvm::ConstantInt *Simdlen = nullptr;
2711         if (const auto *C = S.getSingleClause<OMPSimdlenClause>()) {
2712           RValue Len =
2713               this->EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
2714                                 /*ignoreResult=*/true);
2715           auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2716           Simdlen = Val;
2717         }
2718         llvm::ConstantInt *Safelen = nullptr;
2719         if (const auto *C = S.getSingleClause<OMPSafelenClause>()) {
2720           RValue Len =
2721               this->EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
2722                                 /*ignoreResult=*/true);
2723           auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2724           Safelen = Val;
2725         }
2726         llvm::omp::OrderKind Order = llvm::omp::OrderKind::OMP_ORDER_unknown;
2727         if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
2728           if (C->getKind() == OpenMPOrderClauseKind ::OMPC_ORDER_concurrent) {
2729             Order = llvm::omp::OrderKind::OMP_ORDER_concurrent;
2730           }
2731         }
2732         // Add simd metadata to the collapsed loop. Do not generate
2733         // another loop for if clause. Support for if clause is done earlier.
2734         OMPBuilder.applySimd(CLI, AlignedVars,
2735                              /*IfCond*/ nullptr, Order, Simdlen, Safelen);
2736         return;
2737       }
2738     };
2739     {
2740       auto LPCRegion =
2741           CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
2742       OMPLexicalScope Scope(*this, S, OMPD_unknown);
2743       CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd,
2744                                                   CodeGenIRBuilder);
2745     }
2746     return;
2747   }
2748 
2749   ParentLoopDirectiveForScanRegion ScanRegion(*this, S);
2750   OMPFirstScanLoop = true;
2751   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2752     emitOMPSimdRegion(CGF, S, Action);
2753   };
2754   {
2755     auto LPCRegion =
2756         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
2757     OMPLexicalScope Scope(*this, S, OMPD_unknown);
2758     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2759   }
2760   // Check for outer lastprivate conditional update.
2761   checkForLastprivateConditionalUpdate(*this, S);
2762 }
2763 
EmitOMPTileDirective(const OMPTileDirective & S)2764 void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) {
2765   // Emit the de-sugared statement.
2766   OMPTransformDirectiveScopeRAII TileScope(*this, &S);
2767   EmitStmt(S.getTransformedStmt());
2768 }
2769 
EmitOMPReverseDirective(const OMPReverseDirective & S)2770 void CodeGenFunction::EmitOMPReverseDirective(const OMPReverseDirective &S) {
2771   // Emit the de-sugared statement.
2772   OMPTransformDirectiveScopeRAII ReverseScope(*this, &S);
2773   EmitStmt(S.getTransformedStmt());
2774 }
2775 
EmitOMPInterchangeDirective(const OMPInterchangeDirective & S)2776 void CodeGenFunction::EmitOMPInterchangeDirective(
2777     const OMPInterchangeDirective &S) {
2778   // Emit the de-sugared statement.
2779   OMPTransformDirectiveScopeRAII InterchangeScope(*this, &S);
2780   EmitStmt(S.getTransformedStmt());
2781 }
2782 
EmitOMPUnrollDirective(const OMPUnrollDirective & S)2783 void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) {
2784   bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder;
2785 
2786   if (UseOMPIRBuilder) {
2787     auto DL = SourceLocToDebugLoc(S.getBeginLoc());
2788     const Stmt *Inner = S.getRawStmt();
2789 
2790     // Consume nested loop. Clear the entire remaining loop stack because a
2791     // fully unrolled loop is non-transformable. For partial unrolling the
2792     // generated outer loop is pushed back to the stack.
2793     llvm::CanonicalLoopInfo *CLI = EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
2794     OMPLoopNestStack.clear();
2795 
2796     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2797 
2798     bool NeedsUnrolledCLI = ExpectedOMPLoopDepth >= 1;
2799     llvm::CanonicalLoopInfo *UnrolledCLI = nullptr;
2800 
2801     if (S.hasClausesOfKind<OMPFullClause>()) {
2802       assert(ExpectedOMPLoopDepth == 0);
2803       OMPBuilder.unrollLoopFull(DL, CLI);
2804     } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
2805       uint64_t Factor = 0;
2806       if (Expr *FactorExpr = PartialClause->getFactor()) {
2807         Factor = FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue();
2808         assert(Factor >= 1 && "Only positive factors are valid");
2809       }
2810       OMPBuilder.unrollLoopPartial(DL, CLI, Factor,
2811                                    NeedsUnrolledCLI ? &UnrolledCLI : nullptr);
2812     } else {
2813       OMPBuilder.unrollLoopHeuristic(DL, CLI);
2814     }
2815 
2816     assert((!NeedsUnrolledCLI || UnrolledCLI) &&
2817            "NeedsUnrolledCLI implies UnrolledCLI to be set");
2818     if (UnrolledCLI)
2819       OMPLoopNestStack.push_back(UnrolledCLI);
2820 
2821     return;
2822   }
2823 
2824   // This function is only called if the unrolled loop is not consumed by any
2825   // other loop-associated construct. Such a loop-associated construct will have
2826   // used the transformed AST.
2827 
2828   // Set the unroll metadata for the next emitted loop.
2829   LoopStack.setUnrollState(LoopAttributes::Enable);
2830 
2831   if (S.hasClausesOfKind<OMPFullClause>()) {
2832     LoopStack.setUnrollState(LoopAttributes::Full);
2833   } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
2834     if (Expr *FactorExpr = PartialClause->getFactor()) {
2835       uint64_t Factor =
2836           FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue();
2837       assert(Factor >= 1 && "Only positive factors are valid");
2838       LoopStack.setUnrollCount(Factor);
2839     }
2840   }
2841 
2842   EmitStmt(S.getAssociatedStmt());
2843 }
2844 
EmitOMPOuterLoop(bool DynamicOrOrdered,bool IsMonotonic,const OMPLoopDirective & S,CodeGenFunction::OMPPrivateScope & LoopScope,const CodeGenFunction::OMPLoopArguments & LoopArgs,const CodeGenFunction::CodeGenLoopTy & CodeGenLoop,const CodeGenFunction::CodeGenOrderedTy & CodeGenOrdered)2845 void CodeGenFunction::EmitOMPOuterLoop(
2846     bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
2847     CodeGenFunction::OMPPrivateScope &LoopScope,
2848     const CodeGenFunction::OMPLoopArguments &LoopArgs,
2849     const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
2850     const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
2851   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2852 
2853   const Expr *IVExpr = S.getIterationVariable();
2854   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2855   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2856 
2857   JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
2858 
2859   // Start the loop with a block that tests the condition.
2860   llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond");
2861   EmitBlock(CondBlock);
2862   const SourceRange R = S.getSourceRange();
2863   OMPLoopNestStack.clear();
2864   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2865                  SourceLocToDebugLoc(R.getEnd()));
2866 
2867   llvm::Value *BoolCondVal = nullptr;
2868   if (!DynamicOrOrdered) {
2869     // UB = min(UB, GlobalUB) or
2870     // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
2871     // 'distribute parallel for')
2872     EmitIgnoredExpr(LoopArgs.EUB);
2873     // IV = LB
2874     EmitIgnoredExpr(LoopArgs.Init);
2875     // IV < UB
2876     BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
2877   } else {
2878     BoolCondVal =
2879         RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL,
2880                        LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
2881   }
2882 
2883   // If there are any cleanups between here and the loop-exit scope,
2884   // create a block to stage a loop exit along.
2885   llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2886   if (LoopScope.requiresCleanups())
2887     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
2888 
2889   llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body");
2890   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
2891   if (ExitBlock != LoopExit.getBlock()) {
2892     EmitBlock(ExitBlock);
2893     EmitBranchThroughCleanup(LoopExit);
2894   }
2895   EmitBlock(LoopBody);
2896 
2897   // Emit "IV = LB" (in case of static schedule, we have already calculated new
2898   // LB for loop condition and emitted it above).
2899   if (DynamicOrOrdered)
2900     EmitIgnoredExpr(LoopArgs.Init);
2901 
2902   // Create a block for the increment.
2903   JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
2904   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
2905 
2906   emitCommonSimdLoop(
2907       *this, S,
2908       [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
2909         // Generate !llvm.loop.parallel metadata for loads and stores for loops
2910         // with dynamic/guided scheduling and without ordered clause.
2911         if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2912           CGF.LoopStack.setParallel(!IsMonotonic);
2913           if (const auto *C = S.getSingleClause<OMPOrderClause>())
2914             if (C->getKind() == OMPC_ORDER_concurrent)
2915               CGF.LoopStack.setParallel(/*Enable=*/true);
2916         } else {
2917           CGF.EmitOMPSimdInit(S);
2918         }
2919       },
2920       [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered,
2921        &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2922         SourceLocation Loc = S.getBeginLoc();
2923         // when 'distribute' is not combined with a 'for':
2924         // while (idx <= UB) { BODY; ++idx; }
2925         // when 'distribute' is combined with a 'for'
2926         // (e.g. 'distribute parallel for')
2927         // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
2928         CGF.EmitOMPInnerLoop(
2929             S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
2930             [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
2931               CodeGenLoop(CGF, S, LoopExit);
2932             },
2933             [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
2934               CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
2935             });
2936       });
2937 
2938   EmitBlock(Continue.getBlock());
2939   BreakContinueStack.pop_back();
2940   if (!DynamicOrOrdered) {
2941     // Emit "LB = LB + Stride", "UB = UB + Stride".
2942     EmitIgnoredExpr(LoopArgs.NextLB);
2943     EmitIgnoredExpr(LoopArgs.NextUB);
2944   }
2945 
2946   EmitBranch(CondBlock);
2947   OMPLoopNestStack.clear();
2948   LoopStack.pop();
2949   // Emit the fall-through block.
2950   EmitBlock(LoopExit.getBlock());
2951 
2952   // Tell the runtime we are done.
2953   auto &&CodeGen = [DynamicOrOrdered, &S, &LoopArgs](CodeGenFunction &CGF) {
2954     if (!DynamicOrOrdered)
2955       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
2956                                                      LoopArgs.DKind);
2957   };
2958   OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2959 }
2960 
EmitOMPForOuterLoop(const OpenMPScheduleTy & ScheduleKind,bool IsMonotonic,const OMPLoopDirective & S,OMPPrivateScope & LoopScope,bool Ordered,const OMPLoopArguments & LoopArgs,const CodeGenDispatchBoundsTy & CGDispatchBounds)2961 void CodeGenFunction::EmitOMPForOuterLoop(
2962     const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
2963     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
2964     const OMPLoopArguments &LoopArgs,
2965     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2966   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2967 
2968   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
2969   const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind.Schedule);
2970 
2971   assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule,
2972                                             LoopArgs.Chunk != nullptr)) &&
2973          "static non-chunked schedule does not need outer loop");
2974 
2975   // Emit outer loop.
2976   //
2977   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2978   // When schedule(dynamic,chunk_size) is specified, the iterations are
2979   // distributed to threads in the team in chunks as the threads request them.
2980   // Each thread executes a chunk of iterations, then requests another chunk,
2981   // until no chunks remain to be distributed. Each chunk contains chunk_size
2982   // iterations, except for the last chunk to be distributed, which may have
2983   // fewer iterations. When no chunk_size is specified, it defaults to 1.
2984   //
2985   // When schedule(guided,chunk_size) is specified, the iterations are assigned
2986   // to threads in the team in chunks as the executing threads request them.
2987   // Each thread executes a chunk of iterations, then requests another chunk,
2988   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
2989   // each chunk is proportional to the number of unassigned iterations divided
2990   // by the number of threads in the team, decreasing to 1. For a chunk_size
2991   // with value k (greater than 1), the size of each chunk is determined in the
2992   // same way, with the restriction that the chunks do not contain fewer than k
2993   // iterations (except for the last chunk to be assigned, which may have fewer
2994   // than k iterations).
2995   //
2996   // When schedule(auto) is specified, the decision regarding scheduling is
2997   // delegated to the compiler and/or runtime system. The programmer gives the
2998   // implementation the freedom to choose any possible mapping of iterations to
2999   // threads in the team.
3000   //
3001   // When schedule(runtime) is specified, the decision regarding scheduling is
3002   // deferred until run time, and the schedule and chunk size are taken from the
3003   // run-sched-var ICV. If the ICV is set to auto, the schedule is
3004   // implementation defined
3005   //
3006   // __kmpc_dispatch_init();
3007   // while(__kmpc_dispatch_next(&LB, &UB)) {
3008   //   idx = LB;
3009   //   while (idx <= UB) { BODY; ++idx;
3010   //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
3011   //   } // inner loop
3012   // }
3013   // __kmpc_dispatch_deinit();
3014   //
3015   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3016   // When schedule(static, chunk_size) is specified, iterations are divided into
3017   // chunks of size chunk_size, and the chunks are assigned to the threads in
3018   // the team in a round-robin fashion in the order of the thread number.
3019   //
3020   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
3021   //   while (idx <= UB) { BODY; ++idx; } // inner loop
3022   //   LB = LB + ST;
3023   //   UB = UB + ST;
3024   // }
3025   //
3026 
3027   const Expr *IVExpr = S.getIterationVariable();
3028   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3029   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3030 
3031   if (DynamicOrOrdered) {
3032     const std::pair<llvm::Value *, llvm::Value *> DispatchBounds =
3033         CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
3034     llvm::Value *LBVal = DispatchBounds.first;
3035     llvm::Value *UBVal = DispatchBounds.second;
3036     CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
3037                                                              LoopArgs.Chunk};
3038     RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize,
3039                            IVSigned, Ordered, DipatchRTInputValues);
3040   } else {
3041     CGOpenMPRuntime::StaticRTInput StaticInit(
3042         IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
3043         LoopArgs.ST, LoopArgs.Chunk);
3044     RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
3045                          ScheduleKind, StaticInit);
3046   }
3047 
3048   auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
3049                                     const unsigned IVSize,
3050                                     const bool IVSigned) {
3051     if (Ordered) {
3052       CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
3053                                                             IVSigned);
3054     }
3055   };
3056 
3057   OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
3058                                  LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
3059   OuterLoopArgs.IncExpr = S.getInc();
3060   OuterLoopArgs.Init = S.getInit();
3061   OuterLoopArgs.Cond = S.getCond();
3062   OuterLoopArgs.NextLB = S.getNextLowerBound();
3063   OuterLoopArgs.NextUB = S.getNextUpperBound();
3064   OuterLoopArgs.DKind = LoopArgs.DKind;
3065   EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
3066                    emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
3067   if (DynamicOrOrdered) {
3068     RT.emitForDispatchDeinit(*this, S.getBeginLoc());
3069   }
3070 }
3071 
emitEmptyOrdered(CodeGenFunction &,SourceLocation Loc,const unsigned IVSize,const bool IVSigned)3072 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
3073                              const unsigned IVSize, const bool IVSigned) {}
3074 
EmitOMPDistributeOuterLoop(OpenMPDistScheduleClauseKind ScheduleKind,const OMPLoopDirective & S,OMPPrivateScope & LoopScope,const OMPLoopArguments & LoopArgs,const CodeGenLoopTy & CodeGenLoopContent)3075 void CodeGenFunction::EmitOMPDistributeOuterLoop(
3076     OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
3077     OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
3078     const CodeGenLoopTy &CodeGenLoopContent) {
3079 
3080   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3081 
3082   // Emit outer loop.
3083   // Same behavior as a OMPForOuterLoop, except that schedule cannot be
3084   // dynamic
3085   //
3086 
3087   const Expr *IVExpr = S.getIterationVariable();
3088   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3089   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3090 
3091   CGOpenMPRuntime::StaticRTInput StaticInit(
3092       IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
3093       LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
3094   RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit);
3095 
3096   // for combined 'distribute' and 'for' the increment expression of distribute
3097   // is stored in DistInc. For 'distribute' alone, it is in Inc.
3098   Expr *IncExpr;
3099   if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
3100     IncExpr = S.getDistInc();
3101   else
3102     IncExpr = S.getInc();
3103 
3104   // this routine is shared by 'omp distribute parallel for' and
3105   // 'omp distribute': select the right EUB expression depending on the
3106   // directive
3107   OMPLoopArguments OuterLoopArgs;
3108   OuterLoopArgs.LB = LoopArgs.LB;
3109   OuterLoopArgs.UB = LoopArgs.UB;
3110   OuterLoopArgs.ST = LoopArgs.ST;
3111   OuterLoopArgs.IL = LoopArgs.IL;
3112   OuterLoopArgs.Chunk = LoopArgs.Chunk;
3113   OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3114                           ? S.getCombinedEnsureUpperBound()
3115                           : S.getEnsureUpperBound();
3116   OuterLoopArgs.IncExpr = IncExpr;
3117   OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3118                            ? S.getCombinedInit()
3119                            : S.getInit();
3120   OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3121                            ? S.getCombinedCond()
3122                            : S.getCond();
3123   OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3124                              ? S.getCombinedNextLowerBound()
3125                              : S.getNextLowerBound();
3126   OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3127                              ? S.getCombinedNextUpperBound()
3128                              : S.getNextUpperBound();
3129   OuterLoopArgs.DKind = OMPD_distribute;
3130 
3131   EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
3132                    LoopScope, OuterLoopArgs, CodeGenLoopContent,
3133                    emitEmptyOrdered);
3134 }
3135 
3136 static std::pair<LValue, LValue>
emitDistributeParallelForInnerBounds(CodeGenFunction & CGF,const OMPExecutableDirective & S)3137 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
3138                                      const OMPExecutableDirective &S) {
3139   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
3140   LValue LB =
3141       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
3142   LValue UB =
3143       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
3144 
3145   // When composing 'distribute' with 'for' (e.g. as in 'distribute
3146   // parallel for') we need to use the 'distribute'
3147   // chunk lower and upper bounds rather than the whole loop iteration
3148   // space. These are parameters to the outlined function for 'parallel'
3149   // and we copy the bounds of the previous schedule into the
3150   // the current ones.
3151   LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
3152   LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
3153   llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
3154       PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc());
3155   PrevLBVal = CGF.EmitScalarConversion(
3156       PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
3157       LS.getIterationVariable()->getType(),
3158       LS.getPrevLowerBoundVariable()->getExprLoc());
3159   llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
3160       PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc());
3161   PrevUBVal = CGF.EmitScalarConversion(
3162       PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
3163       LS.getIterationVariable()->getType(),
3164       LS.getPrevUpperBoundVariable()->getExprLoc());
3165 
3166   CGF.EmitStoreOfScalar(PrevLBVal, LB);
3167   CGF.EmitStoreOfScalar(PrevUBVal, UB);
3168 
3169   return {LB, UB};
3170 }
3171 
3172 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
3173 /// we need to use the LB and UB expressions generated by the worksharing
3174 /// code generation support, whereas in non combined situations we would
3175 /// just emit 0 and the LastIteration expression
3176 /// This function is necessary due to the difference of the LB and UB
3177 /// types for the RT emission routines for 'for_static_init' and
3178 /// 'for_dispatch_init'
3179 static std::pair<llvm::Value *, llvm::Value *>
emitDistributeParallelForDispatchBounds(CodeGenFunction & CGF,const OMPExecutableDirective & S,Address LB,Address UB)3180 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
3181                                         const OMPExecutableDirective &S,
3182                                         Address LB, Address UB) {
3183   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
3184   const Expr *IVExpr = LS.getIterationVariable();
3185   // when implementing a dynamic schedule for a 'for' combined with a
3186   // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
3187   // is not normalized as each team only executes its own assigned
3188   // distribute chunk
3189   QualType IteratorTy = IVExpr->getType();
3190   llvm::Value *LBVal =
3191       CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
3192   llvm::Value *UBVal =
3193       CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
3194   return {LBVal, UBVal};
3195 }
3196 
emitDistributeParallelForDistributeInnerBoundParams(CodeGenFunction & CGF,const OMPExecutableDirective & S,llvm::SmallVectorImpl<llvm::Value * > & CapturedVars)3197 static void emitDistributeParallelForDistributeInnerBoundParams(
3198     CodeGenFunction &CGF, const OMPExecutableDirective &S,
3199     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
3200   const auto &Dir = cast<OMPLoopDirective>(S);
3201   LValue LB =
3202       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
3203   llvm::Value *LBCast = CGF.Builder.CreateIntCast(
3204       CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
3205   CapturedVars.push_back(LBCast);
3206   LValue UB =
3207       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
3208 
3209   llvm::Value *UBCast = CGF.Builder.CreateIntCast(
3210       CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
3211   CapturedVars.push_back(UBCast);
3212 }
3213 
3214 static void
emitInnerParallelForWhenCombined(CodeGenFunction & CGF,const OMPLoopDirective & S,CodeGenFunction::JumpDest LoopExit)3215 emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
3216                                  const OMPLoopDirective &S,
3217                                  CodeGenFunction::JumpDest LoopExit) {
3218   auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
3219                                          PrePostActionTy &Action) {
3220     Action.Enter(CGF);
3221     bool HasCancel = false;
3222     if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
3223       if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
3224         HasCancel = D->hasCancel();
3225       else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
3226         HasCancel = D->hasCancel();
3227       else if (const auto *D =
3228                    dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
3229         HasCancel = D->hasCancel();
3230     }
3231     CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
3232                                                      HasCancel);
3233     CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
3234                                emitDistributeParallelForInnerBounds,
3235                                emitDistributeParallelForDispatchBounds);
3236   };
3237 
3238   emitCommonOMPParallelDirective(
3239       CGF, S,
3240       isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for,
3241       CGInlinedWorksharingLoop,
3242       emitDistributeParallelForDistributeInnerBoundParams);
3243 }
3244 
EmitOMPDistributeParallelForDirective(const OMPDistributeParallelForDirective & S)3245 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
3246     const OMPDistributeParallelForDirective &S) {
3247   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3248     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
3249                               S.getDistInc());
3250   };
3251   OMPLexicalScope Scope(*this, S, OMPD_parallel);
3252   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3253 }
3254 
EmitOMPDistributeParallelForSimdDirective(const OMPDistributeParallelForSimdDirective & S)3255 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
3256     const OMPDistributeParallelForSimdDirective &S) {
3257   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3258     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
3259                               S.getDistInc());
3260   };
3261   OMPLexicalScope Scope(*this, S, OMPD_parallel);
3262   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3263 }
3264 
EmitOMPDistributeSimdDirective(const OMPDistributeSimdDirective & S)3265 void CodeGenFunction::EmitOMPDistributeSimdDirective(
3266     const OMPDistributeSimdDirective &S) {
3267   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3268     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3269   };
3270   OMPLexicalScope Scope(*this, S, OMPD_unknown);
3271   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
3272 }
3273 
EmitOMPTargetSimdDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetSimdDirective & S)3274 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
3275     CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
3276   // Emit SPMD target parallel for region as a standalone region.
3277   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3278     emitOMPSimdRegion(CGF, S, Action);
3279   };
3280   llvm::Function *Fn;
3281   llvm::Constant *Addr;
3282   // Emit target region as a standalone region.
3283   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3284       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3285   assert(Fn && Addr && "Target device function emission failed.");
3286 }
3287 
EmitOMPTargetSimdDirective(const OMPTargetSimdDirective & S)3288 void CodeGenFunction::EmitOMPTargetSimdDirective(
3289     const OMPTargetSimdDirective &S) {
3290   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3291     emitOMPSimdRegion(CGF, S, Action);
3292   };
3293   emitCommonOMPTargetDirective(*this, S, CodeGen);
3294 }
3295 
3296 namespace {
3297 struct ScheduleKindModifiersTy {
3298   OpenMPScheduleClauseKind Kind;
3299   OpenMPScheduleClauseModifier M1;
3300   OpenMPScheduleClauseModifier M2;
ScheduleKindModifiersTy__anon53c5fabf2811::ScheduleKindModifiersTy3301   ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
3302                           OpenMPScheduleClauseModifier M1,
3303                           OpenMPScheduleClauseModifier M2)
3304       : Kind(Kind), M1(M1), M2(M2) {}
3305 };
3306 } // namespace
3307 
EmitOMPWorksharingLoop(const OMPLoopDirective & S,Expr * EUB,const CodeGenLoopBoundsTy & CodeGenLoopBounds,const CodeGenDispatchBoundsTy & CGDispatchBounds)3308 bool CodeGenFunction::EmitOMPWorksharingLoop(
3309     const OMPLoopDirective &S, Expr *EUB,
3310     const CodeGenLoopBoundsTy &CodeGenLoopBounds,
3311     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
3312   // Emit the loop iteration variable.
3313   const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
3314   const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
3315   EmitVarDecl(*IVDecl);
3316 
3317   // Emit the iterations count variable.
3318   // If it is not a variable, Sema decided to calculate iterations count on each
3319   // iteration (e.g., it is foldable into a constant).
3320   if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3321     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3322     // Emit calculation of the iterations count.
3323     EmitIgnoredExpr(S.getCalcLastIteration());
3324   }
3325 
3326   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3327 
3328   bool HasLastprivateClause;
3329   // Check pre-condition.
3330   {
3331     OMPLoopScope PreInitScope(*this, S);
3332     // Skip the entire loop if we don't meet the precondition.
3333     // If the condition constant folds and can be elided, avoid emitting the
3334     // whole loop.
3335     bool CondConstant;
3336     llvm::BasicBlock *ContBlock = nullptr;
3337     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3338       if (!CondConstant)
3339         return false;
3340     } else {
3341       llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
3342       ContBlock = createBasicBlock("omp.precond.end");
3343       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3344                   getProfileCount(&S));
3345       EmitBlock(ThenBlock);
3346       incrementProfileCounter(&S);
3347     }
3348 
3349     RunCleanupsScope DoacrossCleanupScope(*this);
3350     bool Ordered = false;
3351     if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
3352       if (OrderedClause->getNumForLoops())
3353         RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations());
3354       else
3355         Ordered = true;
3356     }
3357 
3358     llvm::DenseSet<const Expr *> EmittedFinals;
3359     emitAlignedClause(*this, S);
3360     bool HasLinears = EmitOMPLinearClauseInit(S);
3361     // Emit helper vars inits.
3362 
3363     std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
3364     LValue LB = Bounds.first;
3365     LValue UB = Bounds.second;
3366     LValue ST =
3367         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3368     LValue IL =
3369         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3370 
3371     // Emit 'then' code.
3372     {
3373       OMPPrivateScope LoopScope(*this);
3374       if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
3375         // Emit implicit barrier to synchronize threads and avoid data races on
3376         // initialization of firstprivate variables and post-update of
3377         // lastprivate variables.
3378         CGM.getOpenMPRuntime().emitBarrierCall(
3379             *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3380             /*ForceSimpleCall=*/true);
3381       }
3382       EmitOMPPrivateClause(S, LoopScope);
3383       CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
3384           *this, S, EmitLValue(S.getIterationVariable()));
3385       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3386       EmitOMPReductionClauseInit(S, LoopScope);
3387       EmitOMPPrivateLoopCounters(S, LoopScope);
3388       EmitOMPLinearClause(S, LoopScope);
3389       (void)LoopScope.Privatize();
3390       if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
3391         CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
3392 
3393       // Detect the loop schedule kind and chunk.
3394       const Expr *ChunkExpr = nullptr;
3395       OpenMPScheduleTy ScheduleKind;
3396       if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
3397         ScheduleKind.Schedule = C->getScheduleKind();
3398         ScheduleKind.M1 = C->getFirstScheduleModifier();
3399         ScheduleKind.M2 = C->getSecondScheduleModifier();
3400         ChunkExpr = C->getChunkSize();
3401       } else {
3402         // Default behaviour for schedule clause.
3403         CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
3404             *this, S, ScheduleKind.Schedule, ChunkExpr);
3405       }
3406       bool HasChunkSizeOne = false;
3407       llvm::Value *Chunk = nullptr;
3408       if (ChunkExpr) {
3409         Chunk = EmitScalarExpr(ChunkExpr);
3410         Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(),
3411                                      S.getIterationVariable()->getType(),
3412                                      S.getBeginLoc());
3413         Expr::EvalResult Result;
3414         if (ChunkExpr->EvaluateAsInt(Result, getContext())) {
3415           llvm::APSInt EvaluatedChunk = Result.Val.getInt();
3416           HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
3417         }
3418       }
3419       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3420       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3421       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
3422       // If the static schedule kind is specified or if the ordered clause is
3423       // specified, and if no monotonic modifier is specified, the effect will
3424       // be as if the monotonic modifier was specified.
3425       bool StaticChunkedOne =
3426           RT.isStaticChunked(ScheduleKind.Schedule,
3427                              /* Chunked */ Chunk != nullptr) &&
3428           HasChunkSizeOne &&
3429           isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
3430       bool IsMonotonic =
3431           Ordered ||
3432           (ScheduleKind.Schedule == OMPC_SCHEDULE_static &&
3433            !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic ||
3434              ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) ||
3435           ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
3436           ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
3437       if ((RT.isStaticNonchunked(ScheduleKind.Schedule,
3438                                  /* Chunked */ Chunk != nullptr) ||
3439            StaticChunkedOne) &&
3440           !Ordered) {
3441         JumpDest LoopExit =
3442             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3443         emitCommonSimdLoop(
3444             *this, S,
3445             [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3446               if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3447                 CGF.EmitOMPSimdInit(S);
3448               } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
3449                 if (C->getKind() == OMPC_ORDER_concurrent)
3450                   CGF.LoopStack.setParallel(/*Enable=*/true);
3451               }
3452             },
3453             [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk,
3454              &S, ScheduleKind, LoopExit,
3455              &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3456               // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3457               // When no chunk_size is specified, the iteration space is divided
3458               // into chunks that are approximately equal in size, and at most
3459               // one chunk is distributed to each thread. Note that the size of
3460               // the chunks is unspecified in this case.
3461               CGOpenMPRuntime::StaticRTInput StaticInit(
3462                   IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
3463                   UB.getAddress(), ST.getAddress(),
3464                   StaticChunkedOne ? Chunk : nullptr);
3465               CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3466                   CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind,
3467                   StaticInit);
3468               // UB = min(UB, GlobalUB);
3469               if (!StaticChunkedOne)
3470                 CGF.EmitIgnoredExpr(S.getEnsureUpperBound());
3471               // IV = LB;
3472               CGF.EmitIgnoredExpr(S.getInit());
3473               // For unchunked static schedule generate:
3474               //
3475               // while (idx <= UB) {
3476               //   BODY;
3477               //   ++idx;
3478               // }
3479               //
3480               // For static schedule with chunk one:
3481               //
3482               // while (IV <= PrevUB) {
3483               //   BODY;
3484               //   IV += ST;
3485               // }
3486               CGF.EmitOMPInnerLoop(
3487                   S, LoopScope.requiresCleanups(),
3488                   StaticChunkedOne ? S.getCombinedParForInDistCond()
3489                                    : S.getCond(),
3490                   StaticChunkedOne ? S.getDistInc() : S.getInc(),
3491                   [&S, LoopExit](CodeGenFunction &CGF) {
3492                     emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit);
3493                   },
3494                   [](CodeGenFunction &) {});
3495             });
3496         EmitBlock(LoopExit.getBlock());
3497         // Tell the runtime we are done.
3498         auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3499           CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
3500                                                          OMPD_for);
3501         };
3502         OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
3503       } else {
3504         // Emit the outer loop, which requests its work chunk [LB..UB] from
3505         // runtime and runs the inner loop to process it.
3506         OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
3507                                        ST.getAddress(), IL.getAddress(), Chunk,
3508                                        EUB);
3509         LoopArguments.DKind = OMPD_for;
3510         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
3511                             LoopArguments, CGDispatchBounds);
3512       }
3513       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3514         EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
3515           return CGF.Builder.CreateIsNotNull(
3516               CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3517         });
3518       }
3519       EmitOMPReductionClauseFinal(
3520           S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
3521                  ? /*Parallel and Simd*/ OMPD_parallel_for_simd
3522                  : /*Parallel only*/ OMPD_parallel);
3523       // Emit post-update of the reduction variables if IsLastIter != 0.
3524       emitPostUpdateForReductionClause(
3525           *this, S, [IL, &S](CodeGenFunction &CGF) {
3526             return CGF.Builder.CreateIsNotNull(
3527                 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3528           });
3529       // Emit final copy of the lastprivate variables if IsLastIter != 0.
3530       if (HasLastprivateClause)
3531         EmitOMPLastprivateClauseFinal(
3532             S, isOpenMPSimdDirective(S.getDirectiveKind()),
3533             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
3534       LoopScope.restoreMap();
3535       EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
3536         return CGF.Builder.CreateIsNotNull(
3537             CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3538       });
3539     }
3540     DoacrossCleanupScope.ForceCleanup();
3541     // We're now done with the loop, so jump to the continuation block.
3542     if (ContBlock) {
3543       EmitBranch(ContBlock);
3544       EmitBlock(ContBlock, /*IsFinished=*/true);
3545     }
3546   }
3547   return HasLastprivateClause;
3548 }
3549 
3550 /// The following two functions generate expressions for the loop lower
3551 /// and upper bounds in case of static and dynamic (dispatch) schedule
3552 /// of the associated 'for' or 'distribute' loop.
3553 static std::pair<LValue, LValue>
emitForLoopBounds(CodeGenFunction & CGF,const OMPExecutableDirective & S)3554 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
3555   const auto &LS = cast<OMPLoopDirective>(S);
3556   LValue LB =
3557       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
3558   LValue UB =
3559       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
3560   return {LB, UB};
3561 }
3562 
3563 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
3564 /// consider the lower and upper bound expressions generated by the
3565 /// worksharing loop support, but we use 0 and the iteration space size as
3566 /// constants
3567 static std::pair<llvm::Value *, llvm::Value *>
emitDispatchForLoopBounds(CodeGenFunction & CGF,const OMPExecutableDirective & S,Address LB,Address UB)3568 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
3569                           Address LB, Address UB) {
3570   const auto &LS = cast<OMPLoopDirective>(S);
3571   const Expr *IVExpr = LS.getIterationVariable();
3572   const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
3573   llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
3574   llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
3575   return {LBVal, UBVal};
3576 }
3577 
3578 /// Emits internal temp array declarations for the directive with inscan
3579 /// reductions.
3580 /// The code is the following:
3581 /// \code
3582 /// size num_iters = <num_iters>;
3583 /// <type> buffer[num_iters];
3584 /// \endcode
emitScanBasedDirectiveDecls(CodeGenFunction & CGF,const OMPLoopDirective & S,llvm::function_ref<llvm::Value * (CodeGenFunction &)> NumIteratorsGen)3585 static void emitScanBasedDirectiveDecls(
3586     CodeGenFunction &CGF, const OMPLoopDirective &S,
3587     llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
3588   llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3589       NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3590   SmallVector<const Expr *, 4> Shareds;
3591   SmallVector<const Expr *, 4> Privates;
3592   SmallVector<const Expr *, 4> ReductionOps;
3593   SmallVector<const Expr *, 4> CopyArrayTemps;
3594   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3595     assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3596            "Only inscan reductions are expected.");
3597     Shareds.append(C->varlist_begin(), C->varlist_end());
3598     Privates.append(C->privates().begin(), C->privates().end());
3599     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3600     CopyArrayTemps.append(C->copy_array_temps().begin(),
3601                           C->copy_array_temps().end());
3602   }
3603   {
3604     // Emit buffers for each reduction variables.
3605     // ReductionCodeGen is required to emit correctly the code for array
3606     // reductions.
3607     ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
3608     unsigned Count = 0;
3609     auto *ITA = CopyArrayTemps.begin();
3610     for (const Expr *IRef : Privates) {
3611       const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
3612       // Emit variably modified arrays, used for arrays/array sections
3613       // reductions.
3614       if (PrivateVD->getType()->isVariablyModifiedType()) {
3615         RedCG.emitSharedOrigLValue(CGF, Count);
3616         RedCG.emitAggregateType(CGF, Count);
3617       }
3618       CodeGenFunction::OpaqueValueMapping DimMapping(
3619           CGF,
3620           cast<OpaqueValueExpr>(
3621               cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe())
3622                   ->getSizeExpr()),
3623           RValue::get(OMPScanNumIterations));
3624       // Emit temp buffer.
3625       CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl()));
3626       ++ITA;
3627       ++Count;
3628     }
3629   }
3630 }
3631 
3632 /// Copies final inscan reductions values to the original variables.
3633 /// The code is the following:
3634 /// \code
3635 /// <orig_var> = buffer[num_iters-1];
3636 /// \endcode
emitScanBasedDirectiveFinals(CodeGenFunction & CGF,const OMPLoopDirective & S,llvm::function_ref<llvm::Value * (CodeGenFunction &)> NumIteratorsGen)3637 static void emitScanBasedDirectiveFinals(
3638     CodeGenFunction &CGF, const OMPLoopDirective &S,
3639     llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
3640   llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3641       NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3642   SmallVector<const Expr *, 4> Shareds;
3643   SmallVector<const Expr *, 4> LHSs;
3644   SmallVector<const Expr *, 4> RHSs;
3645   SmallVector<const Expr *, 4> Privates;
3646   SmallVector<const Expr *, 4> CopyOps;
3647   SmallVector<const Expr *, 4> CopyArrayElems;
3648   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3649     assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3650            "Only inscan reductions are expected.");
3651     Shareds.append(C->varlist_begin(), C->varlist_end());
3652     LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
3653     RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
3654     Privates.append(C->privates().begin(), C->privates().end());
3655     CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
3656     CopyArrayElems.append(C->copy_array_elems().begin(),
3657                           C->copy_array_elems().end());
3658   }
3659   // Create temp var and copy LHS value to this temp value.
3660   // LHS = TMP[LastIter];
3661   llvm::Value *OMPLast = CGF.Builder.CreateNSWSub(
3662       OMPScanNumIterations,
3663       llvm::ConstantInt::get(CGF.SizeTy, 1, /*isSigned=*/false));
3664   for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
3665     const Expr *PrivateExpr = Privates[I];
3666     const Expr *OrigExpr = Shareds[I];
3667     const Expr *CopyArrayElem = CopyArrayElems[I];
3668     CodeGenFunction::OpaqueValueMapping IdxMapping(
3669         CGF,
3670         cast<OpaqueValueExpr>(
3671             cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3672         RValue::get(OMPLast));
3673     LValue DestLVal = CGF.EmitLValue(OrigExpr);
3674     LValue SrcLVal = CGF.EmitLValue(CopyArrayElem);
3675     CGF.EmitOMPCopy(
3676         PrivateExpr->getType(), DestLVal.getAddress(), SrcLVal.getAddress(),
3677         cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
3678         cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), CopyOps[I]);
3679   }
3680 }
3681 
3682 /// Emits the code for the directive with inscan reductions.
3683 /// The code is the following:
3684 /// \code
3685 /// #pragma omp ...
3686 /// for (i: 0..<num_iters>) {
3687 ///   <input phase>;
3688 ///   buffer[i] = red;
3689 /// }
3690 /// #pragma omp master // in parallel region
3691 /// for (int k = 0; k != ceil(log2(num_iters)); ++k)
3692 /// for (size cnt = last_iter; cnt >= pow(2, k); --k)
3693 ///   buffer[i] op= buffer[i-pow(2,k)];
3694 /// #pragma omp barrier // in parallel region
3695 /// #pragma omp ...
3696 /// for (0..<num_iters>) {
3697 ///   red = InclusiveScan ? buffer[i] : buffer[i-1];
3698 ///   <scan phase>;
3699 /// }
3700 /// \endcode
emitScanBasedDirective(CodeGenFunction & CGF,const OMPLoopDirective & S,llvm::function_ref<llvm::Value * (CodeGenFunction &)> NumIteratorsGen,llvm::function_ref<void (CodeGenFunction &)> FirstGen,llvm::function_ref<void (CodeGenFunction &)> SecondGen)3701 static void emitScanBasedDirective(
3702     CodeGenFunction &CGF, const OMPLoopDirective &S,
3703     llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
3704     llvm::function_ref<void(CodeGenFunction &)> FirstGen,
3705     llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
3706   llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3707       NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3708   SmallVector<const Expr *, 4> Privates;
3709   SmallVector<const Expr *, 4> ReductionOps;
3710   SmallVector<const Expr *, 4> LHSs;
3711   SmallVector<const Expr *, 4> RHSs;
3712   SmallVector<const Expr *, 4> CopyArrayElems;
3713   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3714     assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3715            "Only inscan reductions are expected.");
3716     Privates.append(C->privates().begin(), C->privates().end());
3717     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3718     LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
3719     RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
3720     CopyArrayElems.append(C->copy_array_elems().begin(),
3721                           C->copy_array_elems().end());
3722   }
3723   CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
3724   {
3725     // Emit loop with input phase:
3726     // #pragma omp ...
3727     // for (i: 0..<num_iters>) {
3728     //   <input phase>;
3729     //   buffer[i] = red;
3730     // }
3731     CGF.OMPFirstScanLoop = true;
3732     CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3733     FirstGen(CGF);
3734   }
3735   // #pragma omp barrier // in parallel region
3736   auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems,
3737                     &ReductionOps,
3738                     &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) {
3739     Action.Enter(CGF);
3740     // Emit prefix reduction:
3741     // #pragma omp master // in parallel region
3742     // for (int k = 0; k <= ceil(log2(n)); ++k)
3743     llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
3744     llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body");
3745     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit");
3746     llvm::Function *F =
3747         CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy);
3748     llvm::Value *Arg =
3749         CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy);
3750     llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg);
3751     F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy);
3752     LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal);
3753     LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy);
3754     llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
3755         OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1));
3756     auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc());
3757     CGF.EmitBlock(LoopBB);
3758     auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2);
3759     // size pow2k = 1;
3760     auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3761     Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB);
3762     Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB);
3763     // for (size i = n - 1; i >= 2 ^ k; --i)
3764     //   tmp[i] op= tmp[i-pow2k];
3765     llvm::BasicBlock *InnerLoopBB =
3766         CGF.createBasicBlock("omp.inner.log.scan.body");
3767     llvm::BasicBlock *InnerExitBB =
3768         CGF.createBasicBlock("omp.inner.log.scan.exit");
3769     llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K);
3770     CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3771     CGF.EmitBlock(InnerLoopBB);
3772     auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3773     IVal->addIncoming(NMin1, LoopBB);
3774     {
3775       CodeGenFunction::OMPPrivateScope PrivScope(CGF);
3776       auto *ILHS = LHSs.begin();
3777       auto *IRHS = RHSs.begin();
3778       for (const Expr *CopyArrayElem : CopyArrayElems) {
3779         const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3780         const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3781         Address LHSAddr = Address::invalid();
3782         {
3783           CodeGenFunction::OpaqueValueMapping IdxMapping(
3784               CGF,
3785               cast<OpaqueValueExpr>(
3786                   cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3787               RValue::get(IVal));
3788           LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress();
3789         }
3790         PrivScope.addPrivate(LHSVD, LHSAddr);
3791         Address RHSAddr = Address::invalid();
3792         {
3793           llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K);
3794           CodeGenFunction::OpaqueValueMapping IdxMapping(
3795               CGF,
3796               cast<OpaqueValueExpr>(
3797                   cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3798               RValue::get(OffsetIVal));
3799           RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress();
3800         }
3801         PrivScope.addPrivate(RHSVD, RHSAddr);
3802         ++ILHS;
3803         ++IRHS;
3804       }
3805       PrivScope.Privatize();
3806       CGF.CGM.getOpenMPRuntime().emitReduction(
3807           CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
3808           {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown});
3809     }
3810     llvm::Value *NextIVal =
3811         CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1));
3812     IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock());
3813     CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K);
3814     CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3815     CGF.EmitBlock(InnerExitBB);
3816     llvm::Value *Next =
3817         CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1));
3818     Counter->addIncoming(Next, CGF.Builder.GetInsertBlock());
3819     // pow2k <<= 1;
3820     llvm::Value *NextPow2K =
3821         CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
3822     Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock());
3823     llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal);
3824     CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
3825     auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc());
3826     CGF.EmitBlock(ExitBB);
3827   };
3828   if (isOpenMPParallelDirective(S.getDirectiveKind())) {
3829     CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
3830     CGF.CGM.getOpenMPRuntime().emitBarrierCall(
3831         CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3832         /*ForceSimpleCall=*/true);
3833   } else {
3834     RegionCodeGenTy RCG(CodeGen);
3835     RCG(CGF);
3836   }
3837 
3838   CGF.OMPFirstScanLoop = false;
3839   SecondGen(CGF);
3840 }
3841 
emitWorksharingDirective(CodeGenFunction & CGF,const OMPLoopDirective & S,bool HasCancel)3842 static bool emitWorksharingDirective(CodeGenFunction &CGF,
3843                                      const OMPLoopDirective &S,
3844                                      bool HasCancel) {
3845   bool HasLastprivates;
3846   if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
3847                    [](const OMPReductionClause *C) {
3848                      return C->getModifier() == OMPC_REDUCTION_inscan;
3849                    })) {
3850     const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
3851       CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3852       OMPLoopScope LoopScope(CGF, S);
3853       return CGF.EmitScalarExpr(S.getNumIterations());
3854     };
3855     const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) {
3856       CodeGenFunction::OMPCancelStackRAII CancelRegion(
3857           CGF, S.getDirectiveKind(), HasCancel);
3858       (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3859                                        emitForLoopBounds,
3860                                        emitDispatchForLoopBounds);
3861       // Emit an implicit barrier at the end.
3862       CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(),
3863                                                  OMPD_for);
3864     };
3865     const auto &&SecondGen = [&S, HasCancel,
3866                               &HasLastprivates](CodeGenFunction &CGF) {
3867       CodeGenFunction::OMPCancelStackRAII CancelRegion(
3868           CGF, S.getDirectiveKind(), HasCancel);
3869       HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3870                                                    emitForLoopBounds,
3871                                                    emitDispatchForLoopBounds);
3872     };
3873     if (!isOpenMPParallelDirective(S.getDirectiveKind()))
3874       emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen);
3875     emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
3876     if (!isOpenMPParallelDirective(S.getDirectiveKind()))
3877       emitScanBasedDirectiveFinals(CGF, S, NumIteratorsGen);
3878   } else {
3879     CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
3880                                                      HasCancel);
3881     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3882                                                  emitForLoopBounds,
3883                                                  emitDispatchForLoopBounds);
3884   }
3885   return HasLastprivates;
3886 }
3887 
isSupportedByOpenMPIRBuilder(const OMPForDirective & S)3888 static bool isSupportedByOpenMPIRBuilder(const OMPForDirective &S) {
3889   if (S.hasCancel())
3890     return false;
3891   for (OMPClause *C : S.clauses()) {
3892     if (isa<OMPNowaitClause>(C))
3893       continue;
3894 
3895     if (auto *SC = dyn_cast<OMPScheduleClause>(C)) {
3896       if (SC->getFirstScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown)
3897         return false;
3898       if (SC->getSecondScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown)
3899         return false;
3900       switch (SC->getScheduleKind()) {
3901       case OMPC_SCHEDULE_auto:
3902       case OMPC_SCHEDULE_dynamic:
3903       case OMPC_SCHEDULE_runtime:
3904       case OMPC_SCHEDULE_guided:
3905       case OMPC_SCHEDULE_static:
3906         continue;
3907       case OMPC_SCHEDULE_unknown:
3908         return false;
3909       }
3910     }
3911 
3912     return false;
3913   }
3914 
3915   return true;
3916 }
3917 
3918 static llvm::omp::ScheduleKind
convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind)3919 convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind) {
3920   switch (ScheduleClauseKind) {
3921   case OMPC_SCHEDULE_unknown:
3922     return llvm::omp::OMP_SCHEDULE_Default;
3923   case OMPC_SCHEDULE_auto:
3924     return llvm::omp::OMP_SCHEDULE_Auto;
3925   case OMPC_SCHEDULE_dynamic:
3926     return llvm::omp::OMP_SCHEDULE_Dynamic;
3927   case OMPC_SCHEDULE_guided:
3928     return llvm::omp::OMP_SCHEDULE_Guided;
3929   case OMPC_SCHEDULE_runtime:
3930     return llvm::omp::OMP_SCHEDULE_Runtime;
3931   case OMPC_SCHEDULE_static:
3932     return llvm::omp::OMP_SCHEDULE_Static;
3933   }
3934   llvm_unreachable("Unhandled schedule kind");
3935 }
3936 
EmitOMPForDirective(const OMPForDirective & S)3937 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
3938   bool HasLastprivates = false;
3939   bool UseOMPIRBuilder =
3940       CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S);
3941   auto &&CodeGen = [this, &S, &HasLastprivates,
3942                     UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) {
3943     // Use the OpenMPIRBuilder if enabled.
3944     if (UseOMPIRBuilder) {
3945       bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>();
3946 
3947       llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default;
3948       llvm::Value *ChunkSize = nullptr;
3949       if (auto *SchedClause = S.getSingleClause<OMPScheduleClause>()) {
3950         SchedKind =
3951             convertClauseKindToSchedKind(SchedClause->getScheduleKind());
3952         if (const Expr *ChunkSizeExpr = SchedClause->getChunkSize())
3953           ChunkSize = EmitScalarExpr(ChunkSizeExpr);
3954       }
3955 
3956       // Emit the associated statement and get its loop representation.
3957       const Stmt *Inner = S.getRawStmt();
3958       llvm::CanonicalLoopInfo *CLI =
3959           EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
3960 
3961       llvm::OpenMPIRBuilder &OMPBuilder =
3962           CGM.getOpenMPRuntime().getOMPBuilder();
3963       llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
3964           AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
3965       OMPBuilder.applyWorkshareLoop(
3966           Builder.getCurrentDebugLocation(), CLI, AllocaIP, NeedsBarrier,
3967           SchedKind, ChunkSize, /*HasSimdModifier=*/false,
3968           /*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false,
3969           /*HasOrderedClause=*/false);
3970       return;
3971     }
3972 
3973     HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel());
3974   };
3975   {
3976     auto LPCRegion =
3977         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3978     OMPLexicalScope Scope(*this, S, OMPD_unknown);
3979     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
3980                                                 S.hasCancel());
3981   }
3982 
3983   if (!UseOMPIRBuilder) {
3984     // Emit an implicit barrier at the end.
3985     if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
3986       CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3987   }
3988   // Check for outer lastprivate conditional update.
3989   checkForLastprivateConditionalUpdate(*this, S);
3990 }
3991 
EmitOMPForSimdDirective(const OMPForSimdDirective & S)3992 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
3993   bool HasLastprivates = false;
3994   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
3995                                           PrePostActionTy &) {
3996     HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
3997   };
3998   {
3999     auto LPCRegion =
4000         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4001     OMPLexicalScope Scope(*this, S, OMPD_unknown);
4002     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
4003   }
4004 
4005   // Emit an implicit barrier at the end.
4006   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
4007     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
4008   // Check for outer lastprivate conditional update.
4009   checkForLastprivateConditionalUpdate(*this, S);
4010 }
4011 
createSectionLVal(CodeGenFunction & CGF,QualType Ty,const Twine & Name,llvm::Value * Init=nullptr)4012 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
4013                                 const Twine &Name,
4014                                 llvm::Value *Init = nullptr) {
4015   LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
4016   if (Init)
4017     CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
4018   return LVal;
4019 }
4020 
EmitSections(const OMPExecutableDirective & S)4021 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
4022   const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
4023   const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
4024   bool HasLastprivates = false;
4025   auto &&CodeGen = [&S, CapturedStmt, CS,
4026                     &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
4027     const ASTContext &C = CGF.getContext();
4028     QualType KmpInt32Ty =
4029         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4030     // Emit helper vars inits.
4031     LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
4032                                   CGF.Builder.getInt32(0));
4033     llvm::ConstantInt *GlobalUBVal = CS != nullptr
4034                                          ? CGF.Builder.getInt32(CS->size() - 1)
4035                                          : CGF.Builder.getInt32(0);
4036     LValue UB =
4037         createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
4038     LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
4039                                   CGF.Builder.getInt32(1));
4040     LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
4041                                   CGF.Builder.getInt32(0));
4042     // Loop counter.
4043     LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
4044     OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
4045     CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
4046     OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
4047     CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
4048     // Generate condition for loop.
4049     BinaryOperator *Cond = BinaryOperator::Create(
4050         C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_PRValue, OK_Ordinary,
4051         S.getBeginLoc(), FPOptionsOverride());
4052     // Increment for loop counter.
4053     UnaryOperator *Inc = UnaryOperator::Create(
4054         C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_PRValue, OK_Ordinary,
4055         S.getBeginLoc(), true, FPOptionsOverride());
4056     auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
4057       // Iterate through all sections and emit a switch construct:
4058       // switch (IV) {
4059       //   case 0:
4060       //     <SectionStmt[0]>;
4061       //     break;
4062       // ...
4063       //   case <NumSection> - 1:
4064       //     <SectionStmt[<NumSection> - 1]>;
4065       //     break;
4066       // }
4067       // .omp.sections.exit:
4068       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
4069       llvm::SwitchInst *SwitchStmt =
4070           CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()),
4071                                    ExitBB, CS == nullptr ? 1 : CS->size());
4072       if (CS) {
4073         unsigned CaseNumber = 0;
4074         for (const Stmt *SubStmt : CS->children()) {
4075           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
4076           CGF.EmitBlock(CaseBB);
4077           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
4078           CGF.EmitStmt(SubStmt);
4079           CGF.EmitBranch(ExitBB);
4080           ++CaseNumber;
4081         }
4082       } else {
4083         llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case");
4084         CGF.EmitBlock(CaseBB);
4085         SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
4086         CGF.EmitStmt(CapturedStmt);
4087         CGF.EmitBranch(ExitBB);
4088       }
4089       CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
4090     };
4091 
4092     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
4093     if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
4094       // Emit implicit barrier to synchronize threads and avoid data races on
4095       // initialization of firstprivate variables and post-update of lastprivate
4096       // variables.
4097       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
4098           CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
4099           /*ForceSimpleCall=*/true);
4100     }
4101     CGF.EmitOMPPrivateClause(S, LoopScope);
4102     CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV);
4103     HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
4104     CGF.EmitOMPReductionClauseInit(S, LoopScope);
4105     (void)LoopScope.Privatize();
4106     if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
4107       CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
4108 
4109     // Emit static non-chunked loop.
4110     OpenMPScheduleTy ScheduleKind;
4111     ScheduleKind.Schedule = OMPC_SCHEDULE_static;
4112     CGOpenMPRuntime::StaticRTInput StaticInit(
4113         /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
4114         LB.getAddress(), UB.getAddress(), ST.getAddress());
4115     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
4116         CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit);
4117     // UB = min(UB, GlobalUB);
4118     llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc());
4119     llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
4120         CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
4121     CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
4122     // IV = LB;
4123     CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV);
4124     // while (idx <= UB) { BODY; ++idx; }
4125     CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen,
4126                          [](CodeGenFunction &) {});
4127     // Tell the runtime we are done.
4128     auto &&CodeGen = [&S](CodeGenFunction &CGF) {
4129       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
4130                                                      OMPD_sections);
4131     };
4132     CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
4133     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4134     // Emit post-update of the reduction variables if IsLastIter != 0.
4135     emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) {
4136       return CGF.Builder.CreateIsNotNull(
4137           CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
4138     });
4139 
4140     // Emit final copy of the lastprivate variables if IsLastIter != 0.
4141     if (HasLastprivates)
4142       CGF.EmitOMPLastprivateClauseFinal(
4143           S, /*NoFinals=*/false,
4144           CGF.Builder.CreateIsNotNull(
4145               CGF.EmitLoadOfScalar(IL, S.getBeginLoc())));
4146   };
4147 
4148   bool HasCancel = false;
4149   if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
4150     HasCancel = OSD->hasCancel();
4151   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
4152     HasCancel = OPSD->hasCancel();
4153   OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
4154   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
4155                                               HasCancel);
4156   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
4157   // clause. Otherwise the barrier will be generated by the codegen for the
4158   // directive.
4159   if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
4160     // Emit implicit barrier to synchronize threads and avoid data races on
4161     // initialization of firstprivate variables.
4162     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
4163                                            OMPD_unknown);
4164   }
4165 }
4166 
EmitOMPSectionsDirective(const OMPSectionsDirective & S)4167 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
4168   if (CGM.getLangOpts().OpenMPIRBuilder) {
4169     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4170     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4171     using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
4172 
4173     auto FiniCB = [this](InsertPointTy IP) {
4174       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4175     };
4176 
4177     const CapturedStmt *ICS = S.getInnermostCapturedStmt();
4178     const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
4179     const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
4180     llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
4181     if (CS) {
4182       for (const Stmt *SubStmt : CS->children()) {
4183         auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP,
4184                                          InsertPointTy CodeGenIP) {
4185           OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4186               *this, SubStmt, AllocaIP, CodeGenIP, "section");
4187         };
4188         SectionCBVector.push_back(SectionCB);
4189       }
4190     } else {
4191       auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP,
4192                                             InsertPointTy CodeGenIP) {
4193         OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4194             *this, CapturedStmt, AllocaIP, CodeGenIP, "section");
4195       };
4196       SectionCBVector.push_back(SectionCB);
4197     }
4198 
4199     // Privatization callback that performs appropriate action for
4200     // shared/private/firstprivate/lastprivate/copyin/... variables.
4201     //
4202     // TODO: This defaults to shared right now.
4203     auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
4204                      llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
4205       // The next line is appropriate only for variables (Val) with the
4206       // data-sharing attribute "shared".
4207       ReplVal = &Val;
4208 
4209       return CodeGenIP;
4210     };
4211 
4212     CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP);
4213     CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
4214     llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
4215         AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
4216     Builder.restoreIP(OMPBuilder.createSections(
4217         Builder, AllocaIP, SectionCBVector, PrivCB, FiniCB, S.hasCancel(),
4218         S.getSingleClause<OMPNowaitClause>()));
4219     return;
4220   }
4221   {
4222     auto LPCRegion =
4223         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4224     OMPLexicalScope Scope(*this, S, OMPD_unknown);
4225     EmitSections(S);
4226   }
4227   // Emit an implicit barrier at the end.
4228   if (!S.getSingleClause<OMPNowaitClause>()) {
4229     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
4230                                            OMPD_sections);
4231   }
4232   // Check for outer lastprivate conditional update.
4233   checkForLastprivateConditionalUpdate(*this, S);
4234 }
4235 
EmitOMPSectionDirective(const OMPSectionDirective & S)4236 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
4237   if (CGM.getLangOpts().OpenMPIRBuilder) {
4238     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4239     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4240 
4241     const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt();
4242     auto FiniCB = [this](InsertPointTy IP) {
4243       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4244     };
4245 
4246     auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP,
4247                                                    InsertPointTy CodeGenIP) {
4248       OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4249           *this, SectionRegionBodyStmt, AllocaIP, CodeGenIP, "section");
4250     };
4251 
4252     LexicalScope Scope(*this, S.getSourceRange());
4253     EmitStopPoint(&S);
4254     Builder.restoreIP(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB));
4255 
4256     return;
4257   }
4258   LexicalScope Scope(*this, S.getSourceRange());
4259   EmitStopPoint(&S);
4260   EmitStmt(S.getAssociatedStmt());
4261 }
4262 
EmitOMPSingleDirective(const OMPSingleDirective & S)4263 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
4264   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
4265   llvm::SmallVector<const Expr *, 8> DestExprs;
4266   llvm::SmallVector<const Expr *, 8> SrcExprs;
4267   llvm::SmallVector<const Expr *, 8> AssignmentOps;
4268   // Check if there are any 'copyprivate' clauses associated with this
4269   // 'single' construct.
4270   // Build a list of copyprivate variables along with helper expressions
4271   // (<source>, <destination>, <destination>=<source> expressions)
4272   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
4273     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
4274     DestExprs.append(C->destination_exprs().begin(),
4275                      C->destination_exprs().end());
4276     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
4277     AssignmentOps.append(C->assignment_ops().begin(),
4278                          C->assignment_ops().end());
4279   }
4280   // Emit code for 'single' region along with 'copyprivate' clauses
4281   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4282     Action.Enter(CGF);
4283     OMPPrivateScope SingleScope(CGF);
4284     (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
4285     CGF.EmitOMPPrivateClause(S, SingleScope);
4286     (void)SingleScope.Privatize();
4287     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
4288   };
4289   {
4290     auto LPCRegion =
4291         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4292     OMPLexicalScope Scope(*this, S, OMPD_unknown);
4293     CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(),
4294                                             CopyprivateVars, DestExprs,
4295                                             SrcExprs, AssignmentOps);
4296   }
4297   // Emit an implicit barrier at the end (to avoid data race on firstprivate
4298   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
4299   if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
4300     CGM.getOpenMPRuntime().emitBarrierCall(
4301         *this, S.getBeginLoc(),
4302         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
4303   }
4304   // Check for outer lastprivate conditional update.
4305   checkForLastprivateConditionalUpdate(*this, S);
4306 }
4307 
emitMaster(CodeGenFunction & CGF,const OMPExecutableDirective & S)4308 static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4309   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4310     Action.Enter(CGF);
4311     CGF.EmitStmt(S.getRawStmt());
4312   };
4313   CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
4314 }
4315 
EmitOMPMasterDirective(const OMPMasterDirective & S)4316 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
4317   if (CGM.getLangOpts().OpenMPIRBuilder) {
4318     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4319     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4320 
4321     const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt();
4322 
4323     auto FiniCB = [this](InsertPointTy IP) {
4324       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4325     };
4326 
4327     auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP,
4328                                                   InsertPointTy CodeGenIP) {
4329       OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4330           *this, MasterRegionBodyStmt, AllocaIP, CodeGenIP, "master");
4331     };
4332 
4333     LexicalScope Scope(*this, S.getSourceRange());
4334     EmitStopPoint(&S);
4335     Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB));
4336 
4337     return;
4338   }
4339   LexicalScope Scope(*this, S.getSourceRange());
4340   EmitStopPoint(&S);
4341   emitMaster(*this, S);
4342 }
4343 
emitMasked(CodeGenFunction & CGF,const OMPExecutableDirective & S)4344 static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4345   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4346     Action.Enter(CGF);
4347     CGF.EmitStmt(S.getRawStmt());
4348   };
4349   Expr *Filter = nullptr;
4350   if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4351     Filter = FilterClause->getThreadID();
4352   CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, CodeGen, S.getBeginLoc(),
4353                                               Filter);
4354 }
4355 
EmitOMPMaskedDirective(const OMPMaskedDirective & S)4356 void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) {
4357   if (CGM.getLangOpts().OpenMPIRBuilder) {
4358     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4359     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4360 
4361     const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt();
4362     const Expr *Filter = nullptr;
4363     if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4364       Filter = FilterClause->getThreadID();
4365     llvm::Value *FilterVal = Filter
4366                                  ? EmitScalarExpr(Filter, CGM.Int32Ty)
4367                                  : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
4368 
4369     auto FiniCB = [this](InsertPointTy IP) {
4370       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4371     };
4372 
4373     auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP,
4374                                                   InsertPointTy CodeGenIP) {
4375       OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4376           *this, MaskedRegionBodyStmt, AllocaIP, CodeGenIP, "masked");
4377     };
4378 
4379     LexicalScope Scope(*this, S.getSourceRange());
4380     EmitStopPoint(&S);
4381     Builder.restoreIP(
4382         OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal));
4383 
4384     return;
4385   }
4386   LexicalScope Scope(*this, S.getSourceRange());
4387   EmitStopPoint(&S);
4388   emitMasked(*this, S);
4389 }
4390 
EmitOMPCriticalDirective(const OMPCriticalDirective & S)4391 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
4392   if (CGM.getLangOpts().OpenMPIRBuilder) {
4393     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4394     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4395 
4396     const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt();
4397     const Expr *Hint = nullptr;
4398     if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4399       Hint = HintClause->getHint();
4400 
4401     // TODO: This is slightly different from what's currently being done in
4402     // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything
4403     // about typing is final.
4404     llvm::Value *HintInst = nullptr;
4405     if (Hint)
4406       HintInst =
4407           Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false);
4408 
4409     auto FiniCB = [this](InsertPointTy IP) {
4410       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4411     };
4412 
4413     auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP,
4414                                                     InsertPointTy CodeGenIP) {
4415       OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4416           *this, CriticalRegionBodyStmt, AllocaIP, CodeGenIP, "critical");
4417     };
4418 
4419     LexicalScope Scope(*this, S.getSourceRange());
4420     EmitStopPoint(&S);
4421     Builder.restoreIP(OMPBuilder.createCritical(
4422         Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(),
4423         HintInst));
4424 
4425     return;
4426   }
4427 
4428   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4429     Action.Enter(CGF);
4430     CGF.EmitStmt(S.getAssociatedStmt());
4431   };
4432   const Expr *Hint = nullptr;
4433   if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4434     Hint = HintClause->getHint();
4435   LexicalScope Scope(*this, S.getSourceRange());
4436   EmitStopPoint(&S);
4437   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
4438                                             S.getDirectiveName().getAsString(),
4439                                             CodeGen, S.getBeginLoc(), Hint);
4440 }
4441 
EmitOMPParallelForDirective(const OMPParallelForDirective & S)4442 void CodeGenFunction::EmitOMPParallelForDirective(
4443     const OMPParallelForDirective &S) {
4444   // Emit directive as a combined directive that consists of two implicit
4445   // directives: 'parallel' with 'for' directive.
4446   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4447     Action.Enter(CGF);
4448     emitOMPCopyinClause(CGF, S);
4449     (void)emitWorksharingDirective(CGF, S, S.hasCancel());
4450   };
4451   {
4452     const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4453       CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4454       CGCapturedStmtInfo CGSI(CR_OpenMP);
4455       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4456       OMPLoopScope LoopScope(CGF, S);
4457       return CGF.EmitScalarExpr(S.getNumIterations());
4458     };
4459     bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
4460                      [](const OMPReductionClause *C) {
4461                        return C->getModifier() == OMPC_REDUCTION_inscan;
4462                      });
4463     if (IsInscan)
4464       emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
4465     auto LPCRegion =
4466         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4467     emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
4468                                    emitEmptyBoundParameters);
4469     if (IsInscan)
4470       emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen);
4471   }
4472   // Check for outer lastprivate conditional update.
4473   checkForLastprivateConditionalUpdate(*this, S);
4474 }
4475 
EmitOMPParallelForSimdDirective(const OMPParallelForSimdDirective & S)4476 void CodeGenFunction::EmitOMPParallelForSimdDirective(
4477     const OMPParallelForSimdDirective &S) {
4478   // Emit directive as a combined directive that consists of two implicit
4479   // directives: 'parallel' with 'for' directive.
4480   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4481     Action.Enter(CGF);
4482     emitOMPCopyinClause(CGF, S);
4483     (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
4484   };
4485   {
4486     const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4487       CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4488       CGCapturedStmtInfo CGSI(CR_OpenMP);
4489       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4490       OMPLoopScope LoopScope(CGF, S);
4491       return CGF.EmitScalarExpr(S.getNumIterations());
4492     };
4493     bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
4494                      [](const OMPReductionClause *C) {
4495                        return C->getModifier() == OMPC_REDUCTION_inscan;
4496                      });
4497     if (IsInscan)
4498       emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
4499     auto LPCRegion =
4500         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4501     emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen,
4502                                    emitEmptyBoundParameters);
4503     if (IsInscan)
4504       emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen);
4505   }
4506   // Check for outer lastprivate conditional update.
4507   checkForLastprivateConditionalUpdate(*this, S);
4508 }
4509 
EmitOMPParallelMasterDirective(const OMPParallelMasterDirective & S)4510 void CodeGenFunction::EmitOMPParallelMasterDirective(
4511     const OMPParallelMasterDirective &S) {
4512   // Emit directive as a combined directive that consists of two implicit
4513   // directives: 'parallel' with 'master' directive.
4514   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4515     Action.Enter(CGF);
4516     OMPPrivateScope PrivateScope(CGF);
4517     emitOMPCopyinClause(CGF, S);
4518     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4519     CGF.EmitOMPPrivateClause(S, PrivateScope);
4520     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4521     (void)PrivateScope.Privatize();
4522     emitMaster(CGF, S);
4523     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4524   };
4525   {
4526     auto LPCRegion =
4527         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4528     emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen,
4529                                    emitEmptyBoundParameters);
4530     emitPostUpdateForReductionClause(*this, S,
4531                                      [](CodeGenFunction &) { return nullptr; });
4532   }
4533   // Check for outer lastprivate conditional update.
4534   checkForLastprivateConditionalUpdate(*this, S);
4535 }
4536 
EmitOMPParallelMaskedDirective(const OMPParallelMaskedDirective & S)4537 void CodeGenFunction::EmitOMPParallelMaskedDirective(
4538     const OMPParallelMaskedDirective &S) {
4539   // Emit directive as a combined directive that consists of two implicit
4540   // directives: 'parallel' with 'masked' directive.
4541   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4542     Action.Enter(CGF);
4543     OMPPrivateScope PrivateScope(CGF);
4544     emitOMPCopyinClause(CGF, S);
4545     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4546     CGF.EmitOMPPrivateClause(S, PrivateScope);
4547     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4548     (void)PrivateScope.Privatize();
4549     emitMasked(CGF, S);
4550     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4551   };
4552   {
4553     auto LPCRegion =
4554         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4555     emitCommonOMPParallelDirective(*this, S, OMPD_masked, CodeGen,
4556                                    emitEmptyBoundParameters);
4557     emitPostUpdateForReductionClause(*this, S,
4558                                      [](CodeGenFunction &) { return nullptr; });
4559   }
4560   // Check for outer lastprivate conditional update.
4561   checkForLastprivateConditionalUpdate(*this, S);
4562 }
4563 
EmitOMPParallelSectionsDirective(const OMPParallelSectionsDirective & S)4564 void CodeGenFunction::EmitOMPParallelSectionsDirective(
4565     const OMPParallelSectionsDirective &S) {
4566   // Emit directive as a combined directive that consists of two implicit
4567   // directives: 'parallel' with 'sections' directive.
4568   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4569     Action.Enter(CGF);
4570     emitOMPCopyinClause(CGF, S);
4571     CGF.EmitSections(S);
4572   };
4573   {
4574     auto LPCRegion =
4575         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4576     emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
4577                                    emitEmptyBoundParameters);
4578   }
4579   // Check for outer lastprivate conditional update.
4580   checkForLastprivateConditionalUpdate(*this, S);
4581 }
4582 
4583 namespace {
4584 /// Get the list of variables declared in the context of the untied tasks.
4585 class CheckVarsEscapingUntiedTaskDeclContext final
4586     : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> {
4587   llvm::SmallVector<const VarDecl *, 4> PrivateDecls;
4588 
4589 public:
4590   explicit CheckVarsEscapingUntiedTaskDeclContext() = default;
4591   virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default;
VisitDeclStmt(const DeclStmt * S)4592   void VisitDeclStmt(const DeclStmt *S) {
4593     if (!S)
4594       return;
4595     // Need to privatize only local vars, static locals can be processed as is.
4596     for (const Decl *D : S->decls()) {
4597       if (const auto *VD = dyn_cast_or_null<VarDecl>(D))
4598         if (VD->hasLocalStorage())
4599           PrivateDecls.push_back(VD);
4600     }
4601   }
VisitOMPExecutableDirective(const OMPExecutableDirective *)4602   void VisitOMPExecutableDirective(const OMPExecutableDirective *) {}
VisitCapturedStmt(const CapturedStmt *)4603   void VisitCapturedStmt(const CapturedStmt *) {}
VisitLambdaExpr(const LambdaExpr *)4604   void VisitLambdaExpr(const LambdaExpr *) {}
VisitBlockExpr(const BlockExpr *)4605   void VisitBlockExpr(const BlockExpr *) {}
VisitStmt(const Stmt * S)4606   void VisitStmt(const Stmt *S) {
4607     if (!S)
4608       return;
4609     for (const Stmt *Child : S->children())
4610       if (Child)
4611         Visit(Child);
4612   }
4613 
4614   /// Swaps list of vars with the provided one.
getPrivateDecls() const4615   ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; }
4616 };
4617 } // anonymous namespace
4618 
buildDependences(const OMPExecutableDirective & S,OMPTaskDataTy & Data)4619 static void buildDependences(const OMPExecutableDirective &S,
4620                              OMPTaskDataTy &Data) {
4621 
4622   // First look for 'omp_all_memory' and add this first.
4623   bool OmpAllMemory = false;
4624   if (llvm::any_of(
4625           S.getClausesOfKind<OMPDependClause>(), [](const OMPDependClause *C) {
4626             return C->getDependencyKind() == OMPC_DEPEND_outallmemory ||
4627                    C->getDependencyKind() == OMPC_DEPEND_inoutallmemory;
4628           })) {
4629     OmpAllMemory = true;
4630     // Since both OMPC_DEPEND_outallmemory and OMPC_DEPEND_inoutallmemory are
4631     // equivalent to the runtime, always use OMPC_DEPEND_outallmemory to
4632     // simplify.
4633     OMPTaskDataTy::DependData &DD =
4634         Data.Dependences.emplace_back(OMPC_DEPEND_outallmemory,
4635                                       /*IteratorExpr=*/nullptr);
4636     // Add a nullptr Expr to simplify the codegen in emitDependData.
4637     DD.DepExprs.push_back(nullptr);
4638   }
4639   // Add remaining dependences skipping any 'out' or 'inout' if they are
4640   // overridden by 'omp_all_memory'.
4641   for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
4642     OpenMPDependClauseKind Kind = C->getDependencyKind();
4643     if (Kind == OMPC_DEPEND_outallmemory || Kind == OMPC_DEPEND_inoutallmemory)
4644       continue;
4645     if (OmpAllMemory && (Kind == OMPC_DEPEND_out || Kind == OMPC_DEPEND_inout))
4646       continue;
4647     OMPTaskDataTy::DependData &DD =
4648         Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
4649     DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
4650   }
4651 }
4652 
EmitOMPTaskBasedDirective(const OMPExecutableDirective & S,const OpenMPDirectiveKind CapturedRegion,const RegionCodeGenTy & BodyGen,const TaskGenTy & TaskGen,OMPTaskDataTy & Data)4653 void CodeGenFunction::EmitOMPTaskBasedDirective(
4654     const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
4655     const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
4656     OMPTaskDataTy &Data) {
4657   // Emit outlined function for task construct.
4658   const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion);
4659   auto I = CS->getCapturedDecl()->param_begin();
4660   auto PartId = std::next(I);
4661   auto TaskT = std::next(I, 4);
4662   // Check if the task is final
4663   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
4664     // If the condition constant folds and can be elided, try to avoid emitting
4665     // the condition and the dead arm of the if/else.
4666     const Expr *Cond = Clause->getCondition();
4667     bool CondConstant;
4668     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
4669       Data.Final.setInt(CondConstant);
4670     else
4671       Data.Final.setPointer(EvaluateExprAsBool(Cond));
4672   } else {
4673     // By default the task is not final.
4674     Data.Final.setInt(/*IntVal=*/false);
4675   }
4676   // Check if the task has 'priority' clause.
4677   if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
4678     const Expr *Prio = Clause->getPriority();
4679     Data.Priority.setInt(/*IntVal=*/true);
4680     Data.Priority.setPointer(EmitScalarConversion(
4681         EmitScalarExpr(Prio), Prio->getType(),
4682         getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
4683         Prio->getExprLoc()));
4684   }
4685   // The first function argument for tasks is a thread id, the second one is a
4686   // part id (0 for tied tasks, >=0 for untied task).
4687   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
4688   // Get list of private variables.
4689   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
4690     auto IRef = C->varlist_begin();
4691     for (const Expr *IInit : C->private_copies()) {
4692       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4693       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4694         Data.PrivateVars.push_back(*IRef);
4695         Data.PrivateCopies.push_back(IInit);
4696       }
4697       ++IRef;
4698     }
4699   }
4700   EmittedAsPrivate.clear();
4701   // Get list of firstprivate variables.
4702   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
4703     auto IRef = C->varlist_begin();
4704     auto IElemInitRef = C->inits().begin();
4705     for (const Expr *IInit : C->private_copies()) {
4706       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4707       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4708         Data.FirstprivateVars.push_back(*IRef);
4709         Data.FirstprivateCopies.push_back(IInit);
4710         Data.FirstprivateInits.push_back(*IElemInitRef);
4711       }
4712       ++IRef;
4713       ++IElemInitRef;
4714     }
4715   }
4716   // Get list of lastprivate variables (for taskloops).
4717   llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
4718   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
4719     auto IRef = C->varlist_begin();
4720     auto ID = C->destination_exprs().begin();
4721     for (const Expr *IInit : C->private_copies()) {
4722       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4723       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4724         Data.LastprivateVars.push_back(*IRef);
4725         Data.LastprivateCopies.push_back(IInit);
4726       }
4727       LastprivateDstsOrigs.insert(
4728           std::make_pair(cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
4729                          cast<DeclRefExpr>(*IRef)));
4730       ++IRef;
4731       ++ID;
4732     }
4733   }
4734   SmallVector<const Expr *, 4> LHSs;
4735   SmallVector<const Expr *, 4> RHSs;
4736   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
4737     Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
4738     Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
4739     Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
4740     Data.ReductionOps.append(C->reduction_ops().begin(),
4741                              C->reduction_ops().end());
4742     LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4743     RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4744   }
4745   Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
4746       *this, S.getBeginLoc(), LHSs, RHSs, Data);
4747   // Build list of dependences.
4748   buildDependences(S, Data);
4749   // Get list of local vars for untied tasks.
4750   if (!Data.Tied) {
4751     CheckVarsEscapingUntiedTaskDeclContext Checker;
4752     Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt());
4753     Data.PrivateLocals.append(Checker.getPrivateDecls().begin(),
4754                               Checker.getPrivateDecls().end());
4755   }
4756   auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
4757                     CapturedRegion](CodeGenFunction &CGF,
4758                                     PrePostActionTy &Action) {
4759     llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
4760                     std::pair<Address, Address>>
4761         UntiedLocalVars;
4762     // Set proper addresses for generated private copies.
4763     OMPPrivateScope Scope(CGF);
4764     // Generate debug info for variables present in shared clause.
4765     if (auto *DI = CGF.getDebugInfo()) {
4766       llvm::SmallDenseMap<const VarDecl *, FieldDecl *> CaptureFields =
4767           CGF.CapturedStmtInfo->getCaptureFields();
4768       llvm::Value *ContextValue = CGF.CapturedStmtInfo->getContextValue();
4769       if (CaptureFields.size() && ContextValue) {
4770         unsigned CharWidth = CGF.getContext().getCharWidth();
4771         // The shared variables are packed together as members of structure.
4772         // So the address of each shared variable can be computed by adding
4773         // offset of it (within record) to the base address of record. For each
4774         // shared variable, debug intrinsic llvm.dbg.declare is generated with
4775         // appropriate expressions (DIExpression).
4776         // Ex:
4777         //  %12 = load %struct.anon*, %struct.anon** %__context.addr.i
4778         //  call void @llvm.dbg.declare(metadata %struct.anon* %12,
4779         //            metadata !svar1,
4780         //            metadata !DIExpression(DW_OP_deref))
4781         //  call void @llvm.dbg.declare(metadata %struct.anon* %12,
4782         //            metadata !svar2,
4783         //            metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref))
4784         for (auto It = CaptureFields.begin(); It != CaptureFields.end(); ++It) {
4785           const VarDecl *SharedVar = It->first;
4786           RecordDecl *CaptureRecord = It->second->getParent();
4787           const ASTRecordLayout &Layout =
4788               CGF.getContext().getASTRecordLayout(CaptureRecord);
4789           unsigned Offset =
4790               Layout.getFieldOffset(It->second->getFieldIndex()) / CharWidth;
4791           if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo())
4792             (void)DI->EmitDeclareOfAutoVariable(SharedVar, ContextValue,
4793                                                 CGF.Builder, false);
4794           // Get the call dbg.declare instruction we just created and update
4795           // its DIExpression to add offset to base address.
4796           auto UpdateExpr = [](llvm::LLVMContext &Ctx, auto *Declare,
4797                                unsigned Offset) {
4798             SmallVector<uint64_t, 8> Ops;
4799             // Add offset to the base address if non zero.
4800             if (Offset) {
4801               Ops.push_back(llvm::dwarf::DW_OP_plus_uconst);
4802               Ops.push_back(Offset);
4803             }
4804             Ops.push_back(llvm::dwarf::DW_OP_deref);
4805             Declare->setExpression(llvm::DIExpression::get(Ctx, Ops));
4806           };
4807           llvm::Instruction &Last = CGF.Builder.GetInsertBlock()->back();
4808           if (auto DDI = dyn_cast<llvm::DbgVariableIntrinsic>(&Last))
4809             UpdateExpr(DDI->getContext(), DDI, Offset);
4810           // If we're emitting using the new debug info format into a block
4811           // without a terminator, the record will be "trailing".
4812           assert(!Last.isTerminator() && "unexpected terminator");
4813           if (auto *Marker =
4814                   CGF.Builder.GetInsertBlock()->getTrailingDbgRecords()) {
4815             for (llvm::DbgVariableRecord &DVR : llvm::reverse(
4816                      llvm::filterDbgVars(Marker->getDbgRecordRange()))) {
4817               UpdateExpr(Last.getContext(), &DVR, Offset);
4818               break;
4819             }
4820           }
4821         }
4822       }
4823     }
4824     llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs;
4825     if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
4826         !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) {
4827       enum { PrivatesParam = 2, CopyFnParam = 3 };
4828       llvm::Value *CopyFn = CGF.Builder.CreateLoad(
4829           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
4830       llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
4831           CS->getCapturedDecl()->getParam(PrivatesParam)));
4832       // Map privates.
4833       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
4834       llvm::SmallVector<llvm::Value *, 16> CallArgs;
4835       llvm::SmallVector<llvm::Type *, 4> ParamTypes;
4836       CallArgs.push_back(PrivatesPtr);
4837       ParamTypes.push_back(PrivatesPtr->getType());
4838       for (const Expr *E : Data.PrivateVars) {
4839         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4840         RawAddress PrivatePtr = CGF.CreateMemTemp(
4841             CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
4842         PrivatePtrs.emplace_back(VD, PrivatePtr);
4843         CallArgs.push_back(PrivatePtr.getPointer());
4844         ParamTypes.push_back(PrivatePtr.getType());
4845       }
4846       for (const Expr *E : Data.FirstprivateVars) {
4847         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4848         RawAddress PrivatePtr =
4849             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4850                               ".firstpriv.ptr.addr");
4851         PrivatePtrs.emplace_back(VD, PrivatePtr);
4852         FirstprivatePtrs.emplace_back(VD, PrivatePtr);
4853         CallArgs.push_back(PrivatePtr.getPointer());
4854         ParamTypes.push_back(PrivatePtr.getType());
4855       }
4856       for (const Expr *E : Data.LastprivateVars) {
4857         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4858         RawAddress PrivatePtr =
4859             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4860                               ".lastpriv.ptr.addr");
4861         PrivatePtrs.emplace_back(VD, PrivatePtr);
4862         CallArgs.push_back(PrivatePtr.getPointer());
4863         ParamTypes.push_back(PrivatePtr.getType());
4864       }
4865       for (const VarDecl *VD : Data.PrivateLocals) {
4866         QualType Ty = VD->getType().getNonReferenceType();
4867         if (VD->getType()->isLValueReferenceType())
4868           Ty = CGF.getContext().getPointerType(Ty);
4869         if (isAllocatableDecl(VD))
4870           Ty = CGF.getContext().getPointerType(Ty);
4871         RawAddress PrivatePtr = CGF.CreateMemTemp(
4872             CGF.getContext().getPointerType(Ty), ".local.ptr.addr");
4873         auto Result = UntiedLocalVars.insert(
4874             std::make_pair(VD, std::make_pair(PrivatePtr, Address::invalid())));
4875         // If key exists update in place.
4876         if (Result.second == false)
4877           *Result.first = std::make_pair(
4878               VD, std::make_pair(PrivatePtr, Address::invalid()));
4879         CallArgs.push_back(PrivatePtr.getPointer());
4880         ParamTypes.push_back(PrivatePtr.getType());
4881       }
4882       auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
4883                                                ParamTypes, /*isVarArg=*/false);
4884       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
4885           CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
4886       for (const auto &Pair : LastprivateDstsOrigs) {
4887         const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
4888         DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
4889                         /*RefersToEnclosingVariableOrCapture=*/
4890                         CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
4891                         Pair.second->getType(), VK_LValue,
4892                         Pair.second->getExprLoc());
4893         Scope.addPrivate(Pair.first, CGF.EmitLValue(&DRE).getAddress());
4894       }
4895       for (const auto &Pair : PrivatePtrs) {
4896         Address Replacement = Address(
4897             CGF.Builder.CreateLoad(Pair.second),
4898             CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()),
4899             CGF.getContext().getDeclAlign(Pair.first));
4900         Scope.addPrivate(Pair.first, Replacement);
4901         if (auto *DI = CGF.getDebugInfo())
4902           if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo())
4903             (void)DI->EmitDeclareOfAutoVariable(
4904                 Pair.first, Pair.second.getBasePointer(), CGF.Builder,
4905                 /*UsePointerValue*/ true);
4906       }
4907       // Adjust mapping for internal locals by mapping actual memory instead of
4908       // a pointer to this memory.
4909       for (auto &Pair : UntiedLocalVars) {
4910         QualType VDType = Pair.first->getType().getNonReferenceType();
4911         if (Pair.first->getType()->isLValueReferenceType())
4912           VDType = CGF.getContext().getPointerType(VDType);
4913         if (isAllocatableDecl(Pair.first)) {
4914           llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4915           Address Replacement(
4916               Ptr,
4917               CGF.ConvertTypeForMem(CGF.getContext().getPointerType(VDType)),
4918               CGF.getPointerAlign());
4919           Pair.second.first = Replacement;
4920           Ptr = CGF.Builder.CreateLoad(Replacement);
4921           Replacement = Address(Ptr, CGF.ConvertTypeForMem(VDType),
4922                                 CGF.getContext().getDeclAlign(Pair.first));
4923           Pair.second.second = Replacement;
4924         } else {
4925           llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4926           Address Replacement(Ptr, CGF.ConvertTypeForMem(VDType),
4927                               CGF.getContext().getDeclAlign(Pair.first));
4928           Pair.second.first = Replacement;
4929         }
4930       }
4931     }
4932     if (Data.Reductions) {
4933       OMPPrivateScope FirstprivateScope(CGF);
4934       for (const auto &Pair : FirstprivatePtrs) {
4935         Address Replacement(
4936             CGF.Builder.CreateLoad(Pair.second),
4937             CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()),
4938             CGF.getContext().getDeclAlign(Pair.first));
4939         FirstprivateScope.addPrivate(Pair.first, Replacement);
4940       }
4941       (void)FirstprivateScope.Privatize();
4942       OMPLexicalScope LexScope(CGF, S, CapturedRegion);
4943       ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
4944                              Data.ReductionCopies, Data.ReductionOps);
4945       llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
4946           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
4947       for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
4948         RedCG.emitSharedOrigLValue(CGF, Cnt);
4949         RedCG.emitAggregateType(CGF, Cnt);
4950         // FIXME: This must removed once the runtime library is fixed.
4951         // Emit required threadprivate variables for
4952         // initializer/combiner/finalizer.
4953         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
4954                                                            RedCG, Cnt);
4955         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4956             CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
4957         Replacement = Address(
4958             CGF.EmitScalarConversion(Replacement.emitRawPointer(CGF),
4959                                      CGF.getContext().VoidPtrTy,
4960                                      CGF.getContext().getPointerType(
4961                                          Data.ReductionCopies[Cnt]->getType()),
4962                                      Data.ReductionCopies[Cnt]->getExprLoc()),
4963             CGF.ConvertTypeForMem(Data.ReductionCopies[Cnt]->getType()),
4964             Replacement.getAlignment());
4965         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
4966         Scope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement);
4967       }
4968     }
4969     // Privatize all private variables except for in_reduction items.
4970     (void)Scope.Privatize();
4971     SmallVector<const Expr *, 4> InRedVars;
4972     SmallVector<const Expr *, 4> InRedPrivs;
4973     SmallVector<const Expr *, 4> InRedOps;
4974     SmallVector<const Expr *, 4> TaskgroupDescriptors;
4975     for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
4976       auto IPriv = C->privates().begin();
4977       auto IRed = C->reduction_ops().begin();
4978       auto ITD = C->taskgroup_descriptors().begin();
4979       for (const Expr *Ref : C->varlists()) {
4980         InRedVars.emplace_back(Ref);
4981         InRedPrivs.emplace_back(*IPriv);
4982         InRedOps.emplace_back(*IRed);
4983         TaskgroupDescriptors.emplace_back(*ITD);
4984         std::advance(IPriv, 1);
4985         std::advance(IRed, 1);
4986         std::advance(ITD, 1);
4987       }
4988     }
4989     // Privatize in_reduction items here, because taskgroup descriptors must be
4990     // privatized earlier.
4991     OMPPrivateScope InRedScope(CGF);
4992     if (!InRedVars.empty()) {
4993       ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
4994       for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
4995         RedCG.emitSharedOrigLValue(CGF, Cnt);
4996         RedCG.emitAggregateType(CGF, Cnt);
4997         // The taskgroup descriptor variable is always implicit firstprivate and
4998         // privatized already during processing of the firstprivates.
4999         // FIXME: This must removed once the runtime library is fixed.
5000         // Emit required threadprivate variables for
5001         // initializer/combiner/finalizer.
5002         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
5003                                                            RedCG, Cnt);
5004         llvm::Value *ReductionsPtr;
5005         if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
5006           ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr),
5007                                                TRExpr->getExprLoc());
5008         } else {
5009           ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5010         }
5011         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5012             CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
5013         Replacement = Address(
5014             CGF.EmitScalarConversion(
5015                 Replacement.emitRawPointer(CGF), CGF.getContext().VoidPtrTy,
5016                 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
5017                 InRedPrivs[Cnt]->getExprLoc()),
5018             CGF.ConvertTypeForMem(InRedPrivs[Cnt]->getType()),
5019             Replacement.getAlignment());
5020         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
5021         InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement);
5022       }
5023     }
5024     (void)InRedScope.Privatize();
5025 
5026     CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF,
5027                                                              UntiedLocalVars);
5028     Action.Enter(CGF);
5029     BodyGen(CGF);
5030   };
5031   llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
5032       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
5033       Data.NumberOfParts);
5034   OMPLexicalScope Scope(*this, S, std::nullopt,
5035                         !isOpenMPParallelDirective(S.getDirectiveKind()) &&
5036                             !isOpenMPSimdDirective(S.getDirectiveKind()));
5037   TaskGen(*this, OutlinedFn, Data);
5038 }
5039 
5040 static ImplicitParamDecl *
createImplicitFirstprivateForType(ASTContext & C,OMPTaskDataTy & Data,QualType Ty,CapturedDecl * CD,SourceLocation Loc)5041 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
5042                                   QualType Ty, CapturedDecl *CD,
5043                                   SourceLocation Loc) {
5044   auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
5045                                            ImplicitParamKind::Other);
5046   auto *OrigRef = DeclRefExpr::Create(
5047       C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
5048       /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
5049   auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
5050                                               ImplicitParamKind::Other);
5051   auto *PrivateRef = DeclRefExpr::Create(
5052       C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
5053       /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
5054   QualType ElemType = C.getBaseElementType(Ty);
5055   auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType,
5056                                            ImplicitParamKind::Other);
5057   auto *InitRef = DeclRefExpr::Create(
5058       C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
5059       /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
5060   PrivateVD->setInitStyle(VarDecl::CInit);
5061   PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
5062                                               InitRef, /*BasePath=*/nullptr,
5063                                               VK_PRValue, FPOptionsOverride()));
5064   Data.FirstprivateVars.emplace_back(OrigRef);
5065   Data.FirstprivateCopies.emplace_back(PrivateRef);
5066   Data.FirstprivateInits.emplace_back(InitRef);
5067   return OrigVD;
5068 }
5069 
EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective & S,const RegionCodeGenTy & BodyGen,OMPTargetDataInfo & InputInfo)5070 void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
5071     const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
5072     OMPTargetDataInfo &InputInfo) {
5073   // Emit outlined function for task construct.
5074   const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
5075   Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
5076   QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
5077   auto I = CS->getCapturedDecl()->param_begin();
5078   auto PartId = std::next(I);
5079   auto TaskT = std::next(I, 4);
5080   OMPTaskDataTy Data;
5081   // The task is not final.
5082   Data.Final.setInt(/*IntVal=*/false);
5083   // Get list of firstprivate variables.
5084   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
5085     auto IRef = C->varlist_begin();
5086     auto IElemInitRef = C->inits().begin();
5087     for (auto *IInit : C->private_copies()) {
5088       Data.FirstprivateVars.push_back(*IRef);
5089       Data.FirstprivateCopies.push_back(IInit);
5090       Data.FirstprivateInits.push_back(*IElemInitRef);
5091       ++IRef;
5092       ++IElemInitRef;
5093     }
5094   }
5095   SmallVector<const Expr *, 4> LHSs;
5096   SmallVector<const Expr *, 4> RHSs;
5097   for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
5098     Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
5099     Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
5100     Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
5101     Data.ReductionOps.append(C->reduction_ops().begin(),
5102                              C->reduction_ops().end());
5103     LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
5104     RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
5105   }
5106   OMPPrivateScope TargetScope(*this);
5107   VarDecl *BPVD = nullptr;
5108   VarDecl *PVD = nullptr;
5109   VarDecl *SVD = nullptr;
5110   VarDecl *MVD = nullptr;
5111   if (InputInfo.NumberOfTargetItems > 0) {
5112     auto *CD = CapturedDecl::Create(
5113         getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
5114     llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
5115     QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType(
5116         getContext().VoidPtrTy, ArrSize, nullptr, ArraySizeModifier::Normal,
5117         /*IndexTypeQuals=*/0);
5118     BPVD = createImplicitFirstprivateForType(
5119         getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
5120     PVD = createImplicitFirstprivateForType(
5121         getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
5122     QualType SizesType = getContext().getConstantArrayType(
5123         getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
5124         ArrSize, nullptr, ArraySizeModifier::Normal,
5125         /*IndexTypeQuals=*/0);
5126     SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
5127                                             S.getBeginLoc());
5128     TargetScope.addPrivate(BPVD, InputInfo.BasePointersArray);
5129     TargetScope.addPrivate(PVD, InputInfo.PointersArray);
5130     TargetScope.addPrivate(SVD, InputInfo.SizesArray);
5131     // If there is no user-defined mapper, the mapper array will be nullptr. In
5132     // this case, we don't need to privatize it.
5133     if (!isa_and_nonnull<llvm::ConstantPointerNull>(
5134             InputInfo.MappersArray.emitRawPointer(*this))) {
5135       MVD = createImplicitFirstprivateForType(
5136           getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
5137       TargetScope.addPrivate(MVD, InputInfo.MappersArray);
5138     }
5139   }
5140   (void)TargetScope.Privatize();
5141   buildDependences(S, Data);
5142   auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD,
5143                     &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
5144     // Set proper addresses for generated private copies.
5145     OMPPrivateScope Scope(CGF);
5146     if (!Data.FirstprivateVars.empty()) {
5147       enum { PrivatesParam = 2, CopyFnParam = 3 };
5148       llvm::Value *CopyFn = CGF.Builder.CreateLoad(
5149           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
5150       llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
5151           CS->getCapturedDecl()->getParam(PrivatesParam)));
5152       // Map privates.
5153       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
5154       llvm::SmallVector<llvm::Value *, 16> CallArgs;
5155       llvm::SmallVector<llvm::Type *, 4> ParamTypes;
5156       CallArgs.push_back(PrivatesPtr);
5157       ParamTypes.push_back(PrivatesPtr->getType());
5158       for (const Expr *E : Data.FirstprivateVars) {
5159         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5160         RawAddress PrivatePtr =
5161             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
5162                               ".firstpriv.ptr.addr");
5163         PrivatePtrs.emplace_back(VD, PrivatePtr);
5164         CallArgs.push_back(PrivatePtr.getPointer());
5165         ParamTypes.push_back(PrivatePtr.getType());
5166       }
5167       auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
5168                                                ParamTypes, /*isVarArg=*/false);
5169       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
5170           CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
5171       for (const auto &Pair : PrivatePtrs) {
5172         Address Replacement(
5173             CGF.Builder.CreateLoad(Pair.second),
5174             CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()),
5175             CGF.getContext().getDeclAlign(Pair.first));
5176         Scope.addPrivate(Pair.first, Replacement);
5177       }
5178     }
5179     CGF.processInReduction(S, Data, CGF, CS, Scope);
5180     if (InputInfo.NumberOfTargetItems > 0) {
5181       InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
5182           CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0);
5183       InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
5184           CGF.GetAddrOfLocalVar(PVD), /*Index=*/0);
5185       InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
5186           CGF.GetAddrOfLocalVar(SVD), /*Index=*/0);
5187       // If MVD is nullptr, the mapper array is not privatized
5188       if (MVD)
5189         InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP(
5190             CGF.GetAddrOfLocalVar(MVD), /*Index=*/0);
5191     }
5192 
5193     Action.Enter(CGF);
5194     OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
5195     auto *TL = S.getSingleClause<OMPThreadLimitClause>();
5196     if (CGF.CGM.getLangOpts().OpenMP >= 51 &&
5197         needsTaskBasedThreadLimit(S.getDirectiveKind()) && TL) {
5198       // Emit __kmpc_set_thread_limit() to set the thread_limit for the task
5199       // enclosing this target region. This will indirectly set the thread_limit
5200       // for every applicable construct within target region.
5201       CGF.CGM.getOpenMPRuntime().emitThreadLimitClause(
5202           CGF, TL->getThreadLimit(), S.getBeginLoc());
5203     }
5204     BodyGen(CGF);
5205   };
5206   llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
5207       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
5208       Data.NumberOfParts);
5209   llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
5210   IntegerLiteral IfCond(getContext(), TrueOrFalse,
5211                         getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
5212                         SourceLocation());
5213   CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn,
5214                                       SharedsTy, CapturedStruct, &IfCond, Data);
5215 }
5216 
processInReduction(const OMPExecutableDirective & S,OMPTaskDataTy & Data,CodeGenFunction & CGF,const CapturedStmt * CS,OMPPrivateScope & Scope)5217 void CodeGenFunction::processInReduction(const OMPExecutableDirective &S,
5218                                          OMPTaskDataTy &Data,
5219                                          CodeGenFunction &CGF,
5220                                          const CapturedStmt *CS,
5221                                          OMPPrivateScope &Scope) {
5222   if (Data.Reductions) {
5223     OpenMPDirectiveKind CapturedRegion = S.getDirectiveKind();
5224     OMPLexicalScope LexScope(CGF, S, CapturedRegion);
5225     ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
5226                            Data.ReductionCopies, Data.ReductionOps);
5227     llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
5228         CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(4)));
5229     for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
5230       RedCG.emitSharedOrigLValue(CGF, Cnt);
5231       RedCG.emitAggregateType(CGF, Cnt);
5232       // FIXME: This must removed once the runtime library is fixed.
5233       // Emit required threadprivate variables for
5234       // initializer/combiner/finalizer.
5235       CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
5236                                                          RedCG, Cnt);
5237       Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5238           CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
5239       Replacement = Address(
5240           CGF.EmitScalarConversion(Replacement.emitRawPointer(CGF),
5241                                    CGF.getContext().VoidPtrTy,
5242                                    CGF.getContext().getPointerType(
5243                                        Data.ReductionCopies[Cnt]->getType()),
5244                                    Data.ReductionCopies[Cnt]->getExprLoc()),
5245           CGF.ConvertTypeForMem(Data.ReductionCopies[Cnt]->getType()),
5246           Replacement.getAlignment());
5247       Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
5248       Scope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement);
5249     }
5250   }
5251   (void)Scope.Privatize();
5252   SmallVector<const Expr *, 4> InRedVars;
5253   SmallVector<const Expr *, 4> InRedPrivs;
5254   SmallVector<const Expr *, 4> InRedOps;
5255   SmallVector<const Expr *, 4> TaskgroupDescriptors;
5256   for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
5257     auto IPriv = C->privates().begin();
5258     auto IRed = C->reduction_ops().begin();
5259     auto ITD = C->taskgroup_descriptors().begin();
5260     for (const Expr *Ref : C->varlists()) {
5261       InRedVars.emplace_back(Ref);
5262       InRedPrivs.emplace_back(*IPriv);
5263       InRedOps.emplace_back(*IRed);
5264       TaskgroupDescriptors.emplace_back(*ITD);
5265       std::advance(IPriv, 1);
5266       std::advance(IRed, 1);
5267       std::advance(ITD, 1);
5268     }
5269   }
5270   OMPPrivateScope InRedScope(CGF);
5271   if (!InRedVars.empty()) {
5272     ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
5273     for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
5274       RedCG.emitSharedOrigLValue(CGF, Cnt);
5275       RedCG.emitAggregateType(CGF, Cnt);
5276       // FIXME: This must removed once the runtime library is fixed.
5277       // Emit required threadprivate variables for
5278       // initializer/combiner/finalizer.
5279       CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
5280                                                          RedCG, Cnt);
5281       llvm::Value *ReductionsPtr;
5282       if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
5283         ReductionsPtr =
5284             CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr), TRExpr->getExprLoc());
5285       } else {
5286         ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5287       }
5288       Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5289           CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
5290       Replacement = Address(
5291           CGF.EmitScalarConversion(
5292               Replacement.emitRawPointer(CGF), CGF.getContext().VoidPtrTy,
5293               CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
5294               InRedPrivs[Cnt]->getExprLoc()),
5295           CGF.ConvertTypeForMem(InRedPrivs[Cnt]->getType()),
5296           Replacement.getAlignment());
5297       Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
5298       InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement);
5299     }
5300   }
5301   (void)InRedScope.Privatize();
5302 }
5303 
EmitOMPTaskDirective(const OMPTaskDirective & S)5304 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
5305   // Emit outlined function for task construct.
5306   const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
5307   Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
5308   QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
5309   const Expr *IfCond = nullptr;
5310   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
5311     if (C->getNameModifier() == OMPD_unknown ||
5312         C->getNameModifier() == OMPD_task) {
5313       IfCond = C->getCondition();
5314       break;
5315     }
5316   }
5317 
5318   OMPTaskDataTy Data;
5319   // Check if we should emit tied or untied task.
5320   Data.Tied = !S.getSingleClause<OMPUntiedClause>();
5321   auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
5322     CGF.EmitStmt(CS->getCapturedStmt());
5323   };
5324   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
5325                     IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
5326                             const OMPTaskDataTy &Data) {
5327     CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn,
5328                                             SharedsTy, CapturedStruct, IfCond,
5329                                             Data);
5330   };
5331   auto LPCRegion =
5332       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
5333   EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);
5334 }
5335 
EmitOMPTaskyieldDirective(const OMPTaskyieldDirective & S)5336 void CodeGenFunction::EmitOMPTaskyieldDirective(
5337     const OMPTaskyieldDirective &S) {
5338   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc());
5339 }
5340 
EmitOMPErrorDirective(const OMPErrorDirective & S)5341 void CodeGenFunction::EmitOMPErrorDirective(const OMPErrorDirective &S) {
5342   const OMPMessageClause *MC = S.getSingleClause<OMPMessageClause>();
5343   Expr *ME = MC ? MC->getMessageString() : nullptr;
5344   const OMPSeverityClause *SC = S.getSingleClause<OMPSeverityClause>();
5345   bool IsFatal = false;
5346   if (!SC || SC->getSeverityKind() == OMPC_SEVERITY_fatal)
5347     IsFatal = true;
5348   CGM.getOpenMPRuntime().emitErrorCall(*this, S.getBeginLoc(), ME, IsFatal);
5349 }
5350 
EmitOMPBarrierDirective(const OMPBarrierDirective & S)5351 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
5352   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier);
5353 }
5354 
EmitOMPTaskwaitDirective(const OMPTaskwaitDirective & S)5355 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
5356   OMPTaskDataTy Data;
5357   // Build list of dependences
5358   buildDependences(S, Data);
5359   Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>();
5360   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc(), Data);
5361 }
5362 
isSupportedByOpenMPIRBuilder(const OMPTaskgroupDirective & T)5363 bool isSupportedByOpenMPIRBuilder(const OMPTaskgroupDirective &T) {
5364   return T.clauses().empty();
5365 }
5366 
EmitOMPTaskgroupDirective(const OMPTaskgroupDirective & S)5367 void CodeGenFunction::EmitOMPTaskgroupDirective(
5368     const OMPTaskgroupDirective &S) {
5369   OMPLexicalScope Scope(*this, S, OMPD_unknown);
5370   if (CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S)) {
5371     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
5372     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
5373     InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
5374                            AllocaInsertPt->getIterator());
5375 
5376     auto BodyGenCB = [&, this](InsertPointTy AllocaIP,
5377                                InsertPointTy CodeGenIP) {
5378       Builder.restoreIP(CodeGenIP);
5379       EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
5380     };
5381     CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
5382     if (!CapturedStmtInfo)
5383       CapturedStmtInfo = &CapStmtInfo;
5384     Builder.restoreIP(OMPBuilder.createTaskgroup(Builder, AllocaIP, BodyGenCB));
5385     return;
5386   }
5387   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5388     Action.Enter(CGF);
5389     if (const Expr *E = S.getReductionRef()) {
5390       SmallVector<const Expr *, 4> LHSs;
5391       SmallVector<const Expr *, 4> RHSs;
5392       OMPTaskDataTy Data;
5393       for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
5394         Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
5395         Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
5396         Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
5397         Data.ReductionOps.append(C->reduction_ops().begin(),
5398                                  C->reduction_ops().end());
5399         LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
5400         RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
5401       }
5402       llvm::Value *ReductionDesc =
5403           CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(),
5404                                                            LHSs, RHSs, Data);
5405       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5406       CGF.EmitVarDecl(*VD);
5407       CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
5408                             /*Volatile=*/false, E->getType());
5409     }
5410     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
5411   };
5412   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc());
5413 }
5414 
EmitOMPFlushDirective(const OMPFlushDirective & S)5415 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
5416   llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>()
5417                                 ? llvm::AtomicOrdering::NotAtomic
5418                                 : llvm::AtomicOrdering::AcquireRelease;
5419   CGM.getOpenMPRuntime().emitFlush(
5420       *this,
5421       [&S]() -> ArrayRef<const Expr *> {
5422         if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>())
5423           return llvm::ArrayRef(FlushClause->varlist_begin(),
5424                                 FlushClause->varlist_end());
5425         return std::nullopt;
5426       }(),
5427       S.getBeginLoc(), AO);
5428 }
5429 
EmitOMPDepobjDirective(const OMPDepobjDirective & S)5430 void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {
5431   const auto *DO = S.getSingleClause<OMPDepobjClause>();
5432   LValue DOLVal = EmitLValue(DO->getDepobj());
5433   if (const auto *DC = S.getSingleClause<OMPDependClause>()) {
5434     OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(),
5435                                            DC->getModifier());
5436     Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end());
5437     Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause(
5438         *this, Dependencies, DC->getBeginLoc());
5439     EmitStoreOfScalar(DepAddr.emitRawPointer(*this), DOLVal);
5440     return;
5441   }
5442   if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) {
5443     CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc());
5444     return;
5445   }
5446   if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) {
5447     CGM.getOpenMPRuntime().emitUpdateClause(
5448         *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc());
5449     return;
5450   }
5451 }
5452 
EmitOMPScanDirective(const OMPScanDirective & S)5453 void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
5454   if (!OMPParentLoopDirectiveForScan)
5455     return;
5456   const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan;
5457   bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
5458   SmallVector<const Expr *, 4> Shareds;
5459   SmallVector<const Expr *, 4> Privates;
5460   SmallVector<const Expr *, 4> LHSs;
5461   SmallVector<const Expr *, 4> RHSs;
5462   SmallVector<const Expr *, 4> ReductionOps;
5463   SmallVector<const Expr *, 4> CopyOps;
5464   SmallVector<const Expr *, 4> CopyArrayTemps;
5465   SmallVector<const Expr *, 4> CopyArrayElems;
5466   for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) {
5467     if (C->getModifier() != OMPC_REDUCTION_inscan)
5468       continue;
5469     Shareds.append(C->varlist_begin(), C->varlist_end());
5470     Privates.append(C->privates().begin(), C->privates().end());
5471     LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
5472     RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
5473     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
5474     CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
5475     CopyArrayTemps.append(C->copy_array_temps().begin(),
5476                           C->copy_array_temps().end());
5477     CopyArrayElems.append(C->copy_array_elems().begin(),
5478                           C->copy_array_elems().end());
5479   }
5480   if (ParentDir.getDirectiveKind() == OMPD_simd ||
5481       (getLangOpts().OpenMPSimd &&
5482        isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) {
5483     // For simd directive and simd-based directives in simd only mode, use the
5484     // following codegen:
5485     // int x = 0;
5486     // #pragma omp simd reduction(inscan, +: x)
5487     // for (..) {
5488     //   <first part>
5489     //   #pragma omp scan inclusive(x)
5490     //   <second part>
5491     //  }
5492     // is transformed to:
5493     // int x = 0;
5494     // for (..) {
5495     //   int x_priv = 0;
5496     //   <first part>
5497     //   x = x_priv + x;
5498     //   x_priv = x;
5499     //   <second part>
5500     // }
5501     // and
5502     // int x = 0;
5503     // #pragma omp simd reduction(inscan, +: x)
5504     // for (..) {
5505     //   <first part>
5506     //   #pragma omp scan exclusive(x)
5507     //   <second part>
5508     // }
5509     // to
5510     // int x = 0;
5511     // for (..) {
5512     //   int x_priv = 0;
5513     //   <second part>
5514     //   int temp = x;
5515     //   x = x_priv + x;
5516     //   x_priv = temp;
5517     //   <first part>
5518     // }
5519     llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce");
5520     EmitBranch(IsInclusive
5521                    ? OMPScanReduce
5522                    : BreakContinueStack.back().ContinueBlock.getBlock());
5523     EmitBlock(OMPScanDispatch);
5524     {
5525       // New scope for correct construction/destruction of temp variables for
5526       // exclusive scan.
5527       LexicalScope Scope(*this, S.getSourceRange());
5528       EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock);
5529       EmitBlock(OMPScanReduce);
5530       if (!IsInclusive) {
5531         // Create temp var and copy LHS value to this temp value.
5532         // TMP = LHS;
5533         for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5534           const Expr *PrivateExpr = Privates[I];
5535           const Expr *TempExpr = CopyArrayTemps[I];
5536           EmitAutoVarDecl(
5537               *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl()));
5538           LValue DestLVal = EmitLValue(TempExpr);
5539           LValue SrcLVal = EmitLValue(LHSs[I]);
5540           EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(),
5541                       SrcLVal.getAddress(),
5542                       cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5543                       cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
5544                       CopyOps[I]);
5545         }
5546       }
5547       CGM.getOpenMPRuntime().emitReduction(
5548           *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
5549           {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd});
5550       for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5551         const Expr *PrivateExpr = Privates[I];
5552         LValue DestLVal;
5553         LValue SrcLVal;
5554         if (IsInclusive) {
5555           DestLVal = EmitLValue(RHSs[I]);
5556           SrcLVal = EmitLValue(LHSs[I]);
5557         } else {
5558           const Expr *TempExpr = CopyArrayTemps[I];
5559           DestLVal = EmitLValue(RHSs[I]);
5560           SrcLVal = EmitLValue(TempExpr);
5561         }
5562         EmitOMPCopy(
5563             PrivateExpr->getType(), DestLVal.getAddress(), SrcLVal.getAddress(),
5564             cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5565             cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), CopyOps[I]);
5566       }
5567     }
5568     EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock);
5569     OMPScanExitBlock = IsInclusive
5570                            ? BreakContinueStack.back().ContinueBlock.getBlock()
5571                            : OMPScanReduce;
5572     EmitBlock(OMPAfterScanBlock);
5573     return;
5574   }
5575   if (!IsInclusive) {
5576     EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
5577     EmitBlock(OMPScanExitBlock);
5578   }
5579   if (OMPFirstScanLoop) {
5580     // Emit buffer[i] = red; at the end of the input phase.
5581     const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
5582                              .getIterationVariable()
5583                              ->IgnoreParenImpCasts();
5584     LValue IdxLVal = EmitLValue(IVExpr);
5585     llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
5586     IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
5587     for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5588       const Expr *PrivateExpr = Privates[I];
5589       const Expr *OrigExpr = Shareds[I];
5590       const Expr *CopyArrayElem = CopyArrayElems[I];
5591       OpaqueValueMapping IdxMapping(
5592           *this,
5593           cast<OpaqueValueExpr>(
5594               cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
5595           RValue::get(IdxVal));
5596       LValue DestLVal = EmitLValue(CopyArrayElem);
5597       LValue SrcLVal = EmitLValue(OrigExpr);
5598       EmitOMPCopy(
5599           PrivateExpr->getType(), DestLVal.getAddress(), SrcLVal.getAddress(),
5600           cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5601           cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), CopyOps[I]);
5602     }
5603   }
5604   EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
5605   if (IsInclusive) {
5606     EmitBlock(OMPScanExitBlock);
5607     EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
5608   }
5609   EmitBlock(OMPScanDispatch);
5610   if (!OMPFirstScanLoop) {
5611     // Emit red = buffer[i]; at the entrance to the scan phase.
5612     const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
5613                              .getIterationVariable()
5614                              ->IgnoreParenImpCasts();
5615     LValue IdxLVal = EmitLValue(IVExpr);
5616     llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
5617     IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
5618     llvm::BasicBlock *ExclusiveExitBB = nullptr;
5619     if (!IsInclusive) {
5620       llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec");
5621       ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit");
5622       llvm::Value *Cmp = Builder.CreateIsNull(IdxVal);
5623       Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB);
5624       EmitBlock(ContBB);
5625       // Use idx - 1 iteration for exclusive scan.
5626       IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1));
5627     }
5628     for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5629       const Expr *PrivateExpr = Privates[I];
5630       const Expr *OrigExpr = Shareds[I];
5631       const Expr *CopyArrayElem = CopyArrayElems[I];
5632       OpaqueValueMapping IdxMapping(
5633           *this,
5634           cast<OpaqueValueExpr>(
5635               cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
5636           RValue::get(IdxVal));
5637       LValue SrcLVal = EmitLValue(CopyArrayElem);
5638       LValue DestLVal = EmitLValue(OrigExpr);
5639       EmitOMPCopy(
5640           PrivateExpr->getType(), DestLVal.getAddress(), SrcLVal.getAddress(),
5641           cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5642           cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), CopyOps[I]);
5643     }
5644     if (!IsInclusive) {
5645       EmitBlock(ExclusiveExitBB);
5646     }
5647   }
5648   EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock
5649                                                : OMPAfterScanBlock);
5650   EmitBlock(OMPAfterScanBlock);
5651 }
5652 
EmitOMPDistributeLoop(const OMPLoopDirective & S,const CodeGenLoopTy & CodeGenLoop,Expr * IncExpr)5653 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
5654                                             const CodeGenLoopTy &CodeGenLoop,
5655                                             Expr *IncExpr) {
5656   // Emit the loop iteration variable.
5657   const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
5658   const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
5659   EmitVarDecl(*IVDecl);
5660 
5661   // Emit the iterations count variable.
5662   // If it is not a variable, Sema decided to calculate iterations count on each
5663   // iteration (e.g., it is foldable into a constant).
5664   if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
5665     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
5666     // Emit calculation of the iterations count.
5667     EmitIgnoredExpr(S.getCalcLastIteration());
5668   }
5669 
5670   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
5671 
5672   bool HasLastprivateClause = false;
5673   // Check pre-condition.
5674   {
5675     OMPLoopScope PreInitScope(*this, S);
5676     // Skip the entire loop if we don't meet the precondition.
5677     // If the condition constant folds and can be elided, avoid emitting the
5678     // whole loop.
5679     bool CondConstant;
5680     llvm::BasicBlock *ContBlock = nullptr;
5681     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
5682       if (!CondConstant)
5683         return;
5684     } else {
5685       llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
5686       ContBlock = createBasicBlock("omp.precond.end");
5687       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
5688                   getProfileCount(&S));
5689       EmitBlock(ThenBlock);
5690       incrementProfileCounter(&S);
5691     }
5692 
5693     emitAlignedClause(*this, S);
5694     // Emit 'then' code.
5695     {
5696       // Emit helper vars inits.
5697 
5698       LValue LB = EmitOMPHelperVar(
5699           *this, cast<DeclRefExpr>(
5700                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5701                           ? S.getCombinedLowerBoundVariable()
5702                           : S.getLowerBoundVariable())));
5703       LValue UB = EmitOMPHelperVar(
5704           *this, cast<DeclRefExpr>(
5705                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5706                           ? S.getCombinedUpperBoundVariable()
5707                           : S.getUpperBoundVariable())));
5708       LValue ST =
5709           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
5710       LValue IL =
5711           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
5712 
5713       OMPPrivateScope LoopScope(*this);
5714       if (EmitOMPFirstprivateClause(S, LoopScope)) {
5715         // Emit implicit barrier to synchronize threads and avoid data races
5716         // on initialization of firstprivate variables and post-update of
5717         // lastprivate variables.
5718         CGM.getOpenMPRuntime().emitBarrierCall(
5719             *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
5720             /*ForceSimpleCall=*/true);
5721       }
5722       EmitOMPPrivateClause(S, LoopScope);
5723       if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
5724           !isOpenMPParallelDirective(S.getDirectiveKind()) &&
5725           !isOpenMPTeamsDirective(S.getDirectiveKind()))
5726         EmitOMPReductionClauseInit(S, LoopScope);
5727       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
5728       EmitOMPPrivateLoopCounters(S, LoopScope);
5729       (void)LoopScope.Privatize();
5730       if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
5731         CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
5732 
5733       // Detect the distribute schedule kind and chunk.
5734       llvm::Value *Chunk = nullptr;
5735       OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
5736       if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
5737         ScheduleKind = C->getDistScheduleKind();
5738         if (const Expr *Ch = C->getChunkSize()) {
5739           Chunk = EmitScalarExpr(Ch);
5740           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
5741                                        S.getIterationVariable()->getType(),
5742                                        S.getBeginLoc());
5743         }
5744       } else {
5745         // Default behaviour for dist_schedule clause.
5746         CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
5747             *this, S, ScheduleKind, Chunk);
5748       }
5749       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
5750       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
5751 
5752       // OpenMP [2.10.8, distribute Construct, Description]
5753       // If dist_schedule is specified, kind must be static. If specified,
5754       // iterations are divided into chunks of size chunk_size, chunks are
5755       // assigned to the teams of the league in a round-robin fashion in the
5756       // order of the team number. When no chunk_size is specified, the
5757       // iteration space is divided into chunks that are approximately equal
5758       // in size, and at most one chunk is distributed to each team of the
5759       // league. The size of the chunks is unspecified in this case.
5760       bool StaticChunked =
5761           RT.isStaticChunked(ScheduleKind, /* Chunked */ Chunk != nullptr) &&
5762           isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
5763       if (RT.isStaticNonchunked(ScheduleKind,
5764                                 /* Chunked */ Chunk != nullptr) ||
5765           StaticChunked) {
5766         CGOpenMPRuntime::StaticRTInput StaticInit(
5767             IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
5768             LB.getAddress(), UB.getAddress(), ST.getAddress(),
5769             StaticChunked ? Chunk : nullptr);
5770         RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind,
5771                                     StaticInit);
5772         JumpDest LoopExit =
5773             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
5774         // UB = min(UB, GlobalUB);
5775         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5776                             ? S.getCombinedEnsureUpperBound()
5777                             : S.getEnsureUpperBound());
5778         // IV = LB;
5779         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5780                             ? S.getCombinedInit()
5781                             : S.getInit());
5782 
5783         const Expr *Cond =
5784             isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5785                 ? S.getCombinedCond()
5786                 : S.getCond();
5787 
5788         if (StaticChunked)
5789           Cond = S.getCombinedDistCond();
5790 
5791         // For static unchunked schedules generate:
5792         //
5793         //  1. For distribute alone, codegen
5794         //    while (idx <= UB) {
5795         //      BODY;
5796         //      ++idx;
5797         //    }
5798         //
5799         //  2. When combined with 'for' (e.g. as in 'distribute parallel for')
5800         //    while (idx <= UB) {
5801         //      <CodeGen rest of pragma>(LB, UB);
5802         //      idx += ST;
5803         //    }
5804         //
5805         // For static chunk one schedule generate:
5806         //
5807         // while (IV <= GlobalUB) {
5808         //   <CodeGen rest of pragma>(LB, UB);
5809         //   LB += ST;
5810         //   UB += ST;
5811         //   UB = min(UB, GlobalUB);
5812         //   IV = LB;
5813         // }
5814         //
5815         emitCommonSimdLoop(
5816             *this, S,
5817             [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5818               if (isOpenMPSimdDirective(S.getDirectiveKind()))
5819                 CGF.EmitOMPSimdInit(S);
5820             },
5821             [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop,
5822              StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) {
5823               CGF.EmitOMPInnerLoop(
5824                   S, LoopScope.requiresCleanups(), Cond, IncExpr,
5825                   [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
5826                     CodeGenLoop(CGF, S, LoopExit);
5827                   },
5828                   [&S, StaticChunked](CodeGenFunction &CGF) {
5829                     if (StaticChunked) {
5830                       CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound());
5831                       CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound());
5832                       CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound());
5833                       CGF.EmitIgnoredExpr(S.getCombinedInit());
5834                     }
5835                   });
5836             });
5837         EmitBlock(LoopExit.getBlock());
5838         // Tell the runtime we are done.
5839         RT.emitForStaticFinish(*this, S.getEndLoc(), OMPD_distribute);
5840       } else {
5841         // Emit the outer loop, which requests its work chunk [LB..UB] from
5842         // runtime and runs the inner loop to process it.
5843         const OMPLoopArguments LoopArguments = {
5844             LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
5845             Chunk};
5846         EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
5847                                    CodeGenLoop);
5848       }
5849       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
5850         EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
5851           return CGF.Builder.CreateIsNotNull(
5852               CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
5853         });
5854       }
5855       if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
5856           !isOpenMPParallelDirective(S.getDirectiveKind()) &&
5857           !isOpenMPTeamsDirective(S.getDirectiveKind())) {
5858         EmitOMPReductionClauseFinal(S, OMPD_simd);
5859         // Emit post-update of the reduction variables if IsLastIter != 0.
5860         emitPostUpdateForReductionClause(
5861             *this, S, [IL, &S](CodeGenFunction &CGF) {
5862               return CGF.Builder.CreateIsNotNull(
5863                   CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
5864             });
5865       }
5866       // Emit final copy of the lastprivate variables if IsLastIter != 0.
5867       if (HasLastprivateClause) {
5868         EmitOMPLastprivateClauseFinal(
5869             S, /*NoFinals=*/false,
5870             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
5871       }
5872     }
5873 
5874     // We're now done with the loop, so jump to the continuation block.
5875     if (ContBlock) {
5876       EmitBranch(ContBlock);
5877       EmitBlock(ContBlock, true);
5878     }
5879   }
5880 }
5881 
EmitOMPDistributeDirective(const OMPDistributeDirective & S)5882 void CodeGenFunction::EmitOMPDistributeDirective(
5883     const OMPDistributeDirective &S) {
5884   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5885     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
5886   };
5887   OMPLexicalScope Scope(*this, S, OMPD_unknown);
5888   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
5889 }
5890 
emitOutlinedOrderedFunction(CodeGenModule & CGM,const CapturedStmt * S,SourceLocation Loc)5891 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
5892                                                    const CapturedStmt *S,
5893                                                    SourceLocation Loc) {
5894   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
5895   CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
5896   CGF.CapturedStmtInfo = &CapStmtInfo;
5897   llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc);
5898   Fn->setDoesNotRecurse();
5899   return Fn;
5900 }
5901 
5902 template <typename T>
emitRestoreIP(CodeGenFunction & CGF,const T * C,llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,llvm::OpenMPIRBuilder & OMPBuilder)5903 static void emitRestoreIP(CodeGenFunction &CGF, const T *C,
5904                           llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,
5905                           llvm::OpenMPIRBuilder &OMPBuilder) {
5906 
5907   unsigned NumLoops = C->getNumLoops();
5908   QualType Int64Ty = CGF.CGM.getContext().getIntTypeForBitwidth(
5909       /*DestWidth=*/64, /*Signed=*/1);
5910   llvm::SmallVector<llvm::Value *> StoreValues;
5911   for (unsigned I = 0; I < NumLoops; I++) {
5912     const Expr *CounterVal = C->getLoopData(I);
5913     assert(CounterVal);
5914     llvm::Value *StoreValue = CGF.EmitScalarConversion(
5915         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
5916         CounterVal->getExprLoc());
5917     StoreValues.emplace_back(StoreValue);
5918   }
5919   OMPDoacrossKind<T> ODK;
5920   bool IsDependSource = ODK.isSource(C);
5921   CGF.Builder.restoreIP(
5922       OMPBuilder.createOrderedDepend(CGF.Builder, AllocaIP, NumLoops,
5923                                      StoreValues, ".cnt.addr", IsDependSource));
5924 }
5925 
EmitOMPOrderedDirective(const OMPOrderedDirective & S)5926 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
5927   if (CGM.getLangOpts().OpenMPIRBuilder) {
5928     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
5929     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
5930 
5931     if (S.hasClausesOfKind<OMPDependClause>() ||
5932         S.hasClausesOfKind<OMPDoacrossClause>()) {
5933       // The ordered directive with depend clause.
5934       assert(!S.hasAssociatedStmt() && "No associated statement must be in "
5935                                        "ordered depend|doacross construct.");
5936       InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
5937                              AllocaInsertPt->getIterator());
5938       for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
5939         emitRestoreIP(*this, DC, AllocaIP, OMPBuilder);
5940       for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>())
5941         emitRestoreIP(*this, DC, AllocaIP, OMPBuilder);
5942     } else {
5943       // The ordered directive with threads or simd clause, or without clause.
5944       // Without clause, it behaves as if the threads clause is specified.
5945       const auto *C = S.getSingleClause<OMPSIMDClause>();
5946 
5947       auto FiniCB = [this](InsertPointTy IP) {
5948         OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
5949       };
5950 
5951       auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP,
5952                                      InsertPointTy CodeGenIP) {
5953         Builder.restoreIP(CodeGenIP);
5954 
5955         const CapturedStmt *CS = S.getInnermostCapturedStmt();
5956         if (C) {
5957           llvm::BasicBlock *FiniBB = splitBBWithSuffix(
5958               Builder, /*CreateBranch=*/false, ".ordered.after");
5959           llvm::SmallVector<llvm::Value *, 16> CapturedVars;
5960           GenerateOpenMPCapturedVars(*CS, CapturedVars);
5961           llvm::Function *OutlinedFn =
5962               emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc());
5963           assert(S.getBeginLoc().isValid() &&
5964                  "Outlined function call location must be valid.");
5965           ApplyDebugLocation::CreateDefaultArtificial(*this, S.getBeginLoc());
5966           OMPBuilderCBHelpers::EmitCaptureStmt(*this, CodeGenIP, *FiniBB,
5967                                                OutlinedFn, CapturedVars);
5968         } else {
5969           OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
5970               *this, CS->getCapturedStmt(), AllocaIP, CodeGenIP, "ordered");
5971         }
5972       };
5973 
5974       OMPLexicalScope Scope(*this, S, OMPD_unknown);
5975       Builder.restoreIP(
5976           OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, !C));
5977     }
5978     return;
5979   }
5980 
5981   if (S.hasClausesOfKind<OMPDependClause>()) {
5982     assert(!S.hasAssociatedStmt() &&
5983            "No associated statement must be in ordered depend construct.");
5984     for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
5985       CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
5986     return;
5987   }
5988   if (S.hasClausesOfKind<OMPDoacrossClause>()) {
5989     assert(!S.hasAssociatedStmt() &&
5990            "No associated statement must be in ordered doacross construct.");
5991     for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>())
5992       CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
5993     return;
5994   }
5995   const auto *C = S.getSingleClause<OMPSIMDClause>();
5996   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
5997                                  PrePostActionTy &Action) {
5998     const CapturedStmt *CS = S.getInnermostCapturedStmt();
5999     if (C) {
6000       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
6001       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
6002       llvm::Function *OutlinedFn =
6003           emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc());
6004       CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(),
6005                                                       OutlinedFn, CapturedVars);
6006     } else {
6007       Action.Enter(CGF);
6008       CGF.EmitStmt(CS->getCapturedStmt());
6009     }
6010   };
6011   OMPLexicalScope Scope(*this, S, OMPD_unknown);
6012   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C);
6013 }
6014 
convertToScalarValue(CodeGenFunction & CGF,RValue Val,QualType SrcType,QualType DestType,SourceLocation Loc)6015 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
6016                                          QualType SrcType, QualType DestType,
6017                                          SourceLocation Loc) {
6018   assert(CGF.hasScalarEvaluationKind(DestType) &&
6019          "DestType must have scalar evaluation kind.");
6020   assert(!Val.isAggregate() && "Must be a scalar or complex.");
6021   return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
6022                                                    DestType, Loc)
6023                         : CGF.EmitComplexToScalarConversion(
6024                               Val.getComplexVal(), SrcType, DestType, Loc);
6025 }
6026 
6027 static CodeGenFunction::ComplexPairTy
convertToComplexValue(CodeGenFunction & CGF,RValue Val,QualType SrcType,QualType DestType,SourceLocation Loc)6028 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
6029                       QualType DestType, SourceLocation Loc) {
6030   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
6031          "DestType must have complex evaluation kind.");
6032   CodeGenFunction::ComplexPairTy ComplexVal;
6033   if (Val.isScalar()) {
6034     // Convert the input element to the element type of the complex.
6035     QualType DestElementType =
6036         DestType->castAs<ComplexType>()->getElementType();
6037     llvm::Value *ScalarVal = CGF.EmitScalarConversion(
6038         Val.getScalarVal(), SrcType, DestElementType, Loc);
6039     ComplexVal = CodeGenFunction::ComplexPairTy(
6040         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
6041   } else {
6042     assert(Val.isComplex() && "Must be a scalar or complex.");
6043     QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
6044     QualType DestElementType =
6045         DestType->castAs<ComplexType>()->getElementType();
6046     ComplexVal.first = CGF.EmitScalarConversion(
6047         Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
6048     ComplexVal.second = CGF.EmitScalarConversion(
6049         Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
6050   }
6051   return ComplexVal;
6052 }
6053 
emitSimpleAtomicStore(CodeGenFunction & CGF,llvm::AtomicOrdering AO,LValue LVal,RValue RVal)6054 static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
6055                                   LValue LVal, RValue RVal) {
6056   if (LVal.isGlobalReg())
6057     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
6058   else
6059     CGF.EmitAtomicStore(RVal, LVal, AO, LVal.isVolatile(), /*isInit=*/false);
6060 }
6061 
emitSimpleAtomicLoad(CodeGenFunction & CGF,llvm::AtomicOrdering AO,LValue LVal,SourceLocation Loc)6062 static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF,
6063                                    llvm::AtomicOrdering AO, LValue LVal,
6064                                    SourceLocation Loc) {
6065   if (LVal.isGlobalReg())
6066     return CGF.EmitLoadOfLValue(LVal, Loc);
6067   return CGF.EmitAtomicLoad(
6068       LVal, Loc, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO),
6069       LVal.isVolatile());
6070 }
6071 
emitOMPSimpleStore(LValue LVal,RValue RVal,QualType RValTy,SourceLocation Loc)6072 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
6073                                          QualType RValTy, SourceLocation Loc) {
6074   switch (getEvaluationKind(LVal.getType())) {
6075   case TEK_Scalar:
6076     EmitStoreThroughLValue(RValue::get(convertToScalarValue(
6077                                *this, RVal, RValTy, LVal.getType(), Loc)),
6078                            LVal);
6079     break;
6080   case TEK_Complex:
6081     EmitStoreOfComplex(
6082         convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
6083         /*isInit=*/false);
6084     break;
6085   case TEK_Aggregate:
6086     llvm_unreachable("Must be a scalar or complex.");
6087   }
6088 }
6089 
emitOMPAtomicReadExpr(CodeGenFunction & CGF,llvm::AtomicOrdering AO,const Expr * X,const Expr * V,SourceLocation Loc)6090 static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
6091                                   const Expr *X, const Expr *V,
6092                                   SourceLocation Loc) {
6093   // v = x;
6094   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
6095   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
6096   LValue XLValue = CGF.EmitLValue(X);
6097   LValue VLValue = CGF.EmitLValue(V);
6098   RValue Res = emitSimpleAtomicLoad(CGF, AO, XLValue, Loc);
6099   // OpenMP, 2.17.7, atomic Construct
6100   // If the read or capture clause is specified and the acquire, acq_rel, or
6101   // seq_cst clause is specified then the strong flush on exit from the atomic
6102   // operation is also an acquire flush.
6103   switch (AO) {
6104   case llvm::AtomicOrdering::Acquire:
6105   case llvm::AtomicOrdering::AcquireRelease:
6106   case llvm::AtomicOrdering::SequentiallyConsistent:
6107     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
6108                                          llvm::AtomicOrdering::Acquire);
6109     break;
6110   case llvm::AtomicOrdering::Monotonic:
6111   case llvm::AtomicOrdering::Release:
6112     break;
6113   case llvm::AtomicOrdering::NotAtomic:
6114   case llvm::AtomicOrdering::Unordered:
6115     llvm_unreachable("Unexpected ordering.");
6116   }
6117   CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
6118   CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
6119 }
6120 
emitOMPAtomicWriteExpr(CodeGenFunction & CGF,llvm::AtomicOrdering AO,const Expr * X,const Expr * E,SourceLocation Loc)6121 static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF,
6122                                    llvm::AtomicOrdering AO, const Expr *X,
6123                                    const Expr *E, SourceLocation Loc) {
6124   // x = expr;
6125   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
6126   emitSimpleAtomicStore(CGF, AO, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
6127   CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
6128   // OpenMP, 2.17.7, atomic Construct
6129   // If the write, update, or capture clause is specified and the release,
6130   // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6131   // the atomic operation is also a release flush.
6132   switch (AO) {
6133   case llvm::AtomicOrdering::Release:
6134   case llvm::AtomicOrdering::AcquireRelease:
6135   case llvm::AtomicOrdering::SequentiallyConsistent:
6136     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
6137                                          llvm::AtomicOrdering::Release);
6138     break;
6139   case llvm::AtomicOrdering::Acquire:
6140   case llvm::AtomicOrdering::Monotonic:
6141     break;
6142   case llvm::AtomicOrdering::NotAtomic:
6143   case llvm::AtomicOrdering::Unordered:
6144     llvm_unreachable("Unexpected ordering.");
6145   }
6146 }
6147 
emitOMPAtomicRMW(CodeGenFunction & CGF,LValue X,RValue Update,BinaryOperatorKind BO,llvm::AtomicOrdering AO,bool IsXLHSInRHSPart)6148 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
6149                                                 RValue Update,
6150                                                 BinaryOperatorKind BO,
6151                                                 llvm::AtomicOrdering AO,
6152                                                 bool IsXLHSInRHSPart) {
6153   ASTContext &Context = CGF.getContext();
6154   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
6155   // expression is simple and atomic is allowed for the given type for the
6156   // target platform.
6157   if (BO == BO_Comma || !Update.isScalar() || !X.isSimple() ||
6158       (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
6159        (Update.getScalarVal()->getType() != X.getAddress().getElementType())) ||
6160       !Context.getTargetInfo().hasBuiltinAtomic(
6161           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
6162     return std::make_pair(false, RValue::get(nullptr));
6163 
6164   auto &&CheckAtomicSupport = [&CGF](llvm::Type *T, BinaryOperatorKind BO) {
6165     if (T->isIntegerTy())
6166       return true;
6167 
6168     if (T->isFloatingPointTy() && (BO == BO_Add || BO == BO_Sub))
6169       return llvm::isPowerOf2_64(CGF.CGM.getDataLayout().getTypeStoreSize(T));
6170 
6171     return false;
6172   };
6173 
6174   if (!CheckAtomicSupport(Update.getScalarVal()->getType(), BO) ||
6175       !CheckAtomicSupport(X.getAddress().getElementType(), BO))
6176     return std::make_pair(false, RValue::get(nullptr));
6177 
6178   bool IsInteger = X.getAddress().getElementType()->isIntegerTy();
6179   llvm::AtomicRMWInst::BinOp RMWOp;
6180   switch (BO) {
6181   case BO_Add:
6182     RMWOp = IsInteger ? llvm::AtomicRMWInst::Add : llvm::AtomicRMWInst::FAdd;
6183     break;
6184   case BO_Sub:
6185     if (!IsXLHSInRHSPart)
6186       return std::make_pair(false, RValue::get(nullptr));
6187     RMWOp = IsInteger ? llvm::AtomicRMWInst::Sub : llvm::AtomicRMWInst::FSub;
6188     break;
6189   case BO_And:
6190     RMWOp = llvm::AtomicRMWInst::And;
6191     break;
6192   case BO_Or:
6193     RMWOp = llvm::AtomicRMWInst::Or;
6194     break;
6195   case BO_Xor:
6196     RMWOp = llvm::AtomicRMWInst::Xor;
6197     break;
6198   case BO_LT:
6199     if (IsInteger)
6200       RMWOp = X.getType()->hasSignedIntegerRepresentation()
6201                   ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
6202                                      : llvm::AtomicRMWInst::Max)
6203                   : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
6204                                      : llvm::AtomicRMWInst::UMax);
6205     else
6206       RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMin
6207                               : llvm::AtomicRMWInst::FMax;
6208     break;
6209   case BO_GT:
6210     if (IsInteger)
6211       RMWOp = X.getType()->hasSignedIntegerRepresentation()
6212                   ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
6213                                      : llvm::AtomicRMWInst::Min)
6214                   : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
6215                                      : llvm::AtomicRMWInst::UMin);
6216     else
6217       RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMax
6218                               : llvm::AtomicRMWInst::FMin;
6219     break;
6220   case BO_Assign:
6221     RMWOp = llvm::AtomicRMWInst::Xchg;
6222     break;
6223   case BO_Mul:
6224   case BO_Div:
6225   case BO_Rem:
6226   case BO_Shl:
6227   case BO_Shr:
6228   case BO_LAnd:
6229   case BO_LOr:
6230     return std::make_pair(false, RValue::get(nullptr));
6231   case BO_PtrMemD:
6232   case BO_PtrMemI:
6233   case BO_LE:
6234   case BO_GE:
6235   case BO_EQ:
6236   case BO_NE:
6237   case BO_Cmp:
6238   case BO_AddAssign:
6239   case BO_SubAssign:
6240   case BO_AndAssign:
6241   case BO_OrAssign:
6242   case BO_XorAssign:
6243   case BO_MulAssign:
6244   case BO_DivAssign:
6245   case BO_RemAssign:
6246   case BO_ShlAssign:
6247   case BO_ShrAssign:
6248   case BO_Comma:
6249     llvm_unreachable("Unsupported atomic update operation");
6250   }
6251   llvm::Value *UpdateVal = Update.getScalarVal();
6252   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
6253     if (IsInteger)
6254       UpdateVal = CGF.Builder.CreateIntCast(
6255           IC, X.getAddress().getElementType(),
6256           X.getType()->hasSignedIntegerRepresentation());
6257     else
6258       UpdateVal = CGF.Builder.CreateCast(llvm::Instruction::CastOps::UIToFP, IC,
6259                                          X.getAddress().getElementType());
6260   }
6261   llvm::Value *Res =
6262       CGF.Builder.CreateAtomicRMW(RMWOp, X.getAddress(), UpdateVal, AO);
6263   return std::make_pair(true, RValue::get(Res));
6264 }
6265 
EmitOMPAtomicSimpleUpdateExpr(LValue X,RValue E,BinaryOperatorKind BO,bool IsXLHSInRHSPart,llvm::AtomicOrdering AO,SourceLocation Loc,const llvm::function_ref<RValue (RValue)> CommonGen)6266 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
6267     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
6268     llvm::AtomicOrdering AO, SourceLocation Loc,
6269     const llvm::function_ref<RValue(RValue)> CommonGen) {
6270   // Update expressions are allowed to have the following forms:
6271   // x binop= expr; -> xrval + expr;
6272   // x++, ++x -> xrval + 1;
6273   // x--, --x -> xrval - 1;
6274   // x = x binop expr; -> xrval binop expr
6275   // x = expr Op x; - > expr binop xrval;
6276   auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
6277   if (!Res.first) {
6278     if (X.isGlobalReg()) {
6279       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
6280       // 'xrval'.
6281       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
6282     } else {
6283       // Perform compare-and-swap procedure.
6284       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
6285     }
6286   }
6287   return Res;
6288 }
6289 
emitOMPAtomicUpdateExpr(CodeGenFunction & CGF,llvm::AtomicOrdering AO,const Expr * X,const Expr * E,const Expr * UE,bool IsXLHSInRHSPart,SourceLocation Loc)6290 static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF,
6291                                     llvm::AtomicOrdering AO, const Expr *X,
6292                                     const Expr *E, const Expr *UE,
6293                                     bool IsXLHSInRHSPart, SourceLocation Loc) {
6294   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
6295          "Update expr in 'atomic update' must be a binary operator.");
6296   const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
6297   // Update expressions are allowed to have the following forms:
6298   // x binop= expr; -> xrval + expr;
6299   // x++, ++x -> xrval + 1;
6300   // x--, --x -> xrval - 1;
6301   // x = x binop expr; -> xrval binop expr
6302   // x = expr Op x; - > expr binop xrval;
6303   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
6304   LValue XLValue = CGF.EmitLValue(X);
6305   RValue ExprRValue = CGF.EmitAnyExpr(E);
6306   const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
6307   const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
6308   const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
6309   const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
6310   auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) {
6311     CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6312     CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
6313     return CGF.EmitAnyExpr(UE);
6314   };
6315   (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
6316       XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
6317   CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
6318   // OpenMP, 2.17.7, atomic Construct
6319   // If the write, update, or capture clause is specified and the release,
6320   // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6321   // the atomic operation is also a release flush.
6322   switch (AO) {
6323   case llvm::AtomicOrdering::Release:
6324   case llvm::AtomicOrdering::AcquireRelease:
6325   case llvm::AtomicOrdering::SequentiallyConsistent:
6326     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
6327                                          llvm::AtomicOrdering::Release);
6328     break;
6329   case llvm::AtomicOrdering::Acquire:
6330   case llvm::AtomicOrdering::Monotonic:
6331     break;
6332   case llvm::AtomicOrdering::NotAtomic:
6333   case llvm::AtomicOrdering::Unordered:
6334     llvm_unreachable("Unexpected ordering.");
6335   }
6336 }
6337 
convertToType(CodeGenFunction & CGF,RValue Value,QualType SourceType,QualType ResType,SourceLocation Loc)6338 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
6339                             QualType SourceType, QualType ResType,
6340                             SourceLocation Loc) {
6341   switch (CGF.getEvaluationKind(ResType)) {
6342   case TEK_Scalar:
6343     return RValue::get(
6344         convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
6345   case TEK_Complex: {
6346     auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
6347     return RValue::getComplex(Res.first, Res.second);
6348   }
6349   case TEK_Aggregate:
6350     break;
6351   }
6352   llvm_unreachable("Must be a scalar or complex.");
6353 }
6354 
emitOMPAtomicCaptureExpr(CodeGenFunction & CGF,llvm::AtomicOrdering AO,bool IsPostfixUpdate,const Expr * V,const Expr * X,const Expr * E,const Expr * UE,bool IsXLHSInRHSPart,SourceLocation Loc)6355 static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF,
6356                                      llvm::AtomicOrdering AO,
6357                                      bool IsPostfixUpdate, const Expr *V,
6358                                      const Expr *X, const Expr *E,
6359                                      const Expr *UE, bool IsXLHSInRHSPart,
6360                                      SourceLocation Loc) {
6361   assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
6362   assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
6363   RValue NewVVal;
6364   LValue VLValue = CGF.EmitLValue(V);
6365   LValue XLValue = CGF.EmitLValue(X);
6366   RValue ExprRValue = CGF.EmitAnyExpr(E);
6367   QualType NewVValType;
6368   if (UE) {
6369     // 'x' is updated with some additional value.
6370     assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
6371            "Update expr in 'atomic capture' must be a binary operator.");
6372     const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
6373     // Update expressions are allowed to have the following forms:
6374     // x binop= expr; -> xrval + expr;
6375     // x++, ++x -> xrval + 1;
6376     // x--, --x -> xrval - 1;
6377     // x = x binop expr; -> xrval binop expr
6378     // x = expr Op x; - > expr binop xrval;
6379     const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
6380     const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
6381     const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
6382     NewVValType = XRValExpr->getType();
6383     const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
6384     auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
6385                   IsPostfixUpdate](RValue XRValue) {
6386       CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6387       CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
6388       RValue Res = CGF.EmitAnyExpr(UE);
6389       NewVVal = IsPostfixUpdate ? XRValue : Res;
6390       return Res;
6391     };
6392     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
6393         XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
6394     CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
6395     if (Res.first) {
6396       // 'atomicrmw' instruction was generated.
6397       if (IsPostfixUpdate) {
6398         // Use old value from 'atomicrmw'.
6399         NewVVal = Res.second;
6400       } else {
6401         // 'atomicrmw' does not provide new value, so evaluate it using old
6402         // value of 'x'.
6403         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6404         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
6405         NewVVal = CGF.EmitAnyExpr(UE);
6406       }
6407     }
6408   } else {
6409     // 'x' is simply rewritten with some 'expr'.
6410     NewVValType = X->getType().getNonReferenceType();
6411     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
6412                                X->getType().getNonReferenceType(), Loc);
6413     auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) {
6414       NewVVal = XRValue;
6415       return ExprRValue;
6416     };
6417     // Try to perform atomicrmw xchg, otherwise simple exchange.
6418     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
6419         XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
6420         Loc, Gen);
6421     CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
6422     if (Res.first) {
6423       // 'atomicrmw' instruction was generated.
6424       NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
6425     }
6426   }
6427   // Emit post-update store to 'v' of old/new 'x' value.
6428   CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
6429   CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
6430   // OpenMP 5.1 removes the required flush for capture clause.
6431   if (CGF.CGM.getLangOpts().OpenMP < 51) {
6432     // OpenMP, 2.17.7, atomic Construct
6433     // If the write, update, or capture clause is specified and the release,
6434     // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6435     // the atomic operation is also a release flush.
6436     // If the read or capture clause is specified and the acquire, acq_rel, or
6437     // seq_cst clause is specified then the strong flush on exit from the atomic
6438     // operation is also an acquire flush.
6439     switch (AO) {
6440     case llvm::AtomicOrdering::Release:
6441       CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
6442                                            llvm::AtomicOrdering::Release);
6443       break;
6444     case llvm::AtomicOrdering::Acquire:
6445       CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
6446                                            llvm::AtomicOrdering::Acquire);
6447       break;
6448     case llvm::AtomicOrdering::AcquireRelease:
6449     case llvm::AtomicOrdering::SequentiallyConsistent:
6450       CGF.CGM.getOpenMPRuntime().emitFlush(
6451           CGF, std::nullopt, Loc, llvm::AtomicOrdering::AcquireRelease);
6452       break;
6453     case llvm::AtomicOrdering::Monotonic:
6454       break;
6455     case llvm::AtomicOrdering::NotAtomic:
6456     case llvm::AtomicOrdering::Unordered:
6457       llvm_unreachable("Unexpected ordering.");
6458     }
6459   }
6460 }
6461 
emitOMPAtomicCompareExpr(CodeGenFunction & CGF,llvm::AtomicOrdering AO,llvm::AtomicOrdering FailAO,const Expr * X,const Expr * V,const Expr * R,const Expr * E,const Expr * D,const Expr * CE,bool IsXBinopExpr,bool IsPostfixUpdate,bool IsFailOnly,SourceLocation Loc)6462 static void emitOMPAtomicCompareExpr(
6463     CodeGenFunction &CGF, llvm::AtomicOrdering AO, llvm::AtomicOrdering FailAO,
6464     const Expr *X, const Expr *V, const Expr *R, const Expr *E, const Expr *D,
6465     const Expr *CE, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly,
6466     SourceLocation Loc) {
6467   llvm::OpenMPIRBuilder &OMPBuilder =
6468       CGF.CGM.getOpenMPRuntime().getOMPBuilder();
6469 
6470   OMPAtomicCompareOp Op;
6471   assert(isa<BinaryOperator>(CE) && "CE is not a BinaryOperator");
6472   switch (cast<BinaryOperator>(CE)->getOpcode()) {
6473   case BO_EQ:
6474     Op = OMPAtomicCompareOp::EQ;
6475     break;
6476   case BO_LT:
6477     Op = OMPAtomicCompareOp::MIN;
6478     break;
6479   case BO_GT:
6480     Op = OMPAtomicCompareOp::MAX;
6481     break;
6482   default:
6483     llvm_unreachable("unsupported atomic compare binary operator");
6484   }
6485 
6486   LValue XLVal = CGF.EmitLValue(X);
6487   Address XAddr = XLVal.getAddress();
6488 
6489   auto EmitRValueWithCastIfNeeded = [&CGF, Loc](const Expr *X, const Expr *E) {
6490     if (X->getType() == E->getType())
6491       return CGF.EmitScalarExpr(E);
6492     const Expr *NewE = E->IgnoreImplicitAsWritten();
6493     llvm::Value *V = CGF.EmitScalarExpr(NewE);
6494     if (NewE->getType() == X->getType())
6495       return V;
6496     return CGF.EmitScalarConversion(V, NewE->getType(), X->getType(), Loc);
6497   };
6498 
6499   llvm::Value *EVal = EmitRValueWithCastIfNeeded(X, E);
6500   llvm::Value *DVal = D ? EmitRValueWithCastIfNeeded(X, D) : nullptr;
6501   if (auto *CI = dyn_cast<llvm::ConstantInt>(EVal))
6502     EVal = CGF.Builder.CreateIntCast(
6503         CI, XLVal.getAddress().getElementType(),
6504         E->getType()->hasSignedIntegerRepresentation());
6505   if (DVal)
6506     if (auto *CI = dyn_cast<llvm::ConstantInt>(DVal))
6507       DVal = CGF.Builder.CreateIntCast(
6508           CI, XLVal.getAddress().getElementType(),
6509           D->getType()->hasSignedIntegerRepresentation());
6510 
6511   llvm::OpenMPIRBuilder::AtomicOpValue XOpVal{
6512       XAddr.emitRawPointer(CGF), XAddr.getElementType(),
6513       X->getType()->hasSignedIntegerRepresentation(),
6514       X->getType().isVolatileQualified()};
6515   llvm::OpenMPIRBuilder::AtomicOpValue VOpVal, ROpVal;
6516   if (V) {
6517     LValue LV = CGF.EmitLValue(V);
6518     Address Addr = LV.getAddress();
6519     VOpVal = {Addr.emitRawPointer(CGF), Addr.getElementType(),
6520               V->getType()->hasSignedIntegerRepresentation(),
6521               V->getType().isVolatileQualified()};
6522   }
6523   if (R) {
6524     LValue LV = CGF.EmitLValue(R);
6525     Address Addr = LV.getAddress();
6526     ROpVal = {Addr.emitRawPointer(CGF), Addr.getElementType(),
6527               R->getType()->hasSignedIntegerRepresentation(),
6528               R->getType().isVolatileQualified()};
6529   }
6530 
6531   if (FailAO == llvm::AtomicOrdering::NotAtomic) {
6532     // fail clause was not mentioned on the
6533     // "#pragma omp atomic compare" construct.
6534     CGF.Builder.restoreIP(OMPBuilder.createAtomicCompare(
6535         CGF.Builder, XOpVal, VOpVal, ROpVal, EVal, DVal, AO, Op, IsXBinopExpr,
6536         IsPostfixUpdate, IsFailOnly));
6537   } else
6538     CGF.Builder.restoreIP(OMPBuilder.createAtomicCompare(
6539         CGF.Builder, XOpVal, VOpVal, ROpVal, EVal, DVal, AO, Op, IsXBinopExpr,
6540         IsPostfixUpdate, IsFailOnly, FailAO));
6541 }
6542 
emitOMPAtomicExpr(CodeGenFunction & CGF,OpenMPClauseKind Kind,llvm::AtomicOrdering AO,llvm::AtomicOrdering FailAO,bool IsPostfixUpdate,const Expr * X,const Expr * V,const Expr * R,const Expr * E,const Expr * UE,const Expr * D,const Expr * CE,bool IsXLHSInRHSPart,bool IsFailOnly,SourceLocation Loc)6543 static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
6544                               llvm::AtomicOrdering AO,
6545                               llvm::AtomicOrdering FailAO, bool IsPostfixUpdate,
6546                               const Expr *X, const Expr *V, const Expr *R,
6547                               const Expr *E, const Expr *UE, const Expr *D,
6548                               const Expr *CE, bool IsXLHSInRHSPart,
6549                               bool IsFailOnly, SourceLocation Loc) {
6550   switch (Kind) {
6551   case OMPC_read:
6552     emitOMPAtomicReadExpr(CGF, AO, X, V, Loc);
6553     break;
6554   case OMPC_write:
6555     emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc);
6556     break;
6557   case OMPC_unknown:
6558   case OMPC_update:
6559     emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc);
6560     break;
6561   case OMPC_capture:
6562     emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE,
6563                              IsXLHSInRHSPart, Loc);
6564     break;
6565   case OMPC_compare: {
6566     emitOMPAtomicCompareExpr(CGF, AO, FailAO, X, V, R, E, D, CE,
6567                              IsXLHSInRHSPart, IsPostfixUpdate, IsFailOnly, Loc);
6568     break;
6569   }
6570   default:
6571     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
6572   }
6573 }
6574 
EmitOMPAtomicDirective(const OMPAtomicDirective & S)6575 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
6576   llvm::AtomicOrdering AO = CGM.getOpenMPRuntime().getDefaultMemoryOrdering();
6577   // Fail Memory Clause Ordering.
6578   llvm::AtomicOrdering FailAO = llvm::AtomicOrdering::NotAtomic;
6579   bool MemOrderingSpecified = false;
6580   if (S.getSingleClause<OMPSeqCstClause>()) {
6581     AO = llvm::AtomicOrdering::SequentiallyConsistent;
6582     MemOrderingSpecified = true;
6583   } else if (S.getSingleClause<OMPAcqRelClause>()) {
6584     AO = llvm::AtomicOrdering::AcquireRelease;
6585     MemOrderingSpecified = true;
6586   } else if (S.getSingleClause<OMPAcquireClause>()) {
6587     AO = llvm::AtomicOrdering::Acquire;
6588     MemOrderingSpecified = true;
6589   } else if (S.getSingleClause<OMPReleaseClause>()) {
6590     AO = llvm::AtomicOrdering::Release;
6591     MemOrderingSpecified = true;
6592   } else if (S.getSingleClause<OMPRelaxedClause>()) {
6593     AO = llvm::AtomicOrdering::Monotonic;
6594     MemOrderingSpecified = true;
6595   }
6596   llvm::SmallSet<OpenMPClauseKind, 2> KindsEncountered;
6597   OpenMPClauseKind Kind = OMPC_unknown;
6598   for (const OMPClause *C : S.clauses()) {
6599     // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause,
6600     // if it is first).
6601     OpenMPClauseKind K = C->getClauseKind();
6602     // TBD
6603     if (K == OMPC_weak)
6604       return;
6605     if (K == OMPC_seq_cst || K == OMPC_acq_rel || K == OMPC_acquire ||
6606         K == OMPC_release || K == OMPC_relaxed || K == OMPC_hint)
6607       continue;
6608     Kind = K;
6609     KindsEncountered.insert(K);
6610   }
6611   // We just need to correct Kind here. No need to set a bool saying it is
6612   // actually compare capture because we can tell from whether V and R are
6613   // nullptr.
6614   if (KindsEncountered.contains(OMPC_compare) &&
6615       KindsEncountered.contains(OMPC_capture))
6616     Kind = OMPC_compare;
6617   if (!MemOrderingSpecified) {
6618     llvm::AtomicOrdering DefaultOrder =
6619         CGM.getOpenMPRuntime().getDefaultMemoryOrdering();
6620     if (DefaultOrder == llvm::AtomicOrdering::Monotonic ||
6621         DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent ||
6622         (DefaultOrder == llvm::AtomicOrdering::AcquireRelease &&
6623          Kind == OMPC_capture)) {
6624       AO = DefaultOrder;
6625     } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) {
6626       if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) {
6627         AO = llvm::AtomicOrdering::Release;
6628       } else if (Kind == OMPC_read) {
6629         assert(Kind == OMPC_read && "Unexpected atomic kind.");
6630         AO = llvm::AtomicOrdering::Acquire;
6631       }
6632     }
6633   }
6634 
6635   if (KindsEncountered.contains(OMPC_compare) &&
6636       KindsEncountered.contains(OMPC_fail)) {
6637     Kind = OMPC_compare;
6638     const auto *FailClause = S.getSingleClause<OMPFailClause>();
6639     if (FailClause) {
6640       OpenMPClauseKind FailParameter = FailClause->getFailParameter();
6641       if (FailParameter == llvm::omp::OMPC_relaxed)
6642         FailAO = llvm::AtomicOrdering::Monotonic;
6643       else if (FailParameter == llvm::omp::OMPC_acquire)
6644         FailAO = llvm::AtomicOrdering::Acquire;
6645       else if (FailParameter == llvm::omp::OMPC_seq_cst)
6646         FailAO = llvm::AtomicOrdering::SequentiallyConsistent;
6647     }
6648   }
6649 
6650   LexicalScope Scope(*this, S.getSourceRange());
6651   EmitStopPoint(S.getAssociatedStmt());
6652   emitOMPAtomicExpr(*this, Kind, AO, FailAO, S.isPostfixUpdate(), S.getX(),
6653                     S.getV(), S.getR(), S.getExpr(), S.getUpdateExpr(),
6654                     S.getD(), S.getCondExpr(), S.isXLHSInRHSPart(),
6655                     S.isFailOnly(), S.getBeginLoc());
6656 }
6657 
emitCommonOMPTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & S,const RegionCodeGenTy & CodeGen)6658 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
6659                                          const OMPExecutableDirective &S,
6660                                          const RegionCodeGenTy &CodeGen) {
6661   assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
6662   CodeGenModule &CGM = CGF.CGM;
6663 
6664   // On device emit this construct as inlined code.
6665   if (CGM.getLangOpts().OpenMPIsTargetDevice) {
6666     OMPLexicalScope Scope(CGF, S, OMPD_target);
6667     CGM.getOpenMPRuntime().emitInlinedDirective(
6668         CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6669           CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
6670         });
6671     return;
6672   }
6673 
6674   auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
6675   llvm::Function *Fn = nullptr;
6676   llvm::Constant *FnID = nullptr;
6677 
6678   const Expr *IfCond = nullptr;
6679   // Check for the at most one if clause associated with the target region.
6680   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
6681     if (C->getNameModifier() == OMPD_unknown ||
6682         C->getNameModifier() == OMPD_target) {
6683       IfCond = C->getCondition();
6684       break;
6685     }
6686   }
6687 
6688   // Check if we have any device clause associated with the directive.
6689   llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device(
6690       nullptr, OMPC_DEVICE_unknown);
6691   if (auto *C = S.getSingleClause<OMPDeviceClause>())
6692     Device.setPointerAndInt(C->getDevice(), C->getModifier());
6693 
6694   // Check if we have an if clause whose conditional always evaluates to false
6695   // or if we do not have any targets specified. If so the target region is not
6696   // an offload entry point.
6697   bool IsOffloadEntry = true;
6698   if (IfCond) {
6699     bool Val;
6700     if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
6701       IsOffloadEntry = false;
6702   }
6703   if (CGM.getLangOpts().OMPTargetTriples.empty())
6704     IsOffloadEntry = false;
6705 
6706   if (CGM.getLangOpts().OpenMPOffloadMandatory && !IsOffloadEntry) {
6707     unsigned DiagID = CGM.getDiags().getCustomDiagID(
6708         DiagnosticsEngine::Error,
6709         "No offloading entry generated while offloading is mandatory.");
6710     CGM.getDiags().Report(DiagID);
6711   }
6712 
6713   assert(CGF.CurFuncDecl && "No parent declaration for target region!");
6714   StringRef ParentName;
6715   // In case we have Ctors/Dtors we use the complete type variant to produce
6716   // the mangling of the device outlined kernel.
6717   if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
6718     ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
6719   else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
6720     ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
6721   else
6722     ParentName =
6723         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
6724 
6725   // Emit target region as a standalone region.
6726   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
6727                                                     IsOffloadEntry, CodeGen);
6728   OMPLexicalScope Scope(CGF, S, OMPD_task);
6729   auto &&SizeEmitter =
6730       [IsOffloadEntry](CodeGenFunction &CGF,
6731                        const OMPLoopDirective &D) -> llvm::Value * {
6732     if (IsOffloadEntry) {
6733       OMPLoopScope(CGF, D);
6734       // Emit calculation of the iterations count.
6735       llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations());
6736       NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty,
6737                                                 /*isSigned=*/false);
6738       return NumIterations;
6739     }
6740     return nullptr;
6741   };
6742   CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
6743                                         SizeEmitter);
6744 }
6745 
emitTargetRegion(CodeGenFunction & CGF,const OMPTargetDirective & S,PrePostActionTy & Action)6746 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
6747                              PrePostActionTy &Action) {
6748   Action.Enter(CGF);
6749   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6750   (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6751   CGF.EmitOMPPrivateClause(S, PrivateScope);
6752   (void)PrivateScope.Privatize();
6753   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
6754     CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
6755 
6756   CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
6757   CGF.EnsureInsertPoint();
6758 }
6759 
EmitOMPTargetDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetDirective & S)6760 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
6761                                                   StringRef ParentName,
6762                                                   const OMPTargetDirective &S) {
6763   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6764     emitTargetRegion(CGF, S, Action);
6765   };
6766   llvm::Function *Fn;
6767   llvm::Constant *Addr;
6768   // Emit target region as a standalone region.
6769   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6770       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6771   assert(Fn && Addr && "Target device function emission failed.");
6772 }
6773 
EmitOMPTargetDirective(const OMPTargetDirective & S)6774 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
6775   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6776     emitTargetRegion(CGF, S, Action);
6777   };
6778   emitCommonOMPTargetDirective(*this, S, CodeGen);
6779 }
6780 
emitCommonOMPTeamsDirective(CodeGenFunction & CGF,const OMPExecutableDirective & S,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)6781 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
6782                                         const OMPExecutableDirective &S,
6783                                         OpenMPDirectiveKind InnermostKind,
6784                                         const RegionCodeGenTy &CodeGen) {
6785   const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
6786   llvm::Function *OutlinedFn =
6787       CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
6788           CGF, S, *CS->getCapturedDecl()->param_begin(), InnermostKind,
6789           CodeGen);
6790 
6791   const auto *NT = S.getSingleClause<OMPNumTeamsClause>();
6792   const auto *TL = S.getSingleClause<OMPThreadLimitClause>();
6793   if (NT || TL) {
6794     const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr;
6795     const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr;
6796 
6797     CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
6798                                                   S.getBeginLoc());
6799   }
6800 
6801   OMPTeamsScope Scope(CGF, S);
6802   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
6803   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
6804   CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn,
6805                                            CapturedVars);
6806 }
6807 
EmitOMPTeamsDirective(const OMPTeamsDirective & S)6808 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
6809   // Emit teams region as a standalone region.
6810   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6811     Action.Enter(CGF);
6812     OMPPrivateScope PrivateScope(CGF);
6813     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6814     CGF.EmitOMPPrivateClause(S, PrivateScope);
6815     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6816     (void)PrivateScope.Privatize();
6817     CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt());
6818     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6819   };
6820   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
6821   emitPostUpdateForReductionClause(*this, S,
6822                                    [](CodeGenFunction &) { return nullptr; });
6823 }
6824 
emitTargetTeamsRegion(CodeGenFunction & CGF,PrePostActionTy & Action,const OMPTargetTeamsDirective & S)6825 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
6826                                   const OMPTargetTeamsDirective &S) {
6827   auto *CS = S.getCapturedStmt(OMPD_teams);
6828   Action.Enter(CGF);
6829   // Emit teams region as a standalone region.
6830   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
6831     Action.Enter(CGF);
6832     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6833     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6834     CGF.EmitOMPPrivateClause(S, PrivateScope);
6835     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6836     (void)PrivateScope.Privatize();
6837     if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
6838       CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
6839     CGF.EmitStmt(CS->getCapturedStmt());
6840     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6841   };
6842   emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
6843   emitPostUpdateForReductionClause(CGF, S,
6844                                    [](CodeGenFunction &) { return nullptr; });
6845 }
6846 
EmitOMPTargetTeamsDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDirective & S)6847 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
6848     CodeGenModule &CGM, StringRef ParentName,
6849     const OMPTargetTeamsDirective &S) {
6850   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6851     emitTargetTeamsRegion(CGF, Action, S);
6852   };
6853   llvm::Function *Fn;
6854   llvm::Constant *Addr;
6855   // Emit target region as a standalone region.
6856   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6857       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6858   assert(Fn && Addr && "Target device function emission failed.");
6859 }
6860 
EmitOMPTargetTeamsDirective(const OMPTargetTeamsDirective & S)6861 void CodeGenFunction::EmitOMPTargetTeamsDirective(
6862     const OMPTargetTeamsDirective &S) {
6863   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6864     emitTargetTeamsRegion(CGF, Action, S);
6865   };
6866   emitCommonOMPTargetDirective(*this, S, CodeGen);
6867 }
6868 
6869 static void
emitTargetTeamsDistributeRegion(CodeGenFunction & CGF,PrePostActionTy & Action,const OMPTargetTeamsDistributeDirective & S)6870 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
6871                                 const OMPTargetTeamsDistributeDirective &S) {
6872   Action.Enter(CGF);
6873   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6874     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6875   };
6876 
6877   // Emit teams region as a standalone region.
6878   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6879                                             PrePostActionTy &Action) {
6880     Action.Enter(CGF);
6881     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6882     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6883     (void)PrivateScope.Privatize();
6884     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6885                                                     CodeGenDistribute);
6886     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6887   };
6888   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen);
6889   emitPostUpdateForReductionClause(CGF, S,
6890                                    [](CodeGenFunction &) { return nullptr; });
6891 }
6892 
EmitOMPTargetTeamsDistributeDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDistributeDirective & S)6893 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
6894     CodeGenModule &CGM, StringRef ParentName,
6895     const OMPTargetTeamsDistributeDirective &S) {
6896   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6897     emitTargetTeamsDistributeRegion(CGF, Action, S);
6898   };
6899   llvm::Function *Fn;
6900   llvm::Constant *Addr;
6901   // Emit target region as a standalone region.
6902   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6903       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6904   assert(Fn && Addr && "Target device function emission failed.");
6905 }
6906 
EmitOMPTargetTeamsDistributeDirective(const OMPTargetTeamsDistributeDirective & S)6907 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
6908     const OMPTargetTeamsDistributeDirective &S) {
6909   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6910     emitTargetTeamsDistributeRegion(CGF, Action, S);
6911   };
6912   emitCommonOMPTargetDirective(*this, S, CodeGen);
6913 }
6914 
emitTargetTeamsDistributeSimdRegion(CodeGenFunction & CGF,PrePostActionTy & Action,const OMPTargetTeamsDistributeSimdDirective & S)6915 static void emitTargetTeamsDistributeSimdRegion(
6916     CodeGenFunction &CGF, PrePostActionTy &Action,
6917     const OMPTargetTeamsDistributeSimdDirective &S) {
6918   Action.Enter(CGF);
6919   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6920     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6921   };
6922 
6923   // Emit teams region as a standalone region.
6924   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6925                                             PrePostActionTy &Action) {
6926     Action.Enter(CGF);
6927     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6928     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6929     (void)PrivateScope.Privatize();
6930     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6931                                                     CodeGenDistribute);
6932     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6933   };
6934   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen);
6935   emitPostUpdateForReductionClause(CGF, S,
6936                                    [](CodeGenFunction &) { return nullptr; });
6937 }
6938 
EmitOMPTargetTeamsDistributeSimdDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDistributeSimdDirective & S)6939 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
6940     CodeGenModule &CGM, StringRef ParentName,
6941     const OMPTargetTeamsDistributeSimdDirective &S) {
6942   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6943     emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
6944   };
6945   llvm::Function *Fn;
6946   llvm::Constant *Addr;
6947   // Emit target region as a standalone region.
6948   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6949       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6950   assert(Fn && Addr && "Target device function emission failed.");
6951 }
6952 
EmitOMPTargetTeamsDistributeSimdDirective(const OMPTargetTeamsDistributeSimdDirective & S)6953 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
6954     const OMPTargetTeamsDistributeSimdDirective &S) {
6955   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6956     emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
6957   };
6958   emitCommonOMPTargetDirective(*this, S, CodeGen);
6959 }
6960 
EmitOMPTeamsDistributeDirective(const OMPTeamsDistributeDirective & S)6961 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
6962     const OMPTeamsDistributeDirective &S) {
6963 
6964   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6965     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6966   };
6967 
6968   // Emit teams region as a standalone region.
6969   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6970                                             PrePostActionTy &Action) {
6971     Action.Enter(CGF);
6972     OMPPrivateScope PrivateScope(CGF);
6973     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6974     (void)PrivateScope.Privatize();
6975     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6976                                                     CodeGenDistribute);
6977     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6978   };
6979   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
6980   emitPostUpdateForReductionClause(*this, S,
6981                                    [](CodeGenFunction &) { return nullptr; });
6982 }
6983 
EmitOMPTeamsDistributeSimdDirective(const OMPTeamsDistributeSimdDirective & S)6984 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
6985     const OMPTeamsDistributeSimdDirective &S) {
6986   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6987     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6988   };
6989 
6990   // Emit teams region as a standalone region.
6991   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6992                                             PrePostActionTy &Action) {
6993     Action.Enter(CGF);
6994     OMPPrivateScope PrivateScope(CGF);
6995     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6996     (void)PrivateScope.Privatize();
6997     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd,
6998                                                     CodeGenDistribute);
6999     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7000   };
7001   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen);
7002   emitPostUpdateForReductionClause(*this, S,
7003                                    [](CodeGenFunction &) { return nullptr; });
7004 }
7005 
EmitOMPTeamsDistributeParallelForDirective(const OMPTeamsDistributeParallelForDirective & S)7006 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
7007     const OMPTeamsDistributeParallelForDirective &S) {
7008   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7009     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
7010                               S.getDistInc());
7011   };
7012 
7013   // Emit teams region as a standalone region.
7014   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7015                                             PrePostActionTy &Action) {
7016     Action.Enter(CGF);
7017     OMPPrivateScope PrivateScope(CGF);
7018     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7019     (void)PrivateScope.Privatize();
7020     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
7021                                                     CodeGenDistribute);
7022     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7023   };
7024   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
7025   emitPostUpdateForReductionClause(*this, S,
7026                                    [](CodeGenFunction &) { return nullptr; });
7027 }
7028 
EmitOMPTeamsDistributeParallelForSimdDirective(const OMPTeamsDistributeParallelForSimdDirective & S)7029 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
7030     const OMPTeamsDistributeParallelForSimdDirective &S) {
7031   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7032     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
7033                               S.getDistInc());
7034   };
7035 
7036   // Emit teams region as a standalone region.
7037   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7038                                             PrePostActionTy &Action) {
7039     Action.Enter(CGF);
7040     OMPPrivateScope PrivateScope(CGF);
7041     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7042     (void)PrivateScope.Privatize();
7043     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7044         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
7045     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7046   };
7047   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for_simd,
7048                               CodeGen);
7049   emitPostUpdateForReductionClause(*this, S,
7050                                    [](CodeGenFunction &) { return nullptr; });
7051 }
7052 
EmitOMPInteropDirective(const OMPInteropDirective & S)7053 void CodeGenFunction::EmitOMPInteropDirective(const OMPInteropDirective &S) {
7054   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
7055   llvm::Value *Device = nullptr;
7056   llvm::Value *NumDependences = nullptr;
7057   llvm::Value *DependenceList = nullptr;
7058 
7059   if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7060     Device = EmitScalarExpr(C->getDevice());
7061 
7062   // Build list and emit dependences
7063   OMPTaskDataTy Data;
7064   buildDependences(S, Data);
7065   if (!Data.Dependences.empty()) {
7066     Address DependenciesArray = Address::invalid();
7067     std::tie(NumDependences, DependenciesArray) =
7068         CGM.getOpenMPRuntime().emitDependClause(*this, Data.Dependences,
7069                                                 S.getBeginLoc());
7070     DependenceList = DependenciesArray.emitRawPointer(*this);
7071   }
7072   Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>();
7073 
7074   assert(!(Data.HasNowaitClause && !(S.getSingleClause<OMPInitClause>() ||
7075                                      S.getSingleClause<OMPDestroyClause>() ||
7076                                      S.getSingleClause<OMPUseClause>())) &&
7077          "OMPNowaitClause clause is used separately in OMPInteropDirective.");
7078 
7079   auto ItOMPInitClause = S.getClausesOfKind<OMPInitClause>();
7080   if (!ItOMPInitClause.empty()) {
7081     // Look at the multiple init clauses
7082     for (const OMPInitClause *C : ItOMPInitClause) {
7083       llvm::Value *InteropvarPtr =
7084           EmitLValue(C->getInteropVar()).getPointer(*this);
7085       llvm::omp::OMPInteropType InteropType =
7086           llvm::omp::OMPInteropType::Unknown;
7087       if (C->getIsTarget()) {
7088         InteropType = llvm::omp::OMPInteropType::Target;
7089       } else {
7090         assert(C->getIsTargetSync() &&
7091                "Expected interop-type target/targetsync");
7092         InteropType = llvm::omp::OMPInteropType::TargetSync;
7093       }
7094       OMPBuilder.createOMPInteropInit(Builder, InteropvarPtr, InteropType,
7095                                       Device, NumDependences, DependenceList,
7096                                       Data.HasNowaitClause);
7097     }
7098   }
7099   auto ItOMPDestroyClause = S.getClausesOfKind<OMPDestroyClause>();
7100   if (!ItOMPDestroyClause.empty()) {
7101     // Look at the multiple destroy clauses
7102     for (const OMPDestroyClause *C : ItOMPDestroyClause) {
7103       llvm::Value *InteropvarPtr =
7104           EmitLValue(C->getInteropVar()).getPointer(*this);
7105       OMPBuilder.createOMPInteropDestroy(Builder, InteropvarPtr, Device,
7106                                          NumDependences, DependenceList,
7107                                          Data.HasNowaitClause);
7108     }
7109   }
7110   auto ItOMPUseClause = S.getClausesOfKind<OMPUseClause>();
7111   if (!ItOMPUseClause.empty()) {
7112     // Look at the multiple use clauses
7113     for (const OMPUseClause *C : ItOMPUseClause) {
7114       llvm::Value *InteropvarPtr =
7115           EmitLValue(C->getInteropVar()).getPointer(*this);
7116       OMPBuilder.createOMPInteropUse(Builder, InteropvarPtr, Device,
7117                                      NumDependences, DependenceList,
7118                                      Data.HasNowaitClause);
7119     }
7120   }
7121 }
7122 
emitTargetTeamsDistributeParallelForRegion(CodeGenFunction & CGF,const OMPTargetTeamsDistributeParallelForDirective & S,PrePostActionTy & Action)7123 static void emitTargetTeamsDistributeParallelForRegion(
7124     CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S,
7125     PrePostActionTy &Action) {
7126   Action.Enter(CGF);
7127   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7128     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
7129                               S.getDistInc());
7130   };
7131 
7132   // Emit teams region as a standalone region.
7133   auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7134                                                  PrePostActionTy &Action) {
7135     Action.Enter(CGF);
7136     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7137     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7138     (void)PrivateScope.Privatize();
7139     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7140         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
7141     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7142   };
7143 
7144   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
7145                               CodeGenTeams);
7146   emitPostUpdateForReductionClause(CGF, S,
7147                                    [](CodeGenFunction &) { return nullptr; });
7148 }
7149 
EmitOMPTargetTeamsDistributeParallelForDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDistributeParallelForDirective & S)7150 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
7151     CodeGenModule &CGM, StringRef ParentName,
7152     const OMPTargetTeamsDistributeParallelForDirective &S) {
7153   // Emit SPMD target teams distribute parallel for region as a standalone
7154   // region.
7155   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7156     emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
7157   };
7158   llvm::Function *Fn;
7159   llvm::Constant *Addr;
7160   // Emit target region as a standalone region.
7161   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7162       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7163   assert(Fn && Addr && "Target device function emission failed.");
7164 }
7165 
EmitOMPTargetTeamsDistributeParallelForDirective(const OMPTargetTeamsDistributeParallelForDirective & S)7166 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
7167     const OMPTargetTeamsDistributeParallelForDirective &S) {
7168   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7169     emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
7170   };
7171   emitCommonOMPTargetDirective(*this, S, CodeGen);
7172 }
7173 
emitTargetTeamsDistributeParallelForSimdRegion(CodeGenFunction & CGF,const OMPTargetTeamsDistributeParallelForSimdDirective & S,PrePostActionTy & Action)7174 static void emitTargetTeamsDistributeParallelForSimdRegion(
7175     CodeGenFunction &CGF,
7176     const OMPTargetTeamsDistributeParallelForSimdDirective &S,
7177     PrePostActionTy &Action) {
7178   Action.Enter(CGF);
7179   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7180     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
7181                               S.getDistInc());
7182   };
7183 
7184   // Emit teams region as a standalone region.
7185   auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7186                                                  PrePostActionTy &Action) {
7187     Action.Enter(CGF);
7188     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7189     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7190     (void)PrivateScope.Privatize();
7191     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7192         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
7193     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7194   };
7195 
7196   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd,
7197                               CodeGenTeams);
7198   emitPostUpdateForReductionClause(CGF, S,
7199                                    [](CodeGenFunction &) { return nullptr; });
7200 }
7201 
EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDistributeParallelForSimdDirective & S)7202 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
7203     CodeGenModule &CGM, StringRef ParentName,
7204     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
7205   // Emit SPMD target teams distribute parallel for simd region as a standalone
7206   // region.
7207   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7208     emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
7209   };
7210   llvm::Function *Fn;
7211   llvm::Constant *Addr;
7212   // Emit target region as a standalone region.
7213   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7214       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7215   assert(Fn && Addr && "Target device function emission failed.");
7216 }
7217 
EmitOMPTargetTeamsDistributeParallelForSimdDirective(const OMPTargetTeamsDistributeParallelForSimdDirective & S)7218 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
7219     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
7220   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7221     emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
7222   };
7223   emitCommonOMPTargetDirective(*this, S, CodeGen);
7224 }
7225 
EmitOMPCancellationPointDirective(const OMPCancellationPointDirective & S)7226 void CodeGenFunction::EmitOMPCancellationPointDirective(
7227     const OMPCancellationPointDirective &S) {
7228   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(),
7229                                                    S.getCancelRegion());
7230 }
7231 
EmitOMPCancelDirective(const OMPCancelDirective & S)7232 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
7233   const Expr *IfCond = nullptr;
7234   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
7235     if (C->getNameModifier() == OMPD_unknown ||
7236         C->getNameModifier() == OMPD_cancel) {
7237       IfCond = C->getCondition();
7238       break;
7239     }
7240   }
7241   if (CGM.getLangOpts().OpenMPIRBuilder) {
7242     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
7243     // TODO: This check is necessary as we only generate `omp parallel` through
7244     // the OpenMPIRBuilder for now.
7245     if (S.getCancelRegion() == OMPD_parallel ||
7246         S.getCancelRegion() == OMPD_sections ||
7247         S.getCancelRegion() == OMPD_section) {
7248       llvm::Value *IfCondition = nullptr;
7249       if (IfCond)
7250         IfCondition = EmitScalarExpr(IfCond,
7251                                      /*IgnoreResultAssign=*/true);
7252       return Builder.restoreIP(
7253           OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion()));
7254     }
7255   }
7256 
7257   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond,
7258                                         S.getCancelRegion());
7259 }
7260 
7261 CodeGenFunction::JumpDest
getOMPCancelDestination(OpenMPDirectiveKind Kind)7262 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
7263   if (Kind == OMPD_parallel || Kind == OMPD_task ||
7264       Kind == OMPD_target_parallel || Kind == OMPD_taskloop ||
7265       Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop)
7266     return ReturnBlock;
7267   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
7268          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
7269          Kind == OMPD_distribute_parallel_for ||
7270          Kind == OMPD_target_parallel_for ||
7271          Kind == OMPD_teams_distribute_parallel_for ||
7272          Kind == OMPD_target_teams_distribute_parallel_for);
7273   return OMPCancelStack.getExitBlock();
7274 }
7275 
EmitOMPUseDevicePtrClause(const OMPUseDevicePtrClause & C,OMPPrivateScope & PrivateScope,const llvm::DenseMap<const ValueDecl *,llvm::Value * > CaptureDeviceAddrMap)7276 void CodeGenFunction::EmitOMPUseDevicePtrClause(
7277     const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope,
7278     const llvm::DenseMap<const ValueDecl *, llvm::Value *>
7279         CaptureDeviceAddrMap) {
7280   llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7281   for (const Expr *OrigVarIt : C.varlists()) {
7282     const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(OrigVarIt)->getDecl());
7283     if (!Processed.insert(OrigVD).second)
7284       continue;
7285 
7286     // In order to identify the right initializer we need to match the
7287     // declaration used by the mapping logic. In some cases we may get
7288     // OMPCapturedExprDecl that refers to the original declaration.
7289     const ValueDecl *MatchingVD = OrigVD;
7290     if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
7291       // OMPCapturedExprDecl are used to privative fields of the current
7292       // structure.
7293       const auto *ME = cast<MemberExpr>(OED->getInit());
7294       assert(isa<CXXThisExpr>(ME->getBase()->IgnoreImpCasts()) &&
7295              "Base should be the current struct!");
7296       MatchingVD = ME->getMemberDecl();
7297     }
7298 
7299     // If we don't have information about the current list item, move on to
7300     // the next one.
7301     auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
7302     if (InitAddrIt == CaptureDeviceAddrMap.end())
7303       continue;
7304 
7305     llvm::Type *Ty = ConvertTypeForMem(OrigVD->getType().getNonReferenceType());
7306 
7307     // Return the address of the private variable.
7308     bool IsRegistered = PrivateScope.addPrivate(
7309         OrigVD,
7310         Address(InitAddrIt->second, Ty,
7311                 getContext().getTypeAlignInChars(getContext().VoidPtrTy)));
7312     assert(IsRegistered && "firstprivate var already registered as private");
7313     // Silence the warning about unused variable.
7314     (void)IsRegistered;
7315   }
7316 }
7317 
getBaseDecl(const Expr * Ref)7318 static const VarDecl *getBaseDecl(const Expr *Ref) {
7319   const Expr *Base = Ref->IgnoreParenImpCasts();
7320   while (const auto *OASE = dyn_cast<ArraySectionExpr>(Base))
7321     Base = OASE->getBase()->IgnoreParenImpCasts();
7322   while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Base))
7323     Base = ASE->getBase()->IgnoreParenImpCasts();
7324   return cast<VarDecl>(cast<DeclRefExpr>(Base)->getDecl());
7325 }
7326 
EmitOMPUseDeviceAddrClause(const OMPUseDeviceAddrClause & C,OMPPrivateScope & PrivateScope,const llvm::DenseMap<const ValueDecl *,llvm::Value * > CaptureDeviceAddrMap)7327 void CodeGenFunction::EmitOMPUseDeviceAddrClause(
7328     const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope,
7329     const llvm::DenseMap<const ValueDecl *, llvm::Value *>
7330         CaptureDeviceAddrMap) {
7331   llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7332   for (const Expr *Ref : C.varlists()) {
7333     const VarDecl *OrigVD = getBaseDecl(Ref);
7334     if (!Processed.insert(OrigVD).second)
7335       continue;
7336     // In order to identify the right initializer we need to match the
7337     // declaration used by the mapping logic. In some cases we may get
7338     // OMPCapturedExprDecl that refers to the original declaration.
7339     const ValueDecl *MatchingVD = OrigVD;
7340     if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
7341       // OMPCapturedExprDecl are used to privative fields of the current
7342       // structure.
7343       const auto *ME = cast<MemberExpr>(OED->getInit());
7344       assert(isa<CXXThisExpr>(ME->getBase()) &&
7345              "Base should be the current struct!");
7346       MatchingVD = ME->getMemberDecl();
7347     }
7348 
7349     // If we don't have information about the current list item, move on to
7350     // the next one.
7351     auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
7352     if (InitAddrIt == CaptureDeviceAddrMap.end())
7353       continue;
7354 
7355     llvm::Type *Ty = ConvertTypeForMem(OrigVD->getType().getNonReferenceType());
7356 
7357     Address PrivAddr =
7358         Address(InitAddrIt->second, Ty,
7359                 getContext().getTypeAlignInChars(getContext().VoidPtrTy));
7360     // For declrefs and variable length array need to load the pointer for
7361     // correct mapping, since the pointer to the data was passed to the runtime.
7362     if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) ||
7363         MatchingVD->getType()->isArrayType()) {
7364       QualType PtrTy = getContext().getPointerType(
7365           OrigVD->getType().getNonReferenceType());
7366       PrivAddr =
7367           EmitLoadOfPointer(PrivAddr.withElementType(ConvertTypeForMem(PtrTy)),
7368                             PtrTy->castAs<PointerType>());
7369     }
7370 
7371     (void)PrivateScope.addPrivate(OrigVD, PrivAddr);
7372   }
7373 }
7374 
7375 // Generate the instructions for '#pragma omp target data' directive.
EmitOMPTargetDataDirective(const OMPTargetDataDirective & S)7376 void CodeGenFunction::EmitOMPTargetDataDirective(
7377     const OMPTargetDataDirective &S) {
7378   CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true,
7379                                        /*SeparateBeginEndCalls=*/true);
7380 
7381   // Create a pre/post action to signal the privatization of the device pointer.
7382   // This action can be replaced by the OpenMP runtime code generation to
7383   // deactivate privatization.
7384   bool PrivatizeDevicePointers = false;
7385   class DevicePointerPrivActionTy : public PrePostActionTy {
7386     bool &PrivatizeDevicePointers;
7387 
7388   public:
7389     explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
7390         : PrivatizeDevicePointers(PrivatizeDevicePointers) {}
7391     void Enter(CodeGenFunction &CGF) override {
7392       PrivatizeDevicePointers = true;
7393     }
7394   };
7395   DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
7396 
7397   auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
7398     auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7399       CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
7400     };
7401 
7402     // Codegen that selects whether to generate the privatization code or not.
7403     auto &&PrivCodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
7404       RegionCodeGenTy RCG(InnermostCodeGen);
7405       PrivatizeDevicePointers = false;
7406 
7407       // Call the pre-action to change the status of PrivatizeDevicePointers if
7408       // needed.
7409       Action.Enter(CGF);
7410 
7411       if (PrivatizeDevicePointers) {
7412         OMPPrivateScope PrivateScope(CGF);
7413         // Emit all instances of the use_device_ptr clause.
7414         for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
7415           CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
7416                                         Info.CaptureDeviceAddrMap);
7417         for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
7418           CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope,
7419                                          Info.CaptureDeviceAddrMap);
7420         (void)PrivateScope.Privatize();
7421         RCG(CGF);
7422       } else {
7423         // If we don't have target devices, don't bother emitting the data
7424         // mapping code.
7425         std::optional<OpenMPDirectiveKind> CaptureRegion;
7426         if (CGM.getLangOpts().OMPTargetTriples.empty()) {
7427           // Emit helper decls of the use_device_ptr/use_device_addr clauses.
7428           for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
7429             for (const Expr *E : C->varlists()) {
7430               const Decl *D = cast<DeclRefExpr>(E)->getDecl();
7431               if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
7432                 CGF.EmitVarDecl(*OED);
7433             }
7434           for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
7435             for (const Expr *E : C->varlists()) {
7436               const Decl *D = getBaseDecl(E);
7437               if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
7438                 CGF.EmitVarDecl(*OED);
7439             }
7440         } else {
7441           CaptureRegion = OMPD_unknown;
7442         }
7443 
7444         OMPLexicalScope Scope(CGF, S, CaptureRegion);
7445         RCG(CGF);
7446       }
7447     };
7448 
7449     // Forward the provided action to the privatization codegen.
7450     RegionCodeGenTy PrivRCG(PrivCodeGen);
7451     PrivRCG.setAction(Action);
7452 
7453     // Notwithstanding the body of the region is emitted as inlined directive,
7454     // we don't use an inline scope as changes in the references inside the
7455     // region are expected to be visible outside, so we do not privative them.
7456     OMPLexicalScope Scope(CGF, S);
7457     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
7458                                                     PrivRCG);
7459   };
7460 
7461   RegionCodeGenTy RCG(CodeGen);
7462 
7463   // If we don't have target devices, don't bother emitting the data mapping
7464   // code.
7465   if (CGM.getLangOpts().OMPTargetTriples.empty()) {
7466     RCG(*this);
7467     return;
7468   }
7469 
7470   // Check if we have any if clause associated with the directive.
7471   const Expr *IfCond = nullptr;
7472   if (const auto *C = S.getSingleClause<OMPIfClause>())
7473     IfCond = C->getCondition();
7474 
7475   // Check if we have any device clause associated with the directive.
7476   const Expr *Device = nullptr;
7477   if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7478     Device = C->getDevice();
7479 
7480   // Set the action to signal privatization of device pointers.
7481   RCG.setAction(PrivAction);
7482 
7483   // Emit region code.
7484   CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
7485                                              Info);
7486 }
7487 
EmitOMPTargetEnterDataDirective(const OMPTargetEnterDataDirective & S)7488 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
7489     const OMPTargetEnterDataDirective &S) {
7490   // If we don't have target devices, don't bother emitting the data mapping
7491   // code.
7492   if (CGM.getLangOpts().OMPTargetTriples.empty())
7493     return;
7494 
7495   // Check if we have any if clause associated with the directive.
7496   const Expr *IfCond = nullptr;
7497   if (const auto *C = S.getSingleClause<OMPIfClause>())
7498     IfCond = C->getCondition();
7499 
7500   // Check if we have any device clause associated with the directive.
7501   const Expr *Device = nullptr;
7502   if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7503     Device = C->getDevice();
7504 
7505   OMPLexicalScope Scope(*this, S, OMPD_task);
7506   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
7507 }
7508 
EmitOMPTargetExitDataDirective(const OMPTargetExitDataDirective & S)7509 void CodeGenFunction::EmitOMPTargetExitDataDirective(
7510     const OMPTargetExitDataDirective &S) {
7511   // If we don't have target devices, don't bother emitting the data mapping
7512   // code.
7513   if (CGM.getLangOpts().OMPTargetTriples.empty())
7514     return;
7515 
7516   // Check if we have any if clause associated with the directive.
7517   const Expr *IfCond = nullptr;
7518   if (const auto *C = S.getSingleClause<OMPIfClause>())
7519     IfCond = C->getCondition();
7520 
7521   // Check if we have any device clause associated with the directive.
7522   const Expr *Device = nullptr;
7523   if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7524     Device = C->getDevice();
7525 
7526   OMPLexicalScope Scope(*this, S, OMPD_task);
7527   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
7528 }
7529 
emitTargetParallelRegion(CodeGenFunction & CGF,const OMPTargetParallelDirective & S,PrePostActionTy & Action)7530 static void emitTargetParallelRegion(CodeGenFunction &CGF,
7531                                      const OMPTargetParallelDirective &S,
7532                                      PrePostActionTy &Action) {
7533   // Get the captured statement associated with the 'parallel' region.
7534   const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
7535   Action.Enter(CGF);
7536   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
7537     Action.Enter(CGF);
7538     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7539     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
7540     CGF.EmitOMPPrivateClause(S, PrivateScope);
7541     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7542     (void)PrivateScope.Privatize();
7543     if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
7544       CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
7545     // TODO: Add support for clauses.
7546     CGF.EmitStmt(CS->getCapturedStmt());
7547     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
7548   };
7549   emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
7550                                  emitEmptyBoundParameters);
7551   emitPostUpdateForReductionClause(CGF, S,
7552                                    [](CodeGenFunction &) { return nullptr; });
7553 }
7554 
EmitOMPTargetParallelDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetParallelDirective & S)7555 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
7556     CodeGenModule &CGM, StringRef ParentName,
7557     const OMPTargetParallelDirective &S) {
7558   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7559     emitTargetParallelRegion(CGF, S, Action);
7560   };
7561   llvm::Function *Fn;
7562   llvm::Constant *Addr;
7563   // Emit target region as a standalone region.
7564   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7565       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7566   assert(Fn && Addr && "Target device function emission failed.");
7567 }
7568 
EmitOMPTargetParallelDirective(const OMPTargetParallelDirective & S)7569 void CodeGenFunction::EmitOMPTargetParallelDirective(
7570     const OMPTargetParallelDirective &S) {
7571   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7572     emitTargetParallelRegion(CGF, S, Action);
7573   };
7574   emitCommonOMPTargetDirective(*this, S, CodeGen);
7575 }
7576 
emitTargetParallelForRegion(CodeGenFunction & CGF,const OMPTargetParallelForDirective & S,PrePostActionTy & Action)7577 static void emitTargetParallelForRegion(CodeGenFunction &CGF,
7578                                         const OMPTargetParallelForDirective &S,
7579                                         PrePostActionTy &Action) {
7580   Action.Enter(CGF);
7581   // Emit directive as a combined directive that consists of two implicit
7582   // directives: 'parallel' with 'for' directive.
7583   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7584     Action.Enter(CGF);
7585     CodeGenFunction::OMPCancelStackRAII CancelRegion(
7586         CGF, OMPD_target_parallel_for, S.hasCancel());
7587     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
7588                                emitDispatchForLoopBounds);
7589   };
7590   emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
7591                                  emitEmptyBoundParameters);
7592 }
7593 
EmitOMPTargetParallelForDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetParallelForDirective & S)7594 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
7595     CodeGenModule &CGM, StringRef ParentName,
7596     const OMPTargetParallelForDirective &S) {
7597   // Emit SPMD target parallel for region as a standalone region.
7598   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7599     emitTargetParallelForRegion(CGF, S, Action);
7600   };
7601   llvm::Function *Fn;
7602   llvm::Constant *Addr;
7603   // Emit target region as a standalone region.
7604   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7605       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7606   assert(Fn && Addr && "Target device function emission failed.");
7607 }
7608 
EmitOMPTargetParallelForDirective(const OMPTargetParallelForDirective & S)7609 void CodeGenFunction::EmitOMPTargetParallelForDirective(
7610     const OMPTargetParallelForDirective &S) {
7611   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7612     emitTargetParallelForRegion(CGF, S, Action);
7613   };
7614   emitCommonOMPTargetDirective(*this, S, CodeGen);
7615 }
7616 
7617 static void
emitTargetParallelForSimdRegion(CodeGenFunction & CGF,const OMPTargetParallelForSimdDirective & S,PrePostActionTy & Action)7618 emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
7619                                 const OMPTargetParallelForSimdDirective &S,
7620                                 PrePostActionTy &Action) {
7621   Action.Enter(CGF);
7622   // Emit directive as a combined directive that consists of two implicit
7623   // directives: 'parallel' with 'for' directive.
7624   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7625     Action.Enter(CGF);
7626     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
7627                                emitDispatchForLoopBounds);
7628   };
7629   emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen,
7630                                  emitEmptyBoundParameters);
7631 }
7632 
EmitOMPTargetParallelForSimdDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetParallelForSimdDirective & S)7633 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
7634     CodeGenModule &CGM, StringRef ParentName,
7635     const OMPTargetParallelForSimdDirective &S) {
7636   // Emit SPMD target parallel for region as a standalone region.
7637   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7638     emitTargetParallelForSimdRegion(CGF, S, Action);
7639   };
7640   llvm::Function *Fn;
7641   llvm::Constant *Addr;
7642   // Emit target region as a standalone region.
7643   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7644       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7645   assert(Fn && Addr && "Target device function emission failed.");
7646 }
7647 
EmitOMPTargetParallelForSimdDirective(const OMPTargetParallelForSimdDirective & S)7648 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
7649     const OMPTargetParallelForSimdDirective &S) {
7650   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7651     emitTargetParallelForSimdRegion(CGF, S, Action);
7652   };
7653   emitCommonOMPTargetDirective(*this, S, CodeGen);
7654 }
7655 
7656 /// Emit a helper variable and return corresponding lvalue.
mapParam(CodeGenFunction & CGF,const DeclRefExpr * Helper,const ImplicitParamDecl * PVD,CodeGenFunction::OMPPrivateScope & Privates)7657 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
7658                      const ImplicitParamDecl *PVD,
7659                      CodeGenFunction::OMPPrivateScope &Privates) {
7660   const auto *VDecl = cast<VarDecl>(Helper->getDecl());
7661   Privates.addPrivate(VDecl, CGF.GetAddrOfLocalVar(PVD));
7662 }
7663 
EmitOMPTaskLoopBasedDirective(const OMPLoopDirective & S)7664 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
7665   assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
7666   // Emit outlined function for task construct.
7667   const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop);
7668   Address CapturedStruct = Address::invalid();
7669   {
7670     OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
7671     CapturedStruct = GenerateCapturedStmtArgument(*CS);
7672   }
7673   QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
7674   const Expr *IfCond = nullptr;
7675   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
7676     if (C->getNameModifier() == OMPD_unknown ||
7677         C->getNameModifier() == OMPD_taskloop) {
7678       IfCond = C->getCondition();
7679       break;
7680     }
7681   }
7682 
7683   OMPTaskDataTy Data;
7684   // Check if taskloop must be emitted without taskgroup.
7685   Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
7686   // TODO: Check if we should emit tied or untied task.
7687   Data.Tied = true;
7688   // Set scheduling for taskloop
7689   if (const auto *Clause = S.getSingleClause<OMPGrainsizeClause>()) {
7690     // grainsize clause
7691     Data.Schedule.setInt(/*IntVal=*/false);
7692     Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
7693   } else if (const auto *Clause = S.getSingleClause<OMPNumTasksClause>()) {
7694     // num_tasks clause
7695     Data.Schedule.setInt(/*IntVal=*/true);
7696     Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
7697   }
7698 
7699   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
7700     // if (PreCond) {
7701     //   for (IV in 0..LastIteration) BODY;
7702     //   <Final counter/linear vars updates>;
7703     // }
7704     //
7705 
7706     // Emit: if (PreCond) - begin.
7707     // If the condition constant folds and can be elided, avoid emitting the
7708     // whole loop.
7709     bool CondConstant;
7710     llvm::BasicBlock *ContBlock = nullptr;
7711     OMPLoopScope PreInitScope(CGF, S);
7712     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
7713       if (!CondConstant)
7714         return;
7715     } else {
7716       llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
7717       ContBlock = CGF.createBasicBlock("taskloop.if.end");
7718       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
7719                   CGF.getProfileCount(&S));
7720       CGF.EmitBlock(ThenBlock);
7721       CGF.incrementProfileCounter(&S);
7722     }
7723 
7724     (void)CGF.EmitOMPLinearClauseInit(S);
7725 
7726     OMPPrivateScope LoopScope(CGF);
7727     // Emit helper vars inits.
7728     enum { LowerBound = 5, UpperBound, Stride, LastIter };
7729     auto *I = CS->getCapturedDecl()->param_begin();
7730     auto *LBP = std::next(I, LowerBound);
7731     auto *UBP = std::next(I, UpperBound);
7732     auto *STP = std::next(I, Stride);
7733     auto *LIP = std::next(I, LastIter);
7734     mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
7735              LoopScope);
7736     mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
7737              LoopScope);
7738     mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
7739     mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
7740              LoopScope);
7741     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
7742     CGF.EmitOMPLinearClause(S, LoopScope);
7743     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
7744     (void)LoopScope.Privatize();
7745     // Emit the loop iteration variable.
7746     const Expr *IVExpr = S.getIterationVariable();
7747     const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
7748     CGF.EmitVarDecl(*IVDecl);
7749     CGF.EmitIgnoredExpr(S.getInit());
7750 
7751     // Emit the iterations count variable.
7752     // If it is not a variable, Sema decided to calculate iterations count on
7753     // each iteration (e.g., it is foldable into a constant).
7754     if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
7755       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
7756       // Emit calculation of the iterations count.
7757       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
7758     }
7759 
7760     {
7761       OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
7762       emitCommonSimdLoop(
7763           CGF, S,
7764           [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7765             if (isOpenMPSimdDirective(S.getDirectiveKind()))
7766               CGF.EmitOMPSimdInit(S);
7767           },
7768           [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
7769             CGF.EmitOMPInnerLoop(
7770                 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
7771                 [&S](CodeGenFunction &CGF) {
7772                   emitOMPLoopBodyWithStopPoint(CGF, S,
7773                                                CodeGenFunction::JumpDest());
7774                 },
7775                 [](CodeGenFunction &) {});
7776           });
7777     }
7778     // Emit: if (PreCond) - end.
7779     if (ContBlock) {
7780       CGF.EmitBranch(ContBlock);
7781       CGF.EmitBlock(ContBlock, true);
7782     }
7783     // Emit final copy of the lastprivate variables if IsLastIter != 0.
7784     if (HasLastprivateClause) {
7785       CGF.EmitOMPLastprivateClauseFinal(
7786           S, isOpenMPSimdDirective(S.getDirectiveKind()),
7787           CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
7788               CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
7789               (*LIP)->getType(), S.getBeginLoc())));
7790     }
7791     LoopScope.restoreMap();
7792     CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) {
7793       return CGF.Builder.CreateIsNotNull(
7794           CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
7795                                (*LIP)->getType(), S.getBeginLoc()));
7796     });
7797   };
7798   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
7799                     IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
7800                             const OMPTaskDataTy &Data) {
7801     auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond,
7802                       &Data](CodeGenFunction &CGF, PrePostActionTy &) {
7803       OMPLoopScope PreInitScope(CGF, S);
7804       CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S,
7805                                                   OutlinedFn, SharedsTy,
7806                                                   CapturedStruct, IfCond, Data);
7807     };
7808     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
7809                                                     CodeGen);
7810   };
7811   if (Data.Nogroup) {
7812     EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data);
7813   } else {
7814     CGM.getOpenMPRuntime().emitTaskgroupRegion(
7815         *this,
7816         [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
7817                                         PrePostActionTy &Action) {
7818           Action.Enter(CGF);
7819           CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen,
7820                                         Data);
7821         },
7822         S.getBeginLoc());
7823   }
7824 }
7825 
EmitOMPTaskLoopDirective(const OMPTaskLoopDirective & S)7826 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
7827   auto LPCRegion =
7828       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7829   EmitOMPTaskLoopBasedDirective(S);
7830 }
7831 
EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective & S)7832 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
7833     const OMPTaskLoopSimdDirective &S) {
7834   auto LPCRegion =
7835       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7836   OMPLexicalScope Scope(*this, S);
7837   EmitOMPTaskLoopBasedDirective(S);
7838 }
7839 
EmitOMPMasterTaskLoopDirective(const OMPMasterTaskLoopDirective & S)7840 void CodeGenFunction::EmitOMPMasterTaskLoopDirective(
7841     const OMPMasterTaskLoopDirective &S) {
7842   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7843     Action.Enter(CGF);
7844     EmitOMPTaskLoopBasedDirective(S);
7845   };
7846   auto LPCRegion =
7847       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7848   OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false);
7849   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
7850 }
7851 
EmitOMPMasterTaskLoopSimdDirective(const OMPMasterTaskLoopSimdDirective & S)7852 void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
7853     const OMPMasterTaskLoopSimdDirective &S) {
7854   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7855     Action.Enter(CGF);
7856     EmitOMPTaskLoopBasedDirective(S);
7857   };
7858   auto LPCRegion =
7859       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7860   OMPLexicalScope Scope(*this, S);
7861   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
7862 }
7863 
EmitOMPParallelMasterTaskLoopDirective(const OMPParallelMasterTaskLoopDirective & S)7864 void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
7865     const OMPParallelMasterTaskLoopDirective &S) {
7866   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7867     auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
7868                                   PrePostActionTy &Action) {
7869       Action.Enter(CGF);
7870       CGF.EmitOMPTaskLoopBasedDirective(S);
7871     };
7872     OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
7873     CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,
7874                                             S.getBeginLoc());
7875   };
7876   auto LPCRegion =
7877       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7878   emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen,
7879                                  emitEmptyBoundParameters);
7880 }
7881 
EmitOMPParallelMasterTaskLoopSimdDirective(const OMPParallelMasterTaskLoopSimdDirective & S)7882 void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
7883     const OMPParallelMasterTaskLoopSimdDirective &S) {
7884   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7885     auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
7886                                   PrePostActionTy &Action) {
7887       Action.Enter(CGF);
7888       CGF.EmitOMPTaskLoopBasedDirective(S);
7889     };
7890     OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
7891     CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,
7892                                             S.getBeginLoc());
7893   };
7894   auto LPCRegion =
7895       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7896   emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen,
7897                                  emitEmptyBoundParameters);
7898 }
7899 
7900 // Generate the instructions for '#pragma omp target update' directive.
EmitOMPTargetUpdateDirective(const OMPTargetUpdateDirective & S)7901 void CodeGenFunction::EmitOMPTargetUpdateDirective(
7902     const OMPTargetUpdateDirective &S) {
7903   // If we don't have target devices, don't bother emitting the data mapping
7904   // code.
7905   if (CGM.getLangOpts().OMPTargetTriples.empty())
7906     return;
7907 
7908   // Check if we have any if clause associated with the directive.
7909   const Expr *IfCond = nullptr;
7910   if (const auto *C = S.getSingleClause<OMPIfClause>())
7911     IfCond = C->getCondition();
7912 
7913   // Check if we have any device clause associated with the directive.
7914   const Expr *Device = nullptr;
7915   if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7916     Device = C->getDevice();
7917 
7918   OMPLexicalScope Scope(*this, S, OMPD_task);
7919   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
7920 }
7921 
EmitOMPGenericLoopDirective(const OMPGenericLoopDirective & S)7922 void CodeGenFunction::EmitOMPGenericLoopDirective(
7923     const OMPGenericLoopDirective &S) {
7924   // Unimplemented, just inline the underlying statement for now.
7925   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7926     // Emit the loop iteration variable.
7927     const Stmt *CS =
7928         cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
7929     const auto *ForS = dyn_cast<ForStmt>(CS);
7930     if (ForS && !isa<DeclStmt>(ForS->getInit())) {
7931       OMPPrivateScope LoopScope(CGF);
7932       CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
7933       (void)LoopScope.Privatize();
7934       CGF.EmitStmt(CS);
7935       LoopScope.restoreMap();
7936     } else {
7937       CGF.EmitStmt(CS);
7938     }
7939   };
7940   OMPLexicalScope Scope(*this, S, OMPD_unknown);
7941   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_loop, CodeGen);
7942 }
7943 
EmitOMPParallelGenericLoopDirective(const OMPLoopDirective & S)7944 void CodeGenFunction::EmitOMPParallelGenericLoopDirective(
7945     const OMPLoopDirective &S) {
7946   // Emit combined directive as if its constituent constructs are 'parallel'
7947   // and 'for'.
7948   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7949     Action.Enter(CGF);
7950     emitOMPCopyinClause(CGF, S);
7951     (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
7952   };
7953   {
7954     auto LPCRegion =
7955         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7956     emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
7957                                    emitEmptyBoundParameters);
7958   }
7959   // Check for outer lastprivate conditional update.
7960   checkForLastprivateConditionalUpdate(*this, S);
7961 }
7962 
EmitOMPTeamsGenericLoopDirective(const OMPTeamsGenericLoopDirective & S)7963 void CodeGenFunction::EmitOMPTeamsGenericLoopDirective(
7964     const OMPTeamsGenericLoopDirective &S) {
7965   // To be consistent with current behavior of 'target teams loop', emit
7966   // 'teams loop' as if its constituent constructs are 'teams' and 'distribute'.
7967   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7968     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
7969   };
7970 
7971   // Emit teams region as a standalone region.
7972   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7973                                             PrePostActionTy &Action) {
7974     Action.Enter(CGF);
7975     OMPPrivateScope PrivateScope(CGF);
7976     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7977     (void)PrivateScope.Privatize();
7978     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
7979                                                     CodeGenDistribute);
7980     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7981   };
7982   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
7983   emitPostUpdateForReductionClause(*this, S,
7984                                    [](CodeGenFunction &) { return nullptr; });
7985 }
7986 
7987 #ifndef NDEBUG
emitTargetTeamsLoopCodegenStatus(CodeGenFunction & CGF,std::string StatusMsg,const OMPExecutableDirective & D)7988 static void emitTargetTeamsLoopCodegenStatus(CodeGenFunction &CGF,
7989                                              std::string StatusMsg,
7990                                              const OMPExecutableDirective &D) {
7991   bool IsDevice = CGF.CGM.getLangOpts().OpenMPIsTargetDevice;
7992   if (IsDevice)
7993     StatusMsg += ": DEVICE";
7994   else
7995     StatusMsg += ": HOST";
7996   SourceLocation L = D.getBeginLoc();
7997   auto &SM = CGF.getContext().getSourceManager();
7998   PresumedLoc PLoc = SM.getPresumedLoc(L);
7999   const char *FileName = PLoc.isValid() ? PLoc.getFilename() : nullptr;
8000   unsigned LineNo =
8001       PLoc.isValid() ? PLoc.getLine() : SM.getExpansionLineNumber(L);
8002   llvm::dbgs() << StatusMsg << ": " << FileName << ": " << LineNo << "\n";
8003 }
8004 #endif
8005 
emitTargetTeamsGenericLoopRegionAsParallel(CodeGenFunction & CGF,PrePostActionTy & Action,const OMPTargetTeamsGenericLoopDirective & S)8006 static void emitTargetTeamsGenericLoopRegionAsParallel(
8007     CodeGenFunction &CGF, PrePostActionTy &Action,
8008     const OMPTargetTeamsGenericLoopDirective &S) {
8009   Action.Enter(CGF);
8010   // Emit 'teams loop' as if its constituent constructs are 'distribute,
8011   // 'parallel, and 'for'.
8012   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
8013     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
8014                               S.getDistInc());
8015   };
8016 
8017   // Emit teams region as a standalone region.
8018   auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
8019                                                  PrePostActionTy &Action) {
8020     Action.Enter(CGF);
8021     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
8022     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
8023     (void)PrivateScope.Privatize();
8024     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
8025         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
8026     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
8027   };
8028   DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE,
8029                   emitTargetTeamsLoopCodegenStatus(
8030                       CGF, TTL_CODEGEN_TYPE " as parallel for", S));
8031   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
8032                               CodeGenTeams);
8033   emitPostUpdateForReductionClause(CGF, S,
8034                                    [](CodeGenFunction &) { return nullptr; });
8035 }
8036 
emitTargetTeamsGenericLoopRegionAsDistribute(CodeGenFunction & CGF,PrePostActionTy & Action,const OMPTargetTeamsGenericLoopDirective & S)8037 static void emitTargetTeamsGenericLoopRegionAsDistribute(
8038     CodeGenFunction &CGF, PrePostActionTy &Action,
8039     const OMPTargetTeamsGenericLoopDirective &S) {
8040   Action.Enter(CGF);
8041   // Emit 'teams loop' as if its constituent construct is 'distribute'.
8042   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
8043     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
8044   };
8045 
8046   // Emit teams region as a standalone region.
8047   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
8048                                             PrePostActionTy &Action) {
8049     Action.Enter(CGF);
8050     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
8051     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
8052     (void)PrivateScope.Privatize();
8053     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
8054         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
8055     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
8056   };
8057   DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE,
8058                   emitTargetTeamsLoopCodegenStatus(
8059                       CGF, TTL_CODEGEN_TYPE " as distribute", S));
8060   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen);
8061   emitPostUpdateForReductionClause(CGF, S,
8062                                    [](CodeGenFunction &) { return nullptr; });
8063 }
8064 
EmitOMPTargetTeamsGenericLoopDirective(const OMPTargetTeamsGenericLoopDirective & S)8065 void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDirective(
8066     const OMPTargetTeamsGenericLoopDirective &S) {
8067   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8068     if (S.canBeParallelFor())
8069       emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S);
8070     else
8071       emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S);
8072   };
8073   emitCommonOMPTargetDirective(*this, S, CodeGen);
8074 }
8075 
EmitOMPTargetTeamsGenericLoopDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsGenericLoopDirective & S)8076 void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
8077     CodeGenModule &CGM, StringRef ParentName,
8078     const OMPTargetTeamsGenericLoopDirective &S) {
8079   // Emit SPMD target parallel loop region as a standalone region.
8080   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8081     if (S.canBeParallelFor())
8082       emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S);
8083     else
8084       emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S);
8085   };
8086   llvm::Function *Fn;
8087   llvm::Constant *Addr;
8088   // Emit target region as a standalone region.
8089   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
8090       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
8091   assert(Fn && Addr &&
8092          "Target device function emission failed for 'target teams loop'.");
8093 }
8094 
emitTargetParallelGenericLoopRegion(CodeGenFunction & CGF,const OMPTargetParallelGenericLoopDirective & S,PrePostActionTy & Action)8095 static void emitTargetParallelGenericLoopRegion(
8096     CodeGenFunction &CGF, const OMPTargetParallelGenericLoopDirective &S,
8097     PrePostActionTy &Action) {
8098   Action.Enter(CGF);
8099   // Emit as 'parallel for'.
8100   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8101     Action.Enter(CGF);
8102     CodeGenFunction::OMPCancelStackRAII CancelRegion(
8103         CGF, OMPD_target_parallel_loop, /*hasCancel=*/false);
8104     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
8105                                emitDispatchForLoopBounds);
8106   };
8107   emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
8108                                  emitEmptyBoundParameters);
8109 }
8110 
EmitOMPTargetParallelGenericLoopDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetParallelGenericLoopDirective & S)8111 void CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
8112     CodeGenModule &CGM, StringRef ParentName,
8113     const OMPTargetParallelGenericLoopDirective &S) {
8114   // Emit target parallel loop region as a standalone region.
8115   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8116     emitTargetParallelGenericLoopRegion(CGF, S, Action);
8117   };
8118   llvm::Function *Fn;
8119   llvm::Constant *Addr;
8120   // Emit target region as a standalone region.
8121   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
8122       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
8123   assert(Fn && Addr && "Target device function emission failed.");
8124 }
8125 
8126 /// Emit combined directive 'target parallel loop' as if its constituent
8127 /// constructs are 'target', 'parallel', and 'for'.
EmitOMPTargetParallelGenericLoopDirective(const OMPTargetParallelGenericLoopDirective & S)8128 void CodeGenFunction::EmitOMPTargetParallelGenericLoopDirective(
8129     const OMPTargetParallelGenericLoopDirective &S) {
8130   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8131     emitTargetParallelGenericLoopRegion(CGF, S, Action);
8132   };
8133   emitCommonOMPTargetDirective(*this, S, CodeGen);
8134 }
8135 
EmitSimpleOMPExecutableDirective(const OMPExecutableDirective & D)8136 void CodeGenFunction::EmitSimpleOMPExecutableDirective(
8137     const OMPExecutableDirective &D) {
8138   if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) {
8139     EmitOMPScanDirective(*SD);
8140     return;
8141   }
8142   if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
8143     return;
8144   auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
8145     OMPPrivateScope GlobalsScope(CGF);
8146     if (isOpenMPTaskingDirective(D.getDirectiveKind())) {
8147       // Capture global firstprivates to avoid crash.
8148       for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
8149         for (const Expr *Ref : C->varlists()) {
8150           const auto *DRE = cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
8151           if (!DRE)
8152             continue;
8153           const auto *VD = dyn_cast<VarDecl>(DRE->getDecl());
8154           if (!VD || VD->hasLocalStorage())
8155             continue;
8156           if (!CGF.LocalDeclMap.count(VD)) {
8157             LValue GlobLVal = CGF.EmitLValue(Ref);
8158             GlobalsScope.addPrivate(VD, GlobLVal.getAddress());
8159           }
8160         }
8161       }
8162     }
8163     if (isOpenMPSimdDirective(D.getDirectiveKind())) {
8164       (void)GlobalsScope.Privatize();
8165       ParentLoopDirectiveForScanRegion ScanRegion(CGF, D);
8166       emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action);
8167     } else {
8168       if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {
8169         for (const Expr *E : LD->counters()) {
8170           const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
8171           if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) {
8172             LValue GlobLVal = CGF.EmitLValue(E);
8173             GlobalsScope.addPrivate(VD, GlobLVal.getAddress());
8174           }
8175           if (isa<OMPCapturedExprDecl>(VD)) {
8176             // Emit only those that were not explicitly referenced in clauses.
8177             if (!CGF.LocalDeclMap.count(VD))
8178               CGF.EmitVarDecl(*VD);
8179           }
8180         }
8181         for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) {
8182           if (!C->getNumForLoops())
8183             continue;
8184           for (unsigned I = LD->getLoopsNumber(),
8185                         E = C->getLoopNumIterations().size();
8186                I < E; ++I) {
8187             if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
8188                     cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) {
8189               // Emit only those that were not explicitly referenced in clauses.
8190               if (!CGF.LocalDeclMap.count(VD))
8191                 CGF.EmitVarDecl(*VD);
8192             }
8193           }
8194         }
8195       }
8196       (void)GlobalsScope.Privatize();
8197       CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt());
8198     }
8199   };
8200   if (D.getDirectiveKind() == OMPD_atomic ||
8201       D.getDirectiveKind() == OMPD_critical ||
8202       D.getDirectiveKind() == OMPD_section ||
8203       D.getDirectiveKind() == OMPD_master ||
8204       D.getDirectiveKind() == OMPD_masked ||
8205       D.getDirectiveKind() == OMPD_unroll) {
8206     EmitStmt(D.getAssociatedStmt());
8207   } else {
8208     auto LPCRegion =
8209         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D);
8210     OMPSimdLexicalScope Scope(*this, D);
8211     CGM.getOpenMPRuntime().emitInlinedDirective(
8212         *this,
8213         isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd
8214                                                     : D.getDirectiveKind(),
8215         CodeGen);
8216   }
8217   // Check for outer lastprivate conditional update.
8218   checkForLastprivateConditionalUpdate(*this, D);
8219 }
8220