1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This contains code to emit OpenMP nodes as LLVM code.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "CGCleanup.h"
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CodeGenModule.h"
17 #include "TargetInfo.h"
18 #include "clang/AST/ASTContext.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/AST/DeclOpenMP.h"
21 #include "clang/AST/OpenMPClause.h"
22 #include "clang/AST/Stmt.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/PrettyStackTrace.h"
27 #include "clang/Basic/SourceManager.h"
28 #include "llvm/ADT/SmallSet.h"
29 #include "llvm/BinaryFormat/Dwarf.h"
30 #include "llvm/Frontend/OpenMP/OMPConstants.h"
31 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DebugInfoMetadata.h"
34 #include "llvm/IR/Instructions.h"
35 #include "llvm/IR/IntrinsicInst.h"
36 #include "llvm/IR/Metadata.h"
37 #include "llvm/Support/AtomicOrdering.h"
38 #include "llvm/Support/Debug.h"
39 #include <optional>
40 using namespace clang;
41 using namespace CodeGen;
42 using namespace llvm::omp;
43
44 #define TTL_CODEGEN_TYPE "target-teams-loop-codegen"
45
46 static const VarDecl *getBaseDecl(const Expr *Ref);
47
48 namespace {
49 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
50 /// for captured expressions.
51 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
emitPreInitStmt(CodeGenFunction & CGF,const OMPExecutableDirective & S)52 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
53 for (const auto *C : S.clauses()) {
54 if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
55 if (const auto *PreInit =
56 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
57 for (const auto *I : PreInit->decls()) {
58 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
59 CGF.EmitVarDecl(cast<VarDecl>(*I));
60 } else {
61 CodeGenFunction::AutoVarEmission Emission =
62 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
63 CGF.EmitAutoVarCleanups(Emission);
64 }
65 }
66 }
67 }
68 }
69 }
70 CodeGenFunction::OMPPrivateScope InlinedShareds;
71
isCapturedVar(CodeGenFunction & CGF,const VarDecl * VD)72 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
73 return CGF.LambdaCaptureFields.lookup(VD) ||
74 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
75 (isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl) &&
76 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
77 }
78
79 public:
OMPLexicalScope(CodeGenFunction & CGF,const OMPExecutableDirective & S,const std::optional<OpenMPDirectiveKind> CapturedRegion=std::nullopt,const bool EmitPreInitStmt=true)80 OMPLexicalScope(
81 CodeGenFunction &CGF, const OMPExecutableDirective &S,
82 const std::optional<OpenMPDirectiveKind> CapturedRegion = std::nullopt,
83 const bool EmitPreInitStmt = true)
84 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
85 InlinedShareds(CGF) {
86 if (EmitPreInitStmt)
87 emitPreInitStmt(CGF, S);
88 if (!CapturedRegion)
89 return;
90 assert(S.hasAssociatedStmt() &&
91 "Expected associated statement for inlined directive.");
92 const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
93 for (const auto &C : CS->captures()) {
94 if (C.capturesVariable() || C.capturesVariableByCopy()) {
95 auto *VD = C.getCapturedVar();
96 assert(VD == VD->getCanonicalDecl() &&
97 "Canonical decl must be captured.");
98 DeclRefExpr DRE(
99 CGF.getContext(), const_cast<VarDecl *>(VD),
100 isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
101 InlinedShareds.isGlobalVarCaptured(VD)),
102 VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
103 InlinedShareds.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress());
104 }
105 }
106 (void)InlinedShareds.Privatize();
107 }
108 };
109
110 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
111 /// for captured expressions.
112 class OMPParallelScope final : public OMPLexicalScope {
EmitPreInitStmt(const OMPExecutableDirective & S)113 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
114 OpenMPDirectiveKind Kind = S.getDirectiveKind();
115 return !(isOpenMPTargetExecutionDirective(Kind) ||
116 isOpenMPLoopBoundSharingDirective(Kind)) &&
117 isOpenMPParallelDirective(Kind);
118 }
119
120 public:
OMPParallelScope(CodeGenFunction & CGF,const OMPExecutableDirective & S)121 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
122 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt,
123 EmitPreInitStmt(S)) {}
124 };
125
126 /// Lexical scope for OpenMP teams construct, that handles correct codegen
127 /// for captured expressions.
128 class OMPTeamsScope final : public OMPLexicalScope {
EmitPreInitStmt(const OMPExecutableDirective & S)129 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
130 OpenMPDirectiveKind Kind = S.getDirectiveKind();
131 return !isOpenMPTargetExecutionDirective(Kind) &&
132 isOpenMPTeamsDirective(Kind);
133 }
134
135 public:
OMPTeamsScope(CodeGenFunction & CGF,const OMPExecutableDirective & S)136 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
137 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt,
138 EmitPreInitStmt(S)) {}
139 };
140
141 /// Private scope for OpenMP loop-based directives, that supports capturing
142 /// of used expression from loop statement.
143 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
emitPreInitStmt(CodeGenFunction & CGF,const OMPLoopBasedDirective & S)144 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) {
145 const Stmt *PreInits;
146 CodeGenFunction::OMPMapVars PreCondVars;
147 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
148 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
149 for (const auto *E : LD->counters()) {
150 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
151 EmittedAsPrivate.insert(VD->getCanonicalDecl());
152 (void)PreCondVars.setVarAddr(
153 CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
154 }
155 // Mark private vars as undefs.
156 for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) {
157 for (const Expr *IRef : C->varlists()) {
158 const auto *OrigVD =
159 cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
160 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
161 QualType OrigVDTy = OrigVD->getType().getNonReferenceType();
162 (void)PreCondVars.setVarAddr(
163 CGF, OrigVD,
164 Address(llvm::UndefValue::get(CGF.ConvertTypeForMem(
165 CGF.getContext().getPointerType(OrigVDTy))),
166 CGF.ConvertTypeForMem(OrigVDTy),
167 CGF.getContext().getDeclAlign(OrigVD)));
168 }
169 }
170 }
171 (void)PreCondVars.apply(CGF);
172 // Emit init, __range and __end variables for C++ range loops.
173 (void)OMPLoopBasedDirective::doForAllLoops(
174 LD->getInnermostCapturedStmt()->getCapturedStmt(),
175 /*TryImperfectlyNestedLoops=*/true, LD->getLoopsNumber(),
176 [&CGF](unsigned Cnt, const Stmt *CurStmt) {
177 if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(CurStmt)) {
178 if (const Stmt *Init = CXXFor->getInit())
179 CGF.EmitStmt(Init);
180 CGF.EmitStmt(CXXFor->getRangeStmt());
181 CGF.EmitStmt(CXXFor->getEndStmt());
182 }
183 return false;
184 });
185 PreInits = LD->getPreInits();
186 } else if (const auto *Tile = dyn_cast<OMPTileDirective>(&S)) {
187 PreInits = Tile->getPreInits();
188 } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(&S)) {
189 PreInits = Unroll->getPreInits();
190 } else if (const auto *Reverse = dyn_cast<OMPReverseDirective>(&S)) {
191 PreInits = Reverse->getPreInits();
192 } else if (const auto *Interchange =
193 dyn_cast<OMPInterchangeDirective>(&S)) {
194 PreInits = Interchange->getPreInits();
195 } else {
196 llvm_unreachable("Unknown loop-based directive kind.");
197 }
198 if (PreInits) {
199 // CompoundStmts and DeclStmts are used as lists of PreInit statements and
200 // declarations. Since declarations must be visible in the the following
201 // that they initialize, unpack the CompoundStmt they are nested in.
202 SmallVector<const Stmt *> PreInitStmts;
203 if (auto *PreInitCompound = dyn_cast<CompoundStmt>(PreInits))
204 llvm::append_range(PreInitStmts, PreInitCompound->body());
205 else
206 PreInitStmts.push_back(PreInits);
207
208 for (const Stmt *S : PreInitStmts) {
209 // EmitStmt skips any OMPCapturedExprDecls, but needs to be emitted
210 // here.
211 if (auto *PreInitDecl = dyn_cast<DeclStmt>(S)) {
212 for (Decl *I : PreInitDecl->decls())
213 CGF.EmitVarDecl(cast<VarDecl>(*I));
214 continue;
215 }
216 CGF.EmitStmt(S);
217 }
218 }
219 PreCondVars.restore(CGF);
220 }
221
222 public:
OMPLoopScope(CodeGenFunction & CGF,const OMPLoopBasedDirective & S)223 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S)
224 : CodeGenFunction::RunCleanupsScope(CGF) {
225 emitPreInitStmt(CGF, S);
226 }
227 };
228
229 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
230 CodeGenFunction::OMPPrivateScope InlinedShareds;
231
isCapturedVar(CodeGenFunction & CGF,const VarDecl * VD)232 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
233 return CGF.LambdaCaptureFields.lookup(VD) ||
234 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
235 (isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl) &&
236 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
237 }
238
239 public:
OMPSimdLexicalScope(CodeGenFunction & CGF,const OMPExecutableDirective & S)240 OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
241 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
242 InlinedShareds(CGF) {
243 for (const auto *C : S.clauses()) {
244 if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
245 if (const auto *PreInit =
246 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
247 for (const auto *I : PreInit->decls()) {
248 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
249 CGF.EmitVarDecl(cast<VarDecl>(*I));
250 } else {
251 CodeGenFunction::AutoVarEmission Emission =
252 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
253 CGF.EmitAutoVarCleanups(Emission);
254 }
255 }
256 }
257 } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) {
258 for (const Expr *E : UDP->varlists()) {
259 const Decl *D = cast<DeclRefExpr>(E)->getDecl();
260 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
261 CGF.EmitVarDecl(*OED);
262 }
263 } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) {
264 for (const Expr *E : UDP->varlists()) {
265 const Decl *D = getBaseDecl(E);
266 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
267 CGF.EmitVarDecl(*OED);
268 }
269 }
270 }
271 if (!isOpenMPSimdDirective(S.getDirectiveKind()))
272 CGF.EmitOMPPrivateClause(S, InlinedShareds);
273 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
274 if (const Expr *E = TG->getReductionRef())
275 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
276 }
277 // Temp copy arrays for inscan reductions should not be emitted as they are
278 // not used in simd only mode.
279 llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps;
280 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
281 if (C->getModifier() != OMPC_REDUCTION_inscan)
282 continue;
283 for (const Expr *E : C->copy_array_temps())
284 CopyArrayTemps.insert(cast<DeclRefExpr>(E)->getDecl());
285 }
286 const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
287 while (CS) {
288 for (auto &C : CS->captures()) {
289 if (C.capturesVariable() || C.capturesVariableByCopy()) {
290 auto *VD = C.getCapturedVar();
291 if (CopyArrayTemps.contains(VD))
292 continue;
293 assert(VD == VD->getCanonicalDecl() &&
294 "Canonical decl must be captured.");
295 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
296 isCapturedVar(CGF, VD) ||
297 (CGF.CapturedStmtInfo &&
298 InlinedShareds.isGlobalVarCaptured(VD)),
299 VD->getType().getNonReferenceType(), VK_LValue,
300 C.getLocation());
301 InlinedShareds.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress());
302 }
303 }
304 CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt());
305 }
306 (void)InlinedShareds.Privatize();
307 }
308 };
309
310 } // namespace
311
312 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
313 const OMPExecutableDirective &S,
314 const RegionCodeGenTy &CodeGen);
315
EmitOMPSharedLValue(const Expr * E)316 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
317 if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
318 if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
319 OrigVD = OrigVD->getCanonicalDecl();
320 bool IsCaptured =
321 LambdaCaptureFields.lookup(OrigVD) ||
322 (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
323 (isa_and_nonnull<BlockDecl>(CurCodeDecl));
324 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured,
325 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
326 return EmitLValue(&DRE);
327 }
328 }
329 return EmitLValue(E);
330 }
331
getTypeSize(QualType Ty)332 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
333 ASTContext &C = getContext();
334 llvm::Value *Size = nullptr;
335 auto SizeInChars = C.getTypeSizeInChars(Ty);
336 if (SizeInChars.isZero()) {
337 // getTypeSizeInChars() returns 0 for a VLA.
338 while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) {
339 VlaSizePair VlaSize = getVLASize(VAT);
340 Ty = VlaSize.Type;
341 Size =
342 Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) : VlaSize.NumElts;
343 }
344 SizeInChars = C.getTypeSizeInChars(Ty);
345 if (SizeInChars.isZero())
346 return llvm::ConstantInt::get(SizeTy, /*V=*/0);
347 return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
348 }
349 return CGM.getSize(SizeInChars);
350 }
351
GenerateOpenMPCapturedVars(const CapturedStmt & S,SmallVectorImpl<llvm::Value * > & CapturedVars)352 void CodeGenFunction::GenerateOpenMPCapturedVars(
353 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
354 const RecordDecl *RD = S.getCapturedRecordDecl();
355 auto CurField = RD->field_begin();
356 auto CurCap = S.captures().begin();
357 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
358 E = S.capture_init_end();
359 I != E; ++I, ++CurField, ++CurCap) {
360 if (CurField->hasCapturedVLAType()) {
361 const VariableArrayType *VAT = CurField->getCapturedVLAType();
362 llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
363 CapturedVars.push_back(Val);
364 } else if (CurCap->capturesThis()) {
365 CapturedVars.push_back(CXXThisValue);
366 } else if (CurCap->capturesVariableByCopy()) {
367 llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation());
368
369 // If the field is not a pointer, we need to save the actual value
370 // and load it as a void pointer.
371 if (!CurField->getType()->isAnyPointerType()) {
372 ASTContext &Ctx = getContext();
373 Address DstAddr = CreateMemTemp(
374 Ctx.getUIntPtrType(),
375 Twine(CurCap->getCapturedVar()->getName(), ".casted"));
376 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
377
378 llvm::Value *SrcAddrVal = EmitScalarConversion(
379 DstAddr.emitRawPointer(*this),
380 Ctx.getPointerType(Ctx.getUIntPtrType()),
381 Ctx.getPointerType(CurField->getType()), CurCap->getLocation());
382 LValue SrcLV =
383 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
384
385 // Store the value using the source type pointer.
386 EmitStoreThroughLValue(RValue::get(CV), SrcLV);
387
388 // Load the value using the destination type pointer.
389 CV = EmitLoadOfScalar(DstLV, CurCap->getLocation());
390 }
391 CapturedVars.push_back(CV);
392 } else {
393 assert(CurCap->capturesVariable() && "Expected capture by reference.");
394 CapturedVars.push_back(EmitLValue(*I).getAddress().emitRawPointer(*this));
395 }
396 }
397 }
398
castValueFromUintptr(CodeGenFunction & CGF,SourceLocation Loc,QualType DstType,StringRef Name,LValue AddrLV)399 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
400 QualType DstType, StringRef Name,
401 LValue AddrLV) {
402 ASTContext &Ctx = CGF.getContext();
403
404 llvm::Value *CastedPtr = CGF.EmitScalarConversion(
405 AddrLV.getAddress().emitRawPointer(CGF), Ctx.getUIntPtrType(),
406 Ctx.getPointerType(DstType), Loc);
407 // FIXME: should the pointee type (DstType) be passed?
408 Address TmpAddr =
409 CGF.MakeNaturalAlignAddrLValue(CastedPtr, DstType).getAddress();
410 return TmpAddr;
411 }
412
getCanonicalParamType(ASTContext & C,QualType T)413 static QualType getCanonicalParamType(ASTContext &C, QualType T) {
414 if (T->isLValueReferenceType())
415 return C.getLValueReferenceType(
416 getCanonicalParamType(C, T.getNonReferenceType()),
417 /*SpelledAsLValue=*/false);
418 if (T->isPointerType())
419 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
420 if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
421 if (const auto *VLA = dyn_cast<VariableArrayType>(A))
422 return getCanonicalParamType(C, VLA->getElementType());
423 if (!A->isVariablyModifiedType())
424 return C.getCanonicalType(T);
425 }
426 return C.getCanonicalParamType(T);
427 }
428
429 namespace {
430 /// Contains required data for proper outlined function codegen.
431 struct FunctionOptions {
432 /// Captured statement for which the function is generated.
433 const CapturedStmt *S = nullptr;
434 /// true if cast to/from UIntPtr is required for variables captured by
435 /// value.
436 const bool UIntPtrCastRequired = true;
437 /// true if only casted arguments must be registered as local args or VLA
438 /// sizes.
439 const bool RegisterCastedArgsOnly = false;
440 /// Name of the generated function.
441 const StringRef FunctionName;
442 /// Location of the non-debug version of the outlined function.
443 SourceLocation Loc;
FunctionOptions__anon53c5fabf0311::FunctionOptions444 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
445 bool RegisterCastedArgsOnly, StringRef FunctionName,
446 SourceLocation Loc)
447 : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
448 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
449 FunctionName(FunctionName), Loc(Loc) {}
450 };
451 } // namespace
452
emitOutlinedFunctionPrologue(CodeGenFunction & CGF,FunctionArgList & Args,llvm::MapVector<const Decl *,std::pair<const VarDecl *,Address>> & LocalAddrs,llvm::DenseMap<const Decl *,std::pair<const Expr *,llvm::Value * >> & VLASizes,llvm::Value * & CXXThisValue,const FunctionOptions & FO)453 static llvm::Function *emitOutlinedFunctionPrologue(
454 CodeGenFunction &CGF, FunctionArgList &Args,
455 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
456 &LocalAddrs,
457 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
458 &VLASizes,
459 llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
460 const CapturedDecl *CD = FO.S->getCapturedDecl();
461 const RecordDecl *RD = FO.S->getCapturedRecordDecl();
462 assert(CD->hasBody() && "missing CapturedDecl body");
463
464 CXXThisValue = nullptr;
465 // Build the argument list.
466 CodeGenModule &CGM = CGF.CGM;
467 ASTContext &Ctx = CGM.getContext();
468 FunctionArgList TargetArgs;
469 Args.append(CD->param_begin(),
470 std::next(CD->param_begin(), CD->getContextParamPosition()));
471 TargetArgs.append(
472 CD->param_begin(),
473 std::next(CD->param_begin(), CD->getContextParamPosition()));
474 auto I = FO.S->captures().begin();
475 FunctionDecl *DebugFunctionDecl = nullptr;
476 if (!FO.UIntPtrCastRequired) {
477 FunctionProtoType::ExtProtoInfo EPI;
478 QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, std::nullopt, EPI);
479 DebugFunctionDecl = FunctionDecl::Create(
480 Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(),
481 SourceLocation(), DeclarationName(), FunctionTy,
482 Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static,
483 /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false,
484 /*hasWrittenPrototype=*/false);
485 }
486 for (const FieldDecl *FD : RD->fields()) {
487 QualType ArgType = FD->getType();
488 IdentifierInfo *II = nullptr;
489 VarDecl *CapVar = nullptr;
490
491 // If this is a capture by copy and the type is not a pointer, the outlined
492 // function argument type should be uintptr and the value properly casted to
493 // uintptr. This is necessary given that the runtime library is only able to
494 // deal with pointers. We can pass in the same way the VLA type sizes to the
495 // outlined function.
496 if (FO.UIntPtrCastRequired &&
497 ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
498 I->capturesVariableArrayType()))
499 ArgType = Ctx.getUIntPtrType();
500
501 if (I->capturesVariable() || I->capturesVariableByCopy()) {
502 CapVar = I->getCapturedVar();
503 II = CapVar->getIdentifier();
504 } else if (I->capturesThis()) {
505 II = &Ctx.Idents.get("this");
506 } else {
507 assert(I->capturesVariableArrayType());
508 II = &Ctx.Idents.get("vla");
509 }
510 if (ArgType->isVariablyModifiedType())
511 ArgType = getCanonicalParamType(Ctx, ArgType);
512 VarDecl *Arg;
513 if (CapVar && (CapVar->getTLSKind() != clang::VarDecl::TLS_None)) {
514 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
515 II, ArgType,
516 ImplicitParamKind::ThreadPrivateVar);
517 } else if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
518 Arg = ParmVarDecl::Create(
519 Ctx, DebugFunctionDecl,
520 CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(),
521 CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType,
522 /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
523 } else {
524 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
525 II, ArgType, ImplicitParamKind::Other);
526 }
527 Args.emplace_back(Arg);
528 // Do not cast arguments if we emit function with non-original types.
529 TargetArgs.emplace_back(
530 FO.UIntPtrCastRequired
531 ? Arg
532 : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
533 ++I;
534 }
535 Args.append(std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
536 CD->param_end());
537 TargetArgs.append(
538 std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
539 CD->param_end());
540
541 // Create the function declaration.
542 const CGFunctionInfo &FuncInfo =
543 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
544 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
545
546 auto *F =
547 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
548 FO.FunctionName, &CGM.getModule());
549 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
550 if (CD->isNothrow())
551 F->setDoesNotThrow();
552 F->setDoesNotRecurse();
553
554 // Always inline the outlined function if optimizations are enabled.
555 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
556 F->removeFnAttr(llvm::Attribute::NoInline);
557 F->addFnAttr(llvm::Attribute::AlwaysInline);
558 }
559
560 // Generate the function.
561 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
562 FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(),
563 FO.UIntPtrCastRequired ? FO.Loc
564 : CD->getBody()->getBeginLoc());
565 unsigned Cnt = CD->getContextParamPosition();
566 I = FO.S->captures().begin();
567 for (const FieldDecl *FD : RD->fields()) {
568 // Do not map arguments if we emit function with non-original types.
569 Address LocalAddr(Address::invalid());
570 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
571 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
572 TargetArgs[Cnt]);
573 } else {
574 LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
575 }
576 // If we are capturing a pointer by copy we don't need to do anything, just
577 // use the value that we get from the arguments.
578 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
579 const VarDecl *CurVD = I->getCapturedVar();
580 if (!FO.RegisterCastedArgsOnly)
581 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
582 ++Cnt;
583 ++I;
584 continue;
585 }
586
587 LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
588 AlignmentSource::Decl);
589 if (FD->hasCapturedVLAType()) {
590 if (FO.UIntPtrCastRequired) {
591 ArgLVal = CGF.MakeAddrLValue(
592 castValueFromUintptr(CGF, I->getLocation(), FD->getType(),
593 Args[Cnt]->getName(), ArgLVal),
594 FD->getType(), AlignmentSource::Decl);
595 }
596 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
597 const VariableArrayType *VAT = FD->getCapturedVLAType();
598 VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg);
599 } else if (I->capturesVariable()) {
600 const VarDecl *Var = I->getCapturedVar();
601 QualType VarTy = Var->getType();
602 Address ArgAddr = ArgLVal.getAddress();
603 if (ArgLVal.getType()->isLValueReferenceType()) {
604 ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
605 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
606 assert(ArgLVal.getType()->isPointerType());
607 ArgAddr = CGF.EmitLoadOfPointer(
608 ArgAddr, ArgLVal.getType()->castAs<PointerType>());
609 }
610 if (!FO.RegisterCastedArgsOnly) {
611 LocalAddrs.insert(
612 {Args[Cnt], {Var, ArgAddr.withAlignment(Ctx.getDeclAlign(Var))}});
613 }
614 } else if (I->capturesVariableByCopy()) {
615 assert(!FD->getType()->isAnyPointerType() &&
616 "Not expecting a captured pointer.");
617 const VarDecl *Var = I->getCapturedVar();
618 LocalAddrs.insert({Args[Cnt],
619 {Var, FO.UIntPtrCastRequired
620 ? castValueFromUintptr(
621 CGF, I->getLocation(), FD->getType(),
622 Args[Cnt]->getName(), ArgLVal)
623 : ArgLVal.getAddress()}});
624 } else {
625 // If 'this' is captured, load it into CXXThisValue.
626 assert(I->capturesThis());
627 CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
628 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}});
629 }
630 ++Cnt;
631 ++I;
632 }
633
634 return F;
635 }
636
637 llvm::Function *
GenerateOpenMPCapturedStmtFunction(const CapturedStmt & S,SourceLocation Loc)638 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
639 SourceLocation Loc) {
640 assert(
641 CapturedStmtInfo &&
642 "CapturedStmtInfo should be set when generating the captured function");
643 const CapturedDecl *CD = S.getCapturedDecl();
644 // Build the argument list.
645 bool NeedWrapperFunction =
646 getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo();
647 FunctionArgList Args;
648 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
649 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
650 SmallString<256> Buffer;
651 llvm::raw_svector_ostream Out(Buffer);
652 Out << CapturedStmtInfo->getHelperName();
653 if (NeedWrapperFunction)
654 Out << "_debug__";
655 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
656 Out.str(), Loc);
657 llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
658 VLASizes, CXXThisValue, FO);
659 CodeGenFunction::OMPPrivateScope LocalScope(*this);
660 for (const auto &LocalAddrPair : LocalAddrs) {
661 if (LocalAddrPair.second.first) {
662 LocalScope.addPrivate(LocalAddrPair.second.first,
663 LocalAddrPair.second.second);
664 }
665 }
666 (void)LocalScope.Privatize();
667 for (const auto &VLASizePair : VLASizes)
668 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
669 PGO.assignRegionCounters(GlobalDecl(CD), F);
670 CapturedStmtInfo->EmitBody(*this, CD->getBody());
671 (void)LocalScope.ForceCleanup();
672 FinishFunction(CD->getBodyRBrace());
673 if (!NeedWrapperFunction)
674 return F;
675
676 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
677 /*RegisterCastedArgsOnly=*/true,
678 CapturedStmtInfo->getHelperName(), Loc);
679 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
680 WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
681 Args.clear();
682 LocalAddrs.clear();
683 VLASizes.clear();
684 llvm::Function *WrapperF =
685 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
686 WrapperCGF.CXXThisValue, WrapperFO);
687 llvm::SmallVector<llvm::Value *, 4> CallArgs;
688 auto *PI = F->arg_begin();
689 for (const auto *Arg : Args) {
690 llvm::Value *CallArg;
691 auto I = LocalAddrs.find(Arg);
692 if (I != LocalAddrs.end()) {
693 LValue LV = WrapperCGF.MakeAddrLValue(
694 I->second.second,
695 I->second.first ? I->second.first->getType() : Arg->getType(),
696 AlignmentSource::Decl);
697 if (LV.getType()->isAnyComplexType())
698 LV.setAddress(LV.getAddress().withElementType(PI->getType()));
699 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
700 } else {
701 auto EI = VLASizes.find(Arg);
702 if (EI != VLASizes.end()) {
703 CallArg = EI->second.second;
704 } else {
705 LValue LV =
706 WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
707 Arg->getType(), AlignmentSource::Decl);
708 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
709 }
710 }
711 CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
712 ++PI;
713 }
714 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs);
715 WrapperCGF.FinishFunction();
716 return WrapperF;
717 }
718
719 //===----------------------------------------------------------------------===//
720 // OpenMP Directive Emission
721 //===----------------------------------------------------------------------===//
EmitOMPAggregateAssign(Address DestAddr,Address SrcAddr,QualType OriginalType,const llvm::function_ref<void (Address,Address)> CopyGen)722 void CodeGenFunction::EmitOMPAggregateAssign(
723 Address DestAddr, Address SrcAddr, QualType OriginalType,
724 const llvm::function_ref<void(Address, Address)> CopyGen) {
725 // Perform element-by-element initialization.
726 QualType ElementTy;
727
728 // Drill down to the base element type on both arrays.
729 const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
730 llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
731 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
732
733 llvm::Value *SrcBegin = SrcAddr.emitRawPointer(*this);
734 llvm::Value *DestBegin = DestAddr.emitRawPointer(*this);
735 // Cast from pointer to array type to pointer to single element.
736 llvm::Value *DestEnd = Builder.CreateInBoundsGEP(DestAddr.getElementType(),
737 DestBegin, NumElements);
738
739 // The basic structure here is a while-do loop.
740 llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body");
741 llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done");
742 llvm::Value *IsEmpty =
743 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
744 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
745
746 // Enter the loop body, making that address the current address.
747 llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
748 EmitBlock(BodyBB);
749
750 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
751
752 llvm::PHINode *SrcElementPHI =
753 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
754 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
755 Address SrcElementCurrent =
756 Address(SrcElementPHI, SrcAddr.getElementType(),
757 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
758
759 llvm::PHINode *DestElementPHI = Builder.CreatePHI(
760 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
761 DestElementPHI->addIncoming(DestBegin, EntryBB);
762 Address DestElementCurrent =
763 Address(DestElementPHI, DestAddr.getElementType(),
764 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
765
766 // Emit copy.
767 CopyGen(DestElementCurrent, SrcElementCurrent);
768
769 // Shift the address forward by one element.
770 llvm::Value *DestElementNext =
771 Builder.CreateConstGEP1_32(DestAddr.getElementType(), DestElementPHI,
772 /*Idx0=*/1, "omp.arraycpy.dest.element");
773 llvm::Value *SrcElementNext =
774 Builder.CreateConstGEP1_32(SrcAddr.getElementType(), SrcElementPHI,
775 /*Idx0=*/1, "omp.arraycpy.src.element");
776 // Check whether we've reached the end.
777 llvm::Value *Done =
778 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
779 Builder.CreateCondBr(Done, DoneBB, BodyBB);
780 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
781 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
782
783 // Done.
784 EmitBlock(DoneBB, /*IsFinished=*/true);
785 }
786
EmitOMPCopy(QualType OriginalType,Address DestAddr,Address SrcAddr,const VarDecl * DestVD,const VarDecl * SrcVD,const Expr * Copy)787 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
788 Address SrcAddr, const VarDecl *DestVD,
789 const VarDecl *SrcVD, const Expr *Copy) {
790 if (OriginalType->isArrayType()) {
791 const auto *BO = dyn_cast<BinaryOperator>(Copy);
792 if (BO && BO->getOpcode() == BO_Assign) {
793 // Perform simple memcpy for simple copying.
794 LValue Dest = MakeAddrLValue(DestAddr, OriginalType);
795 LValue Src = MakeAddrLValue(SrcAddr, OriginalType);
796 EmitAggregateAssign(Dest, Src, OriginalType);
797 } else {
798 // For arrays with complex element types perform element by element
799 // copying.
800 EmitOMPAggregateAssign(
801 DestAddr, SrcAddr, OriginalType,
802 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
803 // Working with the single array element, so have to remap
804 // destination and source variables to corresponding array
805 // elements.
806 CodeGenFunction::OMPPrivateScope Remap(*this);
807 Remap.addPrivate(DestVD, DestElement);
808 Remap.addPrivate(SrcVD, SrcElement);
809 (void)Remap.Privatize();
810 EmitIgnoredExpr(Copy);
811 });
812 }
813 } else {
814 // Remap pseudo source variable to private copy.
815 CodeGenFunction::OMPPrivateScope Remap(*this);
816 Remap.addPrivate(SrcVD, SrcAddr);
817 Remap.addPrivate(DestVD, DestAddr);
818 (void)Remap.Privatize();
819 // Emit copying of the whole variable.
820 EmitIgnoredExpr(Copy);
821 }
822 }
823
EmitOMPFirstprivateClause(const OMPExecutableDirective & D,OMPPrivateScope & PrivateScope)824 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
825 OMPPrivateScope &PrivateScope) {
826 if (!HaveInsertPoint())
827 return false;
828 bool DeviceConstTarget =
829 getLangOpts().OpenMPIsTargetDevice &&
830 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
831 bool FirstprivateIsLastprivate = false;
832 llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;
833 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
834 for (const auto *D : C->varlists())
835 Lastprivates.try_emplace(
836 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(),
837 C->getKind());
838 }
839 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
840 llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
841 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
842 // Force emission of the firstprivate copy if the directive does not emit
843 // outlined function, like omp for, omp simd, omp distribute etc.
844 bool MustEmitFirstprivateCopy =
845 CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
846 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
847 const auto *IRef = C->varlist_begin();
848 const auto *InitsRef = C->inits().begin();
849 for (const Expr *IInit : C->private_copies()) {
850 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
851 bool ThisFirstprivateIsLastprivate =
852 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
853 const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD);
854 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
855 if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
856 !FD->getType()->isReferenceType() &&
857 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
858 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
859 ++IRef;
860 ++InitsRef;
861 continue;
862 }
863 // Do not emit copy for firstprivate constant variables in target regions,
864 // captured by reference.
865 if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) &&
866 FD && FD->getType()->isReferenceType() &&
867 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
868 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
869 ++IRef;
870 ++InitsRef;
871 continue;
872 }
873 FirstprivateIsLastprivate =
874 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
875 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
876 const auto *VDInit =
877 cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
878 bool IsRegistered;
879 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
880 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
881 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
882 LValue OriginalLVal;
883 if (!FD) {
884 // Check if the firstprivate variable is just a constant value.
885 ConstantEmission CE = tryEmitAsConstant(&DRE);
886 if (CE && !CE.isReference()) {
887 // Constant value, no need to create a copy.
888 ++IRef;
889 ++InitsRef;
890 continue;
891 }
892 if (CE && CE.isReference()) {
893 OriginalLVal = CE.getReferenceLValue(*this, &DRE);
894 } else {
895 assert(!CE && "Expected non-constant firstprivate.");
896 OriginalLVal = EmitLValue(&DRE);
897 }
898 } else {
899 OriginalLVal = EmitLValue(&DRE);
900 }
901 QualType Type = VD->getType();
902 if (Type->isArrayType()) {
903 // Emit VarDecl with copy init for arrays.
904 // Get the address of the original variable captured in current
905 // captured region.
906 AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
907 const Expr *Init = VD->getInit();
908 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
909 // Perform simple memcpy.
910 LValue Dest = MakeAddrLValue(Emission.getAllocatedAddress(), Type);
911 EmitAggregateAssign(Dest, OriginalLVal, Type);
912 } else {
913 EmitOMPAggregateAssign(
914 Emission.getAllocatedAddress(), OriginalLVal.getAddress(), Type,
915 [this, VDInit, Init](Address DestElement, Address SrcElement) {
916 // Clean up any temporaries needed by the
917 // initialization.
918 RunCleanupsScope InitScope(*this);
919 // Emit initialization for single element.
920 setAddrOfLocalVar(VDInit, SrcElement);
921 EmitAnyExprToMem(Init, DestElement,
922 Init->getType().getQualifiers(),
923 /*IsInitializer*/ false);
924 LocalDeclMap.erase(VDInit);
925 });
926 }
927 EmitAutoVarCleanups(Emission);
928 IsRegistered =
929 PrivateScope.addPrivate(OrigVD, Emission.getAllocatedAddress());
930 } else {
931 Address OriginalAddr = OriginalLVal.getAddress();
932 // Emit private VarDecl with copy init.
933 // Remap temp VDInit variable to the address of the original
934 // variable (for proper handling of captured global variables).
935 setAddrOfLocalVar(VDInit, OriginalAddr);
936 EmitDecl(*VD);
937 LocalDeclMap.erase(VDInit);
938 Address VDAddr = GetAddrOfLocalVar(VD);
939 if (ThisFirstprivateIsLastprivate &&
940 Lastprivates[OrigVD->getCanonicalDecl()] ==
941 OMPC_LASTPRIVATE_conditional) {
942 // Create/init special variable for lastprivate conditionals.
943 llvm::Value *V =
944 EmitLoadOfScalar(MakeAddrLValue(VDAddr, (*IRef)->getType(),
945 AlignmentSource::Decl),
946 (*IRef)->getExprLoc());
947 VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
948 *this, OrigVD);
949 EmitStoreOfScalar(V, MakeAddrLValue(VDAddr, (*IRef)->getType(),
950 AlignmentSource::Decl));
951 LocalDeclMap.erase(VD);
952 setAddrOfLocalVar(VD, VDAddr);
953 }
954 IsRegistered = PrivateScope.addPrivate(OrigVD, VDAddr);
955 }
956 assert(IsRegistered &&
957 "firstprivate var already registered as private");
958 // Silence the warning about unused variable.
959 (void)IsRegistered;
960 }
961 ++IRef;
962 ++InitsRef;
963 }
964 }
965 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
966 }
967
EmitOMPPrivateClause(const OMPExecutableDirective & D,CodeGenFunction::OMPPrivateScope & PrivateScope)968 void CodeGenFunction::EmitOMPPrivateClause(
969 const OMPExecutableDirective &D,
970 CodeGenFunction::OMPPrivateScope &PrivateScope) {
971 if (!HaveInsertPoint())
972 return;
973 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
974 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
975 auto IRef = C->varlist_begin();
976 for (const Expr *IInit : C->private_copies()) {
977 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
978 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
979 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
980 EmitDecl(*VD);
981 // Emit private VarDecl with copy init.
982 bool IsRegistered =
983 PrivateScope.addPrivate(OrigVD, GetAddrOfLocalVar(VD));
984 assert(IsRegistered && "private var already registered as private");
985 // Silence the warning about unused variable.
986 (void)IsRegistered;
987 }
988 ++IRef;
989 }
990 }
991 }
992
EmitOMPCopyinClause(const OMPExecutableDirective & D)993 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
994 if (!HaveInsertPoint())
995 return false;
996 // threadprivate_var1 = master_threadprivate_var1;
997 // operator=(threadprivate_var2, master_threadprivate_var2);
998 // ...
999 // __kmpc_barrier(&loc, global_tid);
1000 llvm::DenseSet<const VarDecl *> CopiedVars;
1001 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
1002 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
1003 auto IRef = C->varlist_begin();
1004 auto ISrcRef = C->source_exprs().begin();
1005 auto IDestRef = C->destination_exprs().begin();
1006 for (const Expr *AssignOp : C->assignment_ops()) {
1007 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1008 QualType Type = VD->getType();
1009 if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
1010 // Get the address of the master variable. If we are emitting code with
1011 // TLS support, the address is passed from the master as field in the
1012 // captured declaration.
1013 Address MasterAddr = Address::invalid();
1014 if (getLangOpts().OpenMPUseTLS &&
1015 getContext().getTargetInfo().isTLSSupported()) {
1016 assert(CapturedStmtInfo->lookup(VD) &&
1017 "Copyin threadprivates should have been captured!");
1018 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true,
1019 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1020 MasterAddr = EmitLValue(&DRE).getAddress();
1021 LocalDeclMap.erase(VD);
1022 } else {
1023 MasterAddr =
1024 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
1025 : CGM.GetAddrOfGlobal(VD),
1026 CGM.getTypes().ConvertTypeForMem(VD->getType()),
1027 getContext().getDeclAlign(VD));
1028 }
1029 // Get the address of the threadprivate variable.
1030 Address PrivateAddr = EmitLValue(*IRef).getAddress();
1031 if (CopiedVars.size() == 1) {
1032 // At first check if current thread is a master thread. If it is, no
1033 // need to copy data.
1034 CopyBegin = createBasicBlock("copyin.not.master");
1035 CopyEnd = createBasicBlock("copyin.not.master.end");
1036 // TODO: Avoid ptrtoint conversion.
1037 auto *MasterAddrInt = Builder.CreatePtrToInt(
1038 MasterAddr.emitRawPointer(*this), CGM.IntPtrTy);
1039 auto *PrivateAddrInt = Builder.CreatePtrToInt(
1040 PrivateAddr.emitRawPointer(*this), CGM.IntPtrTy);
1041 Builder.CreateCondBr(
1042 Builder.CreateICmpNE(MasterAddrInt, PrivateAddrInt), CopyBegin,
1043 CopyEnd);
1044 EmitBlock(CopyBegin);
1045 }
1046 const auto *SrcVD =
1047 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1048 const auto *DestVD =
1049 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1050 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
1051 }
1052 ++IRef;
1053 ++ISrcRef;
1054 ++IDestRef;
1055 }
1056 }
1057 if (CopyEnd) {
1058 // Exit out of copying procedure for non-master thread.
1059 EmitBlock(CopyEnd, /*IsFinished=*/true);
1060 return true;
1061 }
1062 return false;
1063 }
1064
EmitOMPLastprivateClauseInit(const OMPExecutableDirective & D,OMPPrivateScope & PrivateScope)1065 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
1066 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
1067 if (!HaveInsertPoint())
1068 return false;
1069 bool HasAtLeastOneLastprivate = false;
1070 llvm::DenseSet<const VarDecl *> SIMDLCVs;
1071 if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1072 const auto *LoopDirective = cast<OMPLoopDirective>(&D);
1073 for (const Expr *C : LoopDirective->counters()) {
1074 SIMDLCVs.insert(
1075 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1076 }
1077 }
1078 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1079 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1080 HasAtLeastOneLastprivate = true;
1081 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
1082 !getLangOpts().OpenMPSimd)
1083 break;
1084 const auto *IRef = C->varlist_begin();
1085 const auto *IDestRef = C->destination_exprs().begin();
1086 for (const Expr *IInit : C->private_copies()) {
1087 // Keep the address of the original variable for future update at the end
1088 // of the loop.
1089 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1090 // Taskloops do not require additional initialization, it is done in
1091 // runtime support library.
1092 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
1093 const auto *DestVD =
1094 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1095 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1096 /*RefersToEnclosingVariableOrCapture=*/
1097 CapturedStmtInfo->lookup(OrigVD) != nullptr,
1098 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1099 PrivateScope.addPrivate(DestVD, EmitLValue(&DRE).getAddress());
1100 // Check if the variable is also a firstprivate: in this case IInit is
1101 // not generated. Initialization of this variable will happen in codegen
1102 // for 'firstprivate' clause.
1103 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
1104 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
1105 Address VDAddr = Address::invalid();
1106 if (C->getKind() == OMPC_LASTPRIVATE_conditional) {
1107 VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
1108 *this, OrigVD);
1109 setAddrOfLocalVar(VD, VDAddr);
1110 } else {
1111 // Emit private VarDecl with copy init.
1112 EmitDecl(*VD);
1113 VDAddr = GetAddrOfLocalVar(VD);
1114 }
1115 bool IsRegistered = PrivateScope.addPrivate(OrigVD, VDAddr);
1116 assert(IsRegistered &&
1117 "lastprivate var already registered as private");
1118 (void)IsRegistered;
1119 }
1120 }
1121 ++IRef;
1122 ++IDestRef;
1123 }
1124 }
1125 return HasAtLeastOneLastprivate;
1126 }
1127
EmitOMPLastprivateClauseFinal(const OMPExecutableDirective & D,bool NoFinals,llvm::Value * IsLastIterCond)1128 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
1129 const OMPExecutableDirective &D, bool NoFinals,
1130 llvm::Value *IsLastIterCond) {
1131 if (!HaveInsertPoint())
1132 return;
1133 // Emit following code:
1134 // if (<IsLastIterCond>) {
1135 // orig_var1 = private_orig_var1;
1136 // ...
1137 // orig_varn = private_orig_varn;
1138 // }
1139 llvm::BasicBlock *ThenBB = nullptr;
1140 llvm::BasicBlock *DoneBB = nullptr;
1141 if (IsLastIterCond) {
1142 // Emit implicit barrier if at least one lastprivate conditional is found
1143 // and this is not a simd mode.
1144 if (!getLangOpts().OpenMPSimd &&
1145 llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(),
1146 [](const OMPLastprivateClause *C) {
1147 return C->getKind() == OMPC_LASTPRIVATE_conditional;
1148 })) {
1149 CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(),
1150 OMPD_unknown,
1151 /*EmitChecks=*/false,
1152 /*ForceSimpleCall=*/true);
1153 }
1154 ThenBB = createBasicBlock(".omp.lastprivate.then");
1155 DoneBB = createBasicBlock(".omp.lastprivate.done");
1156 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
1157 EmitBlock(ThenBB);
1158 }
1159 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1160 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
1161 if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
1162 auto IC = LoopDirective->counters().begin();
1163 for (const Expr *F : LoopDirective->finals()) {
1164 const auto *D =
1165 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
1166 if (NoFinals)
1167 AlreadyEmittedVars.insert(D);
1168 else
1169 LoopCountersAndUpdates[D] = F;
1170 ++IC;
1171 }
1172 }
1173 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1174 auto IRef = C->varlist_begin();
1175 auto ISrcRef = C->source_exprs().begin();
1176 auto IDestRef = C->destination_exprs().begin();
1177 for (const Expr *AssignOp : C->assignment_ops()) {
1178 const auto *PrivateVD =
1179 cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1180 QualType Type = PrivateVD->getType();
1181 const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1182 if (AlreadyEmittedVars.insert(CanonicalVD).second) {
1183 // If lastprivate variable is a loop control variable for loop-based
1184 // directive, update its value before copyin back to original
1185 // variable.
1186 if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
1187 EmitIgnoredExpr(FinalExpr);
1188 const auto *SrcVD =
1189 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1190 const auto *DestVD =
1191 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1192 // Get the address of the private variable.
1193 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
1194 if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1195 PrivateAddr = Address(
1196 Builder.CreateLoad(PrivateAddr),
1197 CGM.getTypes().ConvertTypeForMem(RefTy->getPointeeType()),
1198 CGM.getNaturalTypeAlignment(RefTy->getPointeeType()));
1199 // Store the last value to the private copy in the last iteration.
1200 if (C->getKind() == OMPC_LASTPRIVATE_conditional)
1201 CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
1202 *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD,
1203 (*IRef)->getExprLoc());
1204 // Get the address of the original variable.
1205 Address OriginalAddr = GetAddrOfLocalVar(DestVD);
1206 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
1207 }
1208 ++IRef;
1209 ++ISrcRef;
1210 ++IDestRef;
1211 }
1212 if (const Expr *PostUpdate = C->getPostUpdateExpr())
1213 EmitIgnoredExpr(PostUpdate);
1214 }
1215 if (IsLastIterCond)
1216 EmitBlock(DoneBB, /*IsFinished=*/true);
1217 }
1218
EmitOMPReductionClauseInit(const OMPExecutableDirective & D,CodeGenFunction::OMPPrivateScope & PrivateScope,bool ForInscan)1219 void CodeGenFunction::EmitOMPReductionClauseInit(
1220 const OMPExecutableDirective &D,
1221 CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) {
1222 if (!HaveInsertPoint())
1223 return;
1224 SmallVector<const Expr *, 4> Shareds;
1225 SmallVector<const Expr *, 4> Privates;
1226 SmallVector<const Expr *, 4> ReductionOps;
1227 SmallVector<const Expr *, 4> LHSs;
1228 SmallVector<const Expr *, 4> RHSs;
1229 OMPTaskDataTy Data;
1230 SmallVector<const Expr *, 4> TaskLHSs;
1231 SmallVector<const Expr *, 4> TaskRHSs;
1232 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1233 if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan))
1234 continue;
1235 Shareds.append(C->varlist_begin(), C->varlist_end());
1236 Privates.append(C->privates().begin(), C->privates().end());
1237 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1238 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1239 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1240 if (C->getModifier() == OMPC_REDUCTION_task) {
1241 Data.ReductionVars.append(C->privates().begin(), C->privates().end());
1242 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
1243 Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
1244 Data.ReductionOps.append(C->reduction_ops().begin(),
1245 C->reduction_ops().end());
1246 TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1247 TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1248 }
1249 }
1250 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
1251 unsigned Count = 0;
1252 auto *ILHS = LHSs.begin();
1253 auto *IRHS = RHSs.begin();
1254 auto *IPriv = Privates.begin();
1255 for (const Expr *IRef : Shareds) {
1256 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
1257 // Emit private VarDecl with reduction init.
1258 RedCG.emitSharedOrigLValue(*this, Count);
1259 RedCG.emitAggregateType(*this, Count);
1260 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1261 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
1262 RedCG.getSharedLValue(Count).getAddress(),
1263 [&Emission](CodeGenFunction &CGF) {
1264 CGF.EmitAutoVarInit(Emission);
1265 return true;
1266 });
1267 EmitAutoVarCleanups(Emission);
1268 Address BaseAddr = RedCG.adjustPrivateAddress(
1269 *this, Count, Emission.getAllocatedAddress());
1270 bool IsRegistered =
1271 PrivateScope.addPrivate(RedCG.getBaseDecl(Count), BaseAddr);
1272 assert(IsRegistered && "private var already registered as private");
1273 // Silence the warning about unused variable.
1274 (void)IsRegistered;
1275
1276 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1277 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1278 QualType Type = PrivateVD->getType();
1279 bool isaOMPArraySectionExpr = isa<ArraySectionExpr>(IRef);
1280 if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1281 // Store the address of the original variable associated with the LHS
1282 // implicit variable.
1283 PrivateScope.addPrivate(LHSVD, RedCG.getSharedLValue(Count).getAddress());
1284 PrivateScope.addPrivate(RHSVD, GetAddrOfLocalVar(PrivateVD));
1285 } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1286 isa<ArraySubscriptExpr>(IRef)) {
1287 // Store the address of the original variable associated with the LHS
1288 // implicit variable.
1289 PrivateScope.addPrivate(LHSVD, RedCG.getSharedLValue(Count).getAddress());
1290 PrivateScope.addPrivate(RHSVD,
1291 GetAddrOfLocalVar(PrivateVD).withElementType(
1292 ConvertTypeForMem(RHSVD->getType())));
1293 } else {
1294 QualType Type = PrivateVD->getType();
1295 bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1296 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress();
1297 // Store the address of the original variable associated with the LHS
1298 // implicit variable.
1299 if (IsArray) {
1300 OriginalAddr =
1301 OriginalAddr.withElementType(ConvertTypeForMem(LHSVD->getType()));
1302 }
1303 PrivateScope.addPrivate(LHSVD, OriginalAddr);
1304 PrivateScope.addPrivate(
1305 RHSVD, IsArray ? GetAddrOfLocalVar(PrivateVD).withElementType(
1306 ConvertTypeForMem(RHSVD->getType()))
1307 : GetAddrOfLocalVar(PrivateVD));
1308 }
1309 ++ILHS;
1310 ++IRHS;
1311 ++IPriv;
1312 ++Count;
1313 }
1314 if (!Data.ReductionVars.empty()) {
1315 Data.IsReductionWithTaskMod = true;
1316 Data.IsWorksharingReduction =
1317 isOpenMPWorksharingDirective(D.getDirectiveKind());
1318 llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
1319 *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data);
1320 const Expr *TaskRedRef = nullptr;
1321 switch (D.getDirectiveKind()) {
1322 case OMPD_parallel:
1323 TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr();
1324 break;
1325 case OMPD_for:
1326 TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr();
1327 break;
1328 case OMPD_sections:
1329 TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr();
1330 break;
1331 case OMPD_parallel_for:
1332 TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr();
1333 break;
1334 case OMPD_parallel_master:
1335 TaskRedRef =
1336 cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr();
1337 break;
1338 case OMPD_parallel_sections:
1339 TaskRedRef =
1340 cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr();
1341 break;
1342 case OMPD_target_parallel:
1343 TaskRedRef =
1344 cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr();
1345 break;
1346 case OMPD_target_parallel_for:
1347 TaskRedRef =
1348 cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr();
1349 break;
1350 case OMPD_distribute_parallel_for:
1351 TaskRedRef =
1352 cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr();
1353 break;
1354 case OMPD_teams_distribute_parallel_for:
1355 TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D)
1356 .getTaskReductionRefExpr();
1357 break;
1358 case OMPD_target_teams_distribute_parallel_for:
1359 TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D)
1360 .getTaskReductionRefExpr();
1361 break;
1362 case OMPD_simd:
1363 case OMPD_for_simd:
1364 case OMPD_section:
1365 case OMPD_single:
1366 case OMPD_master:
1367 case OMPD_critical:
1368 case OMPD_parallel_for_simd:
1369 case OMPD_task:
1370 case OMPD_taskyield:
1371 case OMPD_error:
1372 case OMPD_barrier:
1373 case OMPD_taskwait:
1374 case OMPD_taskgroup:
1375 case OMPD_flush:
1376 case OMPD_depobj:
1377 case OMPD_scan:
1378 case OMPD_ordered:
1379 case OMPD_atomic:
1380 case OMPD_teams:
1381 case OMPD_target:
1382 case OMPD_cancellation_point:
1383 case OMPD_cancel:
1384 case OMPD_target_data:
1385 case OMPD_target_enter_data:
1386 case OMPD_target_exit_data:
1387 case OMPD_taskloop:
1388 case OMPD_taskloop_simd:
1389 case OMPD_master_taskloop:
1390 case OMPD_master_taskloop_simd:
1391 case OMPD_parallel_master_taskloop:
1392 case OMPD_parallel_master_taskloop_simd:
1393 case OMPD_distribute:
1394 case OMPD_target_update:
1395 case OMPD_distribute_parallel_for_simd:
1396 case OMPD_distribute_simd:
1397 case OMPD_target_parallel_for_simd:
1398 case OMPD_target_simd:
1399 case OMPD_teams_distribute:
1400 case OMPD_teams_distribute_simd:
1401 case OMPD_teams_distribute_parallel_for_simd:
1402 case OMPD_target_teams:
1403 case OMPD_target_teams_distribute:
1404 case OMPD_target_teams_distribute_parallel_for_simd:
1405 case OMPD_target_teams_distribute_simd:
1406 case OMPD_declare_target:
1407 case OMPD_end_declare_target:
1408 case OMPD_threadprivate:
1409 case OMPD_allocate:
1410 case OMPD_declare_reduction:
1411 case OMPD_declare_mapper:
1412 case OMPD_declare_simd:
1413 case OMPD_requires:
1414 case OMPD_declare_variant:
1415 case OMPD_begin_declare_variant:
1416 case OMPD_end_declare_variant:
1417 case OMPD_unknown:
1418 default:
1419 llvm_unreachable("Unexpected directive with task reductions.");
1420 }
1421
1422 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl());
1423 EmitVarDecl(*VD);
1424 EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD),
1425 /*Volatile=*/false, TaskRedRef->getType());
1426 }
1427 }
1428
EmitOMPReductionClauseFinal(const OMPExecutableDirective & D,const OpenMPDirectiveKind ReductionKind)1429 void CodeGenFunction::EmitOMPReductionClauseFinal(
1430 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1431 if (!HaveInsertPoint())
1432 return;
1433 llvm::SmallVector<const Expr *, 8> Privates;
1434 llvm::SmallVector<const Expr *, 8> LHSExprs;
1435 llvm::SmallVector<const Expr *, 8> RHSExprs;
1436 llvm::SmallVector<const Expr *, 8> ReductionOps;
1437 bool HasAtLeastOneReduction = false;
1438 bool IsReductionWithTaskMod = false;
1439 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1440 // Do not emit for inscan reductions.
1441 if (C->getModifier() == OMPC_REDUCTION_inscan)
1442 continue;
1443 HasAtLeastOneReduction = true;
1444 Privates.append(C->privates().begin(), C->privates().end());
1445 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1446 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1447 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1448 IsReductionWithTaskMod =
1449 IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
1450 }
1451 if (HasAtLeastOneReduction) {
1452 if (IsReductionWithTaskMod) {
1453 CGM.getOpenMPRuntime().emitTaskReductionFini(
1454 *this, D.getBeginLoc(),
1455 isOpenMPWorksharingDirective(D.getDirectiveKind()));
1456 }
1457 bool TeamsLoopCanBeParallel = false;
1458 if (auto *TTLD = dyn_cast<OMPTargetTeamsGenericLoopDirective>(&D))
1459 TeamsLoopCanBeParallel = TTLD->canBeParallelFor();
1460 bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1461 isOpenMPParallelDirective(D.getDirectiveKind()) ||
1462 TeamsLoopCanBeParallel || ReductionKind == OMPD_simd;
1463 bool SimpleReduction = ReductionKind == OMPD_simd;
1464 // Emit nowait reduction if nowait clause is present or directive is a
1465 // parallel directive (it always has implicit barrier).
1466 CGM.getOpenMPRuntime().emitReduction(
1467 *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
1468 {WithNowait, SimpleReduction, ReductionKind});
1469 }
1470 }
1471
emitPostUpdateForReductionClause(CodeGenFunction & CGF,const OMPExecutableDirective & D,const llvm::function_ref<llvm::Value * (CodeGenFunction &)> CondGen)1472 static void emitPostUpdateForReductionClause(
1473 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1474 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1475 if (!CGF.HaveInsertPoint())
1476 return;
1477 llvm::BasicBlock *DoneBB = nullptr;
1478 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1479 if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
1480 if (!DoneBB) {
1481 if (llvm::Value *Cond = CondGen(CGF)) {
1482 // If the first post-update expression is found, emit conditional
1483 // block if it was requested.
1484 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1485 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1486 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1487 CGF.EmitBlock(ThenBB);
1488 }
1489 }
1490 CGF.EmitIgnoredExpr(PostUpdate);
1491 }
1492 }
1493 if (DoneBB)
1494 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1495 }
1496
1497 namespace {
1498 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1499 /// parallel function. This is necessary for combined constructs such as
1500 /// 'distribute parallel for'
1501 typedef llvm::function_ref<void(CodeGenFunction &,
1502 const OMPExecutableDirective &,
1503 llvm::SmallVectorImpl<llvm::Value *> &)>
1504 CodeGenBoundParametersTy;
1505 } // anonymous namespace
1506
1507 static void
checkForLastprivateConditionalUpdate(CodeGenFunction & CGF,const OMPExecutableDirective & S)1508 checkForLastprivateConditionalUpdate(CodeGenFunction &CGF,
1509 const OMPExecutableDirective &S) {
1510 if (CGF.getLangOpts().OpenMP < 50)
1511 return;
1512 llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls;
1513 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
1514 for (const Expr *Ref : C->varlists()) {
1515 if (!Ref->getType()->isScalarType())
1516 continue;
1517 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1518 if (!DRE)
1519 continue;
1520 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1521 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1522 }
1523 }
1524 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
1525 for (const Expr *Ref : C->varlists()) {
1526 if (!Ref->getType()->isScalarType())
1527 continue;
1528 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1529 if (!DRE)
1530 continue;
1531 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1532 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1533 }
1534 }
1535 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
1536 for (const Expr *Ref : C->varlists()) {
1537 if (!Ref->getType()->isScalarType())
1538 continue;
1539 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1540 if (!DRE)
1541 continue;
1542 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1543 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1544 }
1545 }
1546 // Privates should ne analyzed since they are not captured at all.
1547 // Task reductions may be skipped - tasks are ignored.
1548 // Firstprivates do not return value but may be passed by reference - no need
1549 // to check for updated lastprivate conditional.
1550 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
1551 for (const Expr *Ref : C->varlists()) {
1552 if (!Ref->getType()->isScalarType())
1553 continue;
1554 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1555 if (!DRE)
1556 continue;
1557 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1558 }
1559 }
1560 CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional(
1561 CGF, S, PrivateDecls);
1562 }
1563
emitCommonOMPParallelDirective(CodeGenFunction & CGF,const OMPExecutableDirective & S,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen,const CodeGenBoundParametersTy & CodeGenBoundParameters)1564 static void emitCommonOMPParallelDirective(
1565 CodeGenFunction &CGF, const OMPExecutableDirective &S,
1566 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1567 const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1568 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1569 llvm::Value *NumThreads = nullptr;
1570 llvm::Function *OutlinedFn =
1571 CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1572 CGF, S, *CS->getCapturedDecl()->param_begin(), InnermostKind,
1573 CodeGen);
1574 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1575 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1576 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1577 /*IgnoreResultAssign=*/true);
1578 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1579 CGF, NumThreads, NumThreadsClause->getBeginLoc());
1580 }
1581 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1582 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1583 CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1584 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc());
1585 }
1586 const Expr *IfCond = nullptr;
1587 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1588 if (C->getNameModifier() == OMPD_unknown ||
1589 C->getNameModifier() == OMPD_parallel) {
1590 IfCond = C->getCondition();
1591 break;
1592 }
1593 }
1594
1595 OMPParallelScope Scope(CGF, S);
1596 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1597 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1598 // lower and upper bounds with the pragma 'for' chunking mechanism.
1599 // The following lambda takes care of appending the lower and upper bound
1600 // parameters when necessary
1601 CodeGenBoundParameters(CGF, S, CapturedVars);
1602 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1603 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn,
1604 CapturedVars, IfCond, NumThreads);
1605 }
1606
isAllocatableDecl(const VarDecl * VD)1607 static bool isAllocatableDecl(const VarDecl *VD) {
1608 const VarDecl *CVD = VD->getCanonicalDecl();
1609 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
1610 return false;
1611 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1612 // Use the default allocation.
1613 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
1614 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
1615 !AA->getAllocator());
1616 }
1617
emitEmptyBoundParameters(CodeGenFunction &,const OMPExecutableDirective &,llvm::SmallVectorImpl<llvm::Value * > &)1618 static void emitEmptyBoundParameters(CodeGenFunction &,
1619 const OMPExecutableDirective &,
1620 llvm::SmallVectorImpl<llvm::Value *> &) {}
1621
emitOMPCopyinClause(CodeGenFunction & CGF,const OMPExecutableDirective & S)1622 static void emitOMPCopyinClause(CodeGenFunction &CGF,
1623 const OMPExecutableDirective &S) {
1624 bool Copyins = CGF.EmitOMPCopyinClause(S);
1625 if (Copyins) {
1626 // Emit implicit barrier to synchronize threads and avoid data races on
1627 // propagation master's thread values of threadprivate variables to local
1628 // instances of that variables of all other implicit threads.
1629 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1630 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
1631 /*ForceSimpleCall=*/true);
1632 }
1633 }
1634
getAddressOfLocalVariable(CodeGenFunction & CGF,const VarDecl * VD)1635 Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
1636 CodeGenFunction &CGF, const VarDecl *VD) {
1637 CodeGenModule &CGM = CGF.CGM;
1638 auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1639
1640 if (!VD)
1641 return Address::invalid();
1642 const VarDecl *CVD = VD->getCanonicalDecl();
1643 if (!isAllocatableDecl(CVD))
1644 return Address::invalid();
1645 llvm::Value *Size;
1646 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
1647 if (CVD->getType()->isVariablyModifiedType()) {
1648 Size = CGF.getTypeSize(CVD->getType());
1649 // Align the size: ((size + align - 1) / align) * align
1650 Size = CGF.Builder.CreateNUWAdd(
1651 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
1652 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
1653 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
1654 } else {
1655 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
1656 Size = CGM.getSize(Sz.alignTo(Align));
1657 }
1658
1659 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1660 assert(AA->getAllocator() &&
1661 "Expected allocator expression for non-default allocator.");
1662 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
1663 // According to the standard, the original allocator type is a enum (integer).
1664 // Convert to pointer type, if required.
1665 if (Allocator->getType()->isIntegerTy())
1666 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
1667 else if (Allocator->getType()->isPointerTy())
1668 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
1669 CGM.VoidPtrTy);
1670
1671 llvm::Value *Addr = OMPBuilder.createOMPAlloc(
1672 CGF.Builder, Size, Allocator,
1673 getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", "."));
1674 llvm::CallInst *FreeCI =
1675 OMPBuilder.createOMPFree(CGF.Builder, Addr, Allocator);
1676
1677 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI);
1678 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1679 Addr,
1680 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
1681 getNameWithSeparators({CVD->getName(), ".addr"}, ".", "."));
1682 return Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
1683 }
1684
getAddrOfThreadPrivate(CodeGenFunction & CGF,const VarDecl * VD,Address VDAddr,SourceLocation Loc)1685 Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate(
1686 CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr,
1687 SourceLocation Loc) {
1688 CodeGenModule &CGM = CGF.CGM;
1689 if (CGM.getLangOpts().OpenMPUseTLS &&
1690 CGM.getContext().getTargetInfo().isTLSSupported())
1691 return VDAddr;
1692
1693 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1694
1695 llvm::Type *VarTy = VDAddr.getElementType();
1696 llvm::Value *Data =
1697 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy);
1698 llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy));
1699 std::string Suffix = getNameWithSeparators({"cache", ""});
1700 llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix);
1701
1702 llvm::CallInst *ThreadPrivateCacheCall =
1703 OMPBuilder.createCachedThreadPrivate(CGF.Builder, Data, Size, CacheName);
1704
1705 return Address(ThreadPrivateCacheCall, CGM.Int8Ty, VDAddr.getAlignment());
1706 }
1707
getNameWithSeparators(ArrayRef<StringRef> Parts,StringRef FirstSeparator,StringRef Separator)1708 std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators(
1709 ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) {
1710 SmallString<128> Buffer;
1711 llvm::raw_svector_ostream OS(Buffer);
1712 StringRef Sep = FirstSeparator;
1713 for (StringRef Part : Parts) {
1714 OS << Sep << Part;
1715 Sep = Separator;
1716 }
1717 return OS.str().str();
1718 }
1719
EmitOMPInlinedRegionBody(CodeGenFunction & CGF,const Stmt * RegionBodyStmt,InsertPointTy AllocaIP,InsertPointTy CodeGenIP,Twine RegionName)1720 void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
1721 CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP,
1722 InsertPointTy CodeGenIP, Twine RegionName) {
1723 CGBuilderTy &Builder = CGF.Builder;
1724 Builder.restoreIP(CodeGenIP);
1725 llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false,
1726 "." + RegionName + ".after");
1727
1728 {
1729 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB);
1730 CGF.EmitStmt(RegionBodyStmt);
1731 }
1732
1733 if (Builder.saveIP().isSet())
1734 Builder.CreateBr(FiniBB);
1735 }
1736
EmitOMPOutlinedRegionBody(CodeGenFunction & CGF,const Stmt * RegionBodyStmt,InsertPointTy AllocaIP,InsertPointTy CodeGenIP,Twine RegionName)1737 void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
1738 CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP,
1739 InsertPointTy CodeGenIP, Twine RegionName) {
1740 CGBuilderTy &Builder = CGF.Builder;
1741 Builder.restoreIP(CodeGenIP);
1742 llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false,
1743 "." + RegionName + ".after");
1744
1745 {
1746 OMPBuilderCBHelpers::OutlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB);
1747 CGF.EmitStmt(RegionBodyStmt);
1748 }
1749
1750 if (Builder.saveIP().isSet())
1751 Builder.CreateBr(FiniBB);
1752 }
1753
EmitOMPParallelDirective(const OMPParallelDirective & S)1754 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1755 if (CGM.getLangOpts().OpenMPIRBuilder) {
1756 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1757 // Check if we have any if clause associated with the directive.
1758 llvm::Value *IfCond = nullptr;
1759 if (const auto *C = S.getSingleClause<OMPIfClause>())
1760 IfCond = EmitScalarExpr(C->getCondition(),
1761 /*IgnoreResultAssign=*/true);
1762
1763 llvm::Value *NumThreads = nullptr;
1764 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>())
1765 NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(),
1766 /*IgnoreResultAssign=*/true);
1767
1768 ProcBindKind ProcBind = OMP_PROC_BIND_default;
1769 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>())
1770 ProcBind = ProcBindClause->getProcBindKind();
1771
1772 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1773
1774 // The cleanup callback that finalizes all variables at the given location,
1775 // thus calls destructors etc.
1776 auto FiniCB = [this](InsertPointTy IP) {
1777 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
1778 };
1779
1780 // Privatization callback that performs appropriate action for
1781 // shared/private/firstprivate/lastprivate/copyin/... variables.
1782 //
1783 // TODO: This defaults to shared right now.
1784 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1785 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
1786 // The next line is appropriate only for variables (Val) with the
1787 // data-sharing attribute "shared".
1788 ReplVal = &Val;
1789
1790 return CodeGenIP;
1791 };
1792
1793 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1794 const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();
1795
1796 auto BodyGenCB = [&, this](InsertPointTy AllocaIP,
1797 InsertPointTy CodeGenIP) {
1798 OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
1799 *this, ParallelRegionBodyStmt, AllocaIP, CodeGenIP, "parallel");
1800 };
1801
1802 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
1803 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
1804 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
1805 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
1806 Builder.restoreIP(
1807 OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1808 IfCond, NumThreads, ProcBind, S.hasCancel()));
1809 return;
1810 }
1811
1812 // Emit parallel region as a standalone region.
1813 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1814 Action.Enter(CGF);
1815 OMPPrivateScope PrivateScope(CGF);
1816 emitOMPCopyinClause(CGF, S);
1817 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1818 CGF.EmitOMPPrivateClause(S, PrivateScope);
1819 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1820 (void)PrivateScope.Privatize();
1821 CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
1822 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1823 };
1824 {
1825 auto LPCRegion =
1826 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
1827 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1828 emitEmptyBoundParameters);
1829 emitPostUpdateForReductionClause(*this, S,
1830 [](CodeGenFunction &) { return nullptr; });
1831 }
1832 // Check for outer lastprivate conditional update.
1833 checkForLastprivateConditionalUpdate(*this, S);
1834 }
1835
EmitOMPMetaDirective(const OMPMetaDirective & S)1836 void CodeGenFunction::EmitOMPMetaDirective(const OMPMetaDirective &S) {
1837 EmitStmt(S.getIfStmt());
1838 }
1839
1840 namespace {
1841 /// RAII to handle scopes for loop transformation directives.
1842 class OMPTransformDirectiveScopeRAII {
1843 OMPLoopScope *Scope = nullptr;
1844 CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr;
1845 CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr;
1846
1847 OMPTransformDirectiveScopeRAII(const OMPTransformDirectiveScopeRAII &) =
1848 delete;
1849 OMPTransformDirectiveScopeRAII &
1850 operator=(const OMPTransformDirectiveScopeRAII &) = delete;
1851
1852 public:
OMPTransformDirectiveScopeRAII(CodeGenFunction & CGF,const Stmt * S)1853 OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) {
1854 if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(S)) {
1855 Scope = new OMPLoopScope(CGF, *Dir);
1856 CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP);
1857 CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI);
1858 }
1859 }
~OMPTransformDirectiveScopeRAII()1860 ~OMPTransformDirectiveScopeRAII() {
1861 if (!Scope)
1862 return;
1863 delete CapInfoRAII;
1864 delete CGSI;
1865 delete Scope;
1866 }
1867 };
1868 } // namespace
1869
emitBody(CodeGenFunction & CGF,const Stmt * S,const Stmt * NextLoop,int MaxLevel,int Level=0)1870 static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
1871 int MaxLevel, int Level = 0) {
1872 assert(Level < MaxLevel && "Too deep lookup during loop body codegen.");
1873 const Stmt *SimplifiedS = S->IgnoreContainers();
1874 if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) {
1875 PrettyStackTraceLoc CrashInfo(
1876 CGF.getContext().getSourceManager(), CS->getLBracLoc(),
1877 "LLVM IR generation of compound statement ('{}')");
1878
1879 // Keep track of the current cleanup stack depth, including debug scopes.
1880 CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange());
1881 for (const Stmt *CurStmt : CS->body())
1882 emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level);
1883 return;
1884 }
1885 if (SimplifiedS == NextLoop) {
1886 if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(SimplifiedS))
1887 SimplifiedS = Dir->getTransformedStmt();
1888 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS))
1889 SimplifiedS = CanonLoop->getLoopStmt();
1890 if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) {
1891 S = For->getBody();
1892 } else {
1893 assert(isa<CXXForRangeStmt>(SimplifiedS) &&
1894 "Expected canonical for loop or range-based for loop.");
1895 const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS);
1896 CGF.EmitStmt(CXXFor->getLoopVarStmt());
1897 S = CXXFor->getBody();
1898 }
1899 if (Level + 1 < MaxLevel) {
1900 NextLoop = OMPLoopDirective::tryToFindNextInnerLoop(
1901 S, /*TryImperfectlyNestedLoops=*/true);
1902 emitBody(CGF, S, NextLoop, MaxLevel, Level + 1);
1903 return;
1904 }
1905 }
1906 CGF.EmitStmt(S);
1907 }
1908
EmitOMPLoopBody(const OMPLoopDirective & D,JumpDest LoopExit)1909 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1910 JumpDest LoopExit) {
1911 RunCleanupsScope BodyScope(*this);
1912 // Update counters values on current iteration.
1913 for (const Expr *UE : D.updates())
1914 EmitIgnoredExpr(UE);
1915 // Update the linear variables.
1916 // In distribute directives only loop counters may be marked as linear, no
1917 // need to generate the code for them.
1918 if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
1919 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1920 for (const Expr *UE : C->updates())
1921 EmitIgnoredExpr(UE);
1922 }
1923 }
1924
1925 // On a continue in the body, jump to the end.
1926 JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue");
1927 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1928 for (const Expr *E : D.finals_conditions()) {
1929 if (!E)
1930 continue;
1931 // Check that loop counter in non-rectangular nest fits into the iteration
1932 // space.
1933 llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next");
1934 EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(),
1935 getProfileCount(D.getBody()));
1936 EmitBlock(NextBB);
1937 }
1938
1939 OMPPrivateScope InscanScope(*this);
1940 EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true);
1941 bool IsInscanRegion = InscanScope.Privatize();
1942 if (IsInscanRegion) {
1943 // Need to remember the block before and after scan directive
1944 // to dispatch them correctly depending on the clause used in
1945 // this directive, inclusive or exclusive. For inclusive scan the natural
1946 // order of the blocks is used, for exclusive clause the blocks must be
1947 // executed in reverse order.
1948 OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb");
1949 OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb");
1950 // No need to allocate inscan exit block, in simd mode it is selected in the
1951 // codegen for the scan directive.
1952 if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd)
1953 OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb");
1954 OMPScanDispatch = createBasicBlock("omp.inscan.dispatch");
1955 EmitBranch(OMPScanDispatch);
1956 EmitBlock(OMPBeforeScanBlock);
1957 }
1958
1959 // Emit loop variables for C++ range loops.
1960 const Stmt *Body =
1961 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
1962 // Emit loop body.
1963 emitBody(*this, Body,
1964 OMPLoopBasedDirective::tryToFindNextInnerLoop(
1965 Body, /*TryImperfectlyNestedLoops=*/true),
1966 D.getLoopsNumber());
1967
1968 // Jump to the dispatcher at the end of the loop body.
1969 if (IsInscanRegion)
1970 EmitBranch(OMPScanExitBlock);
1971
1972 // The end (updates/cleanups).
1973 EmitBlock(Continue.getBlock());
1974 BreakContinueStack.pop_back();
1975 }
1976
1977 using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>;
1978
1979 /// Emit a captured statement and return the function as well as its captured
1980 /// closure context.
emitCapturedStmtFunc(CodeGenFunction & ParentCGF,const CapturedStmt * S)1981 static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF,
1982 const CapturedStmt *S) {
1983 LValue CapStruct = ParentCGF.InitCapturedStruct(*S);
1984 CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true);
1985 std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI =
1986 std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(*S);
1987 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get());
1988 llvm::Function *F = CGF.GenerateCapturedStmtFunction(*S);
1989
1990 return {F, CapStruct.getPointer(ParentCGF)};
1991 }
1992
1993 /// Emit a call to a previously captured closure.
1994 static llvm::CallInst *
emitCapturedStmtCall(CodeGenFunction & ParentCGF,EmittedClosureTy Cap,llvm::ArrayRef<llvm::Value * > Args)1995 emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap,
1996 llvm::ArrayRef<llvm::Value *> Args) {
1997 // Append the closure context to the argument.
1998 SmallVector<llvm::Value *> EffectiveArgs;
1999 EffectiveArgs.reserve(Args.size() + 1);
2000 llvm::append_range(EffectiveArgs, Args);
2001 EffectiveArgs.push_back(Cap.second);
2002
2003 return ParentCGF.Builder.CreateCall(Cap.first, EffectiveArgs);
2004 }
2005
2006 llvm::CanonicalLoopInfo *
EmitOMPCollapsedCanonicalLoopNest(const Stmt * S,int Depth)2007 CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) {
2008 assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented");
2009
2010 // The caller is processing the loop-associated directive processing the \p
2011 // Depth loops nested in \p S. Put the previous pending loop-associated
2012 // directive to the stack. If the current loop-associated directive is a loop
2013 // transformation directive, it will push its generated loops onto the stack
2014 // such that together with the loops left here they form the combined loop
2015 // nest for the parent loop-associated directive.
2016 int ParentExpectedOMPLoopDepth = ExpectedOMPLoopDepth;
2017 ExpectedOMPLoopDepth = Depth;
2018
2019 EmitStmt(S);
2020 assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops");
2021
2022 // The last added loop is the outermost one.
2023 llvm::CanonicalLoopInfo *Result = OMPLoopNestStack.back();
2024
2025 // Pop the \p Depth loops requested by the call from that stack and restore
2026 // the previous context.
2027 OMPLoopNestStack.pop_back_n(Depth);
2028 ExpectedOMPLoopDepth = ParentExpectedOMPLoopDepth;
2029
2030 return Result;
2031 }
2032
EmitOMPCanonicalLoop(const OMPCanonicalLoop * S)2033 void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) {
2034 const Stmt *SyntacticalLoop = S->getLoopStmt();
2035 if (!getLangOpts().OpenMPIRBuilder) {
2036 // Ignore if OpenMPIRBuilder is not enabled.
2037 EmitStmt(SyntacticalLoop);
2038 return;
2039 }
2040
2041 LexicalScope ForScope(*this, S->getSourceRange());
2042
2043 // Emit init statements. The Distance/LoopVar funcs may reference variable
2044 // declarations they contain.
2045 const Stmt *BodyStmt;
2046 if (const auto *For = dyn_cast<ForStmt>(SyntacticalLoop)) {
2047 if (const Stmt *InitStmt = For->getInit())
2048 EmitStmt(InitStmt);
2049 BodyStmt = For->getBody();
2050 } else if (const auto *RangeFor =
2051 dyn_cast<CXXForRangeStmt>(SyntacticalLoop)) {
2052 if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt())
2053 EmitStmt(RangeStmt);
2054 if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt())
2055 EmitStmt(BeginStmt);
2056 if (const DeclStmt *EndStmt = RangeFor->getEndStmt())
2057 EmitStmt(EndStmt);
2058 if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt())
2059 EmitStmt(LoopVarStmt);
2060 BodyStmt = RangeFor->getBody();
2061 } else
2062 llvm_unreachable("Expected for-stmt or range-based for-stmt");
2063
2064 // Emit closure for later use. By-value captures will be captured here.
2065 const CapturedStmt *DistanceFunc = S->getDistanceFunc();
2066 EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(*this, DistanceFunc);
2067 const CapturedStmt *LoopVarFunc = S->getLoopVarFunc();
2068 EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(*this, LoopVarFunc);
2069
2070 // Call the distance function to get the number of iterations of the loop to
2071 // come.
2072 QualType LogicalTy = DistanceFunc->getCapturedDecl()
2073 ->getParam(0)
2074 ->getType()
2075 .getNonReferenceType();
2076 RawAddress CountAddr = CreateMemTemp(LogicalTy, ".count.addr");
2077 emitCapturedStmtCall(*this, DistanceClosure, {CountAddr.getPointer()});
2078 llvm::Value *DistVal = Builder.CreateLoad(CountAddr, ".count");
2079
2080 // Emit the loop structure.
2081 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2082 auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP,
2083 llvm::Value *IndVar) {
2084 Builder.restoreIP(CodeGenIP);
2085
2086 // Emit the loop body: Convert the logical iteration number to the loop
2087 // variable and emit the body.
2088 const DeclRefExpr *LoopVarRef = S->getLoopVarRef();
2089 LValue LCVal = EmitLValue(LoopVarRef);
2090 Address LoopVarAddress = LCVal.getAddress();
2091 emitCapturedStmtCall(*this, LoopVarClosure,
2092 {LoopVarAddress.emitRawPointer(*this), IndVar});
2093
2094 RunCleanupsScope BodyScope(*this);
2095 EmitStmt(BodyStmt);
2096 };
2097 llvm::CanonicalLoopInfo *CL =
2098 OMPBuilder.createCanonicalLoop(Builder, BodyGen, DistVal);
2099
2100 // Finish up the loop.
2101 Builder.restoreIP(CL->getAfterIP());
2102 ForScope.ForceCleanup();
2103
2104 // Remember the CanonicalLoopInfo for parent AST nodes consuming it.
2105 OMPLoopNestStack.push_back(CL);
2106 }
2107
EmitOMPInnerLoop(const OMPExecutableDirective & S,bool RequiresCleanup,const Expr * LoopCond,const Expr * IncExpr,const llvm::function_ref<void (CodeGenFunction &)> BodyGen,const llvm::function_ref<void (CodeGenFunction &)> PostIncGen)2108 void CodeGenFunction::EmitOMPInnerLoop(
2109 const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond,
2110 const Expr *IncExpr,
2111 const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
2112 const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
2113 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
2114
2115 // Start the loop with a block that tests the condition.
2116 auto CondBlock = createBasicBlock("omp.inner.for.cond");
2117 EmitBlock(CondBlock);
2118 const SourceRange R = S.getSourceRange();
2119
2120 // If attributes are attached, push to the basic block with them.
2121 const auto &OMPED = cast<OMPExecutableDirective>(S);
2122 const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt();
2123 const Stmt *SS = ICS->getCapturedStmt();
2124 const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS);
2125 OMPLoopNestStack.clear();
2126 if (AS)
2127 LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(),
2128 AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()),
2129 SourceLocToDebugLoc(R.getEnd()));
2130 else
2131 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2132 SourceLocToDebugLoc(R.getEnd()));
2133
2134 // If there are any cleanups between here and the loop-exit scope,
2135 // create a block to stage a loop exit along.
2136 llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2137 if (RequiresCleanup)
2138 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
2139
2140 llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body");
2141
2142 // Emit condition.
2143 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
2144 if (ExitBlock != LoopExit.getBlock()) {
2145 EmitBlock(ExitBlock);
2146 EmitBranchThroughCleanup(LoopExit);
2147 }
2148
2149 EmitBlock(LoopBody);
2150 incrementProfileCounter(&S);
2151
2152 // Create a block for the increment.
2153 JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
2154 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
2155
2156 BodyGen(*this);
2157
2158 // Emit "IV = IV + 1" and a back-edge to the condition block.
2159 EmitBlock(Continue.getBlock());
2160 EmitIgnoredExpr(IncExpr);
2161 PostIncGen(*this);
2162 BreakContinueStack.pop_back();
2163 EmitBranch(CondBlock);
2164 LoopStack.pop();
2165 // Emit the fall-through block.
2166 EmitBlock(LoopExit.getBlock());
2167 }
2168
EmitOMPLinearClauseInit(const OMPLoopDirective & D)2169 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
2170 if (!HaveInsertPoint())
2171 return false;
2172 // Emit inits for the linear variables.
2173 bool HasLinears = false;
2174 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2175 for (const Expr *Init : C->inits()) {
2176 HasLinears = true;
2177 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
2178 if (const auto *Ref =
2179 dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
2180 AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
2181 const auto *OrigVD = cast<VarDecl>(Ref->getDecl());
2182 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2183 CapturedStmtInfo->lookup(OrigVD) != nullptr,
2184 VD->getInit()->getType(), VK_LValue,
2185 VD->getInit()->getExprLoc());
2186 EmitExprAsInit(
2187 &DRE, VD,
2188 MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()),
2189 /*capturedByInit=*/false);
2190 EmitAutoVarCleanups(Emission);
2191 } else {
2192 EmitVarDecl(*VD);
2193 }
2194 }
2195 // Emit the linear steps for the linear clauses.
2196 // If a step is not constant, it is pre-calculated before the loop.
2197 if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
2198 if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
2199 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
2200 // Emit calculation of the linear step.
2201 EmitIgnoredExpr(CS);
2202 }
2203 }
2204 return HasLinears;
2205 }
2206
EmitOMPLinearClauseFinal(const OMPLoopDirective & D,const llvm::function_ref<llvm::Value * (CodeGenFunction &)> CondGen)2207 void CodeGenFunction::EmitOMPLinearClauseFinal(
2208 const OMPLoopDirective &D,
2209 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2210 if (!HaveInsertPoint())
2211 return;
2212 llvm::BasicBlock *DoneBB = nullptr;
2213 // Emit the final values of the linear variables.
2214 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2215 auto IC = C->varlist_begin();
2216 for (const Expr *F : C->finals()) {
2217 if (!DoneBB) {
2218 if (llvm::Value *Cond = CondGen(*this)) {
2219 // If the first post-update expression is found, emit conditional
2220 // block if it was requested.
2221 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu");
2222 DoneBB = createBasicBlock(".omp.linear.pu.done");
2223 Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2224 EmitBlock(ThenBB);
2225 }
2226 }
2227 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
2228 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2229 CapturedStmtInfo->lookup(OrigVD) != nullptr,
2230 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
2231 Address OrigAddr = EmitLValue(&DRE).getAddress();
2232 CodeGenFunction::OMPPrivateScope VarScope(*this);
2233 VarScope.addPrivate(OrigVD, OrigAddr);
2234 (void)VarScope.Privatize();
2235 EmitIgnoredExpr(F);
2236 ++IC;
2237 }
2238 if (const Expr *PostUpdate = C->getPostUpdateExpr())
2239 EmitIgnoredExpr(PostUpdate);
2240 }
2241 if (DoneBB)
2242 EmitBlock(DoneBB, /*IsFinished=*/true);
2243 }
2244
emitAlignedClause(CodeGenFunction & CGF,const OMPExecutableDirective & D)2245 static void emitAlignedClause(CodeGenFunction &CGF,
2246 const OMPExecutableDirective &D) {
2247 if (!CGF.HaveInsertPoint())
2248 return;
2249 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
2250 llvm::APInt ClauseAlignment(64, 0);
2251 if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2252 auto *AlignmentCI =
2253 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
2254 ClauseAlignment = AlignmentCI->getValue();
2255 }
2256 for (const Expr *E : Clause->varlists()) {
2257 llvm::APInt Alignment(ClauseAlignment);
2258 if (Alignment == 0) {
2259 // OpenMP [2.8.1, Description]
2260 // If no optional parameter is specified, implementation-defined default
2261 // alignments for SIMD instructions on the target platforms are assumed.
2262 Alignment =
2263 CGF.getContext()
2264 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
2265 E->getType()->getPointeeType()))
2266 .getQuantity();
2267 }
2268 assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2269 "alignment is not power of 2");
2270 if (Alignment != 0) {
2271 llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2272 CGF.emitAlignmentAssumption(
2273 PtrValue, E, /*No second loc needed*/ SourceLocation(),
2274 llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment));
2275 }
2276 }
2277 }
2278 }
2279
EmitOMPPrivateLoopCounters(const OMPLoopDirective & S,CodeGenFunction::OMPPrivateScope & LoopScope)2280 void CodeGenFunction::EmitOMPPrivateLoopCounters(
2281 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
2282 if (!HaveInsertPoint())
2283 return;
2284 auto I = S.private_counters().begin();
2285 for (const Expr *E : S.counters()) {
2286 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2287 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
2288 // Emit var without initialization.
2289 AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD);
2290 EmitAutoVarCleanups(VarEmission);
2291 LocalDeclMap.erase(PrivateVD);
2292 (void)LoopScope.addPrivate(VD, VarEmission.getAllocatedAddress());
2293 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
2294 VD->hasGlobalStorage()) {
2295 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD),
2296 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
2297 E->getType(), VK_LValue, E->getExprLoc());
2298 (void)LoopScope.addPrivate(PrivateVD, EmitLValue(&DRE).getAddress());
2299 } else {
2300 (void)LoopScope.addPrivate(PrivateVD, VarEmission.getAllocatedAddress());
2301 }
2302 ++I;
2303 }
2304 // Privatize extra loop counters used in loops for ordered(n) clauses.
2305 for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
2306 if (!C->getNumForLoops())
2307 continue;
2308 for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size();
2309 I < E; ++I) {
2310 const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I));
2311 const auto *VD = cast<VarDecl>(DRE->getDecl());
2312 // Override only those variables that can be captured to avoid re-emission
2313 // of the variables declared within the loops.
2314 if (DRE->refersToEnclosingVariableOrCapture()) {
2315 (void)LoopScope.addPrivate(
2316 VD, CreateMemTemp(DRE->getType(), VD->getName()));
2317 }
2318 }
2319 }
2320 }
2321
emitPreCond(CodeGenFunction & CGF,const OMPLoopDirective & S,const Expr * Cond,llvm::BasicBlock * TrueBlock,llvm::BasicBlock * FalseBlock,uint64_t TrueCount)2322 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
2323 const Expr *Cond, llvm::BasicBlock *TrueBlock,
2324 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
2325 if (!CGF.HaveInsertPoint())
2326 return;
2327 {
2328 CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
2329 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
2330 (void)PreCondScope.Privatize();
2331 // Get initial values of real counters.
2332 for (const Expr *I : S.inits()) {
2333 CGF.EmitIgnoredExpr(I);
2334 }
2335 }
2336 // Create temp loop control variables with their init values to support
2337 // non-rectangular loops.
2338 CodeGenFunction::OMPMapVars PreCondVars;
2339 for (const Expr *E : S.dependent_counters()) {
2340 if (!E)
2341 continue;
2342 assert(!E->getType().getNonReferenceType()->isRecordType() &&
2343 "dependent counter must not be an iterator.");
2344 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2345 Address CounterAddr =
2346 CGF.CreateMemTemp(VD->getType().getNonReferenceType());
2347 (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr);
2348 }
2349 (void)PreCondVars.apply(CGF);
2350 for (const Expr *E : S.dependent_inits()) {
2351 if (!E)
2352 continue;
2353 CGF.EmitIgnoredExpr(E);
2354 }
2355 // Check that loop is executed at least one time.
2356 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
2357 PreCondVars.restore(CGF);
2358 }
2359
EmitOMPLinearClause(const OMPLoopDirective & D,CodeGenFunction::OMPPrivateScope & PrivateScope)2360 void CodeGenFunction::EmitOMPLinearClause(
2361 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
2362 if (!HaveInsertPoint())
2363 return;
2364 llvm::DenseSet<const VarDecl *> SIMDLCVs;
2365 if (isOpenMPSimdDirective(D.getDirectiveKind())) {
2366 const auto *LoopDirective = cast<OMPLoopDirective>(&D);
2367 for (const Expr *C : LoopDirective->counters()) {
2368 SIMDLCVs.insert(
2369 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
2370 }
2371 }
2372 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2373 auto CurPrivate = C->privates().begin();
2374 for (const Expr *E : C->varlists()) {
2375 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2376 const auto *PrivateVD =
2377 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
2378 if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
2379 // Emit private VarDecl with copy init.
2380 EmitVarDecl(*PrivateVD);
2381 bool IsRegistered =
2382 PrivateScope.addPrivate(VD, GetAddrOfLocalVar(PrivateVD));
2383 assert(IsRegistered && "linear var already registered as private");
2384 // Silence the warning about unused variable.
2385 (void)IsRegistered;
2386 } else {
2387 EmitVarDecl(*PrivateVD);
2388 }
2389 ++CurPrivate;
2390 }
2391 }
2392 }
2393
emitSimdlenSafelenClause(CodeGenFunction & CGF,const OMPExecutableDirective & D)2394 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
2395 const OMPExecutableDirective &D) {
2396 if (!CGF.HaveInsertPoint())
2397 return;
2398 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
2399 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
2400 /*ignoreResult=*/true);
2401 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2402 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2403 // In presence of finite 'safelen', it may be unsafe to mark all
2404 // the memory instructions parallel, because loop-carried
2405 // dependences of 'safelen' iterations are possible.
2406 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
2407 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
2408 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
2409 /*ignoreResult=*/true);
2410 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2411 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2412 // In presence of finite 'safelen', it may be unsafe to mark all
2413 // the memory instructions parallel, because loop-carried
2414 // dependences of 'safelen' iterations are possible.
2415 CGF.LoopStack.setParallel(/*Enable=*/false);
2416 }
2417 }
2418
EmitOMPSimdInit(const OMPLoopDirective & D)2419 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) {
2420 // Walk clauses and process safelen/lastprivate.
2421 LoopStack.setParallel(/*Enable=*/true);
2422 LoopStack.setVectorizeEnable();
2423 emitSimdlenSafelenClause(*this, D);
2424 if (const auto *C = D.getSingleClause<OMPOrderClause>())
2425 if (C->getKind() == OMPC_ORDER_concurrent)
2426 LoopStack.setParallel(/*Enable=*/true);
2427 if ((D.getDirectiveKind() == OMPD_simd ||
2428 (getLangOpts().OpenMPSimd &&
2429 isOpenMPSimdDirective(D.getDirectiveKind()))) &&
2430 llvm::any_of(D.getClausesOfKind<OMPReductionClause>(),
2431 [](const OMPReductionClause *C) {
2432 return C->getModifier() == OMPC_REDUCTION_inscan;
2433 }))
2434 // Disable parallel access in case of prefix sum.
2435 LoopStack.setParallel(/*Enable=*/false);
2436 }
2437
EmitOMPSimdFinal(const OMPLoopDirective & D,const llvm::function_ref<llvm::Value * (CodeGenFunction &)> CondGen)2438 void CodeGenFunction::EmitOMPSimdFinal(
2439 const OMPLoopDirective &D,
2440 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2441 if (!HaveInsertPoint())
2442 return;
2443 llvm::BasicBlock *DoneBB = nullptr;
2444 auto IC = D.counters().begin();
2445 auto IPC = D.private_counters().begin();
2446 for (const Expr *F : D.finals()) {
2447 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
2448 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
2449 const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
2450 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
2451 OrigVD->hasGlobalStorage() || CED) {
2452 if (!DoneBB) {
2453 if (llvm::Value *Cond = CondGen(*this)) {
2454 // If the first post-update expression is found, emit conditional
2455 // block if it was requested.
2456 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then");
2457 DoneBB = createBasicBlock(".omp.final.done");
2458 Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2459 EmitBlock(ThenBB);
2460 }
2461 }
2462 Address OrigAddr = Address::invalid();
2463 if (CED) {
2464 OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
2465 } else {
2466 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD),
2467 /*RefersToEnclosingVariableOrCapture=*/false,
2468 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
2469 OrigAddr = EmitLValue(&DRE).getAddress();
2470 }
2471 OMPPrivateScope VarScope(*this);
2472 VarScope.addPrivate(OrigVD, OrigAddr);
2473 (void)VarScope.Privatize();
2474 EmitIgnoredExpr(F);
2475 }
2476 ++IC;
2477 ++IPC;
2478 }
2479 if (DoneBB)
2480 EmitBlock(DoneBB, /*IsFinished=*/true);
2481 }
2482
emitOMPLoopBodyWithStopPoint(CodeGenFunction & CGF,const OMPLoopDirective & S,CodeGenFunction::JumpDest LoopExit)2483 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
2484 const OMPLoopDirective &S,
2485 CodeGenFunction::JumpDest LoopExit) {
2486 CGF.EmitOMPLoopBody(S, LoopExit);
2487 CGF.EmitStopPoint(&S);
2488 }
2489
2490 /// Emit a helper variable and return corresponding lvalue.
EmitOMPHelperVar(CodeGenFunction & CGF,const DeclRefExpr * Helper)2491 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
2492 const DeclRefExpr *Helper) {
2493 auto VDecl = cast<VarDecl>(Helper->getDecl());
2494 CGF.EmitVarDecl(*VDecl);
2495 return CGF.EmitLValue(Helper);
2496 }
2497
emitCommonSimdLoop(CodeGenFunction & CGF,const OMPLoopDirective & S,const RegionCodeGenTy & SimdInitGen,const RegionCodeGenTy & BodyCodeGen)2498 static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S,
2499 const RegionCodeGenTy &SimdInitGen,
2500 const RegionCodeGenTy &BodyCodeGen) {
2501 auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF,
2502 PrePostActionTy &) {
2503 CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S);
2504 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2505 SimdInitGen(CGF);
2506
2507 BodyCodeGen(CGF);
2508 };
2509 auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
2510 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2511 CGF.LoopStack.setVectorizeEnable(/*Enable=*/false);
2512
2513 BodyCodeGen(CGF);
2514 };
2515 const Expr *IfCond = nullptr;
2516 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2517 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2518 if (CGF.getLangOpts().OpenMP >= 50 &&
2519 (C->getNameModifier() == OMPD_unknown ||
2520 C->getNameModifier() == OMPD_simd)) {
2521 IfCond = C->getCondition();
2522 break;
2523 }
2524 }
2525 }
2526 if (IfCond) {
2527 CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2528 } else {
2529 RegionCodeGenTy ThenRCG(ThenGen);
2530 ThenRCG(CGF);
2531 }
2532 }
2533
emitOMPSimdRegion(CodeGenFunction & CGF,const OMPLoopDirective & S,PrePostActionTy & Action)2534 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
2535 PrePostActionTy &Action) {
2536 Action.Enter(CGF);
2537 assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
2538 "Expected simd directive");
2539 OMPLoopScope PreInitScope(CGF, S);
2540 // if (PreCond) {
2541 // for (IV in 0..LastIteration) BODY;
2542 // <Final counter/linear vars updates>;
2543 // }
2544 //
2545 if (isOpenMPDistributeDirective(S.getDirectiveKind()) ||
2546 isOpenMPWorksharingDirective(S.getDirectiveKind()) ||
2547 isOpenMPTaskLoopDirective(S.getDirectiveKind())) {
2548 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
2549 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
2550 }
2551
2552 // Emit: if (PreCond) - begin.
2553 // If the condition constant folds and can be elided, avoid emitting the
2554 // whole loop.
2555 bool CondConstant;
2556 llvm::BasicBlock *ContBlock = nullptr;
2557 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2558 if (!CondConstant)
2559 return;
2560 } else {
2561 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then");
2562 ContBlock = CGF.createBasicBlock("simd.if.end");
2563 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
2564 CGF.getProfileCount(&S));
2565 CGF.EmitBlock(ThenBlock);
2566 CGF.incrementProfileCounter(&S);
2567 }
2568
2569 // Emit the loop iteration variable.
2570 const Expr *IVExpr = S.getIterationVariable();
2571 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
2572 CGF.EmitVarDecl(*IVDecl);
2573 CGF.EmitIgnoredExpr(S.getInit());
2574
2575 // Emit the iterations count variable.
2576 // If it is not a variable, Sema decided to calculate iterations count on
2577 // each iteration (e.g., it is foldable into a constant).
2578 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2579 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2580 // Emit calculation of the iterations count.
2581 CGF.EmitIgnoredExpr(S.getCalcLastIteration());
2582 }
2583
2584 emitAlignedClause(CGF, S);
2585 (void)CGF.EmitOMPLinearClauseInit(S);
2586 {
2587 CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2588 CGF.EmitOMPPrivateClause(S, LoopScope);
2589 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
2590 CGF.EmitOMPLinearClause(S, LoopScope);
2591 CGF.EmitOMPReductionClauseInit(S, LoopScope);
2592 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
2593 CGF, S, CGF.EmitLValue(S.getIterationVariable()));
2594 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2595 (void)LoopScope.Privatize();
2596 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
2597 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
2598
2599 emitCommonSimdLoop(
2600 CGF, S,
2601 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2602 CGF.EmitOMPSimdInit(S);
2603 },
2604 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2605 CGF.EmitOMPInnerLoop(
2606 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
2607 [&S](CodeGenFunction &CGF) {
2608 emitOMPLoopBodyWithStopPoint(CGF, S,
2609 CodeGenFunction::JumpDest());
2610 },
2611 [](CodeGenFunction &) {});
2612 });
2613 CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; });
2614 // Emit final copy of the lastprivate variables at the end of loops.
2615 if (HasLastprivateClause)
2616 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
2617 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
2618 emitPostUpdateForReductionClause(CGF, S,
2619 [](CodeGenFunction &) { return nullptr; });
2620 LoopScope.restoreMap();
2621 CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; });
2622 }
2623 // Emit: if (PreCond) - end.
2624 if (ContBlock) {
2625 CGF.EmitBranch(ContBlock);
2626 CGF.EmitBlock(ContBlock, true);
2627 }
2628 }
2629
isSupportedByOpenMPIRBuilder(const OMPSimdDirective & S)2630 static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective &S) {
2631 // Check for unsupported clauses
2632 for (OMPClause *C : S.clauses()) {
2633 // Currently only order, simdlen and safelen clauses are supported
2634 if (!(isa<OMPSimdlenClause>(C) || isa<OMPSafelenClause>(C) ||
2635 isa<OMPOrderClause>(C) || isa<OMPAlignedClause>(C)))
2636 return false;
2637 }
2638
2639 // Check if we have a statement with the ordered directive.
2640 // Visit the statement hierarchy to find a compound statement
2641 // with a ordered directive in it.
2642 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(S.getRawStmt())) {
2643 if (const Stmt *SyntacticalLoop = CanonLoop->getLoopStmt()) {
2644 for (const Stmt *SubStmt : SyntacticalLoop->children()) {
2645 if (!SubStmt)
2646 continue;
2647 if (const CompoundStmt *CS = dyn_cast<CompoundStmt>(SubStmt)) {
2648 for (const Stmt *CSSubStmt : CS->children()) {
2649 if (!CSSubStmt)
2650 continue;
2651 if (isa<OMPOrderedDirective>(CSSubStmt)) {
2652 return false;
2653 }
2654 }
2655 }
2656 }
2657 }
2658 }
2659 return true;
2660 }
2661 static llvm::MapVector<llvm::Value *, llvm::Value *>
GetAlignedMapping(const OMPSimdDirective & S,CodeGenFunction & CGF)2662 GetAlignedMapping(const OMPSimdDirective &S, CodeGenFunction &CGF) {
2663 llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars;
2664 for (const auto *Clause : S.getClausesOfKind<OMPAlignedClause>()) {
2665 llvm::APInt ClauseAlignment(64, 0);
2666 if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2667 auto *AlignmentCI =
2668 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
2669 ClauseAlignment = AlignmentCI->getValue();
2670 }
2671 for (const Expr *E : Clause->varlists()) {
2672 llvm::APInt Alignment(ClauseAlignment);
2673 if (Alignment == 0) {
2674 // OpenMP [2.8.1, Description]
2675 // If no optional parameter is specified, implementation-defined default
2676 // alignments for SIMD instructions on the target platforms are assumed.
2677 Alignment =
2678 CGF.getContext()
2679 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
2680 E->getType()->getPointeeType()))
2681 .getQuantity();
2682 }
2683 assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2684 "alignment is not power of 2");
2685 llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2686 AlignedVars[PtrValue] = CGF.Builder.getInt64(Alignment.getSExtValue());
2687 }
2688 }
2689 return AlignedVars;
2690 }
2691
EmitOMPSimdDirective(const OMPSimdDirective & S)2692 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
2693 bool UseOMPIRBuilder =
2694 CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S);
2695 if (UseOMPIRBuilder) {
2696 auto &&CodeGenIRBuilder = [this, &S, UseOMPIRBuilder](CodeGenFunction &CGF,
2697 PrePostActionTy &) {
2698 // Use the OpenMPIRBuilder if enabled.
2699 if (UseOMPIRBuilder) {
2700 llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars =
2701 GetAlignedMapping(S, CGF);
2702 // Emit the associated statement and get its loop representation.
2703 const Stmt *Inner = S.getRawStmt();
2704 llvm::CanonicalLoopInfo *CLI =
2705 EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
2706
2707 llvm::OpenMPIRBuilder &OMPBuilder =
2708 CGM.getOpenMPRuntime().getOMPBuilder();
2709 // Add SIMD specific metadata
2710 llvm::ConstantInt *Simdlen = nullptr;
2711 if (const auto *C = S.getSingleClause<OMPSimdlenClause>()) {
2712 RValue Len =
2713 this->EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
2714 /*ignoreResult=*/true);
2715 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2716 Simdlen = Val;
2717 }
2718 llvm::ConstantInt *Safelen = nullptr;
2719 if (const auto *C = S.getSingleClause<OMPSafelenClause>()) {
2720 RValue Len =
2721 this->EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
2722 /*ignoreResult=*/true);
2723 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2724 Safelen = Val;
2725 }
2726 llvm::omp::OrderKind Order = llvm::omp::OrderKind::OMP_ORDER_unknown;
2727 if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
2728 if (C->getKind() == OpenMPOrderClauseKind ::OMPC_ORDER_concurrent) {
2729 Order = llvm::omp::OrderKind::OMP_ORDER_concurrent;
2730 }
2731 }
2732 // Add simd metadata to the collapsed loop. Do not generate
2733 // another loop for if clause. Support for if clause is done earlier.
2734 OMPBuilder.applySimd(CLI, AlignedVars,
2735 /*IfCond*/ nullptr, Order, Simdlen, Safelen);
2736 return;
2737 }
2738 };
2739 {
2740 auto LPCRegion =
2741 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
2742 OMPLexicalScope Scope(*this, S, OMPD_unknown);
2743 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd,
2744 CodeGenIRBuilder);
2745 }
2746 return;
2747 }
2748
2749 ParentLoopDirectiveForScanRegion ScanRegion(*this, S);
2750 OMPFirstScanLoop = true;
2751 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2752 emitOMPSimdRegion(CGF, S, Action);
2753 };
2754 {
2755 auto LPCRegion =
2756 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
2757 OMPLexicalScope Scope(*this, S, OMPD_unknown);
2758 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2759 }
2760 // Check for outer lastprivate conditional update.
2761 checkForLastprivateConditionalUpdate(*this, S);
2762 }
2763
EmitOMPTileDirective(const OMPTileDirective & S)2764 void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) {
2765 // Emit the de-sugared statement.
2766 OMPTransformDirectiveScopeRAII TileScope(*this, &S);
2767 EmitStmt(S.getTransformedStmt());
2768 }
2769
EmitOMPReverseDirective(const OMPReverseDirective & S)2770 void CodeGenFunction::EmitOMPReverseDirective(const OMPReverseDirective &S) {
2771 // Emit the de-sugared statement.
2772 OMPTransformDirectiveScopeRAII ReverseScope(*this, &S);
2773 EmitStmt(S.getTransformedStmt());
2774 }
2775
EmitOMPInterchangeDirective(const OMPInterchangeDirective & S)2776 void CodeGenFunction::EmitOMPInterchangeDirective(
2777 const OMPInterchangeDirective &S) {
2778 // Emit the de-sugared statement.
2779 OMPTransformDirectiveScopeRAII InterchangeScope(*this, &S);
2780 EmitStmt(S.getTransformedStmt());
2781 }
2782
EmitOMPUnrollDirective(const OMPUnrollDirective & S)2783 void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) {
2784 bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder;
2785
2786 if (UseOMPIRBuilder) {
2787 auto DL = SourceLocToDebugLoc(S.getBeginLoc());
2788 const Stmt *Inner = S.getRawStmt();
2789
2790 // Consume nested loop. Clear the entire remaining loop stack because a
2791 // fully unrolled loop is non-transformable. For partial unrolling the
2792 // generated outer loop is pushed back to the stack.
2793 llvm::CanonicalLoopInfo *CLI = EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
2794 OMPLoopNestStack.clear();
2795
2796 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2797
2798 bool NeedsUnrolledCLI = ExpectedOMPLoopDepth >= 1;
2799 llvm::CanonicalLoopInfo *UnrolledCLI = nullptr;
2800
2801 if (S.hasClausesOfKind<OMPFullClause>()) {
2802 assert(ExpectedOMPLoopDepth == 0);
2803 OMPBuilder.unrollLoopFull(DL, CLI);
2804 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
2805 uint64_t Factor = 0;
2806 if (Expr *FactorExpr = PartialClause->getFactor()) {
2807 Factor = FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue();
2808 assert(Factor >= 1 && "Only positive factors are valid");
2809 }
2810 OMPBuilder.unrollLoopPartial(DL, CLI, Factor,
2811 NeedsUnrolledCLI ? &UnrolledCLI : nullptr);
2812 } else {
2813 OMPBuilder.unrollLoopHeuristic(DL, CLI);
2814 }
2815
2816 assert((!NeedsUnrolledCLI || UnrolledCLI) &&
2817 "NeedsUnrolledCLI implies UnrolledCLI to be set");
2818 if (UnrolledCLI)
2819 OMPLoopNestStack.push_back(UnrolledCLI);
2820
2821 return;
2822 }
2823
2824 // This function is only called if the unrolled loop is not consumed by any
2825 // other loop-associated construct. Such a loop-associated construct will have
2826 // used the transformed AST.
2827
2828 // Set the unroll metadata for the next emitted loop.
2829 LoopStack.setUnrollState(LoopAttributes::Enable);
2830
2831 if (S.hasClausesOfKind<OMPFullClause>()) {
2832 LoopStack.setUnrollState(LoopAttributes::Full);
2833 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
2834 if (Expr *FactorExpr = PartialClause->getFactor()) {
2835 uint64_t Factor =
2836 FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue();
2837 assert(Factor >= 1 && "Only positive factors are valid");
2838 LoopStack.setUnrollCount(Factor);
2839 }
2840 }
2841
2842 EmitStmt(S.getAssociatedStmt());
2843 }
2844
EmitOMPOuterLoop(bool DynamicOrOrdered,bool IsMonotonic,const OMPLoopDirective & S,CodeGenFunction::OMPPrivateScope & LoopScope,const CodeGenFunction::OMPLoopArguments & LoopArgs,const CodeGenFunction::CodeGenLoopTy & CodeGenLoop,const CodeGenFunction::CodeGenOrderedTy & CodeGenOrdered)2845 void CodeGenFunction::EmitOMPOuterLoop(
2846 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
2847 CodeGenFunction::OMPPrivateScope &LoopScope,
2848 const CodeGenFunction::OMPLoopArguments &LoopArgs,
2849 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
2850 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
2851 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2852
2853 const Expr *IVExpr = S.getIterationVariable();
2854 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2855 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2856
2857 JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
2858
2859 // Start the loop with a block that tests the condition.
2860 llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond");
2861 EmitBlock(CondBlock);
2862 const SourceRange R = S.getSourceRange();
2863 OMPLoopNestStack.clear();
2864 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2865 SourceLocToDebugLoc(R.getEnd()));
2866
2867 llvm::Value *BoolCondVal = nullptr;
2868 if (!DynamicOrOrdered) {
2869 // UB = min(UB, GlobalUB) or
2870 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
2871 // 'distribute parallel for')
2872 EmitIgnoredExpr(LoopArgs.EUB);
2873 // IV = LB
2874 EmitIgnoredExpr(LoopArgs.Init);
2875 // IV < UB
2876 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
2877 } else {
2878 BoolCondVal =
2879 RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL,
2880 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
2881 }
2882
2883 // If there are any cleanups between here and the loop-exit scope,
2884 // create a block to stage a loop exit along.
2885 llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2886 if (LoopScope.requiresCleanups())
2887 ExitBlock = createBasicBlock("omp.dispatch.cleanup");
2888
2889 llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body");
2890 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
2891 if (ExitBlock != LoopExit.getBlock()) {
2892 EmitBlock(ExitBlock);
2893 EmitBranchThroughCleanup(LoopExit);
2894 }
2895 EmitBlock(LoopBody);
2896
2897 // Emit "IV = LB" (in case of static schedule, we have already calculated new
2898 // LB for loop condition and emitted it above).
2899 if (DynamicOrOrdered)
2900 EmitIgnoredExpr(LoopArgs.Init);
2901
2902 // Create a block for the increment.
2903 JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
2904 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
2905
2906 emitCommonSimdLoop(
2907 *this, S,
2908 [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
2909 // Generate !llvm.loop.parallel metadata for loads and stores for loops
2910 // with dynamic/guided scheduling and without ordered clause.
2911 if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2912 CGF.LoopStack.setParallel(!IsMonotonic);
2913 if (const auto *C = S.getSingleClause<OMPOrderClause>())
2914 if (C->getKind() == OMPC_ORDER_concurrent)
2915 CGF.LoopStack.setParallel(/*Enable=*/true);
2916 } else {
2917 CGF.EmitOMPSimdInit(S);
2918 }
2919 },
2920 [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered,
2921 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2922 SourceLocation Loc = S.getBeginLoc();
2923 // when 'distribute' is not combined with a 'for':
2924 // while (idx <= UB) { BODY; ++idx; }
2925 // when 'distribute' is combined with a 'for'
2926 // (e.g. 'distribute parallel for')
2927 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
2928 CGF.EmitOMPInnerLoop(
2929 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
2930 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
2931 CodeGenLoop(CGF, S, LoopExit);
2932 },
2933 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
2934 CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
2935 });
2936 });
2937
2938 EmitBlock(Continue.getBlock());
2939 BreakContinueStack.pop_back();
2940 if (!DynamicOrOrdered) {
2941 // Emit "LB = LB + Stride", "UB = UB + Stride".
2942 EmitIgnoredExpr(LoopArgs.NextLB);
2943 EmitIgnoredExpr(LoopArgs.NextUB);
2944 }
2945
2946 EmitBranch(CondBlock);
2947 OMPLoopNestStack.clear();
2948 LoopStack.pop();
2949 // Emit the fall-through block.
2950 EmitBlock(LoopExit.getBlock());
2951
2952 // Tell the runtime we are done.
2953 auto &&CodeGen = [DynamicOrOrdered, &S, &LoopArgs](CodeGenFunction &CGF) {
2954 if (!DynamicOrOrdered)
2955 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
2956 LoopArgs.DKind);
2957 };
2958 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2959 }
2960
EmitOMPForOuterLoop(const OpenMPScheduleTy & ScheduleKind,bool IsMonotonic,const OMPLoopDirective & S,OMPPrivateScope & LoopScope,bool Ordered,const OMPLoopArguments & LoopArgs,const CodeGenDispatchBoundsTy & CGDispatchBounds)2961 void CodeGenFunction::EmitOMPForOuterLoop(
2962 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
2963 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
2964 const OMPLoopArguments &LoopArgs,
2965 const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2966 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2967
2968 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
2969 const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind.Schedule);
2970
2971 assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule,
2972 LoopArgs.Chunk != nullptr)) &&
2973 "static non-chunked schedule does not need outer loop");
2974
2975 // Emit outer loop.
2976 //
2977 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2978 // When schedule(dynamic,chunk_size) is specified, the iterations are
2979 // distributed to threads in the team in chunks as the threads request them.
2980 // Each thread executes a chunk of iterations, then requests another chunk,
2981 // until no chunks remain to be distributed. Each chunk contains chunk_size
2982 // iterations, except for the last chunk to be distributed, which may have
2983 // fewer iterations. When no chunk_size is specified, it defaults to 1.
2984 //
2985 // When schedule(guided,chunk_size) is specified, the iterations are assigned
2986 // to threads in the team in chunks as the executing threads request them.
2987 // Each thread executes a chunk of iterations, then requests another chunk,
2988 // until no chunks remain to be assigned. For a chunk_size of 1, the size of
2989 // each chunk is proportional to the number of unassigned iterations divided
2990 // by the number of threads in the team, decreasing to 1. For a chunk_size
2991 // with value k (greater than 1), the size of each chunk is determined in the
2992 // same way, with the restriction that the chunks do not contain fewer than k
2993 // iterations (except for the last chunk to be assigned, which may have fewer
2994 // than k iterations).
2995 //
2996 // When schedule(auto) is specified, the decision regarding scheduling is
2997 // delegated to the compiler and/or runtime system. The programmer gives the
2998 // implementation the freedom to choose any possible mapping of iterations to
2999 // threads in the team.
3000 //
3001 // When schedule(runtime) is specified, the decision regarding scheduling is
3002 // deferred until run time, and the schedule and chunk size are taken from the
3003 // run-sched-var ICV. If the ICV is set to auto, the schedule is
3004 // implementation defined
3005 //
3006 // __kmpc_dispatch_init();
3007 // while(__kmpc_dispatch_next(&LB, &UB)) {
3008 // idx = LB;
3009 // while (idx <= UB) { BODY; ++idx;
3010 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
3011 // } // inner loop
3012 // }
3013 // __kmpc_dispatch_deinit();
3014 //
3015 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3016 // When schedule(static, chunk_size) is specified, iterations are divided into
3017 // chunks of size chunk_size, and the chunks are assigned to the threads in
3018 // the team in a round-robin fashion in the order of the thread number.
3019 //
3020 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
3021 // while (idx <= UB) { BODY; ++idx; } // inner loop
3022 // LB = LB + ST;
3023 // UB = UB + ST;
3024 // }
3025 //
3026
3027 const Expr *IVExpr = S.getIterationVariable();
3028 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3029 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3030
3031 if (DynamicOrOrdered) {
3032 const std::pair<llvm::Value *, llvm::Value *> DispatchBounds =
3033 CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
3034 llvm::Value *LBVal = DispatchBounds.first;
3035 llvm::Value *UBVal = DispatchBounds.second;
3036 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
3037 LoopArgs.Chunk};
3038 RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize,
3039 IVSigned, Ordered, DipatchRTInputValues);
3040 } else {
3041 CGOpenMPRuntime::StaticRTInput StaticInit(
3042 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
3043 LoopArgs.ST, LoopArgs.Chunk);
3044 RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
3045 ScheduleKind, StaticInit);
3046 }
3047
3048 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
3049 const unsigned IVSize,
3050 const bool IVSigned) {
3051 if (Ordered) {
3052 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
3053 IVSigned);
3054 }
3055 };
3056
3057 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
3058 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
3059 OuterLoopArgs.IncExpr = S.getInc();
3060 OuterLoopArgs.Init = S.getInit();
3061 OuterLoopArgs.Cond = S.getCond();
3062 OuterLoopArgs.NextLB = S.getNextLowerBound();
3063 OuterLoopArgs.NextUB = S.getNextUpperBound();
3064 OuterLoopArgs.DKind = LoopArgs.DKind;
3065 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
3066 emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
3067 if (DynamicOrOrdered) {
3068 RT.emitForDispatchDeinit(*this, S.getBeginLoc());
3069 }
3070 }
3071
emitEmptyOrdered(CodeGenFunction &,SourceLocation Loc,const unsigned IVSize,const bool IVSigned)3072 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
3073 const unsigned IVSize, const bool IVSigned) {}
3074
EmitOMPDistributeOuterLoop(OpenMPDistScheduleClauseKind ScheduleKind,const OMPLoopDirective & S,OMPPrivateScope & LoopScope,const OMPLoopArguments & LoopArgs,const CodeGenLoopTy & CodeGenLoopContent)3075 void CodeGenFunction::EmitOMPDistributeOuterLoop(
3076 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
3077 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
3078 const CodeGenLoopTy &CodeGenLoopContent) {
3079
3080 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3081
3082 // Emit outer loop.
3083 // Same behavior as a OMPForOuterLoop, except that schedule cannot be
3084 // dynamic
3085 //
3086
3087 const Expr *IVExpr = S.getIterationVariable();
3088 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3089 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3090
3091 CGOpenMPRuntime::StaticRTInput StaticInit(
3092 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
3093 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
3094 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit);
3095
3096 // for combined 'distribute' and 'for' the increment expression of distribute
3097 // is stored in DistInc. For 'distribute' alone, it is in Inc.
3098 Expr *IncExpr;
3099 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
3100 IncExpr = S.getDistInc();
3101 else
3102 IncExpr = S.getInc();
3103
3104 // this routine is shared by 'omp distribute parallel for' and
3105 // 'omp distribute': select the right EUB expression depending on the
3106 // directive
3107 OMPLoopArguments OuterLoopArgs;
3108 OuterLoopArgs.LB = LoopArgs.LB;
3109 OuterLoopArgs.UB = LoopArgs.UB;
3110 OuterLoopArgs.ST = LoopArgs.ST;
3111 OuterLoopArgs.IL = LoopArgs.IL;
3112 OuterLoopArgs.Chunk = LoopArgs.Chunk;
3113 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3114 ? S.getCombinedEnsureUpperBound()
3115 : S.getEnsureUpperBound();
3116 OuterLoopArgs.IncExpr = IncExpr;
3117 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3118 ? S.getCombinedInit()
3119 : S.getInit();
3120 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3121 ? S.getCombinedCond()
3122 : S.getCond();
3123 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3124 ? S.getCombinedNextLowerBound()
3125 : S.getNextLowerBound();
3126 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3127 ? S.getCombinedNextUpperBound()
3128 : S.getNextUpperBound();
3129 OuterLoopArgs.DKind = OMPD_distribute;
3130
3131 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
3132 LoopScope, OuterLoopArgs, CodeGenLoopContent,
3133 emitEmptyOrdered);
3134 }
3135
3136 static std::pair<LValue, LValue>
emitDistributeParallelForInnerBounds(CodeGenFunction & CGF,const OMPExecutableDirective & S)3137 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
3138 const OMPExecutableDirective &S) {
3139 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
3140 LValue LB =
3141 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
3142 LValue UB =
3143 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
3144
3145 // When composing 'distribute' with 'for' (e.g. as in 'distribute
3146 // parallel for') we need to use the 'distribute'
3147 // chunk lower and upper bounds rather than the whole loop iteration
3148 // space. These are parameters to the outlined function for 'parallel'
3149 // and we copy the bounds of the previous schedule into the
3150 // the current ones.
3151 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
3152 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
3153 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
3154 PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc());
3155 PrevLBVal = CGF.EmitScalarConversion(
3156 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
3157 LS.getIterationVariable()->getType(),
3158 LS.getPrevLowerBoundVariable()->getExprLoc());
3159 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
3160 PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc());
3161 PrevUBVal = CGF.EmitScalarConversion(
3162 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
3163 LS.getIterationVariable()->getType(),
3164 LS.getPrevUpperBoundVariable()->getExprLoc());
3165
3166 CGF.EmitStoreOfScalar(PrevLBVal, LB);
3167 CGF.EmitStoreOfScalar(PrevUBVal, UB);
3168
3169 return {LB, UB};
3170 }
3171
3172 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
3173 /// we need to use the LB and UB expressions generated by the worksharing
3174 /// code generation support, whereas in non combined situations we would
3175 /// just emit 0 and the LastIteration expression
3176 /// This function is necessary due to the difference of the LB and UB
3177 /// types for the RT emission routines for 'for_static_init' and
3178 /// 'for_dispatch_init'
3179 static std::pair<llvm::Value *, llvm::Value *>
emitDistributeParallelForDispatchBounds(CodeGenFunction & CGF,const OMPExecutableDirective & S,Address LB,Address UB)3180 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
3181 const OMPExecutableDirective &S,
3182 Address LB, Address UB) {
3183 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
3184 const Expr *IVExpr = LS.getIterationVariable();
3185 // when implementing a dynamic schedule for a 'for' combined with a
3186 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
3187 // is not normalized as each team only executes its own assigned
3188 // distribute chunk
3189 QualType IteratorTy = IVExpr->getType();
3190 llvm::Value *LBVal =
3191 CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
3192 llvm::Value *UBVal =
3193 CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
3194 return {LBVal, UBVal};
3195 }
3196
emitDistributeParallelForDistributeInnerBoundParams(CodeGenFunction & CGF,const OMPExecutableDirective & S,llvm::SmallVectorImpl<llvm::Value * > & CapturedVars)3197 static void emitDistributeParallelForDistributeInnerBoundParams(
3198 CodeGenFunction &CGF, const OMPExecutableDirective &S,
3199 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
3200 const auto &Dir = cast<OMPLoopDirective>(S);
3201 LValue LB =
3202 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
3203 llvm::Value *LBCast = CGF.Builder.CreateIntCast(
3204 CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
3205 CapturedVars.push_back(LBCast);
3206 LValue UB =
3207 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
3208
3209 llvm::Value *UBCast = CGF.Builder.CreateIntCast(
3210 CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
3211 CapturedVars.push_back(UBCast);
3212 }
3213
3214 static void
emitInnerParallelForWhenCombined(CodeGenFunction & CGF,const OMPLoopDirective & S,CodeGenFunction::JumpDest LoopExit)3215 emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
3216 const OMPLoopDirective &S,
3217 CodeGenFunction::JumpDest LoopExit) {
3218 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
3219 PrePostActionTy &Action) {
3220 Action.Enter(CGF);
3221 bool HasCancel = false;
3222 if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
3223 if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
3224 HasCancel = D->hasCancel();
3225 else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
3226 HasCancel = D->hasCancel();
3227 else if (const auto *D =
3228 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
3229 HasCancel = D->hasCancel();
3230 }
3231 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
3232 HasCancel);
3233 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
3234 emitDistributeParallelForInnerBounds,
3235 emitDistributeParallelForDispatchBounds);
3236 };
3237
3238 emitCommonOMPParallelDirective(
3239 CGF, S,
3240 isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for,
3241 CGInlinedWorksharingLoop,
3242 emitDistributeParallelForDistributeInnerBoundParams);
3243 }
3244
EmitOMPDistributeParallelForDirective(const OMPDistributeParallelForDirective & S)3245 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
3246 const OMPDistributeParallelForDirective &S) {
3247 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3248 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
3249 S.getDistInc());
3250 };
3251 OMPLexicalScope Scope(*this, S, OMPD_parallel);
3252 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3253 }
3254
EmitOMPDistributeParallelForSimdDirective(const OMPDistributeParallelForSimdDirective & S)3255 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
3256 const OMPDistributeParallelForSimdDirective &S) {
3257 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3258 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
3259 S.getDistInc());
3260 };
3261 OMPLexicalScope Scope(*this, S, OMPD_parallel);
3262 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3263 }
3264
EmitOMPDistributeSimdDirective(const OMPDistributeSimdDirective & S)3265 void CodeGenFunction::EmitOMPDistributeSimdDirective(
3266 const OMPDistributeSimdDirective &S) {
3267 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3268 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3269 };
3270 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3271 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
3272 }
3273
EmitOMPTargetSimdDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetSimdDirective & S)3274 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
3275 CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
3276 // Emit SPMD target parallel for region as a standalone region.
3277 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3278 emitOMPSimdRegion(CGF, S, Action);
3279 };
3280 llvm::Function *Fn;
3281 llvm::Constant *Addr;
3282 // Emit target region as a standalone region.
3283 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3284 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3285 assert(Fn && Addr && "Target device function emission failed.");
3286 }
3287
EmitOMPTargetSimdDirective(const OMPTargetSimdDirective & S)3288 void CodeGenFunction::EmitOMPTargetSimdDirective(
3289 const OMPTargetSimdDirective &S) {
3290 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3291 emitOMPSimdRegion(CGF, S, Action);
3292 };
3293 emitCommonOMPTargetDirective(*this, S, CodeGen);
3294 }
3295
3296 namespace {
3297 struct ScheduleKindModifiersTy {
3298 OpenMPScheduleClauseKind Kind;
3299 OpenMPScheduleClauseModifier M1;
3300 OpenMPScheduleClauseModifier M2;
ScheduleKindModifiersTy__anon53c5fabf2811::ScheduleKindModifiersTy3301 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
3302 OpenMPScheduleClauseModifier M1,
3303 OpenMPScheduleClauseModifier M2)
3304 : Kind(Kind), M1(M1), M2(M2) {}
3305 };
3306 } // namespace
3307
EmitOMPWorksharingLoop(const OMPLoopDirective & S,Expr * EUB,const CodeGenLoopBoundsTy & CodeGenLoopBounds,const CodeGenDispatchBoundsTy & CGDispatchBounds)3308 bool CodeGenFunction::EmitOMPWorksharingLoop(
3309 const OMPLoopDirective &S, Expr *EUB,
3310 const CodeGenLoopBoundsTy &CodeGenLoopBounds,
3311 const CodeGenDispatchBoundsTy &CGDispatchBounds) {
3312 // Emit the loop iteration variable.
3313 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
3314 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
3315 EmitVarDecl(*IVDecl);
3316
3317 // Emit the iterations count variable.
3318 // If it is not a variable, Sema decided to calculate iterations count on each
3319 // iteration (e.g., it is foldable into a constant).
3320 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3321 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3322 // Emit calculation of the iterations count.
3323 EmitIgnoredExpr(S.getCalcLastIteration());
3324 }
3325
3326 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3327
3328 bool HasLastprivateClause;
3329 // Check pre-condition.
3330 {
3331 OMPLoopScope PreInitScope(*this, S);
3332 // Skip the entire loop if we don't meet the precondition.
3333 // If the condition constant folds and can be elided, avoid emitting the
3334 // whole loop.
3335 bool CondConstant;
3336 llvm::BasicBlock *ContBlock = nullptr;
3337 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3338 if (!CondConstant)
3339 return false;
3340 } else {
3341 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
3342 ContBlock = createBasicBlock("omp.precond.end");
3343 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3344 getProfileCount(&S));
3345 EmitBlock(ThenBlock);
3346 incrementProfileCounter(&S);
3347 }
3348
3349 RunCleanupsScope DoacrossCleanupScope(*this);
3350 bool Ordered = false;
3351 if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
3352 if (OrderedClause->getNumForLoops())
3353 RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations());
3354 else
3355 Ordered = true;
3356 }
3357
3358 llvm::DenseSet<const Expr *> EmittedFinals;
3359 emitAlignedClause(*this, S);
3360 bool HasLinears = EmitOMPLinearClauseInit(S);
3361 // Emit helper vars inits.
3362
3363 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
3364 LValue LB = Bounds.first;
3365 LValue UB = Bounds.second;
3366 LValue ST =
3367 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3368 LValue IL =
3369 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3370
3371 // Emit 'then' code.
3372 {
3373 OMPPrivateScope LoopScope(*this);
3374 if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
3375 // Emit implicit barrier to synchronize threads and avoid data races on
3376 // initialization of firstprivate variables and post-update of
3377 // lastprivate variables.
3378 CGM.getOpenMPRuntime().emitBarrierCall(
3379 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3380 /*ForceSimpleCall=*/true);
3381 }
3382 EmitOMPPrivateClause(S, LoopScope);
3383 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
3384 *this, S, EmitLValue(S.getIterationVariable()));
3385 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3386 EmitOMPReductionClauseInit(S, LoopScope);
3387 EmitOMPPrivateLoopCounters(S, LoopScope);
3388 EmitOMPLinearClause(S, LoopScope);
3389 (void)LoopScope.Privatize();
3390 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
3391 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
3392
3393 // Detect the loop schedule kind and chunk.
3394 const Expr *ChunkExpr = nullptr;
3395 OpenMPScheduleTy ScheduleKind;
3396 if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
3397 ScheduleKind.Schedule = C->getScheduleKind();
3398 ScheduleKind.M1 = C->getFirstScheduleModifier();
3399 ScheduleKind.M2 = C->getSecondScheduleModifier();
3400 ChunkExpr = C->getChunkSize();
3401 } else {
3402 // Default behaviour for schedule clause.
3403 CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
3404 *this, S, ScheduleKind.Schedule, ChunkExpr);
3405 }
3406 bool HasChunkSizeOne = false;
3407 llvm::Value *Chunk = nullptr;
3408 if (ChunkExpr) {
3409 Chunk = EmitScalarExpr(ChunkExpr);
3410 Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(),
3411 S.getIterationVariable()->getType(),
3412 S.getBeginLoc());
3413 Expr::EvalResult Result;
3414 if (ChunkExpr->EvaluateAsInt(Result, getContext())) {
3415 llvm::APSInt EvaluatedChunk = Result.Val.getInt();
3416 HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
3417 }
3418 }
3419 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3420 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3421 // OpenMP 4.5, 2.7.1 Loop Construct, Description.
3422 // If the static schedule kind is specified or if the ordered clause is
3423 // specified, and if no monotonic modifier is specified, the effect will
3424 // be as if the monotonic modifier was specified.
3425 bool StaticChunkedOne =
3426 RT.isStaticChunked(ScheduleKind.Schedule,
3427 /* Chunked */ Chunk != nullptr) &&
3428 HasChunkSizeOne &&
3429 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
3430 bool IsMonotonic =
3431 Ordered ||
3432 (ScheduleKind.Schedule == OMPC_SCHEDULE_static &&
3433 !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic ||
3434 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) ||
3435 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
3436 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
3437 if ((RT.isStaticNonchunked(ScheduleKind.Schedule,
3438 /* Chunked */ Chunk != nullptr) ||
3439 StaticChunkedOne) &&
3440 !Ordered) {
3441 JumpDest LoopExit =
3442 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3443 emitCommonSimdLoop(
3444 *this, S,
3445 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3446 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3447 CGF.EmitOMPSimdInit(S);
3448 } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
3449 if (C->getKind() == OMPC_ORDER_concurrent)
3450 CGF.LoopStack.setParallel(/*Enable=*/true);
3451 }
3452 },
3453 [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk,
3454 &S, ScheduleKind, LoopExit,
3455 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3456 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3457 // When no chunk_size is specified, the iteration space is divided
3458 // into chunks that are approximately equal in size, and at most
3459 // one chunk is distributed to each thread. Note that the size of
3460 // the chunks is unspecified in this case.
3461 CGOpenMPRuntime::StaticRTInput StaticInit(
3462 IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
3463 UB.getAddress(), ST.getAddress(),
3464 StaticChunkedOne ? Chunk : nullptr);
3465 CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3466 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind,
3467 StaticInit);
3468 // UB = min(UB, GlobalUB);
3469 if (!StaticChunkedOne)
3470 CGF.EmitIgnoredExpr(S.getEnsureUpperBound());
3471 // IV = LB;
3472 CGF.EmitIgnoredExpr(S.getInit());
3473 // For unchunked static schedule generate:
3474 //
3475 // while (idx <= UB) {
3476 // BODY;
3477 // ++idx;
3478 // }
3479 //
3480 // For static schedule with chunk one:
3481 //
3482 // while (IV <= PrevUB) {
3483 // BODY;
3484 // IV += ST;
3485 // }
3486 CGF.EmitOMPInnerLoop(
3487 S, LoopScope.requiresCleanups(),
3488 StaticChunkedOne ? S.getCombinedParForInDistCond()
3489 : S.getCond(),
3490 StaticChunkedOne ? S.getDistInc() : S.getInc(),
3491 [&S, LoopExit](CodeGenFunction &CGF) {
3492 emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit);
3493 },
3494 [](CodeGenFunction &) {});
3495 });
3496 EmitBlock(LoopExit.getBlock());
3497 // Tell the runtime we are done.
3498 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3499 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
3500 OMPD_for);
3501 };
3502 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
3503 } else {
3504 // Emit the outer loop, which requests its work chunk [LB..UB] from
3505 // runtime and runs the inner loop to process it.
3506 OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
3507 ST.getAddress(), IL.getAddress(), Chunk,
3508 EUB);
3509 LoopArguments.DKind = OMPD_for;
3510 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
3511 LoopArguments, CGDispatchBounds);
3512 }
3513 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3514 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
3515 return CGF.Builder.CreateIsNotNull(
3516 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3517 });
3518 }
3519 EmitOMPReductionClauseFinal(
3520 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
3521 ? /*Parallel and Simd*/ OMPD_parallel_for_simd
3522 : /*Parallel only*/ OMPD_parallel);
3523 // Emit post-update of the reduction variables if IsLastIter != 0.
3524 emitPostUpdateForReductionClause(
3525 *this, S, [IL, &S](CodeGenFunction &CGF) {
3526 return CGF.Builder.CreateIsNotNull(
3527 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3528 });
3529 // Emit final copy of the lastprivate variables if IsLastIter != 0.
3530 if (HasLastprivateClause)
3531 EmitOMPLastprivateClauseFinal(
3532 S, isOpenMPSimdDirective(S.getDirectiveKind()),
3533 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
3534 LoopScope.restoreMap();
3535 EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
3536 return CGF.Builder.CreateIsNotNull(
3537 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3538 });
3539 }
3540 DoacrossCleanupScope.ForceCleanup();
3541 // We're now done with the loop, so jump to the continuation block.
3542 if (ContBlock) {
3543 EmitBranch(ContBlock);
3544 EmitBlock(ContBlock, /*IsFinished=*/true);
3545 }
3546 }
3547 return HasLastprivateClause;
3548 }
3549
3550 /// The following two functions generate expressions for the loop lower
3551 /// and upper bounds in case of static and dynamic (dispatch) schedule
3552 /// of the associated 'for' or 'distribute' loop.
3553 static std::pair<LValue, LValue>
emitForLoopBounds(CodeGenFunction & CGF,const OMPExecutableDirective & S)3554 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
3555 const auto &LS = cast<OMPLoopDirective>(S);
3556 LValue LB =
3557 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
3558 LValue UB =
3559 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
3560 return {LB, UB};
3561 }
3562
3563 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
3564 /// consider the lower and upper bound expressions generated by the
3565 /// worksharing loop support, but we use 0 and the iteration space size as
3566 /// constants
3567 static std::pair<llvm::Value *, llvm::Value *>
emitDispatchForLoopBounds(CodeGenFunction & CGF,const OMPExecutableDirective & S,Address LB,Address UB)3568 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
3569 Address LB, Address UB) {
3570 const auto &LS = cast<OMPLoopDirective>(S);
3571 const Expr *IVExpr = LS.getIterationVariable();
3572 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
3573 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
3574 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
3575 return {LBVal, UBVal};
3576 }
3577
3578 /// Emits internal temp array declarations for the directive with inscan
3579 /// reductions.
3580 /// The code is the following:
3581 /// \code
3582 /// size num_iters = <num_iters>;
3583 /// <type> buffer[num_iters];
3584 /// \endcode
emitScanBasedDirectiveDecls(CodeGenFunction & CGF,const OMPLoopDirective & S,llvm::function_ref<llvm::Value * (CodeGenFunction &)> NumIteratorsGen)3585 static void emitScanBasedDirectiveDecls(
3586 CodeGenFunction &CGF, const OMPLoopDirective &S,
3587 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
3588 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3589 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3590 SmallVector<const Expr *, 4> Shareds;
3591 SmallVector<const Expr *, 4> Privates;
3592 SmallVector<const Expr *, 4> ReductionOps;
3593 SmallVector<const Expr *, 4> CopyArrayTemps;
3594 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3595 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3596 "Only inscan reductions are expected.");
3597 Shareds.append(C->varlist_begin(), C->varlist_end());
3598 Privates.append(C->privates().begin(), C->privates().end());
3599 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3600 CopyArrayTemps.append(C->copy_array_temps().begin(),
3601 C->copy_array_temps().end());
3602 }
3603 {
3604 // Emit buffers for each reduction variables.
3605 // ReductionCodeGen is required to emit correctly the code for array
3606 // reductions.
3607 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
3608 unsigned Count = 0;
3609 auto *ITA = CopyArrayTemps.begin();
3610 for (const Expr *IRef : Privates) {
3611 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
3612 // Emit variably modified arrays, used for arrays/array sections
3613 // reductions.
3614 if (PrivateVD->getType()->isVariablyModifiedType()) {
3615 RedCG.emitSharedOrigLValue(CGF, Count);
3616 RedCG.emitAggregateType(CGF, Count);
3617 }
3618 CodeGenFunction::OpaqueValueMapping DimMapping(
3619 CGF,
3620 cast<OpaqueValueExpr>(
3621 cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe())
3622 ->getSizeExpr()),
3623 RValue::get(OMPScanNumIterations));
3624 // Emit temp buffer.
3625 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl()));
3626 ++ITA;
3627 ++Count;
3628 }
3629 }
3630 }
3631
3632 /// Copies final inscan reductions values to the original variables.
3633 /// The code is the following:
3634 /// \code
3635 /// <orig_var> = buffer[num_iters-1];
3636 /// \endcode
emitScanBasedDirectiveFinals(CodeGenFunction & CGF,const OMPLoopDirective & S,llvm::function_ref<llvm::Value * (CodeGenFunction &)> NumIteratorsGen)3637 static void emitScanBasedDirectiveFinals(
3638 CodeGenFunction &CGF, const OMPLoopDirective &S,
3639 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
3640 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3641 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3642 SmallVector<const Expr *, 4> Shareds;
3643 SmallVector<const Expr *, 4> LHSs;
3644 SmallVector<const Expr *, 4> RHSs;
3645 SmallVector<const Expr *, 4> Privates;
3646 SmallVector<const Expr *, 4> CopyOps;
3647 SmallVector<const Expr *, 4> CopyArrayElems;
3648 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3649 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3650 "Only inscan reductions are expected.");
3651 Shareds.append(C->varlist_begin(), C->varlist_end());
3652 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
3653 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
3654 Privates.append(C->privates().begin(), C->privates().end());
3655 CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
3656 CopyArrayElems.append(C->copy_array_elems().begin(),
3657 C->copy_array_elems().end());
3658 }
3659 // Create temp var and copy LHS value to this temp value.
3660 // LHS = TMP[LastIter];
3661 llvm::Value *OMPLast = CGF.Builder.CreateNSWSub(
3662 OMPScanNumIterations,
3663 llvm::ConstantInt::get(CGF.SizeTy, 1, /*isSigned=*/false));
3664 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
3665 const Expr *PrivateExpr = Privates[I];
3666 const Expr *OrigExpr = Shareds[I];
3667 const Expr *CopyArrayElem = CopyArrayElems[I];
3668 CodeGenFunction::OpaqueValueMapping IdxMapping(
3669 CGF,
3670 cast<OpaqueValueExpr>(
3671 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3672 RValue::get(OMPLast));
3673 LValue DestLVal = CGF.EmitLValue(OrigExpr);
3674 LValue SrcLVal = CGF.EmitLValue(CopyArrayElem);
3675 CGF.EmitOMPCopy(
3676 PrivateExpr->getType(), DestLVal.getAddress(), SrcLVal.getAddress(),
3677 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
3678 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), CopyOps[I]);
3679 }
3680 }
3681
3682 /// Emits the code for the directive with inscan reductions.
3683 /// The code is the following:
3684 /// \code
3685 /// #pragma omp ...
3686 /// for (i: 0..<num_iters>) {
3687 /// <input phase>;
3688 /// buffer[i] = red;
3689 /// }
3690 /// #pragma omp master // in parallel region
3691 /// for (int k = 0; k != ceil(log2(num_iters)); ++k)
3692 /// for (size cnt = last_iter; cnt >= pow(2, k); --k)
3693 /// buffer[i] op= buffer[i-pow(2,k)];
3694 /// #pragma omp barrier // in parallel region
3695 /// #pragma omp ...
3696 /// for (0..<num_iters>) {
3697 /// red = InclusiveScan ? buffer[i] : buffer[i-1];
3698 /// <scan phase>;
3699 /// }
3700 /// \endcode
emitScanBasedDirective(CodeGenFunction & CGF,const OMPLoopDirective & S,llvm::function_ref<llvm::Value * (CodeGenFunction &)> NumIteratorsGen,llvm::function_ref<void (CodeGenFunction &)> FirstGen,llvm::function_ref<void (CodeGenFunction &)> SecondGen)3701 static void emitScanBasedDirective(
3702 CodeGenFunction &CGF, const OMPLoopDirective &S,
3703 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
3704 llvm::function_ref<void(CodeGenFunction &)> FirstGen,
3705 llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
3706 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3707 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3708 SmallVector<const Expr *, 4> Privates;
3709 SmallVector<const Expr *, 4> ReductionOps;
3710 SmallVector<const Expr *, 4> LHSs;
3711 SmallVector<const Expr *, 4> RHSs;
3712 SmallVector<const Expr *, 4> CopyArrayElems;
3713 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3714 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3715 "Only inscan reductions are expected.");
3716 Privates.append(C->privates().begin(), C->privates().end());
3717 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3718 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
3719 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
3720 CopyArrayElems.append(C->copy_array_elems().begin(),
3721 C->copy_array_elems().end());
3722 }
3723 CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
3724 {
3725 // Emit loop with input phase:
3726 // #pragma omp ...
3727 // for (i: 0..<num_iters>) {
3728 // <input phase>;
3729 // buffer[i] = red;
3730 // }
3731 CGF.OMPFirstScanLoop = true;
3732 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3733 FirstGen(CGF);
3734 }
3735 // #pragma omp barrier // in parallel region
3736 auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems,
3737 &ReductionOps,
3738 &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) {
3739 Action.Enter(CGF);
3740 // Emit prefix reduction:
3741 // #pragma omp master // in parallel region
3742 // for (int k = 0; k <= ceil(log2(n)); ++k)
3743 llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
3744 llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body");
3745 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit");
3746 llvm::Function *F =
3747 CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy);
3748 llvm::Value *Arg =
3749 CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy);
3750 llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg);
3751 F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy);
3752 LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal);
3753 LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy);
3754 llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
3755 OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1));
3756 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc());
3757 CGF.EmitBlock(LoopBB);
3758 auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2);
3759 // size pow2k = 1;
3760 auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3761 Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB);
3762 Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB);
3763 // for (size i = n - 1; i >= 2 ^ k; --i)
3764 // tmp[i] op= tmp[i-pow2k];
3765 llvm::BasicBlock *InnerLoopBB =
3766 CGF.createBasicBlock("omp.inner.log.scan.body");
3767 llvm::BasicBlock *InnerExitBB =
3768 CGF.createBasicBlock("omp.inner.log.scan.exit");
3769 llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K);
3770 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3771 CGF.EmitBlock(InnerLoopBB);
3772 auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3773 IVal->addIncoming(NMin1, LoopBB);
3774 {
3775 CodeGenFunction::OMPPrivateScope PrivScope(CGF);
3776 auto *ILHS = LHSs.begin();
3777 auto *IRHS = RHSs.begin();
3778 for (const Expr *CopyArrayElem : CopyArrayElems) {
3779 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3780 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3781 Address LHSAddr = Address::invalid();
3782 {
3783 CodeGenFunction::OpaqueValueMapping IdxMapping(
3784 CGF,
3785 cast<OpaqueValueExpr>(
3786 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3787 RValue::get(IVal));
3788 LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress();
3789 }
3790 PrivScope.addPrivate(LHSVD, LHSAddr);
3791 Address RHSAddr = Address::invalid();
3792 {
3793 llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K);
3794 CodeGenFunction::OpaqueValueMapping IdxMapping(
3795 CGF,
3796 cast<OpaqueValueExpr>(
3797 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3798 RValue::get(OffsetIVal));
3799 RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress();
3800 }
3801 PrivScope.addPrivate(RHSVD, RHSAddr);
3802 ++ILHS;
3803 ++IRHS;
3804 }
3805 PrivScope.Privatize();
3806 CGF.CGM.getOpenMPRuntime().emitReduction(
3807 CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
3808 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown});
3809 }
3810 llvm::Value *NextIVal =
3811 CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1));
3812 IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock());
3813 CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K);
3814 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3815 CGF.EmitBlock(InnerExitBB);
3816 llvm::Value *Next =
3817 CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1));
3818 Counter->addIncoming(Next, CGF.Builder.GetInsertBlock());
3819 // pow2k <<= 1;
3820 llvm::Value *NextPow2K =
3821 CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
3822 Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock());
3823 llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal);
3824 CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
3825 auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc());
3826 CGF.EmitBlock(ExitBB);
3827 };
3828 if (isOpenMPParallelDirective(S.getDirectiveKind())) {
3829 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
3830 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
3831 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3832 /*ForceSimpleCall=*/true);
3833 } else {
3834 RegionCodeGenTy RCG(CodeGen);
3835 RCG(CGF);
3836 }
3837
3838 CGF.OMPFirstScanLoop = false;
3839 SecondGen(CGF);
3840 }
3841
emitWorksharingDirective(CodeGenFunction & CGF,const OMPLoopDirective & S,bool HasCancel)3842 static bool emitWorksharingDirective(CodeGenFunction &CGF,
3843 const OMPLoopDirective &S,
3844 bool HasCancel) {
3845 bool HasLastprivates;
3846 if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
3847 [](const OMPReductionClause *C) {
3848 return C->getModifier() == OMPC_REDUCTION_inscan;
3849 })) {
3850 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
3851 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3852 OMPLoopScope LoopScope(CGF, S);
3853 return CGF.EmitScalarExpr(S.getNumIterations());
3854 };
3855 const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) {
3856 CodeGenFunction::OMPCancelStackRAII CancelRegion(
3857 CGF, S.getDirectiveKind(), HasCancel);
3858 (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3859 emitForLoopBounds,
3860 emitDispatchForLoopBounds);
3861 // Emit an implicit barrier at the end.
3862 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(),
3863 OMPD_for);
3864 };
3865 const auto &&SecondGen = [&S, HasCancel,
3866 &HasLastprivates](CodeGenFunction &CGF) {
3867 CodeGenFunction::OMPCancelStackRAII CancelRegion(
3868 CGF, S.getDirectiveKind(), HasCancel);
3869 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3870 emitForLoopBounds,
3871 emitDispatchForLoopBounds);
3872 };
3873 if (!isOpenMPParallelDirective(S.getDirectiveKind()))
3874 emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen);
3875 emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
3876 if (!isOpenMPParallelDirective(S.getDirectiveKind()))
3877 emitScanBasedDirectiveFinals(CGF, S, NumIteratorsGen);
3878 } else {
3879 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
3880 HasCancel);
3881 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3882 emitForLoopBounds,
3883 emitDispatchForLoopBounds);
3884 }
3885 return HasLastprivates;
3886 }
3887
isSupportedByOpenMPIRBuilder(const OMPForDirective & S)3888 static bool isSupportedByOpenMPIRBuilder(const OMPForDirective &S) {
3889 if (S.hasCancel())
3890 return false;
3891 for (OMPClause *C : S.clauses()) {
3892 if (isa<OMPNowaitClause>(C))
3893 continue;
3894
3895 if (auto *SC = dyn_cast<OMPScheduleClause>(C)) {
3896 if (SC->getFirstScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown)
3897 return false;
3898 if (SC->getSecondScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown)
3899 return false;
3900 switch (SC->getScheduleKind()) {
3901 case OMPC_SCHEDULE_auto:
3902 case OMPC_SCHEDULE_dynamic:
3903 case OMPC_SCHEDULE_runtime:
3904 case OMPC_SCHEDULE_guided:
3905 case OMPC_SCHEDULE_static:
3906 continue;
3907 case OMPC_SCHEDULE_unknown:
3908 return false;
3909 }
3910 }
3911
3912 return false;
3913 }
3914
3915 return true;
3916 }
3917
3918 static llvm::omp::ScheduleKind
convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind)3919 convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind) {
3920 switch (ScheduleClauseKind) {
3921 case OMPC_SCHEDULE_unknown:
3922 return llvm::omp::OMP_SCHEDULE_Default;
3923 case OMPC_SCHEDULE_auto:
3924 return llvm::omp::OMP_SCHEDULE_Auto;
3925 case OMPC_SCHEDULE_dynamic:
3926 return llvm::omp::OMP_SCHEDULE_Dynamic;
3927 case OMPC_SCHEDULE_guided:
3928 return llvm::omp::OMP_SCHEDULE_Guided;
3929 case OMPC_SCHEDULE_runtime:
3930 return llvm::omp::OMP_SCHEDULE_Runtime;
3931 case OMPC_SCHEDULE_static:
3932 return llvm::omp::OMP_SCHEDULE_Static;
3933 }
3934 llvm_unreachable("Unhandled schedule kind");
3935 }
3936
EmitOMPForDirective(const OMPForDirective & S)3937 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
3938 bool HasLastprivates = false;
3939 bool UseOMPIRBuilder =
3940 CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S);
3941 auto &&CodeGen = [this, &S, &HasLastprivates,
3942 UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) {
3943 // Use the OpenMPIRBuilder if enabled.
3944 if (UseOMPIRBuilder) {
3945 bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>();
3946
3947 llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default;
3948 llvm::Value *ChunkSize = nullptr;
3949 if (auto *SchedClause = S.getSingleClause<OMPScheduleClause>()) {
3950 SchedKind =
3951 convertClauseKindToSchedKind(SchedClause->getScheduleKind());
3952 if (const Expr *ChunkSizeExpr = SchedClause->getChunkSize())
3953 ChunkSize = EmitScalarExpr(ChunkSizeExpr);
3954 }
3955
3956 // Emit the associated statement and get its loop representation.
3957 const Stmt *Inner = S.getRawStmt();
3958 llvm::CanonicalLoopInfo *CLI =
3959 EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
3960
3961 llvm::OpenMPIRBuilder &OMPBuilder =
3962 CGM.getOpenMPRuntime().getOMPBuilder();
3963 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
3964 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
3965 OMPBuilder.applyWorkshareLoop(
3966 Builder.getCurrentDebugLocation(), CLI, AllocaIP, NeedsBarrier,
3967 SchedKind, ChunkSize, /*HasSimdModifier=*/false,
3968 /*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false,
3969 /*HasOrderedClause=*/false);
3970 return;
3971 }
3972
3973 HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel());
3974 };
3975 {
3976 auto LPCRegion =
3977 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3978 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3979 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
3980 S.hasCancel());
3981 }
3982
3983 if (!UseOMPIRBuilder) {
3984 // Emit an implicit barrier at the end.
3985 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
3986 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3987 }
3988 // Check for outer lastprivate conditional update.
3989 checkForLastprivateConditionalUpdate(*this, S);
3990 }
3991
EmitOMPForSimdDirective(const OMPForSimdDirective & S)3992 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
3993 bool HasLastprivates = false;
3994 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
3995 PrePostActionTy &) {
3996 HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
3997 };
3998 {
3999 auto LPCRegion =
4000 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4001 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4002 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
4003 }
4004
4005 // Emit an implicit barrier at the end.
4006 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
4007 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
4008 // Check for outer lastprivate conditional update.
4009 checkForLastprivateConditionalUpdate(*this, S);
4010 }
4011
createSectionLVal(CodeGenFunction & CGF,QualType Ty,const Twine & Name,llvm::Value * Init=nullptr)4012 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
4013 const Twine &Name,
4014 llvm::Value *Init = nullptr) {
4015 LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
4016 if (Init)
4017 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
4018 return LVal;
4019 }
4020
EmitSections(const OMPExecutableDirective & S)4021 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
4022 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
4023 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
4024 bool HasLastprivates = false;
4025 auto &&CodeGen = [&S, CapturedStmt, CS,
4026 &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
4027 const ASTContext &C = CGF.getContext();
4028 QualType KmpInt32Ty =
4029 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4030 // Emit helper vars inits.
4031 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
4032 CGF.Builder.getInt32(0));
4033 llvm::ConstantInt *GlobalUBVal = CS != nullptr
4034 ? CGF.Builder.getInt32(CS->size() - 1)
4035 : CGF.Builder.getInt32(0);
4036 LValue UB =
4037 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
4038 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
4039 CGF.Builder.getInt32(1));
4040 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
4041 CGF.Builder.getInt32(0));
4042 // Loop counter.
4043 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
4044 OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
4045 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
4046 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
4047 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
4048 // Generate condition for loop.
4049 BinaryOperator *Cond = BinaryOperator::Create(
4050 C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_PRValue, OK_Ordinary,
4051 S.getBeginLoc(), FPOptionsOverride());
4052 // Increment for loop counter.
4053 UnaryOperator *Inc = UnaryOperator::Create(
4054 C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_PRValue, OK_Ordinary,
4055 S.getBeginLoc(), true, FPOptionsOverride());
4056 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
4057 // Iterate through all sections and emit a switch construct:
4058 // switch (IV) {
4059 // case 0:
4060 // <SectionStmt[0]>;
4061 // break;
4062 // ...
4063 // case <NumSection> - 1:
4064 // <SectionStmt[<NumSection> - 1]>;
4065 // break;
4066 // }
4067 // .omp.sections.exit:
4068 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
4069 llvm::SwitchInst *SwitchStmt =
4070 CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()),
4071 ExitBB, CS == nullptr ? 1 : CS->size());
4072 if (CS) {
4073 unsigned CaseNumber = 0;
4074 for (const Stmt *SubStmt : CS->children()) {
4075 auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
4076 CGF.EmitBlock(CaseBB);
4077 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
4078 CGF.EmitStmt(SubStmt);
4079 CGF.EmitBranch(ExitBB);
4080 ++CaseNumber;
4081 }
4082 } else {
4083 llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case");
4084 CGF.EmitBlock(CaseBB);
4085 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
4086 CGF.EmitStmt(CapturedStmt);
4087 CGF.EmitBranch(ExitBB);
4088 }
4089 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
4090 };
4091
4092 CodeGenFunction::OMPPrivateScope LoopScope(CGF);
4093 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
4094 // Emit implicit barrier to synchronize threads and avoid data races on
4095 // initialization of firstprivate variables and post-update of lastprivate
4096 // variables.
4097 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
4098 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
4099 /*ForceSimpleCall=*/true);
4100 }
4101 CGF.EmitOMPPrivateClause(S, LoopScope);
4102 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV);
4103 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
4104 CGF.EmitOMPReductionClauseInit(S, LoopScope);
4105 (void)LoopScope.Privatize();
4106 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
4107 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
4108
4109 // Emit static non-chunked loop.
4110 OpenMPScheduleTy ScheduleKind;
4111 ScheduleKind.Schedule = OMPC_SCHEDULE_static;
4112 CGOpenMPRuntime::StaticRTInput StaticInit(
4113 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
4114 LB.getAddress(), UB.getAddress(), ST.getAddress());
4115 CGF.CGM.getOpenMPRuntime().emitForStaticInit(
4116 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit);
4117 // UB = min(UB, GlobalUB);
4118 llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc());
4119 llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
4120 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
4121 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
4122 // IV = LB;
4123 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV);
4124 // while (idx <= UB) { BODY; ++idx; }
4125 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen,
4126 [](CodeGenFunction &) {});
4127 // Tell the runtime we are done.
4128 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
4129 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
4130 OMPD_sections);
4131 };
4132 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
4133 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4134 // Emit post-update of the reduction variables if IsLastIter != 0.
4135 emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) {
4136 return CGF.Builder.CreateIsNotNull(
4137 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
4138 });
4139
4140 // Emit final copy of the lastprivate variables if IsLastIter != 0.
4141 if (HasLastprivates)
4142 CGF.EmitOMPLastprivateClauseFinal(
4143 S, /*NoFinals=*/false,
4144 CGF.Builder.CreateIsNotNull(
4145 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())));
4146 };
4147
4148 bool HasCancel = false;
4149 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
4150 HasCancel = OSD->hasCancel();
4151 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
4152 HasCancel = OPSD->hasCancel();
4153 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
4154 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
4155 HasCancel);
4156 // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
4157 // clause. Otherwise the barrier will be generated by the codegen for the
4158 // directive.
4159 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
4160 // Emit implicit barrier to synchronize threads and avoid data races on
4161 // initialization of firstprivate variables.
4162 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
4163 OMPD_unknown);
4164 }
4165 }
4166
EmitOMPSectionsDirective(const OMPSectionsDirective & S)4167 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
4168 if (CGM.getLangOpts().OpenMPIRBuilder) {
4169 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4170 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4171 using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
4172
4173 auto FiniCB = [this](InsertPointTy IP) {
4174 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4175 };
4176
4177 const CapturedStmt *ICS = S.getInnermostCapturedStmt();
4178 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
4179 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
4180 llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
4181 if (CS) {
4182 for (const Stmt *SubStmt : CS->children()) {
4183 auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP,
4184 InsertPointTy CodeGenIP) {
4185 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4186 *this, SubStmt, AllocaIP, CodeGenIP, "section");
4187 };
4188 SectionCBVector.push_back(SectionCB);
4189 }
4190 } else {
4191 auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP,
4192 InsertPointTy CodeGenIP) {
4193 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4194 *this, CapturedStmt, AllocaIP, CodeGenIP, "section");
4195 };
4196 SectionCBVector.push_back(SectionCB);
4197 }
4198
4199 // Privatization callback that performs appropriate action for
4200 // shared/private/firstprivate/lastprivate/copyin/... variables.
4201 //
4202 // TODO: This defaults to shared right now.
4203 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
4204 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
4205 // The next line is appropriate only for variables (Val) with the
4206 // data-sharing attribute "shared".
4207 ReplVal = &Val;
4208
4209 return CodeGenIP;
4210 };
4211
4212 CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP);
4213 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
4214 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
4215 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
4216 Builder.restoreIP(OMPBuilder.createSections(
4217 Builder, AllocaIP, SectionCBVector, PrivCB, FiniCB, S.hasCancel(),
4218 S.getSingleClause<OMPNowaitClause>()));
4219 return;
4220 }
4221 {
4222 auto LPCRegion =
4223 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4224 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4225 EmitSections(S);
4226 }
4227 // Emit an implicit barrier at the end.
4228 if (!S.getSingleClause<OMPNowaitClause>()) {
4229 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
4230 OMPD_sections);
4231 }
4232 // Check for outer lastprivate conditional update.
4233 checkForLastprivateConditionalUpdate(*this, S);
4234 }
4235
EmitOMPSectionDirective(const OMPSectionDirective & S)4236 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
4237 if (CGM.getLangOpts().OpenMPIRBuilder) {
4238 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4239 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4240
4241 const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt();
4242 auto FiniCB = [this](InsertPointTy IP) {
4243 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4244 };
4245
4246 auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP,
4247 InsertPointTy CodeGenIP) {
4248 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4249 *this, SectionRegionBodyStmt, AllocaIP, CodeGenIP, "section");
4250 };
4251
4252 LexicalScope Scope(*this, S.getSourceRange());
4253 EmitStopPoint(&S);
4254 Builder.restoreIP(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB));
4255
4256 return;
4257 }
4258 LexicalScope Scope(*this, S.getSourceRange());
4259 EmitStopPoint(&S);
4260 EmitStmt(S.getAssociatedStmt());
4261 }
4262
EmitOMPSingleDirective(const OMPSingleDirective & S)4263 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
4264 llvm::SmallVector<const Expr *, 8> CopyprivateVars;
4265 llvm::SmallVector<const Expr *, 8> DestExprs;
4266 llvm::SmallVector<const Expr *, 8> SrcExprs;
4267 llvm::SmallVector<const Expr *, 8> AssignmentOps;
4268 // Check if there are any 'copyprivate' clauses associated with this
4269 // 'single' construct.
4270 // Build a list of copyprivate variables along with helper expressions
4271 // (<source>, <destination>, <destination>=<source> expressions)
4272 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
4273 CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
4274 DestExprs.append(C->destination_exprs().begin(),
4275 C->destination_exprs().end());
4276 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
4277 AssignmentOps.append(C->assignment_ops().begin(),
4278 C->assignment_ops().end());
4279 }
4280 // Emit code for 'single' region along with 'copyprivate' clauses
4281 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4282 Action.Enter(CGF);
4283 OMPPrivateScope SingleScope(CGF);
4284 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
4285 CGF.EmitOMPPrivateClause(S, SingleScope);
4286 (void)SingleScope.Privatize();
4287 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
4288 };
4289 {
4290 auto LPCRegion =
4291 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4292 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4293 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(),
4294 CopyprivateVars, DestExprs,
4295 SrcExprs, AssignmentOps);
4296 }
4297 // Emit an implicit barrier at the end (to avoid data race on firstprivate
4298 // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
4299 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
4300 CGM.getOpenMPRuntime().emitBarrierCall(
4301 *this, S.getBeginLoc(),
4302 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
4303 }
4304 // Check for outer lastprivate conditional update.
4305 checkForLastprivateConditionalUpdate(*this, S);
4306 }
4307
emitMaster(CodeGenFunction & CGF,const OMPExecutableDirective & S)4308 static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4309 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4310 Action.Enter(CGF);
4311 CGF.EmitStmt(S.getRawStmt());
4312 };
4313 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
4314 }
4315
EmitOMPMasterDirective(const OMPMasterDirective & S)4316 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
4317 if (CGM.getLangOpts().OpenMPIRBuilder) {
4318 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4319 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4320
4321 const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt();
4322
4323 auto FiniCB = [this](InsertPointTy IP) {
4324 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4325 };
4326
4327 auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP,
4328 InsertPointTy CodeGenIP) {
4329 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4330 *this, MasterRegionBodyStmt, AllocaIP, CodeGenIP, "master");
4331 };
4332
4333 LexicalScope Scope(*this, S.getSourceRange());
4334 EmitStopPoint(&S);
4335 Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB));
4336
4337 return;
4338 }
4339 LexicalScope Scope(*this, S.getSourceRange());
4340 EmitStopPoint(&S);
4341 emitMaster(*this, S);
4342 }
4343
emitMasked(CodeGenFunction & CGF,const OMPExecutableDirective & S)4344 static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4345 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4346 Action.Enter(CGF);
4347 CGF.EmitStmt(S.getRawStmt());
4348 };
4349 Expr *Filter = nullptr;
4350 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4351 Filter = FilterClause->getThreadID();
4352 CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, CodeGen, S.getBeginLoc(),
4353 Filter);
4354 }
4355
EmitOMPMaskedDirective(const OMPMaskedDirective & S)4356 void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) {
4357 if (CGM.getLangOpts().OpenMPIRBuilder) {
4358 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4359 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4360
4361 const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt();
4362 const Expr *Filter = nullptr;
4363 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4364 Filter = FilterClause->getThreadID();
4365 llvm::Value *FilterVal = Filter
4366 ? EmitScalarExpr(Filter, CGM.Int32Ty)
4367 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
4368
4369 auto FiniCB = [this](InsertPointTy IP) {
4370 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4371 };
4372
4373 auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP,
4374 InsertPointTy CodeGenIP) {
4375 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4376 *this, MaskedRegionBodyStmt, AllocaIP, CodeGenIP, "masked");
4377 };
4378
4379 LexicalScope Scope(*this, S.getSourceRange());
4380 EmitStopPoint(&S);
4381 Builder.restoreIP(
4382 OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal));
4383
4384 return;
4385 }
4386 LexicalScope Scope(*this, S.getSourceRange());
4387 EmitStopPoint(&S);
4388 emitMasked(*this, S);
4389 }
4390
EmitOMPCriticalDirective(const OMPCriticalDirective & S)4391 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
4392 if (CGM.getLangOpts().OpenMPIRBuilder) {
4393 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4394 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4395
4396 const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt();
4397 const Expr *Hint = nullptr;
4398 if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4399 Hint = HintClause->getHint();
4400
4401 // TODO: This is slightly different from what's currently being done in
4402 // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything
4403 // about typing is final.
4404 llvm::Value *HintInst = nullptr;
4405 if (Hint)
4406 HintInst =
4407 Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false);
4408
4409 auto FiniCB = [this](InsertPointTy IP) {
4410 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4411 };
4412
4413 auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP,
4414 InsertPointTy CodeGenIP) {
4415 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4416 *this, CriticalRegionBodyStmt, AllocaIP, CodeGenIP, "critical");
4417 };
4418
4419 LexicalScope Scope(*this, S.getSourceRange());
4420 EmitStopPoint(&S);
4421 Builder.restoreIP(OMPBuilder.createCritical(
4422 Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(),
4423 HintInst));
4424
4425 return;
4426 }
4427
4428 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4429 Action.Enter(CGF);
4430 CGF.EmitStmt(S.getAssociatedStmt());
4431 };
4432 const Expr *Hint = nullptr;
4433 if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4434 Hint = HintClause->getHint();
4435 LexicalScope Scope(*this, S.getSourceRange());
4436 EmitStopPoint(&S);
4437 CGM.getOpenMPRuntime().emitCriticalRegion(*this,
4438 S.getDirectiveName().getAsString(),
4439 CodeGen, S.getBeginLoc(), Hint);
4440 }
4441
EmitOMPParallelForDirective(const OMPParallelForDirective & S)4442 void CodeGenFunction::EmitOMPParallelForDirective(
4443 const OMPParallelForDirective &S) {
4444 // Emit directive as a combined directive that consists of two implicit
4445 // directives: 'parallel' with 'for' directive.
4446 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4447 Action.Enter(CGF);
4448 emitOMPCopyinClause(CGF, S);
4449 (void)emitWorksharingDirective(CGF, S, S.hasCancel());
4450 };
4451 {
4452 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4453 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4454 CGCapturedStmtInfo CGSI(CR_OpenMP);
4455 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4456 OMPLoopScope LoopScope(CGF, S);
4457 return CGF.EmitScalarExpr(S.getNumIterations());
4458 };
4459 bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
4460 [](const OMPReductionClause *C) {
4461 return C->getModifier() == OMPC_REDUCTION_inscan;
4462 });
4463 if (IsInscan)
4464 emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
4465 auto LPCRegion =
4466 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4467 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
4468 emitEmptyBoundParameters);
4469 if (IsInscan)
4470 emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen);
4471 }
4472 // Check for outer lastprivate conditional update.
4473 checkForLastprivateConditionalUpdate(*this, S);
4474 }
4475
EmitOMPParallelForSimdDirective(const OMPParallelForSimdDirective & S)4476 void CodeGenFunction::EmitOMPParallelForSimdDirective(
4477 const OMPParallelForSimdDirective &S) {
4478 // Emit directive as a combined directive that consists of two implicit
4479 // directives: 'parallel' with 'for' directive.
4480 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4481 Action.Enter(CGF);
4482 emitOMPCopyinClause(CGF, S);
4483 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
4484 };
4485 {
4486 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4487 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4488 CGCapturedStmtInfo CGSI(CR_OpenMP);
4489 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4490 OMPLoopScope LoopScope(CGF, S);
4491 return CGF.EmitScalarExpr(S.getNumIterations());
4492 };
4493 bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
4494 [](const OMPReductionClause *C) {
4495 return C->getModifier() == OMPC_REDUCTION_inscan;
4496 });
4497 if (IsInscan)
4498 emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
4499 auto LPCRegion =
4500 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4501 emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen,
4502 emitEmptyBoundParameters);
4503 if (IsInscan)
4504 emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen);
4505 }
4506 // Check for outer lastprivate conditional update.
4507 checkForLastprivateConditionalUpdate(*this, S);
4508 }
4509
EmitOMPParallelMasterDirective(const OMPParallelMasterDirective & S)4510 void CodeGenFunction::EmitOMPParallelMasterDirective(
4511 const OMPParallelMasterDirective &S) {
4512 // Emit directive as a combined directive that consists of two implicit
4513 // directives: 'parallel' with 'master' directive.
4514 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4515 Action.Enter(CGF);
4516 OMPPrivateScope PrivateScope(CGF);
4517 emitOMPCopyinClause(CGF, S);
4518 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4519 CGF.EmitOMPPrivateClause(S, PrivateScope);
4520 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4521 (void)PrivateScope.Privatize();
4522 emitMaster(CGF, S);
4523 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4524 };
4525 {
4526 auto LPCRegion =
4527 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4528 emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen,
4529 emitEmptyBoundParameters);
4530 emitPostUpdateForReductionClause(*this, S,
4531 [](CodeGenFunction &) { return nullptr; });
4532 }
4533 // Check for outer lastprivate conditional update.
4534 checkForLastprivateConditionalUpdate(*this, S);
4535 }
4536
EmitOMPParallelMaskedDirective(const OMPParallelMaskedDirective & S)4537 void CodeGenFunction::EmitOMPParallelMaskedDirective(
4538 const OMPParallelMaskedDirective &S) {
4539 // Emit directive as a combined directive that consists of two implicit
4540 // directives: 'parallel' with 'masked' directive.
4541 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4542 Action.Enter(CGF);
4543 OMPPrivateScope PrivateScope(CGF);
4544 emitOMPCopyinClause(CGF, S);
4545 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4546 CGF.EmitOMPPrivateClause(S, PrivateScope);
4547 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4548 (void)PrivateScope.Privatize();
4549 emitMasked(CGF, S);
4550 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4551 };
4552 {
4553 auto LPCRegion =
4554 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4555 emitCommonOMPParallelDirective(*this, S, OMPD_masked, CodeGen,
4556 emitEmptyBoundParameters);
4557 emitPostUpdateForReductionClause(*this, S,
4558 [](CodeGenFunction &) { return nullptr; });
4559 }
4560 // Check for outer lastprivate conditional update.
4561 checkForLastprivateConditionalUpdate(*this, S);
4562 }
4563
EmitOMPParallelSectionsDirective(const OMPParallelSectionsDirective & S)4564 void CodeGenFunction::EmitOMPParallelSectionsDirective(
4565 const OMPParallelSectionsDirective &S) {
4566 // Emit directive as a combined directive that consists of two implicit
4567 // directives: 'parallel' with 'sections' directive.
4568 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4569 Action.Enter(CGF);
4570 emitOMPCopyinClause(CGF, S);
4571 CGF.EmitSections(S);
4572 };
4573 {
4574 auto LPCRegion =
4575 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4576 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
4577 emitEmptyBoundParameters);
4578 }
4579 // Check for outer lastprivate conditional update.
4580 checkForLastprivateConditionalUpdate(*this, S);
4581 }
4582
4583 namespace {
4584 /// Get the list of variables declared in the context of the untied tasks.
4585 class CheckVarsEscapingUntiedTaskDeclContext final
4586 : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> {
4587 llvm::SmallVector<const VarDecl *, 4> PrivateDecls;
4588
4589 public:
4590 explicit CheckVarsEscapingUntiedTaskDeclContext() = default;
4591 virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default;
VisitDeclStmt(const DeclStmt * S)4592 void VisitDeclStmt(const DeclStmt *S) {
4593 if (!S)
4594 return;
4595 // Need to privatize only local vars, static locals can be processed as is.
4596 for (const Decl *D : S->decls()) {
4597 if (const auto *VD = dyn_cast_or_null<VarDecl>(D))
4598 if (VD->hasLocalStorage())
4599 PrivateDecls.push_back(VD);
4600 }
4601 }
VisitOMPExecutableDirective(const OMPExecutableDirective *)4602 void VisitOMPExecutableDirective(const OMPExecutableDirective *) {}
VisitCapturedStmt(const CapturedStmt *)4603 void VisitCapturedStmt(const CapturedStmt *) {}
VisitLambdaExpr(const LambdaExpr *)4604 void VisitLambdaExpr(const LambdaExpr *) {}
VisitBlockExpr(const BlockExpr *)4605 void VisitBlockExpr(const BlockExpr *) {}
VisitStmt(const Stmt * S)4606 void VisitStmt(const Stmt *S) {
4607 if (!S)
4608 return;
4609 for (const Stmt *Child : S->children())
4610 if (Child)
4611 Visit(Child);
4612 }
4613
4614 /// Swaps list of vars with the provided one.
getPrivateDecls() const4615 ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; }
4616 };
4617 } // anonymous namespace
4618
buildDependences(const OMPExecutableDirective & S,OMPTaskDataTy & Data)4619 static void buildDependences(const OMPExecutableDirective &S,
4620 OMPTaskDataTy &Data) {
4621
4622 // First look for 'omp_all_memory' and add this first.
4623 bool OmpAllMemory = false;
4624 if (llvm::any_of(
4625 S.getClausesOfKind<OMPDependClause>(), [](const OMPDependClause *C) {
4626 return C->getDependencyKind() == OMPC_DEPEND_outallmemory ||
4627 C->getDependencyKind() == OMPC_DEPEND_inoutallmemory;
4628 })) {
4629 OmpAllMemory = true;
4630 // Since both OMPC_DEPEND_outallmemory and OMPC_DEPEND_inoutallmemory are
4631 // equivalent to the runtime, always use OMPC_DEPEND_outallmemory to
4632 // simplify.
4633 OMPTaskDataTy::DependData &DD =
4634 Data.Dependences.emplace_back(OMPC_DEPEND_outallmemory,
4635 /*IteratorExpr=*/nullptr);
4636 // Add a nullptr Expr to simplify the codegen in emitDependData.
4637 DD.DepExprs.push_back(nullptr);
4638 }
4639 // Add remaining dependences skipping any 'out' or 'inout' if they are
4640 // overridden by 'omp_all_memory'.
4641 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
4642 OpenMPDependClauseKind Kind = C->getDependencyKind();
4643 if (Kind == OMPC_DEPEND_outallmemory || Kind == OMPC_DEPEND_inoutallmemory)
4644 continue;
4645 if (OmpAllMemory && (Kind == OMPC_DEPEND_out || Kind == OMPC_DEPEND_inout))
4646 continue;
4647 OMPTaskDataTy::DependData &DD =
4648 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
4649 DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
4650 }
4651 }
4652
EmitOMPTaskBasedDirective(const OMPExecutableDirective & S,const OpenMPDirectiveKind CapturedRegion,const RegionCodeGenTy & BodyGen,const TaskGenTy & TaskGen,OMPTaskDataTy & Data)4653 void CodeGenFunction::EmitOMPTaskBasedDirective(
4654 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
4655 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
4656 OMPTaskDataTy &Data) {
4657 // Emit outlined function for task construct.
4658 const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion);
4659 auto I = CS->getCapturedDecl()->param_begin();
4660 auto PartId = std::next(I);
4661 auto TaskT = std::next(I, 4);
4662 // Check if the task is final
4663 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
4664 // If the condition constant folds and can be elided, try to avoid emitting
4665 // the condition and the dead arm of the if/else.
4666 const Expr *Cond = Clause->getCondition();
4667 bool CondConstant;
4668 if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
4669 Data.Final.setInt(CondConstant);
4670 else
4671 Data.Final.setPointer(EvaluateExprAsBool(Cond));
4672 } else {
4673 // By default the task is not final.
4674 Data.Final.setInt(/*IntVal=*/false);
4675 }
4676 // Check if the task has 'priority' clause.
4677 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
4678 const Expr *Prio = Clause->getPriority();
4679 Data.Priority.setInt(/*IntVal=*/true);
4680 Data.Priority.setPointer(EmitScalarConversion(
4681 EmitScalarExpr(Prio), Prio->getType(),
4682 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
4683 Prio->getExprLoc()));
4684 }
4685 // The first function argument for tasks is a thread id, the second one is a
4686 // part id (0 for tied tasks, >=0 for untied task).
4687 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
4688 // Get list of private variables.
4689 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
4690 auto IRef = C->varlist_begin();
4691 for (const Expr *IInit : C->private_copies()) {
4692 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4693 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4694 Data.PrivateVars.push_back(*IRef);
4695 Data.PrivateCopies.push_back(IInit);
4696 }
4697 ++IRef;
4698 }
4699 }
4700 EmittedAsPrivate.clear();
4701 // Get list of firstprivate variables.
4702 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
4703 auto IRef = C->varlist_begin();
4704 auto IElemInitRef = C->inits().begin();
4705 for (const Expr *IInit : C->private_copies()) {
4706 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4707 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4708 Data.FirstprivateVars.push_back(*IRef);
4709 Data.FirstprivateCopies.push_back(IInit);
4710 Data.FirstprivateInits.push_back(*IElemInitRef);
4711 }
4712 ++IRef;
4713 ++IElemInitRef;
4714 }
4715 }
4716 // Get list of lastprivate variables (for taskloops).
4717 llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
4718 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
4719 auto IRef = C->varlist_begin();
4720 auto ID = C->destination_exprs().begin();
4721 for (const Expr *IInit : C->private_copies()) {
4722 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4723 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4724 Data.LastprivateVars.push_back(*IRef);
4725 Data.LastprivateCopies.push_back(IInit);
4726 }
4727 LastprivateDstsOrigs.insert(
4728 std::make_pair(cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
4729 cast<DeclRefExpr>(*IRef)));
4730 ++IRef;
4731 ++ID;
4732 }
4733 }
4734 SmallVector<const Expr *, 4> LHSs;
4735 SmallVector<const Expr *, 4> RHSs;
4736 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
4737 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
4738 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
4739 Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
4740 Data.ReductionOps.append(C->reduction_ops().begin(),
4741 C->reduction_ops().end());
4742 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4743 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4744 }
4745 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
4746 *this, S.getBeginLoc(), LHSs, RHSs, Data);
4747 // Build list of dependences.
4748 buildDependences(S, Data);
4749 // Get list of local vars for untied tasks.
4750 if (!Data.Tied) {
4751 CheckVarsEscapingUntiedTaskDeclContext Checker;
4752 Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt());
4753 Data.PrivateLocals.append(Checker.getPrivateDecls().begin(),
4754 Checker.getPrivateDecls().end());
4755 }
4756 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
4757 CapturedRegion](CodeGenFunction &CGF,
4758 PrePostActionTy &Action) {
4759 llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
4760 std::pair<Address, Address>>
4761 UntiedLocalVars;
4762 // Set proper addresses for generated private copies.
4763 OMPPrivateScope Scope(CGF);
4764 // Generate debug info for variables present in shared clause.
4765 if (auto *DI = CGF.getDebugInfo()) {
4766 llvm::SmallDenseMap<const VarDecl *, FieldDecl *> CaptureFields =
4767 CGF.CapturedStmtInfo->getCaptureFields();
4768 llvm::Value *ContextValue = CGF.CapturedStmtInfo->getContextValue();
4769 if (CaptureFields.size() && ContextValue) {
4770 unsigned CharWidth = CGF.getContext().getCharWidth();
4771 // The shared variables are packed together as members of structure.
4772 // So the address of each shared variable can be computed by adding
4773 // offset of it (within record) to the base address of record. For each
4774 // shared variable, debug intrinsic llvm.dbg.declare is generated with
4775 // appropriate expressions (DIExpression).
4776 // Ex:
4777 // %12 = load %struct.anon*, %struct.anon** %__context.addr.i
4778 // call void @llvm.dbg.declare(metadata %struct.anon* %12,
4779 // metadata !svar1,
4780 // metadata !DIExpression(DW_OP_deref))
4781 // call void @llvm.dbg.declare(metadata %struct.anon* %12,
4782 // metadata !svar2,
4783 // metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref))
4784 for (auto It = CaptureFields.begin(); It != CaptureFields.end(); ++It) {
4785 const VarDecl *SharedVar = It->first;
4786 RecordDecl *CaptureRecord = It->second->getParent();
4787 const ASTRecordLayout &Layout =
4788 CGF.getContext().getASTRecordLayout(CaptureRecord);
4789 unsigned Offset =
4790 Layout.getFieldOffset(It->second->getFieldIndex()) / CharWidth;
4791 if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo())
4792 (void)DI->EmitDeclareOfAutoVariable(SharedVar, ContextValue,
4793 CGF.Builder, false);
4794 // Get the call dbg.declare instruction we just created and update
4795 // its DIExpression to add offset to base address.
4796 auto UpdateExpr = [](llvm::LLVMContext &Ctx, auto *Declare,
4797 unsigned Offset) {
4798 SmallVector<uint64_t, 8> Ops;
4799 // Add offset to the base address if non zero.
4800 if (Offset) {
4801 Ops.push_back(llvm::dwarf::DW_OP_plus_uconst);
4802 Ops.push_back(Offset);
4803 }
4804 Ops.push_back(llvm::dwarf::DW_OP_deref);
4805 Declare->setExpression(llvm::DIExpression::get(Ctx, Ops));
4806 };
4807 llvm::Instruction &Last = CGF.Builder.GetInsertBlock()->back();
4808 if (auto DDI = dyn_cast<llvm::DbgVariableIntrinsic>(&Last))
4809 UpdateExpr(DDI->getContext(), DDI, Offset);
4810 // If we're emitting using the new debug info format into a block
4811 // without a terminator, the record will be "trailing".
4812 assert(!Last.isTerminator() && "unexpected terminator");
4813 if (auto *Marker =
4814 CGF.Builder.GetInsertBlock()->getTrailingDbgRecords()) {
4815 for (llvm::DbgVariableRecord &DVR : llvm::reverse(
4816 llvm::filterDbgVars(Marker->getDbgRecordRange()))) {
4817 UpdateExpr(Last.getContext(), &DVR, Offset);
4818 break;
4819 }
4820 }
4821 }
4822 }
4823 }
4824 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs;
4825 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
4826 !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) {
4827 enum { PrivatesParam = 2, CopyFnParam = 3 };
4828 llvm::Value *CopyFn = CGF.Builder.CreateLoad(
4829 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
4830 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
4831 CS->getCapturedDecl()->getParam(PrivatesParam)));
4832 // Map privates.
4833 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
4834 llvm::SmallVector<llvm::Value *, 16> CallArgs;
4835 llvm::SmallVector<llvm::Type *, 4> ParamTypes;
4836 CallArgs.push_back(PrivatesPtr);
4837 ParamTypes.push_back(PrivatesPtr->getType());
4838 for (const Expr *E : Data.PrivateVars) {
4839 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4840 RawAddress PrivatePtr = CGF.CreateMemTemp(
4841 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
4842 PrivatePtrs.emplace_back(VD, PrivatePtr);
4843 CallArgs.push_back(PrivatePtr.getPointer());
4844 ParamTypes.push_back(PrivatePtr.getType());
4845 }
4846 for (const Expr *E : Data.FirstprivateVars) {
4847 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4848 RawAddress PrivatePtr =
4849 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4850 ".firstpriv.ptr.addr");
4851 PrivatePtrs.emplace_back(VD, PrivatePtr);
4852 FirstprivatePtrs.emplace_back(VD, PrivatePtr);
4853 CallArgs.push_back(PrivatePtr.getPointer());
4854 ParamTypes.push_back(PrivatePtr.getType());
4855 }
4856 for (const Expr *E : Data.LastprivateVars) {
4857 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4858 RawAddress PrivatePtr =
4859 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4860 ".lastpriv.ptr.addr");
4861 PrivatePtrs.emplace_back(VD, PrivatePtr);
4862 CallArgs.push_back(PrivatePtr.getPointer());
4863 ParamTypes.push_back(PrivatePtr.getType());
4864 }
4865 for (const VarDecl *VD : Data.PrivateLocals) {
4866 QualType Ty = VD->getType().getNonReferenceType();
4867 if (VD->getType()->isLValueReferenceType())
4868 Ty = CGF.getContext().getPointerType(Ty);
4869 if (isAllocatableDecl(VD))
4870 Ty = CGF.getContext().getPointerType(Ty);
4871 RawAddress PrivatePtr = CGF.CreateMemTemp(
4872 CGF.getContext().getPointerType(Ty), ".local.ptr.addr");
4873 auto Result = UntiedLocalVars.insert(
4874 std::make_pair(VD, std::make_pair(PrivatePtr, Address::invalid())));
4875 // If key exists update in place.
4876 if (Result.second == false)
4877 *Result.first = std::make_pair(
4878 VD, std::make_pair(PrivatePtr, Address::invalid()));
4879 CallArgs.push_back(PrivatePtr.getPointer());
4880 ParamTypes.push_back(PrivatePtr.getType());
4881 }
4882 auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
4883 ParamTypes, /*isVarArg=*/false);
4884 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
4885 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
4886 for (const auto &Pair : LastprivateDstsOrigs) {
4887 const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
4888 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
4889 /*RefersToEnclosingVariableOrCapture=*/
4890 CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
4891 Pair.second->getType(), VK_LValue,
4892 Pair.second->getExprLoc());
4893 Scope.addPrivate(Pair.first, CGF.EmitLValue(&DRE).getAddress());
4894 }
4895 for (const auto &Pair : PrivatePtrs) {
4896 Address Replacement = Address(
4897 CGF.Builder.CreateLoad(Pair.second),
4898 CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()),
4899 CGF.getContext().getDeclAlign(Pair.first));
4900 Scope.addPrivate(Pair.first, Replacement);
4901 if (auto *DI = CGF.getDebugInfo())
4902 if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo())
4903 (void)DI->EmitDeclareOfAutoVariable(
4904 Pair.first, Pair.second.getBasePointer(), CGF.Builder,
4905 /*UsePointerValue*/ true);
4906 }
4907 // Adjust mapping for internal locals by mapping actual memory instead of
4908 // a pointer to this memory.
4909 for (auto &Pair : UntiedLocalVars) {
4910 QualType VDType = Pair.first->getType().getNonReferenceType();
4911 if (Pair.first->getType()->isLValueReferenceType())
4912 VDType = CGF.getContext().getPointerType(VDType);
4913 if (isAllocatableDecl(Pair.first)) {
4914 llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4915 Address Replacement(
4916 Ptr,
4917 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(VDType)),
4918 CGF.getPointerAlign());
4919 Pair.second.first = Replacement;
4920 Ptr = CGF.Builder.CreateLoad(Replacement);
4921 Replacement = Address(Ptr, CGF.ConvertTypeForMem(VDType),
4922 CGF.getContext().getDeclAlign(Pair.first));
4923 Pair.second.second = Replacement;
4924 } else {
4925 llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4926 Address Replacement(Ptr, CGF.ConvertTypeForMem(VDType),
4927 CGF.getContext().getDeclAlign(Pair.first));
4928 Pair.second.first = Replacement;
4929 }
4930 }
4931 }
4932 if (Data.Reductions) {
4933 OMPPrivateScope FirstprivateScope(CGF);
4934 for (const auto &Pair : FirstprivatePtrs) {
4935 Address Replacement(
4936 CGF.Builder.CreateLoad(Pair.second),
4937 CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()),
4938 CGF.getContext().getDeclAlign(Pair.first));
4939 FirstprivateScope.addPrivate(Pair.first, Replacement);
4940 }
4941 (void)FirstprivateScope.Privatize();
4942 OMPLexicalScope LexScope(CGF, S, CapturedRegion);
4943 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
4944 Data.ReductionCopies, Data.ReductionOps);
4945 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
4946 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
4947 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
4948 RedCG.emitSharedOrigLValue(CGF, Cnt);
4949 RedCG.emitAggregateType(CGF, Cnt);
4950 // FIXME: This must removed once the runtime library is fixed.
4951 // Emit required threadprivate variables for
4952 // initializer/combiner/finalizer.
4953 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
4954 RedCG, Cnt);
4955 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4956 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
4957 Replacement = Address(
4958 CGF.EmitScalarConversion(Replacement.emitRawPointer(CGF),
4959 CGF.getContext().VoidPtrTy,
4960 CGF.getContext().getPointerType(
4961 Data.ReductionCopies[Cnt]->getType()),
4962 Data.ReductionCopies[Cnt]->getExprLoc()),
4963 CGF.ConvertTypeForMem(Data.ReductionCopies[Cnt]->getType()),
4964 Replacement.getAlignment());
4965 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
4966 Scope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement);
4967 }
4968 }
4969 // Privatize all private variables except for in_reduction items.
4970 (void)Scope.Privatize();
4971 SmallVector<const Expr *, 4> InRedVars;
4972 SmallVector<const Expr *, 4> InRedPrivs;
4973 SmallVector<const Expr *, 4> InRedOps;
4974 SmallVector<const Expr *, 4> TaskgroupDescriptors;
4975 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
4976 auto IPriv = C->privates().begin();
4977 auto IRed = C->reduction_ops().begin();
4978 auto ITD = C->taskgroup_descriptors().begin();
4979 for (const Expr *Ref : C->varlists()) {
4980 InRedVars.emplace_back(Ref);
4981 InRedPrivs.emplace_back(*IPriv);
4982 InRedOps.emplace_back(*IRed);
4983 TaskgroupDescriptors.emplace_back(*ITD);
4984 std::advance(IPriv, 1);
4985 std::advance(IRed, 1);
4986 std::advance(ITD, 1);
4987 }
4988 }
4989 // Privatize in_reduction items here, because taskgroup descriptors must be
4990 // privatized earlier.
4991 OMPPrivateScope InRedScope(CGF);
4992 if (!InRedVars.empty()) {
4993 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
4994 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
4995 RedCG.emitSharedOrigLValue(CGF, Cnt);
4996 RedCG.emitAggregateType(CGF, Cnt);
4997 // The taskgroup descriptor variable is always implicit firstprivate and
4998 // privatized already during processing of the firstprivates.
4999 // FIXME: This must removed once the runtime library is fixed.
5000 // Emit required threadprivate variables for
5001 // initializer/combiner/finalizer.
5002 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
5003 RedCG, Cnt);
5004 llvm::Value *ReductionsPtr;
5005 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
5006 ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr),
5007 TRExpr->getExprLoc());
5008 } else {
5009 ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5010 }
5011 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5012 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
5013 Replacement = Address(
5014 CGF.EmitScalarConversion(
5015 Replacement.emitRawPointer(CGF), CGF.getContext().VoidPtrTy,
5016 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
5017 InRedPrivs[Cnt]->getExprLoc()),
5018 CGF.ConvertTypeForMem(InRedPrivs[Cnt]->getType()),
5019 Replacement.getAlignment());
5020 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
5021 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement);
5022 }
5023 }
5024 (void)InRedScope.Privatize();
5025
5026 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF,
5027 UntiedLocalVars);
5028 Action.Enter(CGF);
5029 BodyGen(CGF);
5030 };
5031 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
5032 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
5033 Data.NumberOfParts);
5034 OMPLexicalScope Scope(*this, S, std::nullopt,
5035 !isOpenMPParallelDirective(S.getDirectiveKind()) &&
5036 !isOpenMPSimdDirective(S.getDirectiveKind()));
5037 TaskGen(*this, OutlinedFn, Data);
5038 }
5039
5040 static ImplicitParamDecl *
createImplicitFirstprivateForType(ASTContext & C,OMPTaskDataTy & Data,QualType Ty,CapturedDecl * CD,SourceLocation Loc)5041 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
5042 QualType Ty, CapturedDecl *CD,
5043 SourceLocation Loc) {
5044 auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
5045 ImplicitParamKind::Other);
5046 auto *OrigRef = DeclRefExpr::Create(
5047 C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
5048 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
5049 auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
5050 ImplicitParamKind::Other);
5051 auto *PrivateRef = DeclRefExpr::Create(
5052 C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
5053 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
5054 QualType ElemType = C.getBaseElementType(Ty);
5055 auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType,
5056 ImplicitParamKind::Other);
5057 auto *InitRef = DeclRefExpr::Create(
5058 C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
5059 /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
5060 PrivateVD->setInitStyle(VarDecl::CInit);
5061 PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
5062 InitRef, /*BasePath=*/nullptr,
5063 VK_PRValue, FPOptionsOverride()));
5064 Data.FirstprivateVars.emplace_back(OrigRef);
5065 Data.FirstprivateCopies.emplace_back(PrivateRef);
5066 Data.FirstprivateInits.emplace_back(InitRef);
5067 return OrigVD;
5068 }
5069
EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective & S,const RegionCodeGenTy & BodyGen,OMPTargetDataInfo & InputInfo)5070 void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
5071 const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
5072 OMPTargetDataInfo &InputInfo) {
5073 // Emit outlined function for task construct.
5074 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
5075 Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
5076 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
5077 auto I = CS->getCapturedDecl()->param_begin();
5078 auto PartId = std::next(I);
5079 auto TaskT = std::next(I, 4);
5080 OMPTaskDataTy Data;
5081 // The task is not final.
5082 Data.Final.setInt(/*IntVal=*/false);
5083 // Get list of firstprivate variables.
5084 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
5085 auto IRef = C->varlist_begin();
5086 auto IElemInitRef = C->inits().begin();
5087 for (auto *IInit : C->private_copies()) {
5088 Data.FirstprivateVars.push_back(*IRef);
5089 Data.FirstprivateCopies.push_back(IInit);
5090 Data.FirstprivateInits.push_back(*IElemInitRef);
5091 ++IRef;
5092 ++IElemInitRef;
5093 }
5094 }
5095 SmallVector<const Expr *, 4> LHSs;
5096 SmallVector<const Expr *, 4> RHSs;
5097 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
5098 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
5099 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
5100 Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
5101 Data.ReductionOps.append(C->reduction_ops().begin(),
5102 C->reduction_ops().end());
5103 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
5104 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
5105 }
5106 OMPPrivateScope TargetScope(*this);
5107 VarDecl *BPVD = nullptr;
5108 VarDecl *PVD = nullptr;
5109 VarDecl *SVD = nullptr;
5110 VarDecl *MVD = nullptr;
5111 if (InputInfo.NumberOfTargetItems > 0) {
5112 auto *CD = CapturedDecl::Create(
5113 getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
5114 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
5115 QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType(
5116 getContext().VoidPtrTy, ArrSize, nullptr, ArraySizeModifier::Normal,
5117 /*IndexTypeQuals=*/0);
5118 BPVD = createImplicitFirstprivateForType(
5119 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
5120 PVD = createImplicitFirstprivateForType(
5121 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
5122 QualType SizesType = getContext().getConstantArrayType(
5123 getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
5124 ArrSize, nullptr, ArraySizeModifier::Normal,
5125 /*IndexTypeQuals=*/0);
5126 SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
5127 S.getBeginLoc());
5128 TargetScope.addPrivate(BPVD, InputInfo.BasePointersArray);
5129 TargetScope.addPrivate(PVD, InputInfo.PointersArray);
5130 TargetScope.addPrivate(SVD, InputInfo.SizesArray);
5131 // If there is no user-defined mapper, the mapper array will be nullptr. In
5132 // this case, we don't need to privatize it.
5133 if (!isa_and_nonnull<llvm::ConstantPointerNull>(
5134 InputInfo.MappersArray.emitRawPointer(*this))) {
5135 MVD = createImplicitFirstprivateForType(
5136 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
5137 TargetScope.addPrivate(MVD, InputInfo.MappersArray);
5138 }
5139 }
5140 (void)TargetScope.Privatize();
5141 buildDependences(S, Data);
5142 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD,
5143 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
5144 // Set proper addresses for generated private copies.
5145 OMPPrivateScope Scope(CGF);
5146 if (!Data.FirstprivateVars.empty()) {
5147 enum { PrivatesParam = 2, CopyFnParam = 3 };
5148 llvm::Value *CopyFn = CGF.Builder.CreateLoad(
5149 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
5150 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
5151 CS->getCapturedDecl()->getParam(PrivatesParam)));
5152 // Map privates.
5153 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
5154 llvm::SmallVector<llvm::Value *, 16> CallArgs;
5155 llvm::SmallVector<llvm::Type *, 4> ParamTypes;
5156 CallArgs.push_back(PrivatesPtr);
5157 ParamTypes.push_back(PrivatesPtr->getType());
5158 for (const Expr *E : Data.FirstprivateVars) {
5159 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5160 RawAddress PrivatePtr =
5161 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
5162 ".firstpriv.ptr.addr");
5163 PrivatePtrs.emplace_back(VD, PrivatePtr);
5164 CallArgs.push_back(PrivatePtr.getPointer());
5165 ParamTypes.push_back(PrivatePtr.getType());
5166 }
5167 auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
5168 ParamTypes, /*isVarArg=*/false);
5169 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
5170 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
5171 for (const auto &Pair : PrivatePtrs) {
5172 Address Replacement(
5173 CGF.Builder.CreateLoad(Pair.second),
5174 CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()),
5175 CGF.getContext().getDeclAlign(Pair.first));
5176 Scope.addPrivate(Pair.first, Replacement);
5177 }
5178 }
5179 CGF.processInReduction(S, Data, CGF, CS, Scope);
5180 if (InputInfo.NumberOfTargetItems > 0) {
5181 InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
5182 CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0);
5183 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
5184 CGF.GetAddrOfLocalVar(PVD), /*Index=*/0);
5185 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
5186 CGF.GetAddrOfLocalVar(SVD), /*Index=*/0);
5187 // If MVD is nullptr, the mapper array is not privatized
5188 if (MVD)
5189 InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP(
5190 CGF.GetAddrOfLocalVar(MVD), /*Index=*/0);
5191 }
5192
5193 Action.Enter(CGF);
5194 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
5195 auto *TL = S.getSingleClause<OMPThreadLimitClause>();
5196 if (CGF.CGM.getLangOpts().OpenMP >= 51 &&
5197 needsTaskBasedThreadLimit(S.getDirectiveKind()) && TL) {
5198 // Emit __kmpc_set_thread_limit() to set the thread_limit for the task
5199 // enclosing this target region. This will indirectly set the thread_limit
5200 // for every applicable construct within target region.
5201 CGF.CGM.getOpenMPRuntime().emitThreadLimitClause(
5202 CGF, TL->getThreadLimit(), S.getBeginLoc());
5203 }
5204 BodyGen(CGF);
5205 };
5206 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
5207 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
5208 Data.NumberOfParts);
5209 llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
5210 IntegerLiteral IfCond(getContext(), TrueOrFalse,
5211 getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
5212 SourceLocation());
5213 CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn,
5214 SharedsTy, CapturedStruct, &IfCond, Data);
5215 }
5216
processInReduction(const OMPExecutableDirective & S,OMPTaskDataTy & Data,CodeGenFunction & CGF,const CapturedStmt * CS,OMPPrivateScope & Scope)5217 void CodeGenFunction::processInReduction(const OMPExecutableDirective &S,
5218 OMPTaskDataTy &Data,
5219 CodeGenFunction &CGF,
5220 const CapturedStmt *CS,
5221 OMPPrivateScope &Scope) {
5222 if (Data.Reductions) {
5223 OpenMPDirectiveKind CapturedRegion = S.getDirectiveKind();
5224 OMPLexicalScope LexScope(CGF, S, CapturedRegion);
5225 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
5226 Data.ReductionCopies, Data.ReductionOps);
5227 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
5228 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(4)));
5229 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
5230 RedCG.emitSharedOrigLValue(CGF, Cnt);
5231 RedCG.emitAggregateType(CGF, Cnt);
5232 // FIXME: This must removed once the runtime library is fixed.
5233 // Emit required threadprivate variables for
5234 // initializer/combiner/finalizer.
5235 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
5236 RedCG, Cnt);
5237 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5238 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
5239 Replacement = Address(
5240 CGF.EmitScalarConversion(Replacement.emitRawPointer(CGF),
5241 CGF.getContext().VoidPtrTy,
5242 CGF.getContext().getPointerType(
5243 Data.ReductionCopies[Cnt]->getType()),
5244 Data.ReductionCopies[Cnt]->getExprLoc()),
5245 CGF.ConvertTypeForMem(Data.ReductionCopies[Cnt]->getType()),
5246 Replacement.getAlignment());
5247 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
5248 Scope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement);
5249 }
5250 }
5251 (void)Scope.Privatize();
5252 SmallVector<const Expr *, 4> InRedVars;
5253 SmallVector<const Expr *, 4> InRedPrivs;
5254 SmallVector<const Expr *, 4> InRedOps;
5255 SmallVector<const Expr *, 4> TaskgroupDescriptors;
5256 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
5257 auto IPriv = C->privates().begin();
5258 auto IRed = C->reduction_ops().begin();
5259 auto ITD = C->taskgroup_descriptors().begin();
5260 for (const Expr *Ref : C->varlists()) {
5261 InRedVars.emplace_back(Ref);
5262 InRedPrivs.emplace_back(*IPriv);
5263 InRedOps.emplace_back(*IRed);
5264 TaskgroupDescriptors.emplace_back(*ITD);
5265 std::advance(IPriv, 1);
5266 std::advance(IRed, 1);
5267 std::advance(ITD, 1);
5268 }
5269 }
5270 OMPPrivateScope InRedScope(CGF);
5271 if (!InRedVars.empty()) {
5272 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
5273 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
5274 RedCG.emitSharedOrigLValue(CGF, Cnt);
5275 RedCG.emitAggregateType(CGF, Cnt);
5276 // FIXME: This must removed once the runtime library is fixed.
5277 // Emit required threadprivate variables for
5278 // initializer/combiner/finalizer.
5279 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
5280 RedCG, Cnt);
5281 llvm::Value *ReductionsPtr;
5282 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
5283 ReductionsPtr =
5284 CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr), TRExpr->getExprLoc());
5285 } else {
5286 ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5287 }
5288 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5289 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
5290 Replacement = Address(
5291 CGF.EmitScalarConversion(
5292 Replacement.emitRawPointer(CGF), CGF.getContext().VoidPtrTy,
5293 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
5294 InRedPrivs[Cnt]->getExprLoc()),
5295 CGF.ConvertTypeForMem(InRedPrivs[Cnt]->getType()),
5296 Replacement.getAlignment());
5297 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
5298 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement);
5299 }
5300 }
5301 (void)InRedScope.Privatize();
5302 }
5303
EmitOMPTaskDirective(const OMPTaskDirective & S)5304 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
5305 // Emit outlined function for task construct.
5306 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
5307 Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
5308 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
5309 const Expr *IfCond = nullptr;
5310 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
5311 if (C->getNameModifier() == OMPD_unknown ||
5312 C->getNameModifier() == OMPD_task) {
5313 IfCond = C->getCondition();
5314 break;
5315 }
5316 }
5317
5318 OMPTaskDataTy Data;
5319 // Check if we should emit tied or untied task.
5320 Data.Tied = !S.getSingleClause<OMPUntiedClause>();
5321 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
5322 CGF.EmitStmt(CS->getCapturedStmt());
5323 };
5324 auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
5325 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
5326 const OMPTaskDataTy &Data) {
5327 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn,
5328 SharedsTy, CapturedStruct, IfCond,
5329 Data);
5330 };
5331 auto LPCRegion =
5332 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
5333 EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);
5334 }
5335
EmitOMPTaskyieldDirective(const OMPTaskyieldDirective & S)5336 void CodeGenFunction::EmitOMPTaskyieldDirective(
5337 const OMPTaskyieldDirective &S) {
5338 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc());
5339 }
5340
EmitOMPErrorDirective(const OMPErrorDirective & S)5341 void CodeGenFunction::EmitOMPErrorDirective(const OMPErrorDirective &S) {
5342 const OMPMessageClause *MC = S.getSingleClause<OMPMessageClause>();
5343 Expr *ME = MC ? MC->getMessageString() : nullptr;
5344 const OMPSeverityClause *SC = S.getSingleClause<OMPSeverityClause>();
5345 bool IsFatal = false;
5346 if (!SC || SC->getSeverityKind() == OMPC_SEVERITY_fatal)
5347 IsFatal = true;
5348 CGM.getOpenMPRuntime().emitErrorCall(*this, S.getBeginLoc(), ME, IsFatal);
5349 }
5350
EmitOMPBarrierDirective(const OMPBarrierDirective & S)5351 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
5352 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier);
5353 }
5354
EmitOMPTaskwaitDirective(const OMPTaskwaitDirective & S)5355 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
5356 OMPTaskDataTy Data;
5357 // Build list of dependences
5358 buildDependences(S, Data);
5359 Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>();
5360 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc(), Data);
5361 }
5362
isSupportedByOpenMPIRBuilder(const OMPTaskgroupDirective & T)5363 bool isSupportedByOpenMPIRBuilder(const OMPTaskgroupDirective &T) {
5364 return T.clauses().empty();
5365 }
5366
EmitOMPTaskgroupDirective(const OMPTaskgroupDirective & S)5367 void CodeGenFunction::EmitOMPTaskgroupDirective(
5368 const OMPTaskgroupDirective &S) {
5369 OMPLexicalScope Scope(*this, S, OMPD_unknown);
5370 if (CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S)) {
5371 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
5372 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
5373 InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
5374 AllocaInsertPt->getIterator());
5375
5376 auto BodyGenCB = [&, this](InsertPointTy AllocaIP,
5377 InsertPointTy CodeGenIP) {
5378 Builder.restoreIP(CodeGenIP);
5379 EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
5380 };
5381 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
5382 if (!CapturedStmtInfo)
5383 CapturedStmtInfo = &CapStmtInfo;
5384 Builder.restoreIP(OMPBuilder.createTaskgroup(Builder, AllocaIP, BodyGenCB));
5385 return;
5386 }
5387 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5388 Action.Enter(CGF);
5389 if (const Expr *E = S.getReductionRef()) {
5390 SmallVector<const Expr *, 4> LHSs;
5391 SmallVector<const Expr *, 4> RHSs;
5392 OMPTaskDataTy Data;
5393 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
5394 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
5395 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
5396 Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
5397 Data.ReductionOps.append(C->reduction_ops().begin(),
5398 C->reduction_ops().end());
5399 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
5400 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
5401 }
5402 llvm::Value *ReductionDesc =
5403 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(),
5404 LHSs, RHSs, Data);
5405 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5406 CGF.EmitVarDecl(*VD);
5407 CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
5408 /*Volatile=*/false, E->getType());
5409 }
5410 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
5411 };
5412 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc());
5413 }
5414
EmitOMPFlushDirective(const OMPFlushDirective & S)5415 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
5416 llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>()
5417 ? llvm::AtomicOrdering::NotAtomic
5418 : llvm::AtomicOrdering::AcquireRelease;
5419 CGM.getOpenMPRuntime().emitFlush(
5420 *this,
5421 [&S]() -> ArrayRef<const Expr *> {
5422 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>())
5423 return llvm::ArrayRef(FlushClause->varlist_begin(),
5424 FlushClause->varlist_end());
5425 return std::nullopt;
5426 }(),
5427 S.getBeginLoc(), AO);
5428 }
5429
EmitOMPDepobjDirective(const OMPDepobjDirective & S)5430 void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {
5431 const auto *DO = S.getSingleClause<OMPDepobjClause>();
5432 LValue DOLVal = EmitLValue(DO->getDepobj());
5433 if (const auto *DC = S.getSingleClause<OMPDependClause>()) {
5434 OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(),
5435 DC->getModifier());
5436 Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end());
5437 Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause(
5438 *this, Dependencies, DC->getBeginLoc());
5439 EmitStoreOfScalar(DepAddr.emitRawPointer(*this), DOLVal);
5440 return;
5441 }
5442 if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) {
5443 CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc());
5444 return;
5445 }
5446 if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) {
5447 CGM.getOpenMPRuntime().emitUpdateClause(
5448 *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc());
5449 return;
5450 }
5451 }
5452
EmitOMPScanDirective(const OMPScanDirective & S)5453 void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
5454 if (!OMPParentLoopDirectiveForScan)
5455 return;
5456 const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan;
5457 bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
5458 SmallVector<const Expr *, 4> Shareds;
5459 SmallVector<const Expr *, 4> Privates;
5460 SmallVector<const Expr *, 4> LHSs;
5461 SmallVector<const Expr *, 4> RHSs;
5462 SmallVector<const Expr *, 4> ReductionOps;
5463 SmallVector<const Expr *, 4> CopyOps;
5464 SmallVector<const Expr *, 4> CopyArrayTemps;
5465 SmallVector<const Expr *, 4> CopyArrayElems;
5466 for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) {
5467 if (C->getModifier() != OMPC_REDUCTION_inscan)
5468 continue;
5469 Shareds.append(C->varlist_begin(), C->varlist_end());
5470 Privates.append(C->privates().begin(), C->privates().end());
5471 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
5472 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
5473 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
5474 CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
5475 CopyArrayTemps.append(C->copy_array_temps().begin(),
5476 C->copy_array_temps().end());
5477 CopyArrayElems.append(C->copy_array_elems().begin(),
5478 C->copy_array_elems().end());
5479 }
5480 if (ParentDir.getDirectiveKind() == OMPD_simd ||
5481 (getLangOpts().OpenMPSimd &&
5482 isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) {
5483 // For simd directive and simd-based directives in simd only mode, use the
5484 // following codegen:
5485 // int x = 0;
5486 // #pragma omp simd reduction(inscan, +: x)
5487 // for (..) {
5488 // <first part>
5489 // #pragma omp scan inclusive(x)
5490 // <second part>
5491 // }
5492 // is transformed to:
5493 // int x = 0;
5494 // for (..) {
5495 // int x_priv = 0;
5496 // <first part>
5497 // x = x_priv + x;
5498 // x_priv = x;
5499 // <second part>
5500 // }
5501 // and
5502 // int x = 0;
5503 // #pragma omp simd reduction(inscan, +: x)
5504 // for (..) {
5505 // <first part>
5506 // #pragma omp scan exclusive(x)
5507 // <second part>
5508 // }
5509 // to
5510 // int x = 0;
5511 // for (..) {
5512 // int x_priv = 0;
5513 // <second part>
5514 // int temp = x;
5515 // x = x_priv + x;
5516 // x_priv = temp;
5517 // <first part>
5518 // }
5519 llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce");
5520 EmitBranch(IsInclusive
5521 ? OMPScanReduce
5522 : BreakContinueStack.back().ContinueBlock.getBlock());
5523 EmitBlock(OMPScanDispatch);
5524 {
5525 // New scope for correct construction/destruction of temp variables for
5526 // exclusive scan.
5527 LexicalScope Scope(*this, S.getSourceRange());
5528 EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock);
5529 EmitBlock(OMPScanReduce);
5530 if (!IsInclusive) {
5531 // Create temp var and copy LHS value to this temp value.
5532 // TMP = LHS;
5533 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5534 const Expr *PrivateExpr = Privates[I];
5535 const Expr *TempExpr = CopyArrayTemps[I];
5536 EmitAutoVarDecl(
5537 *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl()));
5538 LValue DestLVal = EmitLValue(TempExpr);
5539 LValue SrcLVal = EmitLValue(LHSs[I]);
5540 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(),
5541 SrcLVal.getAddress(),
5542 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5543 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
5544 CopyOps[I]);
5545 }
5546 }
5547 CGM.getOpenMPRuntime().emitReduction(
5548 *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
5549 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd});
5550 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5551 const Expr *PrivateExpr = Privates[I];
5552 LValue DestLVal;
5553 LValue SrcLVal;
5554 if (IsInclusive) {
5555 DestLVal = EmitLValue(RHSs[I]);
5556 SrcLVal = EmitLValue(LHSs[I]);
5557 } else {
5558 const Expr *TempExpr = CopyArrayTemps[I];
5559 DestLVal = EmitLValue(RHSs[I]);
5560 SrcLVal = EmitLValue(TempExpr);
5561 }
5562 EmitOMPCopy(
5563 PrivateExpr->getType(), DestLVal.getAddress(), SrcLVal.getAddress(),
5564 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5565 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), CopyOps[I]);
5566 }
5567 }
5568 EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock);
5569 OMPScanExitBlock = IsInclusive
5570 ? BreakContinueStack.back().ContinueBlock.getBlock()
5571 : OMPScanReduce;
5572 EmitBlock(OMPAfterScanBlock);
5573 return;
5574 }
5575 if (!IsInclusive) {
5576 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
5577 EmitBlock(OMPScanExitBlock);
5578 }
5579 if (OMPFirstScanLoop) {
5580 // Emit buffer[i] = red; at the end of the input phase.
5581 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
5582 .getIterationVariable()
5583 ->IgnoreParenImpCasts();
5584 LValue IdxLVal = EmitLValue(IVExpr);
5585 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
5586 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
5587 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5588 const Expr *PrivateExpr = Privates[I];
5589 const Expr *OrigExpr = Shareds[I];
5590 const Expr *CopyArrayElem = CopyArrayElems[I];
5591 OpaqueValueMapping IdxMapping(
5592 *this,
5593 cast<OpaqueValueExpr>(
5594 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
5595 RValue::get(IdxVal));
5596 LValue DestLVal = EmitLValue(CopyArrayElem);
5597 LValue SrcLVal = EmitLValue(OrigExpr);
5598 EmitOMPCopy(
5599 PrivateExpr->getType(), DestLVal.getAddress(), SrcLVal.getAddress(),
5600 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5601 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), CopyOps[I]);
5602 }
5603 }
5604 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
5605 if (IsInclusive) {
5606 EmitBlock(OMPScanExitBlock);
5607 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
5608 }
5609 EmitBlock(OMPScanDispatch);
5610 if (!OMPFirstScanLoop) {
5611 // Emit red = buffer[i]; at the entrance to the scan phase.
5612 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
5613 .getIterationVariable()
5614 ->IgnoreParenImpCasts();
5615 LValue IdxLVal = EmitLValue(IVExpr);
5616 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
5617 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
5618 llvm::BasicBlock *ExclusiveExitBB = nullptr;
5619 if (!IsInclusive) {
5620 llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec");
5621 ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit");
5622 llvm::Value *Cmp = Builder.CreateIsNull(IdxVal);
5623 Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB);
5624 EmitBlock(ContBB);
5625 // Use idx - 1 iteration for exclusive scan.
5626 IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1));
5627 }
5628 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5629 const Expr *PrivateExpr = Privates[I];
5630 const Expr *OrigExpr = Shareds[I];
5631 const Expr *CopyArrayElem = CopyArrayElems[I];
5632 OpaqueValueMapping IdxMapping(
5633 *this,
5634 cast<OpaqueValueExpr>(
5635 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
5636 RValue::get(IdxVal));
5637 LValue SrcLVal = EmitLValue(CopyArrayElem);
5638 LValue DestLVal = EmitLValue(OrigExpr);
5639 EmitOMPCopy(
5640 PrivateExpr->getType(), DestLVal.getAddress(), SrcLVal.getAddress(),
5641 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5642 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), CopyOps[I]);
5643 }
5644 if (!IsInclusive) {
5645 EmitBlock(ExclusiveExitBB);
5646 }
5647 }
5648 EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock
5649 : OMPAfterScanBlock);
5650 EmitBlock(OMPAfterScanBlock);
5651 }
5652
EmitOMPDistributeLoop(const OMPLoopDirective & S,const CodeGenLoopTy & CodeGenLoop,Expr * IncExpr)5653 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
5654 const CodeGenLoopTy &CodeGenLoop,
5655 Expr *IncExpr) {
5656 // Emit the loop iteration variable.
5657 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
5658 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
5659 EmitVarDecl(*IVDecl);
5660
5661 // Emit the iterations count variable.
5662 // If it is not a variable, Sema decided to calculate iterations count on each
5663 // iteration (e.g., it is foldable into a constant).
5664 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
5665 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
5666 // Emit calculation of the iterations count.
5667 EmitIgnoredExpr(S.getCalcLastIteration());
5668 }
5669
5670 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
5671
5672 bool HasLastprivateClause = false;
5673 // Check pre-condition.
5674 {
5675 OMPLoopScope PreInitScope(*this, S);
5676 // Skip the entire loop if we don't meet the precondition.
5677 // If the condition constant folds and can be elided, avoid emitting the
5678 // whole loop.
5679 bool CondConstant;
5680 llvm::BasicBlock *ContBlock = nullptr;
5681 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
5682 if (!CondConstant)
5683 return;
5684 } else {
5685 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
5686 ContBlock = createBasicBlock("omp.precond.end");
5687 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
5688 getProfileCount(&S));
5689 EmitBlock(ThenBlock);
5690 incrementProfileCounter(&S);
5691 }
5692
5693 emitAlignedClause(*this, S);
5694 // Emit 'then' code.
5695 {
5696 // Emit helper vars inits.
5697
5698 LValue LB = EmitOMPHelperVar(
5699 *this, cast<DeclRefExpr>(
5700 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5701 ? S.getCombinedLowerBoundVariable()
5702 : S.getLowerBoundVariable())));
5703 LValue UB = EmitOMPHelperVar(
5704 *this, cast<DeclRefExpr>(
5705 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5706 ? S.getCombinedUpperBoundVariable()
5707 : S.getUpperBoundVariable())));
5708 LValue ST =
5709 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
5710 LValue IL =
5711 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
5712
5713 OMPPrivateScope LoopScope(*this);
5714 if (EmitOMPFirstprivateClause(S, LoopScope)) {
5715 // Emit implicit barrier to synchronize threads and avoid data races
5716 // on initialization of firstprivate variables and post-update of
5717 // lastprivate variables.
5718 CGM.getOpenMPRuntime().emitBarrierCall(
5719 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
5720 /*ForceSimpleCall=*/true);
5721 }
5722 EmitOMPPrivateClause(S, LoopScope);
5723 if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
5724 !isOpenMPParallelDirective(S.getDirectiveKind()) &&
5725 !isOpenMPTeamsDirective(S.getDirectiveKind()))
5726 EmitOMPReductionClauseInit(S, LoopScope);
5727 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
5728 EmitOMPPrivateLoopCounters(S, LoopScope);
5729 (void)LoopScope.Privatize();
5730 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
5731 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
5732
5733 // Detect the distribute schedule kind and chunk.
5734 llvm::Value *Chunk = nullptr;
5735 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
5736 if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
5737 ScheduleKind = C->getDistScheduleKind();
5738 if (const Expr *Ch = C->getChunkSize()) {
5739 Chunk = EmitScalarExpr(Ch);
5740 Chunk = EmitScalarConversion(Chunk, Ch->getType(),
5741 S.getIterationVariable()->getType(),
5742 S.getBeginLoc());
5743 }
5744 } else {
5745 // Default behaviour for dist_schedule clause.
5746 CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
5747 *this, S, ScheduleKind, Chunk);
5748 }
5749 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
5750 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
5751
5752 // OpenMP [2.10.8, distribute Construct, Description]
5753 // If dist_schedule is specified, kind must be static. If specified,
5754 // iterations are divided into chunks of size chunk_size, chunks are
5755 // assigned to the teams of the league in a round-robin fashion in the
5756 // order of the team number. When no chunk_size is specified, the
5757 // iteration space is divided into chunks that are approximately equal
5758 // in size, and at most one chunk is distributed to each team of the
5759 // league. The size of the chunks is unspecified in this case.
5760 bool StaticChunked =
5761 RT.isStaticChunked(ScheduleKind, /* Chunked */ Chunk != nullptr) &&
5762 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
5763 if (RT.isStaticNonchunked(ScheduleKind,
5764 /* Chunked */ Chunk != nullptr) ||
5765 StaticChunked) {
5766 CGOpenMPRuntime::StaticRTInput StaticInit(
5767 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
5768 LB.getAddress(), UB.getAddress(), ST.getAddress(),
5769 StaticChunked ? Chunk : nullptr);
5770 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind,
5771 StaticInit);
5772 JumpDest LoopExit =
5773 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
5774 // UB = min(UB, GlobalUB);
5775 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5776 ? S.getCombinedEnsureUpperBound()
5777 : S.getEnsureUpperBound());
5778 // IV = LB;
5779 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5780 ? S.getCombinedInit()
5781 : S.getInit());
5782
5783 const Expr *Cond =
5784 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5785 ? S.getCombinedCond()
5786 : S.getCond();
5787
5788 if (StaticChunked)
5789 Cond = S.getCombinedDistCond();
5790
5791 // For static unchunked schedules generate:
5792 //
5793 // 1. For distribute alone, codegen
5794 // while (idx <= UB) {
5795 // BODY;
5796 // ++idx;
5797 // }
5798 //
5799 // 2. When combined with 'for' (e.g. as in 'distribute parallel for')
5800 // while (idx <= UB) {
5801 // <CodeGen rest of pragma>(LB, UB);
5802 // idx += ST;
5803 // }
5804 //
5805 // For static chunk one schedule generate:
5806 //
5807 // while (IV <= GlobalUB) {
5808 // <CodeGen rest of pragma>(LB, UB);
5809 // LB += ST;
5810 // UB += ST;
5811 // UB = min(UB, GlobalUB);
5812 // IV = LB;
5813 // }
5814 //
5815 emitCommonSimdLoop(
5816 *this, S,
5817 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5818 if (isOpenMPSimdDirective(S.getDirectiveKind()))
5819 CGF.EmitOMPSimdInit(S);
5820 },
5821 [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop,
5822 StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) {
5823 CGF.EmitOMPInnerLoop(
5824 S, LoopScope.requiresCleanups(), Cond, IncExpr,
5825 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
5826 CodeGenLoop(CGF, S, LoopExit);
5827 },
5828 [&S, StaticChunked](CodeGenFunction &CGF) {
5829 if (StaticChunked) {
5830 CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound());
5831 CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound());
5832 CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound());
5833 CGF.EmitIgnoredExpr(S.getCombinedInit());
5834 }
5835 });
5836 });
5837 EmitBlock(LoopExit.getBlock());
5838 // Tell the runtime we are done.
5839 RT.emitForStaticFinish(*this, S.getEndLoc(), OMPD_distribute);
5840 } else {
5841 // Emit the outer loop, which requests its work chunk [LB..UB] from
5842 // runtime and runs the inner loop to process it.
5843 const OMPLoopArguments LoopArguments = {
5844 LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
5845 Chunk};
5846 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
5847 CodeGenLoop);
5848 }
5849 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
5850 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
5851 return CGF.Builder.CreateIsNotNull(
5852 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
5853 });
5854 }
5855 if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
5856 !isOpenMPParallelDirective(S.getDirectiveKind()) &&
5857 !isOpenMPTeamsDirective(S.getDirectiveKind())) {
5858 EmitOMPReductionClauseFinal(S, OMPD_simd);
5859 // Emit post-update of the reduction variables if IsLastIter != 0.
5860 emitPostUpdateForReductionClause(
5861 *this, S, [IL, &S](CodeGenFunction &CGF) {
5862 return CGF.Builder.CreateIsNotNull(
5863 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
5864 });
5865 }
5866 // Emit final copy of the lastprivate variables if IsLastIter != 0.
5867 if (HasLastprivateClause) {
5868 EmitOMPLastprivateClauseFinal(
5869 S, /*NoFinals=*/false,
5870 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
5871 }
5872 }
5873
5874 // We're now done with the loop, so jump to the continuation block.
5875 if (ContBlock) {
5876 EmitBranch(ContBlock);
5877 EmitBlock(ContBlock, true);
5878 }
5879 }
5880 }
5881
EmitOMPDistributeDirective(const OMPDistributeDirective & S)5882 void CodeGenFunction::EmitOMPDistributeDirective(
5883 const OMPDistributeDirective &S) {
5884 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5885 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
5886 };
5887 OMPLexicalScope Scope(*this, S, OMPD_unknown);
5888 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
5889 }
5890
emitOutlinedOrderedFunction(CodeGenModule & CGM,const CapturedStmt * S,SourceLocation Loc)5891 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
5892 const CapturedStmt *S,
5893 SourceLocation Loc) {
5894 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
5895 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
5896 CGF.CapturedStmtInfo = &CapStmtInfo;
5897 llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc);
5898 Fn->setDoesNotRecurse();
5899 return Fn;
5900 }
5901
5902 template <typename T>
emitRestoreIP(CodeGenFunction & CGF,const T * C,llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,llvm::OpenMPIRBuilder & OMPBuilder)5903 static void emitRestoreIP(CodeGenFunction &CGF, const T *C,
5904 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,
5905 llvm::OpenMPIRBuilder &OMPBuilder) {
5906
5907 unsigned NumLoops = C->getNumLoops();
5908 QualType Int64Ty = CGF.CGM.getContext().getIntTypeForBitwidth(
5909 /*DestWidth=*/64, /*Signed=*/1);
5910 llvm::SmallVector<llvm::Value *> StoreValues;
5911 for (unsigned I = 0; I < NumLoops; I++) {
5912 const Expr *CounterVal = C->getLoopData(I);
5913 assert(CounterVal);
5914 llvm::Value *StoreValue = CGF.EmitScalarConversion(
5915 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
5916 CounterVal->getExprLoc());
5917 StoreValues.emplace_back(StoreValue);
5918 }
5919 OMPDoacrossKind<T> ODK;
5920 bool IsDependSource = ODK.isSource(C);
5921 CGF.Builder.restoreIP(
5922 OMPBuilder.createOrderedDepend(CGF.Builder, AllocaIP, NumLoops,
5923 StoreValues, ".cnt.addr", IsDependSource));
5924 }
5925
EmitOMPOrderedDirective(const OMPOrderedDirective & S)5926 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
5927 if (CGM.getLangOpts().OpenMPIRBuilder) {
5928 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
5929 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
5930
5931 if (S.hasClausesOfKind<OMPDependClause>() ||
5932 S.hasClausesOfKind<OMPDoacrossClause>()) {
5933 // The ordered directive with depend clause.
5934 assert(!S.hasAssociatedStmt() && "No associated statement must be in "
5935 "ordered depend|doacross construct.");
5936 InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
5937 AllocaInsertPt->getIterator());
5938 for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
5939 emitRestoreIP(*this, DC, AllocaIP, OMPBuilder);
5940 for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>())
5941 emitRestoreIP(*this, DC, AllocaIP, OMPBuilder);
5942 } else {
5943 // The ordered directive with threads or simd clause, or without clause.
5944 // Without clause, it behaves as if the threads clause is specified.
5945 const auto *C = S.getSingleClause<OMPSIMDClause>();
5946
5947 auto FiniCB = [this](InsertPointTy IP) {
5948 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
5949 };
5950
5951 auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP,
5952 InsertPointTy CodeGenIP) {
5953 Builder.restoreIP(CodeGenIP);
5954
5955 const CapturedStmt *CS = S.getInnermostCapturedStmt();
5956 if (C) {
5957 llvm::BasicBlock *FiniBB = splitBBWithSuffix(
5958 Builder, /*CreateBranch=*/false, ".ordered.after");
5959 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
5960 GenerateOpenMPCapturedVars(*CS, CapturedVars);
5961 llvm::Function *OutlinedFn =
5962 emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc());
5963 assert(S.getBeginLoc().isValid() &&
5964 "Outlined function call location must be valid.");
5965 ApplyDebugLocation::CreateDefaultArtificial(*this, S.getBeginLoc());
5966 OMPBuilderCBHelpers::EmitCaptureStmt(*this, CodeGenIP, *FiniBB,
5967 OutlinedFn, CapturedVars);
5968 } else {
5969 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
5970 *this, CS->getCapturedStmt(), AllocaIP, CodeGenIP, "ordered");
5971 }
5972 };
5973
5974 OMPLexicalScope Scope(*this, S, OMPD_unknown);
5975 Builder.restoreIP(
5976 OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, !C));
5977 }
5978 return;
5979 }
5980
5981 if (S.hasClausesOfKind<OMPDependClause>()) {
5982 assert(!S.hasAssociatedStmt() &&
5983 "No associated statement must be in ordered depend construct.");
5984 for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
5985 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
5986 return;
5987 }
5988 if (S.hasClausesOfKind<OMPDoacrossClause>()) {
5989 assert(!S.hasAssociatedStmt() &&
5990 "No associated statement must be in ordered doacross construct.");
5991 for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>())
5992 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
5993 return;
5994 }
5995 const auto *C = S.getSingleClause<OMPSIMDClause>();
5996 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
5997 PrePostActionTy &Action) {
5998 const CapturedStmt *CS = S.getInnermostCapturedStmt();
5999 if (C) {
6000 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
6001 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
6002 llvm::Function *OutlinedFn =
6003 emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc());
6004 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(),
6005 OutlinedFn, CapturedVars);
6006 } else {
6007 Action.Enter(CGF);
6008 CGF.EmitStmt(CS->getCapturedStmt());
6009 }
6010 };
6011 OMPLexicalScope Scope(*this, S, OMPD_unknown);
6012 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C);
6013 }
6014
convertToScalarValue(CodeGenFunction & CGF,RValue Val,QualType SrcType,QualType DestType,SourceLocation Loc)6015 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
6016 QualType SrcType, QualType DestType,
6017 SourceLocation Loc) {
6018 assert(CGF.hasScalarEvaluationKind(DestType) &&
6019 "DestType must have scalar evaluation kind.");
6020 assert(!Val.isAggregate() && "Must be a scalar or complex.");
6021 return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
6022 DestType, Loc)
6023 : CGF.EmitComplexToScalarConversion(
6024 Val.getComplexVal(), SrcType, DestType, Loc);
6025 }
6026
6027 static CodeGenFunction::ComplexPairTy
convertToComplexValue(CodeGenFunction & CGF,RValue Val,QualType SrcType,QualType DestType,SourceLocation Loc)6028 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
6029 QualType DestType, SourceLocation Loc) {
6030 assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
6031 "DestType must have complex evaluation kind.");
6032 CodeGenFunction::ComplexPairTy ComplexVal;
6033 if (Val.isScalar()) {
6034 // Convert the input element to the element type of the complex.
6035 QualType DestElementType =
6036 DestType->castAs<ComplexType>()->getElementType();
6037 llvm::Value *ScalarVal = CGF.EmitScalarConversion(
6038 Val.getScalarVal(), SrcType, DestElementType, Loc);
6039 ComplexVal = CodeGenFunction::ComplexPairTy(
6040 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
6041 } else {
6042 assert(Val.isComplex() && "Must be a scalar or complex.");
6043 QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
6044 QualType DestElementType =
6045 DestType->castAs<ComplexType>()->getElementType();
6046 ComplexVal.first = CGF.EmitScalarConversion(
6047 Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
6048 ComplexVal.second = CGF.EmitScalarConversion(
6049 Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
6050 }
6051 return ComplexVal;
6052 }
6053
emitSimpleAtomicStore(CodeGenFunction & CGF,llvm::AtomicOrdering AO,LValue LVal,RValue RVal)6054 static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
6055 LValue LVal, RValue RVal) {
6056 if (LVal.isGlobalReg())
6057 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
6058 else
6059 CGF.EmitAtomicStore(RVal, LVal, AO, LVal.isVolatile(), /*isInit=*/false);
6060 }
6061
emitSimpleAtomicLoad(CodeGenFunction & CGF,llvm::AtomicOrdering AO,LValue LVal,SourceLocation Loc)6062 static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF,
6063 llvm::AtomicOrdering AO, LValue LVal,
6064 SourceLocation Loc) {
6065 if (LVal.isGlobalReg())
6066 return CGF.EmitLoadOfLValue(LVal, Loc);
6067 return CGF.EmitAtomicLoad(
6068 LVal, Loc, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO),
6069 LVal.isVolatile());
6070 }
6071
emitOMPSimpleStore(LValue LVal,RValue RVal,QualType RValTy,SourceLocation Loc)6072 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
6073 QualType RValTy, SourceLocation Loc) {
6074 switch (getEvaluationKind(LVal.getType())) {
6075 case TEK_Scalar:
6076 EmitStoreThroughLValue(RValue::get(convertToScalarValue(
6077 *this, RVal, RValTy, LVal.getType(), Loc)),
6078 LVal);
6079 break;
6080 case TEK_Complex:
6081 EmitStoreOfComplex(
6082 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
6083 /*isInit=*/false);
6084 break;
6085 case TEK_Aggregate:
6086 llvm_unreachable("Must be a scalar or complex.");
6087 }
6088 }
6089
emitOMPAtomicReadExpr(CodeGenFunction & CGF,llvm::AtomicOrdering AO,const Expr * X,const Expr * V,SourceLocation Loc)6090 static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
6091 const Expr *X, const Expr *V,
6092 SourceLocation Loc) {
6093 // v = x;
6094 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
6095 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
6096 LValue XLValue = CGF.EmitLValue(X);
6097 LValue VLValue = CGF.EmitLValue(V);
6098 RValue Res = emitSimpleAtomicLoad(CGF, AO, XLValue, Loc);
6099 // OpenMP, 2.17.7, atomic Construct
6100 // If the read or capture clause is specified and the acquire, acq_rel, or
6101 // seq_cst clause is specified then the strong flush on exit from the atomic
6102 // operation is also an acquire flush.
6103 switch (AO) {
6104 case llvm::AtomicOrdering::Acquire:
6105 case llvm::AtomicOrdering::AcquireRelease:
6106 case llvm::AtomicOrdering::SequentiallyConsistent:
6107 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
6108 llvm::AtomicOrdering::Acquire);
6109 break;
6110 case llvm::AtomicOrdering::Monotonic:
6111 case llvm::AtomicOrdering::Release:
6112 break;
6113 case llvm::AtomicOrdering::NotAtomic:
6114 case llvm::AtomicOrdering::Unordered:
6115 llvm_unreachable("Unexpected ordering.");
6116 }
6117 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
6118 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
6119 }
6120
emitOMPAtomicWriteExpr(CodeGenFunction & CGF,llvm::AtomicOrdering AO,const Expr * X,const Expr * E,SourceLocation Loc)6121 static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF,
6122 llvm::AtomicOrdering AO, const Expr *X,
6123 const Expr *E, SourceLocation Loc) {
6124 // x = expr;
6125 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
6126 emitSimpleAtomicStore(CGF, AO, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
6127 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
6128 // OpenMP, 2.17.7, atomic Construct
6129 // If the write, update, or capture clause is specified and the release,
6130 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6131 // the atomic operation is also a release flush.
6132 switch (AO) {
6133 case llvm::AtomicOrdering::Release:
6134 case llvm::AtomicOrdering::AcquireRelease:
6135 case llvm::AtomicOrdering::SequentiallyConsistent:
6136 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
6137 llvm::AtomicOrdering::Release);
6138 break;
6139 case llvm::AtomicOrdering::Acquire:
6140 case llvm::AtomicOrdering::Monotonic:
6141 break;
6142 case llvm::AtomicOrdering::NotAtomic:
6143 case llvm::AtomicOrdering::Unordered:
6144 llvm_unreachable("Unexpected ordering.");
6145 }
6146 }
6147
emitOMPAtomicRMW(CodeGenFunction & CGF,LValue X,RValue Update,BinaryOperatorKind BO,llvm::AtomicOrdering AO,bool IsXLHSInRHSPart)6148 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
6149 RValue Update,
6150 BinaryOperatorKind BO,
6151 llvm::AtomicOrdering AO,
6152 bool IsXLHSInRHSPart) {
6153 ASTContext &Context = CGF.getContext();
6154 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
6155 // expression is simple and atomic is allowed for the given type for the
6156 // target platform.
6157 if (BO == BO_Comma || !Update.isScalar() || !X.isSimple() ||
6158 (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
6159 (Update.getScalarVal()->getType() != X.getAddress().getElementType())) ||
6160 !Context.getTargetInfo().hasBuiltinAtomic(
6161 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
6162 return std::make_pair(false, RValue::get(nullptr));
6163
6164 auto &&CheckAtomicSupport = [&CGF](llvm::Type *T, BinaryOperatorKind BO) {
6165 if (T->isIntegerTy())
6166 return true;
6167
6168 if (T->isFloatingPointTy() && (BO == BO_Add || BO == BO_Sub))
6169 return llvm::isPowerOf2_64(CGF.CGM.getDataLayout().getTypeStoreSize(T));
6170
6171 return false;
6172 };
6173
6174 if (!CheckAtomicSupport(Update.getScalarVal()->getType(), BO) ||
6175 !CheckAtomicSupport(X.getAddress().getElementType(), BO))
6176 return std::make_pair(false, RValue::get(nullptr));
6177
6178 bool IsInteger = X.getAddress().getElementType()->isIntegerTy();
6179 llvm::AtomicRMWInst::BinOp RMWOp;
6180 switch (BO) {
6181 case BO_Add:
6182 RMWOp = IsInteger ? llvm::AtomicRMWInst::Add : llvm::AtomicRMWInst::FAdd;
6183 break;
6184 case BO_Sub:
6185 if (!IsXLHSInRHSPart)
6186 return std::make_pair(false, RValue::get(nullptr));
6187 RMWOp = IsInteger ? llvm::AtomicRMWInst::Sub : llvm::AtomicRMWInst::FSub;
6188 break;
6189 case BO_And:
6190 RMWOp = llvm::AtomicRMWInst::And;
6191 break;
6192 case BO_Or:
6193 RMWOp = llvm::AtomicRMWInst::Or;
6194 break;
6195 case BO_Xor:
6196 RMWOp = llvm::AtomicRMWInst::Xor;
6197 break;
6198 case BO_LT:
6199 if (IsInteger)
6200 RMWOp = X.getType()->hasSignedIntegerRepresentation()
6201 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
6202 : llvm::AtomicRMWInst::Max)
6203 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
6204 : llvm::AtomicRMWInst::UMax);
6205 else
6206 RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMin
6207 : llvm::AtomicRMWInst::FMax;
6208 break;
6209 case BO_GT:
6210 if (IsInteger)
6211 RMWOp = X.getType()->hasSignedIntegerRepresentation()
6212 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
6213 : llvm::AtomicRMWInst::Min)
6214 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
6215 : llvm::AtomicRMWInst::UMin);
6216 else
6217 RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMax
6218 : llvm::AtomicRMWInst::FMin;
6219 break;
6220 case BO_Assign:
6221 RMWOp = llvm::AtomicRMWInst::Xchg;
6222 break;
6223 case BO_Mul:
6224 case BO_Div:
6225 case BO_Rem:
6226 case BO_Shl:
6227 case BO_Shr:
6228 case BO_LAnd:
6229 case BO_LOr:
6230 return std::make_pair(false, RValue::get(nullptr));
6231 case BO_PtrMemD:
6232 case BO_PtrMemI:
6233 case BO_LE:
6234 case BO_GE:
6235 case BO_EQ:
6236 case BO_NE:
6237 case BO_Cmp:
6238 case BO_AddAssign:
6239 case BO_SubAssign:
6240 case BO_AndAssign:
6241 case BO_OrAssign:
6242 case BO_XorAssign:
6243 case BO_MulAssign:
6244 case BO_DivAssign:
6245 case BO_RemAssign:
6246 case BO_ShlAssign:
6247 case BO_ShrAssign:
6248 case BO_Comma:
6249 llvm_unreachable("Unsupported atomic update operation");
6250 }
6251 llvm::Value *UpdateVal = Update.getScalarVal();
6252 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
6253 if (IsInteger)
6254 UpdateVal = CGF.Builder.CreateIntCast(
6255 IC, X.getAddress().getElementType(),
6256 X.getType()->hasSignedIntegerRepresentation());
6257 else
6258 UpdateVal = CGF.Builder.CreateCast(llvm::Instruction::CastOps::UIToFP, IC,
6259 X.getAddress().getElementType());
6260 }
6261 llvm::Value *Res =
6262 CGF.Builder.CreateAtomicRMW(RMWOp, X.getAddress(), UpdateVal, AO);
6263 return std::make_pair(true, RValue::get(Res));
6264 }
6265
EmitOMPAtomicSimpleUpdateExpr(LValue X,RValue E,BinaryOperatorKind BO,bool IsXLHSInRHSPart,llvm::AtomicOrdering AO,SourceLocation Loc,const llvm::function_ref<RValue (RValue)> CommonGen)6266 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
6267 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
6268 llvm::AtomicOrdering AO, SourceLocation Loc,
6269 const llvm::function_ref<RValue(RValue)> CommonGen) {
6270 // Update expressions are allowed to have the following forms:
6271 // x binop= expr; -> xrval + expr;
6272 // x++, ++x -> xrval + 1;
6273 // x--, --x -> xrval - 1;
6274 // x = x binop expr; -> xrval binop expr
6275 // x = expr Op x; - > expr binop xrval;
6276 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
6277 if (!Res.first) {
6278 if (X.isGlobalReg()) {
6279 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
6280 // 'xrval'.
6281 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
6282 } else {
6283 // Perform compare-and-swap procedure.
6284 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
6285 }
6286 }
6287 return Res;
6288 }
6289
emitOMPAtomicUpdateExpr(CodeGenFunction & CGF,llvm::AtomicOrdering AO,const Expr * X,const Expr * E,const Expr * UE,bool IsXLHSInRHSPart,SourceLocation Loc)6290 static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF,
6291 llvm::AtomicOrdering AO, const Expr *X,
6292 const Expr *E, const Expr *UE,
6293 bool IsXLHSInRHSPart, SourceLocation Loc) {
6294 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
6295 "Update expr in 'atomic update' must be a binary operator.");
6296 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
6297 // Update expressions are allowed to have the following forms:
6298 // x binop= expr; -> xrval + expr;
6299 // x++, ++x -> xrval + 1;
6300 // x--, --x -> xrval - 1;
6301 // x = x binop expr; -> xrval binop expr
6302 // x = expr Op x; - > expr binop xrval;
6303 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
6304 LValue XLValue = CGF.EmitLValue(X);
6305 RValue ExprRValue = CGF.EmitAnyExpr(E);
6306 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
6307 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
6308 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
6309 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
6310 auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) {
6311 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6312 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
6313 return CGF.EmitAnyExpr(UE);
6314 };
6315 (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
6316 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
6317 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
6318 // OpenMP, 2.17.7, atomic Construct
6319 // If the write, update, or capture clause is specified and the release,
6320 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6321 // the atomic operation is also a release flush.
6322 switch (AO) {
6323 case llvm::AtomicOrdering::Release:
6324 case llvm::AtomicOrdering::AcquireRelease:
6325 case llvm::AtomicOrdering::SequentiallyConsistent:
6326 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
6327 llvm::AtomicOrdering::Release);
6328 break;
6329 case llvm::AtomicOrdering::Acquire:
6330 case llvm::AtomicOrdering::Monotonic:
6331 break;
6332 case llvm::AtomicOrdering::NotAtomic:
6333 case llvm::AtomicOrdering::Unordered:
6334 llvm_unreachable("Unexpected ordering.");
6335 }
6336 }
6337
convertToType(CodeGenFunction & CGF,RValue Value,QualType SourceType,QualType ResType,SourceLocation Loc)6338 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
6339 QualType SourceType, QualType ResType,
6340 SourceLocation Loc) {
6341 switch (CGF.getEvaluationKind(ResType)) {
6342 case TEK_Scalar:
6343 return RValue::get(
6344 convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
6345 case TEK_Complex: {
6346 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
6347 return RValue::getComplex(Res.first, Res.second);
6348 }
6349 case TEK_Aggregate:
6350 break;
6351 }
6352 llvm_unreachable("Must be a scalar or complex.");
6353 }
6354
emitOMPAtomicCaptureExpr(CodeGenFunction & CGF,llvm::AtomicOrdering AO,bool IsPostfixUpdate,const Expr * V,const Expr * X,const Expr * E,const Expr * UE,bool IsXLHSInRHSPart,SourceLocation Loc)6355 static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF,
6356 llvm::AtomicOrdering AO,
6357 bool IsPostfixUpdate, const Expr *V,
6358 const Expr *X, const Expr *E,
6359 const Expr *UE, bool IsXLHSInRHSPart,
6360 SourceLocation Loc) {
6361 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
6362 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
6363 RValue NewVVal;
6364 LValue VLValue = CGF.EmitLValue(V);
6365 LValue XLValue = CGF.EmitLValue(X);
6366 RValue ExprRValue = CGF.EmitAnyExpr(E);
6367 QualType NewVValType;
6368 if (UE) {
6369 // 'x' is updated with some additional value.
6370 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
6371 "Update expr in 'atomic capture' must be a binary operator.");
6372 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
6373 // Update expressions are allowed to have the following forms:
6374 // x binop= expr; -> xrval + expr;
6375 // x++, ++x -> xrval + 1;
6376 // x--, --x -> xrval - 1;
6377 // x = x binop expr; -> xrval binop expr
6378 // x = expr Op x; - > expr binop xrval;
6379 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
6380 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
6381 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
6382 NewVValType = XRValExpr->getType();
6383 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
6384 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
6385 IsPostfixUpdate](RValue XRValue) {
6386 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6387 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
6388 RValue Res = CGF.EmitAnyExpr(UE);
6389 NewVVal = IsPostfixUpdate ? XRValue : Res;
6390 return Res;
6391 };
6392 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
6393 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
6394 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
6395 if (Res.first) {
6396 // 'atomicrmw' instruction was generated.
6397 if (IsPostfixUpdate) {
6398 // Use old value from 'atomicrmw'.
6399 NewVVal = Res.second;
6400 } else {
6401 // 'atomicrmw' does not provide new value, so evaluate it using old
6402 // value of 'x'.
6403 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6404 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
6405 NewVVal = CGF.EmitAnyExpr(UE);
6406 }
6407 }
6408 } else {
6409 // 'x' is simply rewritten with some 'expr'.
6410 NewVValType = X->getType().getNonReferenceType();
6411 ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
6412 X->getType().getNonReferenceType(), Loc);
6413 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) {
6414 NewVVal = XRValue;
6415 return ExprRValue;
6416 };
6417 // Try to perform atomicrmw xchg, otherwise simple exchange.
6418 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
6419 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
6420 Loc, Gen);
6421 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
6422 if (Res.first) {
6423 // 'atomicrmw' instruction was generated.
6424 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
6425 }
6426 }
6427 // Emit post-update store to 'v' of old/new 'x' value.
6428 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
6429 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
6430 // OpenMP 5.1 removes the required flush for capture clause.
6431 if (CGF.CGM.getLangOpts().OpenMP < 51) {
6432 // OpenMP, 2.17.7, atomic Construct
6433 // If the write, update, or capture clause is specified and the release,
6434 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6435 // the atomic operation is also a release flush.
6436 // If the read or capture clause is specified and the acquire, acq_rel, or
6437 // seq_cst clause is specified then the strong flush on exit from the atomic
6438 // operation is also an acquire flush.
6439 switch (AO) {
6440 case llvm::AtomicOrdering::Release:
6441 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
6442 llvm::AtomicOrdering::Release);
6443 break;
6444 case llvm::AtomicOrdering::Acquire:
6445 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
6446 llvm::AtomicOrdering::Acquire);
6447 break;
6448 case llvm::AtomicOrdering::AcquireRelease:
6449 case llvm::AtomicOrdering::SequentiallyConsistent:
6450 CGF.CGM.getOpenMPRuntime().emitFlush(
6451 CGF, std::nullopt, Loc, llvm::AtomicOrdering::AcquireRelease);
6452 break;
6453 case llvm::AtomicOrdering::Monotonic:
6454 break;
6455 case llvm::AtomicOrdering::NotAtomic:
6456 case llvm::AtomicOrdering::Unordered:
6457 llvm_unreachable("Unexpected ordering.");
6458 }
6459 }
6460 }
6461
emitOMPAtomicCompareExpr(CodeGenFunction & CGF,llvm::AtomicOrdering AO,llvm::AtomicOrdering FailAO,const Expr * X,const Expr * V,const Expr * R,const Expr * E,const Expr * D,const Expr * CE,bool IsXBinopExpr,bool IsPostfixUpdate,bool IsFailOnly,SourceLocation Loc)6462 static void emitOMPAtomicCompareExpr(
6463 CodeGenFunction &CGF, llvm::AtomicOrdering AO, llvm::AtomicOrdering FailAO,
6464 const Expr *X, const Expr *V, const Expr *R, const Expr *E, const Expr *D,
6465 const Expr *CE, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly,
6466 SourceLocation Loc) {
6467 llvm::OpenMPIRBuilder &OMPBuilder =
6468 CGF.CGM.getOpenMPRuntime().getOMPBuilder();
6469
6470 OMPAtomicCompareOp Op;
6471 assert(isa<BinaryOperator>(CE) && "CE is not a BinaryOperator");
6472 switch (cast<BinaryOperator>(CE)->getOpcode()) {
6473 case BO_EQ:
6474 Op = OMPAtomicCompareOp::EQ;
6475 break;
6476 case BO_LT:
6477 Op = OMPAtomicCompareOp::MIN;
6478 break;
6479 case BO_GT:
6480 Op = OMPAtomicCompareOp::MAX;
6481 break;
6482 default:
6483 llvm_unreachable("unsupported atomic compare binary operator");
6484 }
6485
6486 LValue XLVal = CGF.EmitLValue(X);
6487 Address XAddr = XLVal.getAddress();
6488
6489 auto EmitRValueWithCastIfNeeded = [&CGF, Loc](const Expr *X, const Expr *E) {
6490 if (X->getType() == E->getType())
6491 return CGF.EmitScalarExpr(E);
6492 const Expr *NewE = E->IgnoreImplicitAsWritten();
6493 llvm::Value *V = CGF.EmitScalarExpr(NewE);
6494 if (NewE->getType() == X->getType())
6495 return V;
6496 return CGF.EmitScalarConversion(V, NewE->getType(), X->getType(), Loc);
6497 };
6498
6499 llvm::Value *EVal = EmitRValueWithCastIfNeeded(X, E);
6500 llvm::Value *DVal = D ? EmitRValueWithCastIfNeeded(X, D) : nullptr;
6501 if (auto *CI = dyn_cast<llvm::ConstantInt>(EVal))
6502 EVal = CGF.Builder.CreateIntCast(
6503 CI, XLVal.getAddress().getElementType(),
6504 E->getType()->hasSignedIntegerRepresentation());
6505 if (DVal)
6506 if (auto *CI = dyn_cast<llvm::ConstantInt>(DVal))
6507 DVal = CGF.Builder.CreateIntCast(
6508 CI, XLVal.getAddress().getElementType(),
6509 D->getType()->hasSignedIntegerRepresentation());
6510
6511 llvm::OpenMPIRBuilder::AtomicOpValue XOpVal{
6512 XAddr.emitRawPointer(CGF), XAddr.getElementType(),
6513 X->getType()->hasSignedIntegerRepresentation(),
6514 X->getType().isVolatileQualified()};
6515 llvm::OpenMPIRBuilder::AtomicOpValue VOpVal, ROpVal;
6516 if (V) {
6517 LValue LV = CGF.EmitLValue(V);
6518 Address Addr = LV.getAddress();
6519 VOpVal = {Addr.emitRawPointer(CGF), Addr.getElementType(),
6520 V->getType()->hasSignedIntegerRepresentation(),
6521 V->getType().isVolatileQualified()};
6522 }
6523 if (R) {
6524 LValue LV = CGF.EmitLValue(R);
6525 Address Addr = LV.getAddress();
6526 ROpVal = {Addr.emitRawPointer(CGF), Addr.getElementType(),
6527 R->getType()->hasSignedIntegerRepresentation(),
6528 R->getType().isVolatileQualified()};
6529 }
6530
6531 if (FailAO == llvm::AtomicOrdering::NotAtomic) {
6532 // fail clause was not mentioned on the
6533 // "#pragma omp atomic compare" construct.
6534 CGF.Builder.restoreIP(OMPBuilder.createAtomicCompare(
6535 CGF.Builder, XOpVal, VOpVal, ROpVal, EVal, DVal, AO, Op, IsXBinopExpr,
6536 IsPostfixUpdate, IsFailOnly));
6537 } else
6538 CGF.Builder.restoreIP(OMPBuilder.createAtomicCompare(
6539 CGF.Builder, XOpVal, VOpVal, ROpVal, EVal, DVal, AO, Op, IsXBinopExpr,
6540 IsPostfixUpdate, IsFailOnly, FailAO));
6541 }
6542
emitOMPAtomicExpr(CodeGenFunction & CGF,OpenMPClauseKind Kind,llvm::AtomicOrdering AO,llvm::AtomicOrdering FailAO,bool IsPostfixUpdate,const Expr * X,const Expr * V,const Expr * R,const Expr * E,const Expr * UE,const Expr * D,const Expr * CE,bool IsXLHSInRHSPart,bool IsFailOnly,SourceLocation Loc)6543 static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
6544 llvm::AtomicOrdering AO,
6545 llvm::AtomicOrdering FailAO, bool IsPostfixUpdate,
6546 const Expr *X, const Expr *V, const Expr *R,
6547 const Expr *E, const Expr *UE, const Expr *D,
6548 const Expr *CE, bool IsXLHSInRHSPart,
6549 bool IsFailOnly, SourceLocation Loc) {
6550 switch (Kind) {
6551 case OMPC_read:
6552 emitOMPAtomicReadExpr(CGF, AO, X, V, Loc);
6553 break;
6554 case OMPC_write:
6555 emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc);
6556 break;
6557 case OMPC_unknown:
6558 case OMPC_update:
6559 emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc);
6560 break;
6561 case OMPC_capture:
6562 emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE,
6563 IsXLHSInRHSPart, Loc);
6564 break;
6565 case OMPC_compare: {
6566 emitOMPAtomicCompareExpr(CGF, AO, FailAO, X, V, R, E, D, CE,
6567 IsXLHSInRHSPart, IsPostfixUpdate, IsFailOnly, Loc);
6568 break;
6569 }
6570 default:
6571 llvm_unreachable("Clause is not allowed in 'omp atomic'.");
6572 }
6573 }
6574
EmitOMPAtomicDirective(const OMPAtomicDirective & S)6575 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
6576 llvm::AtomicOrdering AO = CGM.getOpenMPRuntime().getDefaultMemoryOrdering();
6577 // Fail Memory Clause Ordering.
6578 llvm::AtomicOrdering FailAO = llvm::AtomicOrdering::NotAtomic;
6579 bool MemOrderingSpecified = false;
6580 if (S.getSingleClause<OMPSeqCstClause>()) {
6581 AO = llvm::AtomicOrdering::SequentiallyConsistent;
6582 MemOrderingSpecified = true;
6583 } else if (S.getSingleClause<OMPAcqRelClause>()) {
6584 AO = llvm::AtomicOrdering::AcquireRelease;
6585 MemOrderingSpecified = true;
6586 } else if (S.getSingleClause<OMPAcquireClause>()) {
6587 AO = llvm::AtomicOrdering::Acquire;
6588 MemOrderingSpecified = true;
6589 } else if (S.getSingleClause<OMPReleaseClause>()) {
6590 AO = llvm::AtomicOrdering::Release;
6591 MemOrderingSpecified = true;
6592 } else if (S.getSingleClause<OMPRelaxedClause>()) {
6593 AO = llvm::AtomicOrdering::Monotonic;
6594 MemOrderingSpecified = true;
6595 }
6596 llvm::SmallSet<OpenMPClauseKind, 2> KindsEncountered;
6597 OpenMPClauseKind Kind = OMPC_unknown;
6598 for (const OMPClause *C : S.clauses()) {
6599 // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause,
6600 // if it is first).
6601 OpenMPClauseKind K = C->getClauseKind();
6602 // TBD
6603 if (K == OMPC_weak)
6604 return;
6605 if (K == OMPC_seq_cst || K == OMPC_acq_rel || K == OMPC_acquire ||
6606 K == OMPC_release || K == OMPC_relaxed || K == OMPC_hint)
6607 continue;
6608 Kind = K;
6609 KindsEncountered.insert(K);
6610 }
6611 // We just need to correct Kind here. No need to set a bool saying it is
6612 // actually compare capture because we can tell from whether V and R are
6613 // nullptr.
6614 if (KindsEncountered.contains(OMPC_compare) &&
6615 KindsEncountered.contains(OMPC_capture))
6616 Kind = OMPC_compare;
6617 if (!MemOrderingSpecified) {
6618 llvm::AtomicOrdering DefaultOrder =
6619 CGM.getOpenMPRuntime().getDefaultMemoryOrdering();
6620 if (DefaultOrder == llvm::AtomicOrdering::Monotonic ||
6621 DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent ||
6622 (DefaultOrder == llvm::AtomicOrdering::AcquireRelease &&
6623 Kind == OMPC_capture)) {
6624 AO = DefaultOrder;
6625 } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) {
6626 if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) {
6627 AO = llvm::AtomicOrdering::Release;
6628 } else if (Kind == OMPC_read) {
6629 assert(Kind == OMPC_read && "Unexpected atomic kind.");
6630 AO = llvm::AtomicOrdering::Acquire;
6631 }
6632 }
6633 }
6634
6635 if (KindsEncountered.contains(OMPC_compare) &&
6636 KindsEncountered.contains(OMPC_fail)) {
6637 Kind = OMPC_compare;
6638 const auto *FailClause = S.getSingleClause<OMPFailClause>();
6639 if (FailClause) {
6640 OpenMPClauseKind FailParameter = FailClause->getFailParameter();
6641 if (FailParameter == llvm::omp::OMPC_relaxed)
6642 FailAO = llvm::AtomicOrdering::Monotonic;
6643 else if (FailParameter == llvm::omp::OMPC_acquire)
6644 FailAO = llvm::AtomicOrdering::Acquire;
6645 else if (FailParameter == llvm::omp::OMPC_seq_cst)
6646 FailAO = llvm::AtomicOrdering::SequentiallyConsistent;
6647 }
6648 }
6649
6650 LexicalScope Scope(*this, S.getSourceRange());
6651 EmitStopPoint(S.getAssociatedStmt());
6652 emitOMPAtomicExpr(*this, Kind, AO, FailAO, S.isPostfixUpdate(), S.getX(),
6653 S.getV(), S.getR(), S.getExpr(), S.getUpdateExpr(),
6654 S.getD(), S.getCondExpr(), S.isXLHSInRHSPart(),
6655 S.isFailOnly(), S.getBeginLoc());
6656 }
6657
emitCommonOMPTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & S,const RegionCodeGenTy & CodeGen)6658 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
6659 const OMPExecutableDirective &S,
6660 const RegionCodeGenTy &CodeGen) {
6661 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
6662 CodeGenModule &CGM = CGF.CGM;
6663
6664 // On device emit this construct as inlined code.
6665 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
6666 OMPLexicalScope Scope(CGF, S, OMPD_target);
6667 CGM.getOpenMPRuntime().emitInlinedDirective(
6668 CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6669 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
6670 });
6671 return;
6672 }
6673
6674 auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
6675 llvm::Function *Fn = nullptr;
6676 llvm::Constant *FnID = nullptr;
6677
6678 const Expr *IfCond = nullptr;
6679 // Check for the at most one if clause associated with the target region.
6680 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
6681 if (C->getNameModifier() == OMPD_unknown ||
6682 C->getNameModifier() == OMPD_target) {
6683 IfCond = C->getCondition();
6684 break;
6685 }
6686 }
6687
6688 // Check if we have any device clause associated with the directive.
6689 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device(
6690 nullptr, OMPC_DEVICE_unknown);
6691 if (auto *C = S.getSingleClause<OMPDeviceClause>())
6692 Device.setPointerAndInt(C->getDevice(), C->getModifier());
6693
6694 // Check if we have an if clause whose conditional always evaluates to false
6695 // or if we do not have any targets specified. If so the target region is not
6696 // an offload entry point.
6697 bool IsOffloadEntry = true;
6698 if (IfCond) {
6699 bool Val;
6700 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
6701 IsOffloadEntry = false;
6702 }
6703 if (CGM.getLangOpts().OMPTargetTriples.empty())
6704 IsOffloadEntry = false;
6705
6706 if (CGM.getLangOpts().OpenMPOffloadMandatory && !IsOffloadEntry) {
6707 unsigned DiagID = CGM.getDiags().getCustomDiagID(
6708 DiagnosticsEngine::Error,
6709 "No offloading entry generated while offloading is mandatory.");
6710 CGM.getDiags().Report(DiagID);
6711 }
6712
6713 assert(CGF.CurFuncDecl && "No parent declaration for target region!");
6714 StringRef ParentName;
6715 // In case we have Ctors/Dtors we use the complete type variant to produce
6716 // the mangling of the device outlined kernel.
6717 if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
6718 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
6719 else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
6720 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
6721 else
6722 ParentName =
6723 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
6724
6725 // Emit target region as a standalone region.
6726 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
6727 IsOffloadEntry, CodeGen);
6728 OMPLexicalScope Scope(CGF, S, OMPD_task);
6729 auto &&SizeEmitter =
6730 [IsOffloadEntry](CodeGenFunction &CGF,
6731 const OMPLoopDirective &D) -> llvm::Value * {
6732 if (IsOffloadEntry) {
6733 OMPLoopScope(CGF, D);
6734 // Emit calculation of the iterations count.
6735 llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations());
6736 NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty,
6737 /*isSigned=*/false);
6738 return NumIterations;
6739 }
6740 return nullptr;
6741 };
6742 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
6743 SizeEmitter);
6744 }
6745
emitTargetRegion(CodeGenFunction & CGF,const OMPTargetDirective & S,PrePostActionTy & Action)6746 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
6747 PrePostActionTy &Action) {
6748 Action.Enter(CGF);
6749 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6750 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6751 CGF.EmitOMPPrivateClause(S, PrivateScope);
6752 (void)PrivateScope.Privatize();
6753 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
6754 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
6755
6756 CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
6757 CGF.EnsureInsertPoint();
6758 }
6759
EmitOMPTargetDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetDirective & S)6760 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
6761 StringRef ParentName,
6762 const OMPTargetDirective &S) {
6763 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6764 emitTargetRegion(CGF, S, Action);
6765 };
6766 llvm::Function *Fn;
6767 llvm::Constant *Addr;
6768 // Emit target region as a standalone region.
6769 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6770 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6771 assert(Fn && Addr && "Target device function emission failed.");
6772 }
6773
EmitOMPTargetDirective(const OMPTargetDirective & S)6774 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
6775 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6776 emitTargetRegion(CGF, S, Action);
6777 };
6778 emitCommonOMPTargetDirective(*this, S, CodeGen);
6779 }
6780
emitCommonOMPTeamsDirective(CodeGenFunction & CGF,const OMPExecutableDirective & S,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)6781 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
6782 const OMPExecutableDirective &S,
6783 OpenMPDirectiveKind InnermostKind,
6784 const RegionCodeGenTy &CodeGen) {
6785 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
6786 llvm::Function *OutlinedFn =
6787 CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
6788 CGF, S, *CS->getCapturedDecl()->param_begin(), InnermostKind,
6789 CodeGen);
6790
6791 const auto *NT = S.getSingleClause<OMPNumTeamsClause>();
6792 const auto *TL = S.getSingleClause<OMPThreadLimitClause>();
6793 if (NT || TL) {
6794 const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr;
6795 const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr;
6796
6797 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
6798 S.getBeginLoc());
6799 }
6800
6801 OMPTeamsScope Scope(CGF, S);
6802 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
6803 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
6804 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn,
6805 CapturedVars);
6806 }
6807
EmitOMPTeamsDirective(const OMPTeamsDirective & S)6808 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
6809 // Emit teams region as a standalone region.
6810 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6811 Action.Enter(CGF);
6812 OMPPrivateScope PrivateScope(CGF);
6813 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6814 CGF.EmitOMPPrivateClause(S, PrivateScope);
6815 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6816 (void)PrivateScope.Privatize();
6817 CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt());
6818 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6819 };
6820 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
6821 emitPostUpdateForReductionClause(*this, S,
6822 [](CodeGenFunction &) { return nullptr; });
6823 }
6824
emitTargetTeamsRegion(CodeGenFunction & CGF,PrePostActionTy & Action,const OMPTargetTeamsDirective & S)6825 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
6826 const OMPTargetTeamsDirective &S) {
6827 auto *CS = S.getCapturedStmt(OMPD_teams);
6828 Action.Enter(CGF);
6829 // Emit teams region as a standalone region.
6830 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
6831 Action.Enter(CGF);
6832 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6833 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6834 CGF.EmitOMPPrivateClause(S, PrivateScope);
6835 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6836 (void)PrivateScope.Privatize();
6837 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
6838 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
6839 CGF.EmitStmt(CS->getCapturedStmt());
6840 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6841 };
6842 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
6843 emitPostUpdateForReductionClause(CGF, S,
6844 [](CodeGenFunction &) { return nullptr; });
6845 }
6846
EmitOMPTargetTeamsDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDirective & S)6847 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
6848 CodeGenModule &CGM, StringRef ParentName,
6849 const OMPTargetTeamsDirective &S) {
6850 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6851 emitTargetTeamsRegion(CGF, Action, S);
6852 };
6853 llvm::Function *Fn;
6854 llvm::Constant *Addr;
6855 // Emit target region as a standalone region.
6856 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6857 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6858 assert(Fn && Addr && "Target device function emission failed.");
6859 }
6860
EmitOMPTargetTeamsDirective(const OMPTargetTeamsDirective & S)6861 void CodeGenFunction::EmitOMPTargetTeamsDirective(
6862 const OMPTargetTeamsDirective &S) {
6863 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6864 emitTargetTeamsRegion(CGF, Action, S);
6865 };
6866 emitCommonOMPTargetDirective(*this, S, CodeGen);
6867 }
6868
6869 static void
emitTargetTeamsDistributeRegion(CodeGenFunction & CGF,PrePostActionTy & Action,const OMPTargetTeamsDistributeDirective & S)6870 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
6871 const OMPTargetTeamsDistributeDirective &S) {
6872 Action.Enter(CGF);
6873 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6874 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6875 };
6876
6877 // Emit teams region as a standalone region.
6878 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6879 PrePostActionTy &Action) {
6880 Action.Enter(CGF);
6881 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6882 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6883 (void)PrivateScope.Privatize();
6884 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6885 CodeGenDistribute);
6886 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6887 };
6888 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen);
6889 emitPostUpdateForReductionClause(CGF, S,
6890 [](CodeGenFunction &) { return nullptr; });
6891 }
6892
EmitOMPTargetTeamsDistributeDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDistributeDirective & S)6893 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
6894 CodeGenModule &CGM, StringRef ParentName,
6895 const OMPTargetTeamsDistributeDirective &S) {
6896 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6897 emitTargetTeamsDistributeRegion(CGF, Action, S);
6898 };
6899 llvm::Function *Fn;
6900 llvm::Constant *Addr;
6901 // Emit target region as a standalone region.
6902 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6903 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6904 assert(Fn && Addr && "Target device function emission failed.");
6905 }
6906
EmitOMPTargetTeamsDistributeDirective(const OMPTargetTeamsDistributeDirective & S)6907 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
6908 const OMPTargetTeamsDistributeDirective &S) {
6909 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6910 emitTargetTeamsDistributeRegion(CGF, Action, S);
6911 };
6912 emitCommonOMPTargetDirective(*this, S, CodeGen);
6913 }
6914
emitTargetTeamsDistributeSimdRegion(CodeGenFunction & CGF,PrePostActionTy & Action,const OMPTargetTeamsDistributeSimdDirective & S)6915 static void emitTargetTeamsDistributeSimdRegion(
6916 CodeGenFunction &CGF, PrePostActionTy &Action,
6917 const OMPTargetTeamsDistributeSimdDirective &S) {
6918 Action.Enter(CGF);
6919 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6920 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6921 };
6922
6923 // Emit teams region as a standalone region.
6924 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6925 PrePostActionTy &Action) {
6926 Action.Enter(CGF);
6927 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6928 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6929 (void)PrivateScope.Privatize();
6930 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6931 CodeGenDistribute);
6932 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6933 };
6934 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen);
6935 emitPostUpdateForReductionClause(CGF, S,
6936 [](CodeGenFunction &) { return nullptr; });
6937 }
6938
EmitOMPTargetTeamsDistributeSimdDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDistributeSimdDirective & S)6939 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
6940 CodeGenModule &CGM, StringRef ParentName,
6941 const OMPTargetTeamsDistributeSimdDirective &S) {
6942 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6943 emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
6944 };
6945 llvm::Function *Fn;
6946 llvm::Constant *Addr;
6947 // Emit target region as a standalone region.
6948 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6949 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6950 assert(Fn && Addr && "Target device function emission failed.");
6951 }
6952
EmitOMPTargetTeamsDistributeSimdDirective(const OMPTargetTeamsDistributeSimdDirective & S)6953 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
6954 const OMPTargetTeamsDistributeSimdDirective &S) {
6955 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6956 emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
6957 };
6958 emitCommonOMPTargetDirective(*this, S, CodeGen);
6959 }
6960
EmitOMPTeamsDistributeDirective(const OMPTeamsDistributeDirective & S)6961 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
6962 const OMPTeamsDistributeDirective &S) {
6963
6964 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6965 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6966 };
6967
6968 // Emit teams region as a standalone region.
6969 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6970 PrePostActionTy &Action) {
6971 Action.Enter(CGF);
6972 OMPPrivateScope PrivateScope(CGF);
6973 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6974 (void)PrivateScope.Privatize();
6975 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6976 CodeGenDistribute);
6977 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6978 };
6979 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
6980 emitPostUpdateForReductionClause(*this, S,
6981 [](CodeGenFunction &) { return nullptr; });
6982 }
6983
EmitOMPTeamsDistributeSimdDirective(const OMPTeamsDistributeSimdDirective & S)6984 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
6985 const OMPTeamsDistributeSimdDirective &S) {
6986 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6987 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6988 };
6989
6990 // Emit teams region as a standalone region.
6991 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6992 PrePostActionTy &Action) {
6993 Action.Enter(CGF);
6994 OMPPrivateScope PrivateScope(CGF);
6995 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6996 (void)PrivateScope.Privatize();
6997 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd,
6998 CodeGenDistribute);
6999 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7000 };
7001 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen);
7002 emitPostUpdateForReductionClause(*this, S,
7003 [](CodeGenFunction &) { return nullptr; });
7004 }
7005
EmitOMPTeamsDistributeParallelForDirective(const OMPTeamsDistributeParallelForDirective & S)7006 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
7007 const OMPTeamsDistributeParallelForDirective &S) {
7008 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7009 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
7010 S.getDistInc());
7011 };
7012
7013 // Emit teams region as a standalone region.
7014 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7015 PrePostActionTy &Action) {
7016 Action.Enter(CGF);
7017 OMPPrivateScope PrivateScope(CGF);
7018 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7019 (void)PrivateScope.Privatize();
7020 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
7021 CodeGenDistribute);
7022 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7023 };
7024 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
7025 emitPostUpdateForReductionClause(*this, S,
7026 [](CodeGenFunction &) { return nullptr; });
7027 }
7028
EmitOMPTeamsDistributeParallelForSimdDirective(const OMPTeamsDistributeParallelForSimdDirective & S)7029 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
7030 const OMPTeamsDistributeParallelForSimdDirective &S) {
7031 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7032 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
7033 S.getDistInc());
7034 };
7035
7036 // Emit teams region as a standalone region.
7037 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7038 PrePostActionTy &Action) {
7039 Action.Enter(CGF);
7040 OMPPrivateScope PrivateScope(CGF);
7041 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7042 (void)PrivateScope.Privatize();
7043 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7044 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
7045 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7046 };
7047 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for_simd,
7048 CodeGen);
7049 emitPostUpdateForReductionClause(*this, S,
7050 [](CodeGenFunction &) { return nullptr; });
7051 }
7052
EmitOMPInteropDirective(const OMPInteropDirective & S)7053 void CodeGenFunction::EmitOMPInteropDirective(const OMPInteropDirective &S) {
7054 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
7055 llvm::Value *Device = nullptr;
7056 llvm::Value *NumDependences = nullptr;
7057 llvm::Value *DependenceList = nullptr;
7058
7059 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7060 Device = EmitScalarExpr(C->getDevice());
7061
7062 // Build list and emit dependences
7063 OMPTaskDataTy Data;
7064 buildDependences(S, Data);
7065 if (!Data.Dependences.empty()) {
7066 Address DependenciesArray = Address::invalid();
7067 std::tie(NumDependences, DependenciesArray) =
7068 CGM.getOpenMPRuntime().emitDependClause(*this, Data.Dependences,
7069 S.getBeginLoc());
7070 DependenceList = DependenciesArray.emitRawPointer(*this);
7071 }
7072 Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>();
7073
7074 assert(!(Data.HasNowaitClause && !(S.getSingleClause<OMPInitClause>() ||
7075 S.getSingleClause<OMPDestroyClause>() ||
7076 S.getSingleClause<OMPUseClause>())) &&
7077 "OMPNowaitClause clause is used separately in OMPInteropDirective.");
7078
7079 auto ItOMPInitClause = S.getClausesOfKind<OMPInitClause>();
7080 if (!ItOMPInitClause.empty()) {
7081 // Look at the multiple init clauses
7082 for (const OMPInitClause *C : ItOMPInitClause) {
7083 llvm::Value *InteropvarPtr =
7084 EmitLValue(C->getInteropVar()).getPointer(*this);
7085 llvm::omp::OMPInteropType InteropType =
7086 llvm::omp::OMPInteropType::Unknown;
7087 if (C->getIsTarget()) {
7088 InteropType = llvm::omp::OMPInteropType::Target;
7089 } else {
7090 assert(C->getIsTargetSync() &&
7091 "Expected interop-type target/targetsync");
7092 InteropType = llvm::omp::OMPInteropType::TargetSync;
7093 }
7094 OMPBuilder.createOMPInteropInit(Builder, InteropvarPtr, InteropType,
7095 Device, NumDependences, DependenceList,
7096 Data.HasNowaitClause);
7097 }
7098 }
7099 auto ItOMPDestroyClause = S.getClausesOfKind<OMPDestroyClause>();
7100 if (!ItOMPDestroyClause.empty()) {
7101 // Look at the multiple destroy clauses
7102 for (const OMPDestroyClause *C : ItOMPDestroyClause) {
7103 llvm::Value *InteropvarPtr =
7104 EmitLValue(C->getInteropVar()).getPointer(*this);
7105 OMPBuilder.createOMPInteropDestroy(Builder, InteropvarPtr, Device,
7106 NumDependences, DependenceList,
7107 Data.HasNowaitClause);
7108 }
7109 }
7110 auto ItOMPUseClause = S.getClausesOfKind<OMPUseClause>();
7111 if (!ItOMPUseClause.empty()) {
7112 // Look at the multiple use clauses
7113 for (const OMPUseClause *C : ItOMPUseClause) {
7114 llvm::Value *InteropvarPtr =
7115 EmitLValue(C->getInteropVar()).getPointer(*this);
7116 OMPBuilder.createOMPInteropUse(Builder, InteropvarPtr, Device,
7117 NumDependences, DependenceList,
7118 Data.HasNowaitClause);
7119 }
7120 }
7121 }
7122
emitTargetTeamsDistributeParallelForRegion(CodeGenFunction & CGF,const OMPTargetTeamsDistributeParallelForDirective & S,PrePostActionTy & Action)7123 static void emitTargetTeamsDistributeParallelForRegion(
7124 CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S,
7125 PrePostActionTy &Action) {
7126 Action.Enter(CGF);
7127 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7128 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
7129 S.getDistInc());
7130 };
7131
7132 // Emit teams region as a standalone region.
7133 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7134 PrePostActionTy &Action) {
7135 Action.Enter(CGF);
7136 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7137 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7138 (void)PrivateScope.Privatize();
7139 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7140 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
7141 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7142 };
7143
7144 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
7145 CodeGenTeams);
7146 emitPostUpdateForReductionClause(CGF, S,
7147 [](CodeGenFunction &) { return nullptr; });
7148 }
7149
EmitOMPTargetTeamsDistributeParallelForDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDistributeParallelForDirective & S)7150 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
7151 CodeGenModule &CGM, StringRef ParentName,
7152 const OMPTargetTeamsDistributeParallelForDirective &S) {
7153 // Emit SPMD target teams distribute parallel for region as a standalone
7154 // region.
7155 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7156 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
7157 };
7158 llvm::Function *Fn;
7159 llvm::Constant *Addr;
7160 // Emit target region as a standalone region.
7161 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7162 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7163 assert(Fn && Addr && "Target device function emission failed.");
7164 }
7165
EmitOMPTargetTeamsDistributeParallelForDirective(const OMPTargetTeamsDistributeParallelForDirective & S)7166 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
7167 const OMPTargetTeamsDistributeParallelForDirective &S) {
7168 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7169 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
7170 };
7171 emitCommonOMPTargetDirective(*this, S, CodeGen);
7172 }
7173
emitTargetTeamsDistributeParallelForSimdRegion(CodeGenFunction & CGF,const OMPTargetTeamsDistributeParallelForSimdDirective & S,PrePostActionTy & Action)7174 static void emitTargetTeamsDistributeParallelForSimdRegion(
7175 CodeGenFunction &CGF,
7176 const OMPTargetTeamsDistributeParallelForSimdDirective &S,
7177 PrePostActionTy &Action) {
7178 Action.Enter(CGF);
7179 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7180 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
7181 S.getDistInc());
7182 };
7183
7184 // Emit teams region as a standalone region.
7185 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7186 PrePostActionTy &Action) {
7187 Action.Enter(CGF);
7188 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7189 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7190 (void)PrivateScope.Privatize();
7191 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7192 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
7193 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7194 };
7195
7196 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd,
7197 CodeGenTeams);
7198 emitPostUpdateForReductionClause(CGF, S,
7199 [](CodeGenFunction &) { return nullptr; });
7200 }
7201
EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDistributeParallelForSimdDirective & S)7202 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
7203 CodeGenModule &CGM, StringRef ParentName,
7204 const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
7205 // Emit SPMD target teams distribute parallel for simd region as a standalone
7206 // region.
7207 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7208 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
7209 };
7210 llvm::Function *Fn;
7211 llvm::Constant *Addr;
7212 // Emit target region as a standalone region.
7213 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7214 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7215 assert(Fn && Addr && "Target device function emission failed.");
7216 }
7217
EmitOMPTargetTeamsDistributeParallelForSimdDirective(const OMPTargetTeamsDistributeParallelForSimdDirective & S)7218 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
7219 const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
7220 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7221 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
7222 };
7223 emitCommonOMPTargetDirective(*this, S, CodeGen);
7224 }
7225
EmitOMPCancellationPointDirective(const OMPCancellationPointDirective & S)7226 void CodeGenFunction::EmitOMPCancellationPointDirective(
7227 const OMPCancellationPointDirective &S) {
7228 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(),
7229 S.getCancelRegion());
7230 }
7231
EmitOMPCancelDirective(const OMPCancelDirective & S)7232 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
7233 const Expr *IfCond = nullptr;
7234 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
7235 if (C->getNameModifier() == OMPD_unknown ||
7236 C->getNameModifier() == OMPD_cancel) {
7237 IfCond = C->getCondition();
7238 break;
7239 }
7240 }
7241 if (CGM.getLangOpts().OpenMPIRBuilder) {
7242 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
7243 // TODO: This check is necessary as we only generate `omp parallel` through
7244 // the OpenMPIRBuilder for now.
7245 if (S.getCancelRegion() == OMPD_parallel ||
7246 S.getCancelRegion() == OMPD_sections ||
7247 S.getCancelRegion() == OMPD_section) {
7248 llvm::Value *IfCondition = nullptr;
7249 if (IfCond)
7250 IfCondition = EmitScalarExpr(IfCond,
7251 /*IgnoreResultAssign=*/true);
7252 return Builder.restoreIP(
7253 OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion()));
7254 }
7255 }
7256
7257 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond,
7258 S.getCancelRegion());
7259 }
7260
7261 CodeGenFunction::JumpDest
getOMPCancelDestination(OpenMPDirectiveKind Kind)7262 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
7263 if (Kind == OMPD_parallel || Kind == OMPD_task ||
7264 Kind == OMPD_target_parallel || Kind == OMPD_taskloop ||
7265 Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop)
7266 return ReturnBlock;
7267 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
7268 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
7269 Kind == OMPD_distribute_parallel_for ||
7270 Kind == OMPD_target_parallel_for ||
7271 Kind == OMPD_teams_distribute_parallel_for ||
7272 Kind == OMPD_target_teams_distribute_parallel_for);
7273 return OMPCancelStack.getExitBlock();
7274 }
7275
EmitOMPUseDevicePtrClause(const OMPUseDevicePtrClause & C,OMPPrivateScope & PrivateScope,const llvm::DenseMap<const ValueDecl *,llvm::Value * > CaptureDeviceAddrMap)7276 void CodeGenFunction::EmitOMPUseDevicePtrClause(
7277 const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope,
7278 const llvm::DenseMap<const ValueDecl *, llvm::Value *>
7279 CaptureDeviceAddrMap) {
7280 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7281 for (const Expr *OrigVarIt : C.varlists()) {
7282 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(OrigVarIt)->getDecl());
7283 if (!Processed.insert(OrigVD).second)
7284 continue;
7285
7286 // In order to identify the right initializer we need to match the
7287 // declaration used by the mapping logic. In some cases we may get
7288 // OMPCapturedExprDecl that refers to the original declaration.
7289 const ValueDecl *MatchingVD = OrigVD;
7290 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
7291 // OMPCapturedExprDecl are used to privative fields of the current
7292 // structure.
7293 const auto *ME = cast<MemberExpr>(OED->getInit());
7294 assert(isa<CXXThisExpr>(ME->getBase()->IgnoreImpCasts()) &&
7295 "Base should be the current struct!");
7296 MatchingVD = ME->getMemberDecl();
7297 }
7298
7299 // If we don't have information about the current list item, move on to
7300 // the next one.
7301 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
7302 if (InitAddrIt == CaptureDeviceAddrMap.end())
7303 continue;
7304
7305 llvm::Type *Ty = ConvertTypeForMem(OrigVD->getType().getNonReferenceType());
7306
7307 // Return the address of the private variable.
7308 bool IsRegistered = PrivateScope.addPrivate(
7309 OrigVD,
7310 Address(InitAddrIt->second, Ty,
7311 getContext().getTypeAlignInChars(getContext().VoidPtrTy)));
7312 assert(IsRegistered && "firstprivate var already registered as private");
7313 // Silence the warning about unused variable.
7314 (void)IsRegistered;
7315 }
7316 }
7317
getBaseDecl(const Expr * Ref)7318 static const VarDecl *getBaseDecl(const Expr *Ref) {
7319 const Expr *Base = Ref->IgnoreParenImpCasts();
7320 while (const auto *OASE = dyn_cast<ArraySectionExpr>(Base))
7321 Base = OASE->getBase()->IgnoreParenImpCasts();
7322 while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Base))
7323 Base = ASE->getBase()->IgnoreParenImpCasts();
7324 return cast<VarDecl>(cast<DeclRefExpr>(Base)->getDecl());
7325 }
7326
EmitOMPUseDeviceAddrClause(const OMPUseDeviceAddrClause & C,OMPPrivateScope & PrivateScope,const llvm::DenseMap<const ValueDecl *,llvm::Value * > CaptureDeviceAddrMap)7327 void CodeGenFunction::EmitOMPUseDeviceAddrClause(
7328 const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope,
7329 const llvm::DenseMap<const ValueDecl *, llvm::Value *>
7330 CaptureDeviceAddrMap) {
7331 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7332 for (const Expr *Ref : C.varlists()) {
7333 const VarDecl *OrigVD = getBaseDecl(Ref);
7334 if (!Processed.insert(OrigVD).second)
7335 continue;
7336 // In order to identify the right initializer we need to match the
7337 // declaration used by the mapping logic. In some cases we may get
7338 // OMPCapturedExprDecl that refers to the original declaration.
7339 const ValueDecl *MatchingVD = OrigVD;
7340 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
7341 // OMPCapturedExprDecl are used to privative fields of the current
7342 // structure.
7343 const auto *ME = cast<MemberExpr>(OED->getInit());
7344 assert(isa<CXXThisExpr>(ME->getBase()) &&
7345 "Base should be the current struct!");
7346 MatchingVD = ME->getMemberDecl();
7347 }
7348
7349 // If we don't have information about the current list item, move on to
7350 // the next one.
7351 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
7352 if (InitAddrIt == CaptureDeviceAddrMap.end())
7353 continue;
7354
7355 llvm::Type *Ty = ConvertTypeForMem(OrigVD->getType().getNonReferenceType());
7356
7357 Address PrivAddr =
7358 Address(InitAddrIt->second, Ty,
7359 getContext().getTypeAlignInChars(getContext().VoidPtrTy));
7360 // For declrefs and variable length array need to load the pointer for
7361 // correct mapping, since the pointer to the data was passed to the runtime.
7362 if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) ||
7363 MatchingVD->getType()->isArrayType()) {
7364 QualType PtrTy = getContext().getPointerType(
7365 OrigVD->getType().getNonReferenceType());
7366 PrivAddr =
7367 EmitLoadOfPointer(PrivAddr.withElementType(ConvertTypeForMem(PtrTy)),
7368 PtrTy->castAs<PointerType>());
7369 }
7370
7371 (void)PrivateScope.addPrivate(OrigVD, PrivAddr);
7372 }
7373 }
7374
7375 // Generate the instructions for '#pragma omp target data' directive.
EmitOMPTargetDataDirective(const OMPTargetDataDirective & S)7376 void CodeGenFunction::EmitOMPTargetDataDirective(
7377 const OMPTargetDataDirective &S) {
7378 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true,
7379 /*SeparateBeginEndCalls=*/true);
7380
7381 // Create a pre/post action to signal the privatization of the device pointer.
7382 // This action can be replaced by the OpenMP runtime code generation to
7383 // deactivate privatization.
7384 bool PrivatizeDevicePointers = false;
7385 class DevicePointerPrivActionTy : public PrePostActionTy {
7386 bool &PrivatizeDevicePointers;
7387
7388 public:
7389 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
7390 : PrivatizeDevicePointers(PrivatizeDevicePointers) {}
7391 void Enter(CodeGenFunction &CGF) override {
7392 PrivatizeDevicePointers = true;
7393 }
7394 };
7395 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
7396
7397 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
7398 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7399 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
7400 };
7401
7402 // Codegen that selects whether to generate the privatization code or not.
7403 auto &&PrivCodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
7404 RegionCodeGenTy RCG(InnermostCodeGen);
7405 PrivatizeDevicePointers = false;
7406
7407 // Call the pre-action to change the status of PrivatizeDevicePointers if
7408 // needed.
7409 Action.Enter(CGF);
7410
7411 if (PrivatizeDevicePointers) {
7412 OMPPrivateScope PrivateScope(CGF);
7413 // Emit all instances of the use_device_ptr clause.
7414 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
7415 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
7416 Info.CaptureDeviceAddrMap);
7417 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
7418 CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope,
7419 Info.CaptureDeviceAddrMap);
7420 (void)PrivateScope.Privatize();
7421 RCG(CGF);
7422 } else {
7423 // If we don't have target devices, don't bother emitting the data
7424 // mapping code.
7425 std::optional<OpenMPDirectiveKind> CaptureRegion;
7426 if (CGM.getLangOpts().OMPTargetTriples.empty()) {
7427 // Emit helper decls of the use_device_ptr/use_device_addr clauses.
7428 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
7429 for (const Expr *E : C->varlists()) {
7430 const Decl *D = cast<DeclRefExpr>(E)->getDecl();
7431 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
7432 CGF.EmitVarDecl(*OED);
7433 }
7434 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
7435 for (const Expr *E : C->varlists()) {
7436 const Decl *D = getBaseDecl(E);
7437 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
7438 CGF.EmitVarDecl(*OED);
7439 }
7440 } else {
7441 CaptureRegion = OMPD_unknown;
7442 }
7443
7444 OMPLexicalScope Scope(CGF, S, CaptureRegion);
7445 RCG(CGF);
7446 }
7447 };
7448
7449 // Forward the provided action to the privatization codegen.
7450 RegionCodeGenTy PrivRCG(PrivCodeGen);
7451 PrivRCG.setAction(Action);
7452
7453 // Notwithstanding the body of the region is emitted as inlined directive,
7454 // we don't use an inline scope as changes in the references inside the
7455 // region are expected to be visible outside, so we do not privative them.
7456 OMPLexicalScope Scope(CGF, S);
7457 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
7458 PrivRCG);
7459 };
7460
7461 RegionCodeGenTy RCG(CodeGen);
7462
7463 // If we don't have target devices, don't bother emitting the data mapping
7464 // code.
7465 if (CGM.getLangOpts().OMPTargetTriples.empty()) {
7466 RCG(*this);
7467 return;
7468 }
7469
7470 // Check if we have any if clause associated with the directive.
7471 const Expr *IfCond = nullptr;
7472 if (const auto *C = S.getSingleClause<OMPIfClause>())
7473 IfCond = C->getCondition();
7474
7475 // Check if we have any device clause associated with the directive.
7476 const Expr *Device = nullptr;
7477 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7478 Device = C->getDevice();
7479
7480 // Set the action to signal privatization of device pointers.
7481 RCG.setAction(PrivAction);
7482
7483 // Emit region code.
7484 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
7485 Info);
7486 }
7487
EmitOMPTargetEnterDataDirective(const OMPTargetEnterDataDirective & S)7488 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
7489 const OMPTargetEnterDataDirective &S) {
7490 // If we don't have target devices, don't bother emitting the data mapping
7491 // code.
7492 if (CGM.getLangOpts().OMPTargetTriples.empty())
7493 return;
7494
7495 // Check if we have any if clause associated with the directive.
7496 const Expr *IfCond = nullptr;
7497 if (const auto *C = S.getSingleClause<OMPIfClause>())
7498 IfCond = C->getCondition();
7499
7500 // Check if we have any device clause associated with the directive.
7501 const Expr *Device = nullptr;
7502 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7503 Device = C->getDevice();
7504
7505 OMPLexicalScope Scope(*this, S, OMPD_task);
7506 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
7507 }
7508
EmitOMPTargetExitDataDirective(const OMPTargetExitDataDirective & S)7509 void CodeGenFunction::EmitOMPTargetExitDataDirective(
7510 const OMPTargetExitDataDirective &S) {
7511 // If we don't have target devices, don't bother emitting the data mapping
7512 // code.
7513 if (CGM.getLangOpts().OMPTargetTriples.empty())
7514 return;
7515
7516 // Check if we have any if clause associated with the directive.
7517 const Expr *IfCond = nullptr;
7518 if (const auto *C = S.getSingleClause<OMPIfClause>())
7519 IfCond = C->getCondition();
7520
7521 // Check if we have any device clause associated with the directive.
7522 const Expr *Device = nullptr;
7523 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7524 Device = C->getDevice();
7525
7526 OMPLexicalScope Scope(*this, S, OMPD_task);
7527 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
7528 }
7529
emitTargetParallelRegion(CodeGenFunction & CGF,const OMPTargetParallelDirective & S,PrePostActionTy & Action)7530 static void emitTargetParallelRegion(CodeGenFunction &CGF,
7531 const OMPTargetParallelDirective &S,
7532 PrePostActionTy &Action) {
7533 // Get the captured statement associated with the 'parallel' region.
7534 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
7535 Action.Enter(CGF);
7536 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
7537 Action.Enter(CGF);
7538 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7539 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
7540 CGF.EmitOMPPrivateClause(S, PrivateScope);
7541 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7542 (void)PrivateScope.Privatize();
7543 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
7544 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
7545 // TODO: Add support for clauses.
7546 CGF.EmitStmt(CS->getCapturedStmt());
7547 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
7548 };
7549 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
7550 emitEmptyBoundParameters);
7551 emitPostUpdateForReductionClause(CGF, S,
7552 [](CodeGenFunction &) { return nullptr; });
7553 }
7554
EmitOMPTargetParallelDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetParallelDirective & S)7555 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
7556 CodeGenModule &CGM, StringRef ParentName,
7557 const OMPTargetParallelDirective &S) {
7558 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7559 emitTargetParallelRegion(CGF, S, Action);
7560 };
7561 llvm::Function *Fn;
7562 llvm::Constant *Addr;
7563 // Emit target region as a standalone region.
7564 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7565 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7566 assert(Fn && Addr && "Target device function emission failed.");
7567 }
7568
EmitOMPTargetParallelDirective(const OMPTargetParallelDirective & S)7569 void CodeGenFunction::EmitOMPTargetParallelDirective(
7570 const OMPTargetParallelDirective &S) {
7571 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7572 emitTargetParallelRegion(CGF, S, Action);
7573 };
7574 emitCommonOMPTargetDirective(*this, S, CodeGen);
7575 }
7576
emitTargetParallelForRegion(CodeGenFunction & CGF,const OMPTargetParallelForDirective & S,PrePostActionTy & Action)7577 static void emitTargetParallelForRegion(CodeGenFunction &CGF,
7578 const OMPTargetParallelForDirective &S,
7579 PrePostActionTy &Action) {
7580 Action.Enter(CGF);
7581 // Emit directive as a combined directive that consists of two implicit
7582 // directives: 'parallel' with 'for' directive.
7583 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7584 Action.Enter(CGF);
7585 CodeGenFunction::OMPCancelStackRAII CancelRegion(
7586 CGF, OMPD_target_parallel_for, S.hasCancel());
7587 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
7588 emitDispatchForLoopBounds);
7589 };
7590 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
7591 emitEmptyBoundParameters);
7592 }
7593
EmitOMPTargetParallelForDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetParallelForDirective & S)7594 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
7595 CodeGenModule &CGM, StringRef ParentName,
7596 const OMPTargetParallelForDirective &S) {
7597 // Emit SPMD target parallel for region as a standalone region.
7598 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7599 emitTargetParallelForRegion(CGF, S, Action);
7600 };
7601 llvm::Function *Fn;
7602 llvm::Constant *Addr;
7603 // Emit target region as a standalone region.
7604 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7605 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7606 assert(Fn && Addr && "Target device function emission failed.");
7607 }
7608
EmitOMPTargetParallelForDirective(const OMPTargetParallelForDirective & S)7609 void CodeGenFunction::EmitOMPTargetParallelForDirective(
7610 const OMPTargetParallelForDirective &S) {
7611 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7612 emitTargetParallelForRegion(CGF, S, Action);
7613 };
7614 emitCommonOMPTargetDirective(*this, S, CodeGen);
7615 }
7616
7617 static void
emitTargetParallelForSimdRegion(CodeGenFunction & CGF,const OMPTargetParallelForSimdDirective & S,PrePostActionTy & Action)7618 emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
7619 const OMPTargetParallelForSimdDirective &S,
7620 PrePostActionTy &Action) {
7621 Action.Enter(CGF);
7622 // Emit directive as a combined directive that consists of two implicit
7623 // directives: 'parallel' with 'for' directive.
7624 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7625 Action.Enter(CGF);
7626 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
7627 emitDispatchForLoopBounds);
7628 };
7629 emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen,
7630 emitEmptyBoundParameters);
7631 }
7632
EmitOMPTargetParallelForSimdDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetParallelForSimdDirective & S)7633 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
7634 CodeGenModule &CGM, StringRef ParentName,
7635 const OMPTargetParallelForSimdDirective &S) {
7636 // Emit SPMD target parallel for region as a standalone region.
7637 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7638 emitTargetParallelForSimdRegion(CGF, S, Action);
7639 };
7640 llvm::Function *Fn;
7641 llvm::Constant *Addr;
7642 // Emit target region as a standalone region.
7643 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7644 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7645 assert(Fn && Addr && "Target device function emission failed.");
7646 }
7647
EmitOMPTargetParallelForSimdDirective(const OMPTargetParallelForSimdDirective & S)7648 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
7649 const OMPTargetParallelForSimdDirective &S) {
7650 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7651 emitTargetParallelForSimdRegion(CGF, S, Action);
7652 };
7653 emitCommonOMPTargetDirective(*this, S, CodeGen);
7654 }
7655
7656 /// Emit a helper variable and return corresponding lvalue.
mapParam(CodeGenFunction & CGF,const DeclRefExpr * Helper,const ImplicitParamDecl * PVD,CodeGenFunction::OMPPrivateScope & Privates)7657 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
7658 const ImplicitParamDecl *PVD,
7659 CodeGenFunction::OMPPrivateScope &Privates) {
7660 const auto *VDecl = cast<VarDecl>(Helper->getDecl());
7661 Privates.addPrivate(VDecl, CGF.GetAddrOfLocalVar(PVD));
7662 }
7663
EmitOMPTaskLoopBasedDirective(const OMPLoopDirective & S)7664 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
7665 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
7666 // Emit outlined function for task construct.
7667 const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop);
7668 Address CapturedStruct = Address::invalid();
7669 {
7670 OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
7671 CapturedStruct = GenerateCapturedStmtArgument(*CS);
7672 }
7673 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
7674 const Expr *IfCond = nullptr;
7675 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
7676 if (C->getNameModifier() == OMPD_unknown ||
7677 C->getNameModifier() == OMPD_taskloop) {
7678 IfCond = C->getCondition();
7679 break;
7680 }
7681 }
7682
7683 OMPTaskDataTy Data;
7684 // Check if taskloop must be emitted without taskgroup.
7685 Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
7686 // TODO: Check if we should emit tied or untied task.
7687 Data.Tied = true;
7688 // Set scheduling for taskloop
7689 if (const auto *Clause = S.getSingleClause<OMPGrainsizeClause>()) {
7690 // grainsize clause
7691 Data.Schedule.setInt(/*IntVal=*/false);
7692 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
7693 } else if (const auto *Clause = S.getSingleClause<OMPNumTasksClause>()) {
7694 // num_tasks clause
7695 Data.Schedule.setInt(/*IntVal=*/true);
7696 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
7697 }
7698
7699 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
7700 // if (PreCond) {
7701 // for (IV in 0..LastIteration) BODY;
7702 // <Final counter/linear vars updates>;
7703 // }
7704 //
7705
7706 // Emit: if (PreCond) - begin.
7707 // If the condition constant folds and can be elided, avoid emitting the
7708 // whole loop.
7709 bool CondConstant;
7710 llvm::BasicBlock *ContBlock = nullptr;
7711 OMPLoopScope PreInitScope(CGF, S);
7712 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
7713 if (!CondConstant)
7714 return;
7715 } else {
7716 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
7717 ContBlock = CGF.createBasicBlock("taskloop.if.end");
7718 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
7719 CGF.getProfileCount(&S));
7720 CGF.EmitBlock(ThenBlock);
7721 CGF.incrementProfileCounter(&S);
7722 }
7723
7724 (void)CGF.EmitOMPLinearClauseInit(S);
7725
7726 OMPPrivateScope LoopScope(CGF);
7727 // Emit helper vars inits.
7728 enum { LowerBound = 5, UpperBound, Stride, LastIter };
7729 auto *I = CS->getCapturedDecl()->param_begin();
7730 auto *LBP = std::next(I, LowerBound);
7731 auto *UBP = std::next(I, UpperBound);
7732 auto *STP = std::next(I, Stride);
7733 auto *LIP = std::next(I, LastIter);
7734 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
7735 LoopScope);
7736 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
7737 LoopScope);
7738 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
7739 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
7740 LoopScope);
7741 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
7742 CGF.EmitOMPLinearClause(S, LoopScope);
7743 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
7744 (void)LoopScope.Privatize();
7745 // Emit the loop iteration variable.
7746 const Expr *IVExpr = S.getIterationVariable();
7747 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
7748 CGF.EmitVarDecl(*IVDecl);
7749 CGF.EmitIgnoredExpr(S.getInit());
7750
7751 // Emit the iterations count variable.
7752 // If it is not a variable, Sema decided to calculate iterations count on
7753 // each iteration (e.g., it is foldable into a constant).
7754 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
7755 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
7756 // Emit calculation of the iterations count.
7757 CGF.EmitIgnoredExpr(S.getCalcLastIteration());
7758 }
7759
7760 {
7761 OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
7762 emitCommonSimdLoop(
7763 CGF, S,
7764 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7765 if (isOpenMPSimdDirective(S.getDirectiveKind()))
7766 CGF.EmitOMPSimdInit(S);
7767 },
7768 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
7769 CGF.EmitOMPInnerLoop(
7770 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
7771 [&S](CodeGenFunction &CGF) {
7772 emitOMPLoopBodyWithStopPoint(CGF, S,
7773 CodeGenFunction::JumpDest());
7774 },
7775 [](CodeGenFunction &) {});
7776 });
7777 }
7778 // Emit: if (PreCond) - end.
7779 if (ContBlock) {
7780 CGF.EmitBranch(ContBlock);
7781 CGF.EmitBlock(ContBlock, true);
7782 }
7783 // Emit final copy of the lastprivate variables if IsLastIter != 0.
7784 if (HasLastprivateClause) {
7785 CGF.EmitOMPLastprivateClauseFinal(
7786 S, isOpenMPSimdDirective(S.getDirectiveKind()),
7787 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
7788 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
7789 (*LIP)->getType(), S.getBeginLoc())));
7790 }
7791 LoopScope.restoreMap();
7792 CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) {
7793 return CGF.Builder.CreateIsNotNull(
7794 CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
7795 (*LIP)->getType(), S.getBeginLoc()));
7796 });
7797 };
7798 auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
7799 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
7800 const OMPTaskDataTy &Data) {
7801 auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond,
7802 &Data](CodeGenFunction &CGF, PrePostActionTy &) {
7803 OMPLoopScope PreInitScope(CGF, S);
7804 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S,
7805 OutlinedFn, SharedsTy,
7806 CapturedStruct, IfCond, Data);
7807 };
7808 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
7809 CodeGen);
7810 };
7811 if (Data.Nogroup) {
7812 EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data);
7813 } else {
7814 CGM.getOpenMPRuntime().emitTaskgroupRegion(
7815 *this,
7816 [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
7817 PrePostActionTy &Action) {
7818 Action.Enter(CGF);
7819 CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen,
7820 Data);
7821 },
7822 S.getBeginLoc());
7823 }
7824 }
7825
EmitOMPTaskLoopDirective(const OMPTaskLoopDirective & S)7826 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
7827 auto LPCRegion =
7828 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7829 EmitOMPTaskLoopBasedDirective(S);
7830 }
7831
EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective & S)7832 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
7833 const OMPTaskLoopSimdDirective &S) {
7834 auto LPCRegion =
7835 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7836 OMPLexicalScope Scope(*this, S);
7837 EmitOMPTaskLoopBasedDirective(S);
7838 }
7839
EmitOMPMasterTaskLoopDirective(const OMPMasterTaskLoopDirective & S)7840 void CodeGenFunction::EmitOMPMasterTaskLoopDirective(
7841 const OMPMasterTaskLoopDirective &S) {
7842 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7843 Action.Enter(CGF);
7844 EmitOMPTaskLoopBasedDirective(S);
7845 };
7846 auto LPCRegion =
7847 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7848 OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false);
7849 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
7850 }
7851
EmitOMPMasterTaskLoopSimdDirective(const OMPMasterTaskLoopSimdDirective & S)7852 void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
7853 const OMPMasterTaskLoopSimdDirective &S) {
7854 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7855 Action.Enter(CGF);
7856 EmitOMPTaskLoopBasedDirective(S);
7857 };
7858 auto LPCRegion =
7859 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7860 OMPLexicalScope Scope(*this, S);
7861 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
7862 }
7863
EmitOMPParallelMasterTaskLoopDirective(const OMPParallelMasterTaskLoopDirective & S)7864 void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
7865 const OMPParallelMasterTaskLoopDirective &S) {
7866 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7867 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
7868 PrePostActionTy &Action) {
7869 Action.Enter(CGF);
7870 CGF.EmitOMPTaskLoopBasedDirective(S);
7871 };
7872 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
7873 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,
7874 S.getBeginLoc());
7875 };
7876 auto LPCRegion =
7877 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7878 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen,
7879 emitEmptyBoundParameters);
7880 }
7881
EmitOMPParallelMasterTaskLoopSimdDirective(const OMPParallelMasterTaskLoopSimdDirective & S)7882 void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
7883 const OMPParallelMasterTaskLoopSimdDirective &S) {
7884 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7885 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
7886 PrePostActionTy &Action) {
7887 Action.Enter(CGF);
7888 CGF.EmitOMPTaskLoopBasedDirective(S);
7889 };
7890 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
7891 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,
7892 S.getBeginLoc());
7893 };
7894 auto LPCRegion =
7895 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7896 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen,
7897 emitEmptyBoundParameters);
7898 }
7899
7900 // Generate the instructions for '#pragma omp target update' directive.
EmitOMPTargetUpdateDirective(const OMPTargetUpdateDirective & S)7901 void CodeGenFunction::EmitOMPTargetUpdateDirective(
7902 const OMPTargetUpdateDirective &S) {
7903 // If we don't have target devices, don't bother emitting the data mapping
7904 // code.
7905 if (CGM.getLangOpts().OMPTargetTriples.empty())
7906 return;
7907
7908 // Check if we have any if clause associated with the directive.
7909 const Expr *IfCond = nullptr;
7910 if (const auto *C = S.getSingleClause<OMPIfClause>())
7911 IfCond = C->getCondition();
7912
7913 // Check if we have any device clause associated with the directive.
7914 const Expr *Device = nullptr;
7915 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7916 Device = C->getDevice();
7917
7918 OMPLexicalScope Scope(*this, S, OMPD_task);
7919 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
7920 }
7921
EmitOMPGenericLoopDirective(const OMPGenericLoopDirective & S)7922 void CodeGenFunction::EmitOMPGenericLoopDirective(
7923 const OMPGenericLoopDirective &S) {
7924 // Unimplemented, just inline the underlying statement for now.
7925 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7926 // Emit the loop iteration variable.
7927 const Stmt *CS =
7928 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
7929 const auto *ForS = dyn_cast<ForStmt>(CS);
7930 if (ForS && !isa<DeclStmt>(ForS->getInit())) {
7931 OMPPrivateScope LoopScope(CGF);
7932 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
7933 (void)LoopScope.Privatize();
7934 CGF.EmitStmt(CS);
7935 LoopScope.restoreMap();
7936 } else {
7937 CGF.EmitStmt(CS);
7938 }
7939 };
7940 OMPLexicalScope Scope(*this, S, OMPD_unknown);
7941 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_loop, CodeGen);
7942 }
7943
EmitOMPParallelGenericLoopDirective(const OMPLoopDirective & S)7944 void CodeGenFunction::EmitOMPParallelGenericLoopDirective(
7945 const OMPLoopDirective &S) {
7946 // Emit combined directive as if its constituent constructs are 'parallel'
7947 // and 'for'.
7948 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7949 Action.Enter(CGF);
7950 emitOMPCopyinClause(CGF, S);
7951 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
7952 };
7953 {
7954 auto LPCRegion =
7955 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7956 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
7957 emitEmptyBoundParameters);
7958 }
7959 // Check for outer lastprivate conditional update.
7960 checkForLastprivateConditionalUpdate(*this, S);
7961 }
7962
EmitOMPTeamsGenericLoopDirective(const OMPTeamsGenericLoopDirective & S)7963 void CodeGenFunction::EmitOMPTeamsGenericLoopDirective(
7964 const OMPTeamsGenericLoopDirective &S) {
7965 // To be consistent with current behavior of 'target teams loop', emit
7966 // 'teams loop' as if its constituent constructs are 'teams' and 'distribute'.
7967 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7968 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
7969 };
7970
7971 // Emit teams region as a standalone region.
7972 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7973 PrePostActionTy &Action) {
7974 Action.Enter(CGF);
7975 OMPPrivateScope PrivateScope(CGF);
7976 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7977 (void)PrivateScope.Privatize();
7978 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
7979 CodeGenDistribute);
7980 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7981 };
7982 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
7983 emitPostUpdateForReductionClause(*this, S,
7984 [](CodeGenFunction &) { return nullptr; });
7985 }
7986
7987 #ifndef NDEBUG
emitTargetTeamsLoopCodegenStatus(CodeGenFunction & CGF,std::string StatusMsg,const OMPExecutableDirective & D)7988 static void emitTargetTeamsLoopCodegenStatus(CodeGenFunction &CGF,
7989 std::string StatusMsg,
7990 const OMPExecutableDirective &D) {
7991 bool IsDevice = CGF.CGM.getLangOpts().OpenMPIsTargetDevice;
7992 if (IsDevice)
7993 StatusMsg += ": DEVICE";
7994 else
7995 StatusMsg += ": HOST";
7996 SourceLocation L = D.getBeginLoc();
7997 auto &SM = CGF.getContext().getSourceManager();
7998 PresumedLoc PLoc = SM.getPresumedLoc(L);
7999 const char *FileName = PLoc.isValid() ? PLoc.getFilename() : nullptr;
8000 unsigned LineNo =
8001 PLoc.isValid() ? PLoc.getLine() : SM.getExpansionLineNumber(L);
8002 llvm::dbgs() << StatusMsg << ": " << FileName << ": " << LineNo << "\n";
8003 }
8004 #endif
8005
emitTargetTeamsGenericLoopRegionAsParallel(CodeGenFunction & CGF,PrePostActionTy & Action,const OMPTargetTeamsGenericLoopDirective & S)8006 static void emitTargetTeamsGenericLoopRegionAsParallel(
8007 CodeGenFunction &CGF, PrePostActionTy &Action,
8008 const OMPTargetTeamsGenericLoopDirective &S) {
8009 Action.Enter(CGF);
8010 // Emit 'teams loop' as if its constituent constructs are 'distribute,
8011 // 'parallel, and 'for'.
8012 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
8013 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
8014 S.getDistInc());
8015 };
8016
8017 // Emit teams region as a standalone region.
8018 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
8019 PrePostActionTy &Action) {
8020 Action.Enter(CGF);
8021 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
8022 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
8023 (void)PrivateScope.Privatize();
8024 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
8025 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
8026 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
8027 };
8028 DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE,
8029 emitTargetTeamsLoopCodegenStatus(
8030 CGF, TTL_CODEGEN_TYPE " as parallel for", S));
8031 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
8032 CodeGenTeams);
8033 emitPostUpdateForReductionClause(CGF, S,
8034 [](CodeGenFunction &) { return nullptr; });
8035 }
8036
emitTargetTeamsGenericLoopRegionAsDistribute(CodeGenFunction & CGF,PrePostActionTy & Action,const OMPTargetTeamsGenericLoopDirective & S)8037 static void emitTargetTeamsGenericLoopRegionAsDistribute(
8038 CodeGenFunction &CGF, PrePostActionTy &Action,
8039 const OMPTargetTeamsGenericLoopDirective &S) {
8040 Action.Enter(CGF);
8041 // Emit 'teams loop' as if its constituent construct is 'distribute'.
8042 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
8043 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
8044 };
8045
8046 // Emit teams region as a standalone region.
8047 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
8048 PrePostActionTy &Action) {
8049 Action.Enter(CGF);
8050 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
8051 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
8052 (void)PrivateScope.Privatize();
8053 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
8054 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
8055 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
8056 };
8057 DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE,
8058 emitTargetTeamsLoopCodegenStatus(
8059 CGF, TTL_CODEGEN_TYPE " as distribute", S));
8060 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen);
8061 emitPostUpdateForReductionClause(CGF, S,
8062 [](CodeGenFunction &) { return nullptr; });
8063 }
8064
EmitOMPTargetTeamsGenericLoopDirective(const OMPTargetTeamsGenericLoopDirective & S)8065 void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDirective(
8066 const OMPTargetTeamsGenericLoopDirective &S) {
8067 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8068 if (S.canBeParallelFor())
8069 emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S);
8070 else
8071 emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S);
8072 };
8073 emitCommonOMPTargetDirective(*this, S, CodeGen);
8074 }
8075
EmitOMPTargetTeamsGenericLoopDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsGenericLoopDirective & S)8076 void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
8077 CodeGenModule &CGM, StringRef ParentName,
8078 const OMPTargetTeamsGenericLoopDirective &S) {
8079 // Emit SPMD target parallel loop region as a standalone region.
8080 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8081 if (S.canBeParallelFor())
8082 emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S);
8083 else
8084 emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S);
8085 };
8086 llvm::Function *Fn;
8087 llvm::Constant *Addr;
8088 // Emit target region as a standalone region.
8089 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
8090 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
8091 assert(Fn && Addr &&
8092 "Target device function emission failed for 'target teams loop'.");
8093 }
8094
emitTargetParallelGenericLoopRegion(CodeGenFunction & CGF,const OMPTargetParallelGenericLoopDirective & S,PrePostActionTy & Action)8095 static void emitTargetParallelGenericLoopRegion(
8096 CodeGenFunction &CGF, const OMPTargetParallelGenericLoopDirective &S,
8097 PrePostActionTy &Action) {
8098 Action.Enter(CGF);
8099 // Emit as 'parallel for'.
8100 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8101 Action.Enter(CGF);
8102 CodeGenFunction::OMPCancelStackRAII CancelRegion(
8103 CGF, OMPD_target_parallel_loop, /*hasCancel=*/false);
8104 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
8105 emitDispatchForLoopBounds);
8106 };
8107 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
8108 emitEmptyBoundParameters);
8109 }
8110
EmitOMPTargetParallelGenericLoopDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetParallelGenericLoopDirective & S)8111 void CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
8112 CodeGenModule &CGM, StringRef ParentName,
8113 const OMPTargetParallelGenericLoopDirective &S) {
8114 // Emit target parallel loop region as a standalone region.
8115 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8116 emitTargetParallelGenericLoopRegion(CGF, S, Action);
8117 };
8118 llvm::Function *Fn;
8119 llvm::Constant *Addr;
8120 // Emit target region as a standalone region.
8121 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
8122 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
8123 assert(Fn && Addr && "Target device function emission failed.");
8124 }
8125
8126 /// Emit combined directive 'target parallel loop' as if its constituent
8127 /// constructs are 'target', 'parallel', and 'for'.
EmitOMPTargetParallelGenericLoopDirective(const OMPTargetParallelGenericLoopDirective & S)8128 void CodeGenFunction::EmitOMPTargetParallelGenericLoopDirective(
8129 const OMPTargetParallelGenericLoopDirective &S) {
8130 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8131 emitTargetParallelGenericLoopRegion(CGF, S, Action);
8132 };
8133 emitCommonOMPTargetDirective(*this, S, CodeGen);
8134 }
8135
EmitSimpleOMPExecutableDirective(const OMPExecutableDirective & D)8136 void CodeGenFunction::EmitSimpleOMPExecutableDirective(
8137 const OMPExecutableDirective &D) {
8138 if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) {
8139 EmitOMPScanDirective(*SD);
8140 return;
8141 }
8142 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
8143 return;
8144 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
8145 OMPPrivateScope GlobalsScope(CGF);
8146 if (isOpenMPTaskingDirective(D.getDirectiveKind())) {
8147 // Capture global firstprivates to avoid crash.
8148 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
8149 for (const Expr *Ref : C->varlists()) {
8150 const auto *DRE = cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
8151 if (!DRE)
8152 continue;
8153 const auto *VD = dyn_cast<VarDecl>(DRE->getDecl());
8154 if (!VD || VD->hasLocalStorage())
8155 continue;
8156 if (!CGF.LocalDeclMap.count(VD)) {
8157 LValue GlobLVal = CGF.EmitLValue(Ref);
8158 GlobalsScope.addPrivate(VD, GlobLVal.getAddress());
8159 }
8160 }
8161 }
8162 }
8163 if (isOpenMPSimdDirective(D.getDirectiveKind())) {
8164 (void)GlobalsScope.Privatize();
8165 ParentLoopDirectiveForScanRegion ScanRegion(CGF, D);
8166 emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action);
8167 } else {
8168 if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {
8169 for (const Expr *E : LD->counters()) {
8170 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
8171 if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) {
8172 LValue GlobLVal = CGF.EmitLValue(E);
8173 GlobalsScope.addPrivate(VD, GlobLVal.getAddress());
8174 }
8175 if (isa<OMPCapturedExprDecl>(VD)) {
8176 // Emit only those that were not explicitly referenced in clauses.
8177 if (!CGF.LocalDeclMap.count(VD))
8178 CGF.EmitVarDecl(*VD);
8179 }
8180 }
8181 for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) {
8182 if (!C->getNumForLoops())
8183 continue;
8184 for (unsigned I = LD->getLoopsNumber(),
8185 E = C->getLoopNumIterations().size();
8186 I < E; ++I) {
8187 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
8188 cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) {
8189 // Emit only those that were not explicitly referenced in clauses.
8190 if (!CGF.LocalDeclMap.count(VD))
8191 CGF.EmitVarDecl(*VD);
8192 }
8193 }
8194 }
8195 }
8196 (void)GlobalsScope.Privatize();
8197 CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt());
8198 }
8199 };
8200 if (D.getDirectiveKind() == OMPD_atomic ||
8201 D.getDirectiveKind() == OMPD_critical ||
8202 D.getDirectiveKind() == OMPD_section ||
8203 D.getDirectiveKind() == OMPD_master ||
8204 D.getDirectiveKind() == OMPD_masked ||
8205 D.getDirectiveKind() == OMPD_unroll) {
8206 EmitStmt(D.getAssociatedStmt());
8207 } else {
8208 auto LPCRegion =
8209 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D);
8210 OMPSimdLexicalScope Scope(*this, D);
8211 CGM.getOpenMPRuntime().emitInlinedDirective(
8212 *this,
8213 isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd
8214 : D.getDirectiveKind(),
8215 CodeGen);
8216 }
8217 // Check for outer lastprivate conditional update.
8218 checkForLastprivateConditionalUpdate(*this, D);
8219 }
8220