1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This contains code to emit OpenMP nodes as LLVM code. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGCleanup.h" 14 #include "CGOpenMPRuntime.h" 15 #include "CodeGenFunction.h" 16 #include "CodeGenModule.h" 17 #include "TargetInfo.h" 18 #include "clang/AST/ASTContext.h" 19 #include "clang/AST/Attr.h" 20 #include "clang/AST/DeclOpenMP.h" 21 #include "clang/AST/OpenMPClause.h" 22 #include "clang/AST/Stmt.h" 23 #include "clang/AST/StmtOpenMP.h" 24 #include "clang/AST/StmtVisitor.h" 25 #include "clang/Basic/OpenMPKinds.h" 26 #include "clang/Basic/PrettyStackTrace.h" 27 #include "clang/Basic/SourceManager.h" 28 #include "llvm/ADT/SmallSet.h" 29 #include "llvm/BinaryFormat/Dwarf.h" 30 #include "llvm/Frontend/OpenMP/OMPConstants.h" 31 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 32 #include "llvm/IR/Constants.h" 33 #include "llvm/IR/DebugInfoMetadata.h" 34 #include "llvm/IR/Instructions.h" 35 #include "llvm/IR/IntrinsicInst.h" 36 #include "llvm/IR/Metadata.h" 37 #include "llvm/Support/AtomicOrdering.h" 38 #include "llvm/Support/Debug.h" 39 #include <optional> 40 using namespace clang; 41 using namespace CodeGen; 42 using namespace llvm::omp; 43 44 #define TTL_CODEGEN_TYPE "target-teams-loop-codegen" 45 46 static const VarDecl *getBaseDecl(const Expr *Ref); 47 48 namespace { 49 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 50 /// for captured expressions. 51 class OMPLexicalScope : public CodeGenFunction::LexicalScope { 52 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 53 for (const auto *C : S.clauses()) { 54 if (const auto *CPI = OMPClauseWithPreInit::get(C)) { 55 if (const auto *PreInit = 56 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 57 for (const auto *I : PreInit->decls()) { 58 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 59 CGF.EmitVarDecl(cast<VarDecl>(*I)); 60 } else { 61 CodeGenFunction::AutoVarEmission Emission = 62 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 63 CGF.EmitAutoVarCleanups(Emission); 64 } 65 } 66 } 67 } 68 } 69 } 70 CodeGenFunction::OMPPrivateScope InlinedShareds; 71 72 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 73 return CGF.LambdaCaptureFields.lookup(VD) || 74 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 75 (isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl) && 76 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); 77 } 78 79 public: 80 OMPLexicalScope( 81 CodeGenFunction &CGF, const OMPExecutableDirective &S, 82 const std::optional<OpenMPDirectiveKind> CapturedRegion = std::nullopt, 83 const bool EmitPreInitStmt = true) 84 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 85 InlinedShareds(CGF) { 86 if (EmitPreInitStmt) 87 emitPreInitStmt(CGF, S); 88 if (!CapturedRegion) 89 return; 90 assert(S.hasAssociatedStmt() && 91 "Expected associated statement for inlined directive."); 92 const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion); 93 for (const auto &C : CS->captures()) { 94 if (C.capturesVariable() || C.capturesVariableByCopy()) { 95 auto *VD = C.getCapturedVar(); 96 assert(VD == VD->getCanonicalDecl() && 97 "Canonical decl must be captured."); 98 DeclRefExpr DRE( 99 CGF.getContext(), const_cast<VarDecl *>(VD), 100 isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && 101 InlinedShareds.isGlobalVarCaptured(VD)), 102 VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); 103 InlinedShareds.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress()); 104 } 105 } 106 (void)InlinedShareds.Privatize(); 107 } 108 }; 109 110 /// Lexical scope for OpenMP parallel construct, that handles correct codegen 111 /// for captured expressions. 112 class OMPParallelScope final : public OMPLexicalScope { 113 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 114 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 115 return !(isOpenMPTargetExecutionDirective(Kind) || 116 isOpenMPLoopBoundSharingDirective(Kind)) && 117 isOpenMPParallelDirective(Kind); 118 } 119 120 public: 121 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 122 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt, 123 EmitPreInitStmt(S)) {} 124 }; 125 126 /// Lexical scope for OpenMP teams construct, that handles correct codegen 127 /// for captured expressions. 128 class OMPTeamsScope final : public OMPLexicalScope { 129 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 130 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 131 return !isOpenMPTargetExecutionDirective(Kind) && 132 isOpenMPTeamsDirective(Kind); 133 } 134 135 public: 136 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 137 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt, 138 EmitPreInitStmt(S)) {} 139 }; 140 141 /// Private scope for OpenMP loop-based directives, that supports capturing 142 /// of used expression from loop statement. 143 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 144 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) { 145 const Stmt *PreInits; 146 CodeGenFunction::OMPMapVars PreCondVars; 147 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { 148 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 149 for (const auto *E : LD->counters()) { 150 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 151 EmittedAsPrivate.insert(VD->getCanonicalDecl()); 152 (void)PreCondVars.setVarAddr( 153 CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType())); 154 } 155 // Mark private vars as undefs. 156 for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) { 157 for (const Expr *IRef : C->varlists()) { 158 const auto *OrigVD = 159 cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 160 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 161 QualType OrigVDTy = OrigVD->getType().getNonReferenceType(); 162 (void)PreCondVars.setVarAddr( 163 CGF, OrigVD, 164 Address(llvm::UndefValue::get(CGF.ConvertTypeForMem( 165 CGF.getContext().getPointerType(OrigVDTy))), 166 CGF.ConvertTypeForMem(OrigVDTy), 167 CGF.getContext().getDeclAlign(OrigVD))); 168 } 169 } 170 } 171 (void)PreCondVars.apply(CGF); 172 // Emit init, __range and __end variables for C++ range loops. 173 (void)OMPLoopBasedDirective::doForAllLoops( 174 LD->getInnermostCapturedStmt()->getCapturedStmt(), 175 /*TryImperfectlyNestedLoops=*/true, LD->getLoopsNumber(), 176 [&CGF](unsigned Cnt, const Stmt *CurStmt) { 177 if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(CurStmt)) { 178 if (const Stmt *Init = CXXFor->getInit()) 179 CGF.EmitStmt(Init); 180 CGF.EmitStmt(CXXFor->getRangeStmt()); 181 CGF.EmitStmt(CXXFor->getEndStmt()); 182 } 183 return false; 184 }); 185 PreInits = LD->getPreInits(); 186 } else if (const auto *Tile = dyn_cast<OMPTileDirective>(&S)) { 187 PreInits = Tile->getPreInits(); 188 } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(&S)) { 189 PreInits = Unroll->getPreInits(); 190 } else if (const auto *Reverse = dyn_cast<OMPReverseDirective>(&S)) { 191 PreInits = Reverse->getPreInits(); 192 } else if (const auto *Interchange = 193 dyn_cast<OMPInterchangeDirective>(&S)) { 194 PreInits = Interchange->getPreInits(); 195 } else { 196 llvm_unreachable("Unknown loop-based directive kind."); 197 } 198 if (PreInits) { 199 // CompoundStmts and DeclStmts are used as lists of PreInit statements and 200 // declarations. Since declarations must be visible in the the following 201 // that they initialize, unpack the CompoundStmt they are nested in. 202 SmallVector<const Stmt *> PreInitStmts; 203 if (auto *PreInitCompound = dyn_cast<CompoundStmt>(PreInits)) 204 llvm::append_range(PreInitStmts, PreInitCompound->body()); 205 else 206 PreInitStmts.push_back(PreInits); 207 208 for (const Stmt *S : PreInitStmts) { 209 // EmitStmt skips any OMPCapturedExprDecls, but needs to be emitted 210 // here. 211 if (auto *PreInitDecl = dyn_cast<DeclStmt>(S)) { 212 for (Decl *I : PreInitDecl->decls()) 213 CGF.EmitVarDecl(cast<VarDecl>(*I)); 214 continue; 215 } 216 CGF.EmitStmt(S); 217 } 218 } 219 PreCondVars.restore(CGF); 220 } 221 222 public: 223 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) 224 : CodeGenFunction::RunCleanupsScope(CGF) { 225 emitPreInitStmt(CGF, S); 226 } 227 }; 228 229 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope { 230 CodeGenFunction::OMPPrivateScope InlinedShareds; 231 232 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 233 return CGF.LambdaCaptureFields.lookup(VD) || 234 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 235 (isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl) && 236 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); 237 } 238 239 public: 240 OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 241 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 242 InlinedShareds(CGF) { 243 for (const auto *C : S.clauses()) { 244 if (const auto *CPI = OMPClauseWithPreInit::get(C)) { 245 if (const auto *PreInit = 246 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 247 for (const auto *I : PreInit->decls()) { 248 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 249 CGF.EmitVarDecl(cast<VarDecl>(*I)); 250 } else { 251 CodeGenFunction::AutoVarEmission Emission = 252 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 253 CGF.EmitAutoVarCleanups(Emission); 254 } 255 } 256 } 257 } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) { 258 for (const Expr *E : UDP->varlists()) { 259 const Decl *D = cast<DeclRefExpr>(E)->getDecl(); 260 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) 261 CGF.EmitVarDecl(*OED); 262 } 263 } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) { 264 for (const Expr *E : UDP->varlists()) { 265 const Decl *D = getBaseDecl(E); 266 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) 267 CGF.EmitVarDecl(*OED); 268 } 269 } 270 } 271 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 272 CGF.EmitOMPPrivateClause(S, InlinedShareds); 273 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) { 274 if (const Expr *E = TG->getReductionRef()) 275 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl())); 276 } 277 // Temp copy arrays for inscan reductions should not be emitted as they are 278 // not used in simd only mode. 279 llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps; 280 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 281 if (C->getModifier() != OMPC_REDUCTION_inscan) 282 continue; 283 for (const Expr *E : C->copy_array_temps()) 284 CopyArrayTemps.insert(cast<DeclRefExpr>(E)->getDecl()); 285 } 286 const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt()); 287 while (CS) { 288 for (auto &C : CS->captures()) { 289 if (C.capturesVariable() || C.capturesVariableByCopy()) { 290 auto *VD = C.getCapturedVar(); 291 if (CopyArrayTemps.contains(VD)) 292 continue; 293 assert(VD == VD->getCanonicalDecl() && 294 "Canonical decl must be captured."); 295 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 296 isCapturedVar(CGF, VD) || 297 (CGF.CapturedStmtInfo && 298 InlinedShareds.isGlobalVarCaptured(VD)), 299 VD->getType().getNonReferenceType(), VK_LValue, 300 C.getLocation()); 301 InlinedShareds.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress()); 302 } 303 } 304 CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt()); 305 } 306 (void)InlinedShareds.Privatize(); 307 } 308 }; 309 310 } // namespace 311 312 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 313 const OMPExecutableDirective &S, 314 const RegionCodeGenTy &CodeGen); 315 316 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) { 317 if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) { 318 if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) { 319 OrigVD = OrigVD->getCanonicalDecl(); 320 bool IsCaptured = 321 LambdaCaptureFields.lookup(OrigVD) || 322 (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) || 323 (isa_and_nonnull<BlockDecl>(CurCodeDecl)); 324 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured, 325 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc()); 326 return EmitLValue(&DRE); 327 } 328 } 329 return EmitLValue(E); 330 } 331 332 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 333 ASTContext &C = getContext(); 334 llvm::Value *Size = nullptr; 335 auto SizeInChars = C.getTypeSizeInChars(Ty); 336 if (SizeInChars.isZero()) { 337 // getTypeSizeInChars() returns 0 for a VLA. 338 while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) { 339 VlaSizePair VlaSize = getVLASize(VAT); 340 Ty = VlaSize.Type; 341 Size = 342 Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) : VlaSize.NumElts; 343 } 344 SizeInChars = C.getTypeSizeInChars(Ty); 345 if (SizeInChars.isZero()) 346 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 347 return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 348 } 349 return CGM.getSize(SizeInChars); 350 } 351 352 void CodeGenFunction::GenerateOpenMPCapturedVars( 353 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 354 const RecordDecl *RD = S.getCapturedRecordDecl(); 355 auto CurField = RD->field_begin(); 356 auto CurCap = S.captures().begin(); 357 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 358 E = S.capture_init_end(); 359 I != E; ++I, ++CurField, ++CurCap) { 360 if (CurField->hasCapturedVLAType()) { 361 const VariableArrayType *VAT = CurField->getCapturedVLAType(); 362 llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()]; 363 CapturedVars.push_back(Val); 364 } else if (CurCap->capturesThis()) { 365 CapturedVars.push_back(CXXThisValue); 366 } else if (CurCap->capturesVariableByCopy()) { 367 llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation()); 368 369 // If the field is not a pointer, we need to save the actual value 370 // and load it as a void pointer. 371 if (!CurField->getType()->isAnyPointerType()) { 372 ASTContext &Ctx = getContext(); 373 Address DstAddr = CreateMemTemp( 374 Ctx.getUIntPtrType(), 375 Twine(CurCap->getCapturedVar()->getName(), ".casted")); 376 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 377 378 llvm::Value *SrcAddrVal = EmitScalarConversion( 379 DstAddr.emitRawPointer(*this), 380 Ctx.getPointerType(Ctx.getUIntPtrType()), 381 Ctx.getPointerType(CurField->getType()), CurCap->getLocation()); 382 LValue SrcLV = 383 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); 384 385 // Store the value using the source type pointer. 386 EmitStoreThroughLValue(RValue::get(CV), SrcLV); 387 388 // Load the value using the destination type pointer. 389 CV = EmitLoadOfScalar(DstLV, CurCap->getLocation()); 390 } 391 CapturedVars.push_back(CV); 392 } else { 393 assert(CurCap->capturesVariable() && "Expected capture by reference."); 394 CapturedVars.push_back(EmitLValue(*I).getAddress().emitRawPointer(*this)); 395 } 396 } 397 } 398 399 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc, 400 QualType DstType, StringRef Name, 401 LValue AddrLV) { 402 ASTContext &Ctx = CGF.getContext(); 403 404 llvm::Value *CastedPtr = CGF.EmitScalarConversion( 405 AddrLV.getAddress().emitRawPointer(CGF), Ctx.getUIntPtrType(), 406 Ctx.getPointerType(DstType), Loc); 407 // FIXME: should the pointee type (DstType) be passed? 408 Address TmpAddr = 409 CGF.MakeNaturalAlignAddrLValue(CastedPtr, DstType).getAddress(); 410 return TmpAddr; 411 } 412 413 static QualType getCanonicalParamType(ASTContext &C, QualType T) { 414 if (T->isLValueReferenceType()) 415 return C.getLValueReferenceType( 416 getCanonicalParamType(C, T.getNonReferenceType()), 417 /*SpelledAsLValue=*/false); 418 if (T->isPointerType()) 419 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); 420 if (const ArrayType *A = T->getAsArrayTypeUnsafe()) { 421 if (const auto *VLA = dyn_cast<VariableArrayType>(A)) 422 return getCanonicalParamType(C, VLA->getElementType()); 423 if (!A->isVariablyModifiedType()) 424 return C.getCanonicalType(T); 425 } 426 return C.getCanonicalParamType(T); 427 } 428 429 namespace { 430 /// Contains required data for proper outlined function codegen. 431 struct FunctionOptions { 432 /// Captured statement for which the function is generated. 433 const CapturedStmt *S = nullptr; 434 /// true if cast to/from UIntPtr is required for variables captured by 435 /// value. 436 const bool UIntPtrCastRequired = true; 437 /// true if only casted arguments must be registered as local args or VLA 438 /// sizes. 439 const bool RegisterCastedArgsOnly = false; 440 /// Name of the generated function. 441 const StringRef FunctionName; 442 /// Location of the non-debug version of the outlined function. 443 SourceLocation Loc; 444 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, 445 bool RegisterCastedArgsOnly, StringRef FunctionName, 446 SourceLocation Loc) 447 : S(S), UIntPtrCastRequired(UIntPtrCastRequired), 448 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), 449 FunctionName(FunctionName), Loc(Loc) {} 450 }; 451 } // namespace 452 453 static llvm::Function *emitOutlinedFunctionPrologue( 454 CodeGenFunction &CGF, FunctionArgList &Args, 455 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> 456 &LocalAddrs, 457 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> 458 &VLASizes, 459 llvm::Value *&CXXThisValue, const FunctionOptions &FO) { 460 const CapturedDecl *CD = FO.S->getCapturedDecl(); 461 const RecordDecl *RD = FO.S->getCapturedRecordDecl(); 462 assert(CD->hasBody() && "missing CapturedDecl body"); 463 464 CXXThisValue = nullptr; 465 // Build the argument list. 466 CodeGenModule &CGM = CGF.CGM; 467 ASTContext &Ctx = CGM.getContext(); 468 FunctionArgList TargetArgs; 469 Args.append(CD->param_begin(), 470 std::next(CD->param_begin(), CD->getContextParamPosition())); 471 TargetArgs.append( 472 CD->param_begin(), 473 std::next(CD->param_begin(), CD->getContextParamPosition())); 474 auto I = FO.S->captures().begin(); 475 FunctionDecl *DebugFunctionDecl = nullptr; 476 if (!FO.UIntPtrCastRequired) { 477 FunctionProtoType::ExtProtoInfo EPI; 478 QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, std::nullopt, EPI); 479 DebugFunctionDecl = FunctionDecl::Create( 480 Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(), 481 SourceLocation(), DeclarationName(), FunctionTy, 482 Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static, 483 /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false, 484 /*hasWrittenPrototype=*/false); 485 } 486 for (const FieldDecl *FD : RD->fields()) { 487 QualType ArgType = FD->getType(); 488 IdentifierInfo *II = nullptr; 489 VarDecl *CapVar = nullptr; 490 491 // If this is a capture by copy and the type is not a pointer, the outlined 492 // function argument type should be uintptr and the value properly casted to 493 // uintptr. This is necessary given that the runtime library is only able to 494 // deal with pointers. We can pass in the same way the VLA type sizes to the 495 // outlined function. 496 if (FO.UIntPtrCastRequired && 497 ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 498 I->capturesVariableArrayType())) 499 ArgType = Ctx.getUIntPtrType(); 500 501 if (I->capturesVariable() || I->capturesVariableByCopy()) { 502 CapVar = I->getCapturedVar(); 503 II = CapVar->getIdentifier(); 504 } else if (I->capturesThis()) { 505 II = &Ctx.Idents.get("this"); 506 } else { 507 assert(I->capturesVariableArrayType()); 508 II = &Ctx.Idents.get("vla"); 509 } 510 if (ArgType->isVariablyModifiedType()) 511 ArgType = getCanonicalParamType(Ctx, ArgType); 512 VarDecl *Arg; 513 if (CapVar && (CapVar->getTLSKind() != clang::VarDecl::TLS_None)) { 514 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), 515 II, ArgType, 516 ImplicitParamKind::ThreadPrivateVar); 517 } else if (DebugFunctionDecl && (CapVar || I->capturesThis())) { 518 Arg = ParmVarDecl::Create( 519 Ctx, DebugFunctionDecl, 520 CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(), 521 CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType, 522 /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr); 523 } else { 524 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), 525 II, ArgType, ImplicitParamKind::Other); 526 } 527 Args.emplace_back(Arg); 528 // Do not cast arguments if we emit function with non-original types. 529 TargetArgs.emplace_back( 530 FO.UIntPtrCastRequired 531 ? Arg 532 : CGM.getOpenMPRuntime().translateParameter(FD, Arg)); 533 ++I; 534 } 535 Args.append(std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 536 CD->param_end()); 537 TargetArgs.append( 538 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 539 CD->param_end()); 540 541 // Create the function declaration. 542 const CGFunctionInfo &FuncInfo = 543 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); 544 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 545 546 auto *F = 547 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 548 FO.FunctionName, &CGM.getModule()); 549 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 550 if (CD->isNothrow()) 551 F->setDoesNotThrow(); 552 F->setDoesNotRecurse(); 553 554 // Always inline the outlined function if optimizations are enabled. 555 if (CGM.getCodeGenOpts().OptimizationLevel != 0) { 556 F->removeFnAttr(llvm::Attribute::NoInline); 557 F->addFnAttr(llvm::Attribute::AlwaysInline); 558 } 559 560 // Generate the function. 561 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, 562 FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(), 563 FO.UIntPtrCastRequired ? FO.Loc 564 : CD->getBody()->getBeginLoc()); 565 unsigned Cnt = CD->getContextParamPosition(); 566 I = FO.S->captures().begin(); 567 for (const FieldDecl *FD : RD->fields()) { 568 // Do not map arguments if we emit function with non-original types. 569 Address LocalAddr(Address::invalid()); 570 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) { 571 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt], 572 TargetArgs[Cnt]); 573 } else { 574 LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); 575 } 576 // If we are capturing a pointer by copy we don't need to do anything, just 577 // use the value that we get from the arguments. 578 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 579 const VarDecl *CurVD = I->getCapturedVar(); 580 if (!FO.RegisterCastedArgsOnly) 581 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); 582 ++Cnt; 583 ++I; 584 continue; 585 } 586 587 LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), 588 AlignmentSource::Decl); 589 if (FD->hasCapturedVLAType()) { 590 if (FO.UIntPtrCastRequired) { 591 ArgLVal = CGF.MakeAddrLValue( 592 castValueFromUintptr(CGF, I->getLocation(), FD->getType(), 593 Args[Cnt]->getName(), ArgLVal), 594 FD->getType(), AlignmentSource::Decl); 595 } 596 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); 597 const VariableArrayType *VAT = FD->getCapturedVLAType(); 598 VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg); 599 } else if (I->capturesVariable()) { 600 const VarDecl *Var = I->getCapturedVar(); 601 QualType VarTy = Var->getType(); 602 Address ArgAddr = ArgLVal.getAddress(); 603 if (ArgLVal.getType()->isLValueReferenceType()) { 604 ArgAddr = CGF.EmitLoadOfReference(ArgLVal); 605 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { 606 assert(ArgLVal.getType()->isPointerType()); 607 ArgAddr = CGF.EmitLoadOfPointer( 608 ArgAddr, ArgLVal.getType()->castAs<PointerType>()); 609 } 610 if (!FO.RegisterCastedArgsOnly) { 611 LocalAddrs.insert( 612 {Args[Cnt], {Var, ArgAddr.withAlignment(Ctx.getDeclAlign(Var))}}); 613 } 614 } else if (I->capturesVariableByCopy()) { 615 assert(!FD->getType()->isAnyPointerType() && 616 "Not expecting a captured pointer."); 617 const VarDecl *Var = I->getCapturedVar(); 618 LocalAddrs.insert({Args[Cnt], 619 {Var, FO.UIntPtrCastRequired 620 ? castValueFromUintptr( 621 CGF, I->getLocation(), FD->getType(), 622 Args[Cnt]->getName(), ArgLVal) 623 : ArgLVal.getAddress()}}); 624 } else { 625 // If 'this' is captured, load it into CXXThisValue. 626 assert(I->capturesThis()); 627 CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); 628 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}}); 629 } 630 ++Cnt; 631 ++I; 632 } 633 634 return F; 635 } 636 637 llvm::Function * 638 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, 639 SourceLocation Loc) { 640 assert( 641 CapturedStmtInfo && 642 "CapturedStmtInfo should be set when generating the captured function"); 643 const CapturedDecl *CD = S.getCapturedDecl(); 644 // Build the argument list. 645 bool NeedWrapperFunction = 646 getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo(); 647 FunctionArgList Args; 648 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; 649 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; 650 SmallString<256> Buffer; 651 llvm::raw_svector_ostream Out(Buffer); 652 Out << CapturedStmtInfo->getHelperName(); 653 if (NeedWrapperFunction) 654 Out << "_debug__"; 655 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, 656 Out.str(), Loc); 657 llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs, 658 VLASizes, CXXThisValue, FO); 659 CodeGenFunction::OMPPrivateScope LocalScope(*this); 660 for (const auto &LocalAddrPair : LocalAddrs) { 661 if (LocalAddrPair.second.first) { 662 LocalScope.addPrivate(LocalAddrPair.second.first, 663 LocalAddrPair.second.second); 664 } 665 } 666 (void)LocalScope.Privatize(); 667 for (const auto &VLASizePair : VLASizes) 668 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; 669 PGO.assignRegionCounters(GlobalDecl(CD), F); 670 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 671 (void)LocalScope.ForceCleanup(); 672 FinishFunction(CD->getBodyRBrace()); 673 if (!NeedWrapperFunction) 674 return F; 675 676 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, 677 /*RegisterCastedArgsOnly=*/true, 678 CapturedStmtInfo->getHelperName(), Loc); 679 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); 680 WrapperCGF.CapturedStmtInfo = CapturedStmtInfo; 681 Args.clear(); 682 LocalAddrs.clear(); 683 VLASizes.clear(); 684 llvm::Function *WrapperF = 685 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, 686 WrapperCGF.CXXThisValue, WrapperFO); 687 llvm::SmallVector<llvm::Value *, 4> CallArgs; 688 auto *PI = F->arg_begin(); 689 for (const auto *Arg : Args) { 690 llvm::Value *CallArg; 691 auto I = LocalAddrs.find(Arg); 692 if (I != LocalAddrs.end()) { 693 LValue LV = WrapperCGF.MakeAddrLValue( 694 I->second.second, 695 I->second.first ? I->second.first->getType() : Arg->getType(), 696 AlignmentSource::Decl); 697 if (LV.getType()->isAnyComplexType()) 698 LV.setAddress(LV.getAddress().withElementType(PI->getType())); 699 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); 700 } else { 701 auto EI = VLASizes.find(Arg); 702 if (EI != VLASizes.end()) { 703 CallArg = EI->second.second; 704 } else { 705 LValue LV = 706 WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), 707 Arg->getType(), AlignmentSource::Decl); 708 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); 709 } 710 } 711 CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType())); 712 ++PI; 713 } 714 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs); 715 WrapperCGF.FinishFunction(); 716 return WrapperF; 717 } 718 719 //===----------------------------------------------------------------------===// 720 // OpenMP Directive Emission 721 //===----------------------------------------------------------------------===// 722 void CodeGenFunction::EmitOMPAggregateAssign( 723 Address DestAddr, Address SrcAddr, QualType OriginalType, 724 const llvm::function_ref<void(Address, Address)> CopyGen) { 725 // Perform element-by-element initialization. 726 QualType ElementTy; 727 728 // Drill down to the base element type on both arrays. 729 const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 730 llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 731 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType()); 732 733 llvm::Value *SrcBegin = SrcAddr.emitRawPointer(*this); 734 llvm::Value *DestBegin = DestAddr.emitRawPointer(*this); 735 // Cast from pointer to array type to pointer to single element. 736 llvm::Value *DestEnd = Builder.CreateInBoundsGEP(DestAddr.getElementType(), 737 DestBegin, NumElements); 738 739 // The basic structure here is a while-do loop. 740 llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body"); 741 llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done"); 742 llvm::Value *IsEmpty = 743 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 744 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 745 746 // Enter the loop body, making that address the current address. 747 llvm::BasicBlock *EntryBB = Builder.GetInsertBlock(); 748 EmitBlock(BodyBB); 749 750 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 751 752 llvm::PHINode *SrcElementPHI = 753 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 754 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 755 Address SrcElementCurrent = 756 Address(SrcElementPHI, SrcAddr.getElementType(), 757 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 758 759 llvm::PHINode *DestElementPHI = Builder.CreatePHI( 760 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 761 DestElementPHI->addIncoming(DestBegin, EntryBB); 762 Address DestElementCurrent = 763 Address(DestElementPHI, DestAddr.getElementType(), 764 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 765 766 // Emit copy. 767 CopyGen(DestElementCurrent, SrcElementCurrent); 768 769 // Shift the address forward by one element. 770 llvm::Value *DestElementNext = 771 Builder.CreateConstGEP1_32(DestAddr.getElementType(), DestElementPHI, 772 /*Idx0=*/1, "omp.arraycpy.dest.element"); 773 llvm::Value *SrcElementNext = 774 Builder.CreateConstGEP1_32(SrcAddr.getElementType(), SrcElementPHI, 775 /*Idx0=*/1, "omp.arraycpy.src.element"); 776 // Check whether we've reached the end. 777 llvm::Value *Done = 778 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 779 Builder.CreateCondBr(Done, DoneBB, BodyBB); 780 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 781 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 782 783 // Done. 784 EmitBlock(DoneBB, /*IsFinished=*/true); 785 } 786 787 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 788 Address SrcAddr, const VarDecl *DestVD, 789 const VarDecl *SrcVD, const Expr *Copy) { 790 if (OriginalType->isArrayType()) { 791 const auto *BO = dyn_cast<BinaryOperator>(Copy); 792 if (BO && BO->getOpcode() == BO_Assign) { 793 // Perform simple memcpy for simple copying. 794 LValue Dest = MakeAddrLValue(DestAddr, OriginalType); 795 LValue Src = MakeAddrLValue(SrcAddr, OriginalType); 796 EmitAggregateAssign(Dest, Src, OriginalType); 797 } else { 798 // For arrays with complex element types perform element by element 799 // copying. 800 EmitOMPAggregateAssign( 801 DestAddr, SrcAddr, OriginalType, 802 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 803 // Working with the single array element, so have to remap 804 // destination and source variables to corresponding array 805 // elements. 806 CodeGenFunction::OMPPrivateScope Remap(*this); 807 Remap.addPrivate(DestVD, DestElement); 808 Remap.addPrivate(SrcVD, SrcElement); 809 (void)Remap.Privatize(); 810 EmitIgnoredExpr(Copy); 811 }); 812 } 813 } else { 814 // Remap pseudo source variable to private copy. 815 CodeGenFunction::OMPPrivateScope Remap(*this); 816 Remap.addPrivate(SrcVD, SrcAddr); 817 Remap.addPrivate(DestVD, DestAddr); 818 (void)Remap.Privatize(); 819 // Emit copying of the whole variable. 820 EmitIgnoredExpr(Copy); 821 } 822 } 823 824 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 825 OMPPrivateScope &PrivateScope) { 826 if (!HaveInsertPoint()) 827 return false; 828 bool DeviceConstTarget = 829 getLangOpts().OpenMPIsTargetDevice && 830 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 831 bool FirstprivateIsLastprivate = false; 832 llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates; 833 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 834 for (const auto *D : C->varlists()) 835 Lastprivates.try_emplace( 836 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(), 837 C->getKind()); 838 } 839 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 840 llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 841 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 842 // Force emission of the firstprivate copy if the directive does not emit 843 // outlined function, like omp for, omp simd, omp distribute etc. 844 bool MustEmitFirstprivateCopy = 845 CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown; 846 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 847 const auto *IRef = C->varlist_begin(); 848 const auto *InitsRef = C->inits().begin(); 849 for (const Expr *IInit : C->private_copies()) { 850 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 851 bool ThisFirstprivateIsLastprivate = 852 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; 853 const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD); 854 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 855 if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD && 856 !FD->getType()->isReferenceType() && 857 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { 858 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 859 ++IRef; 860 ++InitsRef; 861 continue; 862 } 863 // Do not emit copy for firstprivate constant variables in target regions, 864 // captured by reference. 865 if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) && 866 FD && FD->getType()->isReferenceType() && 867 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { 868 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 869 ++IRef; 870 ++InitsRef; 871 continue; 872 } 873 FirstprivateIsLastprivate = 874 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; 875 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 876 const auto *VDInit = 877 cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 878 bool IsRegistered; 879 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 880 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, 881 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 882 LValue OriginalLVal; 883 if (!FD) { 884 // Check if the firstprivate variable is just a constant value. 885 ConstantEmission CE = tryEmitAsConstant(&DRE); 886 if (CE && !CE.isReference()) { 887 // Constant value, no need to create a copy. 888 ++IRef; 889 ++InitsRef; 890 continue; 891 } 892 if (CE && CE.isReference()) { 893 OriginalLVal = CE.getReferenceLValue(*this, &DRE); 894 } else { 895 assert(!CE && "Expected non-constant firstprivate."); 896 OriginalLVal = EmitLValue(&DRE); 897 } 898 } else { 899 OriginalLVal = EmitLValue(&DRE); 900 } 901 QualType Type = VD->getType(); 902 if (Type->isArrayType()) { 903 // Emit VarDecl with copy init for arrays. 904 // Get the address of the original variable captured in current 905 // captured region. 906 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 907 const Expr *Init = VD->getInit(); 908 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 909 // Perform simple memcpy. 910 LValue Dest = MakeAddrLValue(Emission.getAllocatedAddress(), Type); 911 EmitAggregateAssign(Dest, OriginalLVal, Type); 912 } else { 913 EmitOMPAggregateAssign( 914 Emission.getAllocatedAddress(), OriginalLVal.getAddress(), Type, 915 [this, VDInit, Init](Address DestElement, Address SrcElement) { 916 // Clean up any temporaries needed by the 917 // initialization. 918 RunCleanupsScope InitScope(*this); 919 // Emit initialization for single element. 920 setAddrOfLocalVar(VDInit, SrcElement); 921 EmitAnyExprToMem(Init, DestElement, 922 Init->getType().getQualifiers(), 923 /*IsInitializer*/ false); 924 LocalDeclMap.erase(VDInit); 925 }); 926 } 927 EmitAutoVarCleanups(Emission); 928 IsRegistered = 929 PrivateScope.addPrivate(OrigVD, Emission.getAllocatedAddress()); 930 } else { 931 Address OriginalAddr = OriginalLVal.getAddress(); 932 // Emit private VarDecl with copy init. 933 // Remap temp VDInit variable to the address of the original 934 // variable (for proper handling of captured global variables). 935 setAddrOfLocalVar(VDInit, OriginalAddr); 936 EmitDecl(*VD); 937 LocalDeclMap.erase(VDInit); 938 Address VDAddr = GetAddrOfLocalVar(VD); 939 if (ThisFirstprivateIsLastprivate && 940 Lastprivates[OrigVD->getCanonicalDecl()] == 941 OMPC_LASTPRIVATE_conditional) { 942 // Create/init special variable for lastprivate conditionals. 943 llvm::Value *V = 944 EmitLoadOfScalar(MakeAddrLValue(VDAddr, (*IRef)->getType(), 945 AlignmentSource::Decl), 946 (*IRef)->getExprLoc()); 947 VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit( 948 *this, OrigVD); 949 EmitStoreOfScalar(V, MakeAddrLValue(VDAddr, (*IRef)->getType(), 950 AlignmentSource::Decl)); 951 LocalDeclMap.erase(VD); 952 setAddrOfLocalVar(VD, VDAddr); 953 } 954 IsRegistered = PrivateScope.addPrivate(OrigVD, VDAddr); 955 } 956 assert(IsRegistered && 957 "firstprivate var already registered as private"); 958 // Silence the warning about unused variable. 959 (void)IsRegistered; 960 } 961 ++IRef; 962 ++InitsRef; 963 } 964 } 965 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 966 } 967 968 void CodeGenFunction::EmitOMPPrivateClause( 969 const OMPExecutableDirective &D, 970 CodeGenFunction::OMPPrivateScope &PrivateScope) { 971 if (!HaveInsertPoint()) 972 return; 973 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 974 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 975 auto IRef = C->varlist_begin(); 976 for (const Expr *IInit : C->private_copies()) { 977 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 978 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 979 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 980 EmitDecl(*VD); 981 // Emit private VarDecl with copy init. 982 bool IsRegistered = 983 PrivateScope.addPrivate(OrigVD, GetAddrOfLocalVar(VD)); 984 assert(IsRegistered && "private var already registered as private"); 985 // Silence the warning about unused variable. 986 (void)IsRegistered; 987 } 988 ++IRef; 989 } 990 } 991 } 992 993 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 994 if (!HaveInsertPoint()) 995 return false; 996 // threadprivate_var1 = master_threadprivate_var1; 997 // operator=(threadprivate_var2, master_threadprivate_var2); 998 // ... 999 // __kmpc_barrier(&loc, global_tid); 1000 llvm::DenseSet<const VarDecl *> CopiedVars; 1001 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 1002 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 1003 auto IRef = C->varlist_begin(); 1004 auto ISrcRef = C->source_exprs().begin(); 1005 auto IDestRef = C->destination_exprs().begin(); 1006 for (const Expr *AssignOp : C->assignment_ops()) { 1007 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1008 QualType Type = VD->getType(); 1009 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 1010 // Get the address of the master variable. If we are emitting code with 1011 // TLS support, the address is passed from the master as field in the 1012 // captured declaration. 1013 Address MasterAddr = Address::invalid(); 1014 if (getLangOpts().OpenMPUseTLS && 1015 getContext().getTargetInfo().isTLSSupported()) { 1016 assert(CapturedStmtInfo->lookup(VD) && 1017 "Copyin threadprivates should have been captured!"); 1018 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true, 1019 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 1020 MasterAddr = EmitLValue(&DRE).getAddress(); 1021 LocalDeclMap.erase(VD); 1022 } else { 1023 MasterAddr = 1024 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 1025 : CGM.GetAddrOfGlobal(VD), 1026 CGM.getTypes().ConvertTypeForMem(VD->getType()), 1027 getContext().getDeclAlign(VD)); 1028 } 1029 // Get the address of the threadprivate variable. 1030 Address PrivateAddr = EmitLValue(*IRef).getAddress(); 1031 if (CopiedVars.size() == 1) { 1032 // At first check if current thread is a master thread. If it is, no 1033 // need to copy data. 1034 CopyBegin = createBasicBlock("copyin.not.master"); 1035 CopyEnd = createBasicBlock("copyin.not.master.end"); 1036 // TODO: Avoid ptrtoint conversion. 1037 auto *MasterAddrInt = Builder.CreatePtrToInt( 1038 MasterAddr.emitRawPointer(*this), CGM.IntPtrTy); 1039 auto *PrivateAddrInt = Builder.CreatePtrToInt( 1040 PrivateAddr.emitRawPointer(*this), CGM.IntPtrTy); 1041 Builder.CreateCondBr( 1042 Builder.CreateICmpNE(MasterAddrInt, PrivateAddrInt), CopyBegin, 1043 CopyEnd); 1044 EmitBlock(CopyBegin); 1045 } 1046 const auto *SrcVD = 1047 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 1048 const auto *DestVD = 1049 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 1050 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 1051 } 1052 ++IRef; 1053 ++ISrcRef; 1054 ++IDestRef; 1055 } 1056 } 1057 if (CopyEnd) { 1058 // Exit out of copying procedure for non-master thread. 1059 EmitBlock(CopyEnd, /*IsFinished=*/true); 1060 return true; 1061 } 1062 return false; 1063 } 1064 1065 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 1066 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 1067 if (!HaveInsertPoint()) 1068 return false; 1069 bool HasAtLeastOneLastprivate = false; 1070 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1071 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 1072 const auto *LoopDirective = cast<OMPLoopDirective>(&D); 1073 for (const Expr *C : LoopDirective->counters()) { 1074 SIMDLCVs.insert( 1075 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1076 } 1077 } 1078 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 1079 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 1080 HasAtLeastOneLastprivate = true; 1081 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 1082 !getLangOpts().OpenMPSimd) 1083 break; 1084 const auto *IRef = C->varlist_begin(); 1085 const auto *IDestRef = C->destination_exprs().begin(); 1086 for (const Expr *IInit : C->private_copies()) { 1087 // Keep the address of the original variable for future update at the end 1088 // of the loop. 1089 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1090 // Taskloops do not require additional initialization, it is done in 1091 // runtime support library. 1092 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 1093 const auto *DestVD = 1094 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 1095 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 1096 /*RefersToEnclosingVariableOrCapture=*/ 1097 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1098 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 1099 PrivateScope.addPrivate(DestVD, EmitLValue(&DRE).getAddress()); 1100 // Check if the variable is also a firstprivate: in this case IInit is 1101 // not generated. Initialization of this variable will happen in codegen 1102 // for 'firstprivate' clause. 1103 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { 1104 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 1105 Address VDAddr = Address::invalid(); 1106 if (C->getKind() == OMPC_LASTPRIVATE_conditional) { 1107 VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit( 1108 *this, OrigVD); 1109 setAddrOfLocalVar(VD, VDAddr); 1110 } else { 1111 // Emit private VarDecl with copy init. 1112 EmitDecl(*VD); 1113 VDAddr = GetAddrOfLocalVar(VD); 1114 } 1115 bool IsRegistered = PrivateScope.addPrivate(OrigVD, VDAddr); 1116 assert(IsRegistered && 1117 "lastprivate var already registered as private"); 1118 (void)IsRegistered; 1119 } 1120 } 1121 ++IRef; 1122 ++IDestRef; 1123 } 1124 } 1125 return HasAtLeastOneLastprivate; 1126 } 1127 1128 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 1129 const OMPExecutableDirective &D, bool NoFinals, 1130 llvm::Value *IsLastIterCond) { 1131 if (!HaveInsertPoint()) 1132 return; 1133 // Emit following code: 1134 // if (<IsLastIterCond>) { 1135 // orig_var1 = private_orig_var1; 1136 // ... 1137 // orig_varn = private_orig_varn; 1138 // } 1139 llvm::BasicBlock *ThenBB = nullptr; 1140 llvm::BasicBlock *DoneBB = nullptr; 1141 if (IsLastIterCond) { 1142 // Emit implicit barrier if at least one lastprivate conditional is found 1143 // and this is not a simd mode. 1144 if (!getLangOpts().OpenMPSimd && 1145 llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(), 1146 [](const OMPLastprivateClause *C) { 1147 return C->getKind() == OMPC_LASTPRIVATE_conditional; 1148 })) { 1149 CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(), 1150 OMPD_unknown, 1151 /*EmitChecks=*/false, 1152 /*ForceSimpleCall=*/true); 1153 } 1154 ThenBB = createBasicBlock(".omp.lastprivate.then"); 1155 DoneBB = createBasicBlock(".omp.lastprivate.done"); 1156 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 1157 EmitBlock(ThenBB); 1158 } 1159 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 1160 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; 1161 if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 1162 auto IC = LoopDirective->counters().begin(); 1163 for (const Expr *F : LoopDirective->finals()) { 1164 const auto *D = 1165 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); 1166 if (NoFinals) 1167 AlreadyEmittedVars.insert(D); 1168 else 1169 LoopCountersAndUpdates[D] = F; 1170 ++IC; 1171 } 1172 } 1173 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 1174 auto IRef = C->varlist_begin(); 1175 auto ISrcRef = C->source_exprs().begin(); 1176 auto IDestRef = C->destination_exprs().begin(); 1177 for (const Expr *AssignOp : C->assignment_ops()) { 1178 const auto *PrivateVD = 1179 cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1180 QualType Type = PrivateVD->getType(); 1181 const auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 1182 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 1183 // If lastprivate variable is a loop control variable for loop-based 1184 // directive, update its value before copyin back to original 1185 // variable. 1186 if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 1187 EmitIgnoredExpr(FinalExpr); 1188 const auto *SrcVD = 1189 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 1190 const auto *DestVD = 1191 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 1192 // Get the address of the private variable. 1193 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 1194 if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 1195 PrivateAddr = Address( 1196 Builder.CreateLoad(PrivateAddr), 1197 CGM.getTypes().ConvertTypeForMem(RefTy->getPointeeType()), 1198 CGM.getNaturalTypeAlignment(RefTy->getPointeeType())); 1199 // Store the last value to the private copy in the last iteration. 1200 if (C->getKind() == OMPC_LASTPRIVATE_conditional) 1201 CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate( 1202 *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD, 1203 (*IRef)->getExprLoc()); 1204 // Get the address of the original variable. 1205 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 1206 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 1207 } 1208 ++IRef; 1209 ++ISrcRef; 1210 ++IDestRef; 1211 } 1212 if (const Expr *PostUpdate = C->getPostUpdateExpr()) 1213 EmitIgnoredExpr(PostUpdate); 1214 } 1215 if (IsLastIterCond) 1216 EmitBlock(DoneBB, /*IsFinished=*/true); 1217 } 1218 1219 void CodeGenFunction::EmitOMPReductionClauseInit( 1220 const OMPExecutableDirective &D, 1221 CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) { 1222 if (!HaveInsertPoint()) 1223 return; 1224 SmallVector<const Expr *, 4> Shareds; 1225 SmallVector<const Expr *, 4> Privates; 1226 SmallVector<const Expr *, 4> ReductionOps; 1227 SmallVector<const Expr *, 4> LHSs; 1228 SmallVector<const Expr *, 4> RHSs; 1229 OMPTaskDataTy Data; 1230 SmallVector<const Expr *, 4> TaskLHSs; 1231 SmallVector<const Expr *, 4> TaskRHSs; 1232 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1233 if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan)) 1234 continue; 1235 Shareds.append(C->varlist_begin(), C->varlist_end()); 1236 Privates.append(C->privates().begin(), C->privates().end()); 1237 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1238 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1239 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1240 if (C->getModifier() == OMPC_REDUCTION_task) { 1241 Data.ReductionVars.append(C->privates().begin(), C->privates().end()); 1242 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); 1243 Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); 1244 Data.ReductionOps.append(C->reduction_ops().begin(), 1245 C->reduction_ops().end()); 1246 TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1247 TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1248 } 1249 } 1250 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); 1251 unsigned Count = 0; 1252 auto *ILHS = LHSs.begin(); 1253 auto *IRHS = RHSs.begin(); 1254 auto *IPriv = Privates.begin(); 1255 for (const Expr *IRef : Shareds) { 1256 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 1257 // Emit private VarDecl with reduction init. 1258 RedCG.emitSharedOrigLValue(*this, Count); 1259 RedCG.emitAggregateType(*this, Count); 1260 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); 1261 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), 1262 RedCG.getSharedLValue(Count).getAddress(), 1263 [&Emission](CodeGenFunction &CGF) { 1264 CGF.EmitAutoVarInit(Emission); 1265 return true; 1266 }); 1267 EmitAutoVarCleanups(Emission); 1268 Address BaseAddr = RedCG.adjustPrivateAddress( 1269 *this, Count, Emission.getAllocatedAddress()); 1270 bool IsRegistered = 1271 PrivateScope.addPrivate(RedCG.getBaseDecl(Count), BaseAddr); 1272 assert(IsRegistered && "private var already registered as private"); 1273 // Silence the warning about unused variable. 1274 (void)IsRegistered; 1275 1276 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 1277 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 1278 QualType Type = PrivateVD->getType(); 1279 bool isaOMPArraySectionExpr = isa<ArraySectionExpr>(IRef); 1280 if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) { 1281 // Store the address of the original variable associated with the LHS 1282 // implicit variable. 1283 PrivateScope.addPrivate(LHSVD, RedCG.getSharedLValue(Count).getAddress()); 1284 PrivateScope.addPrivate(RHSVD, GetAddrOfLocalVar(PrivateVD)); 1285 } else if ((isaOMPArraySectionExpr && Type->isScalarType()) || 1286 isa<ArraySubscriptExpr>(IRef)) { 1287 // Store the address of the original variable associated with the LHS 1288 // implicit variable. 1289 PrivateScope.addPrivate(LHSVD, RedCG.getSharedLValue(Count).getAddress()); 1290 PrivateScope.addPrivate(RHSVD, 1291 GetAddrOfLocalVar(PrivateVD).withElementType( 1292 ConvertTypeForMem(RHSVD->getType()))); 1293 } else { 1294 QualType Type = PrivateVD->getType(); 1295 bool IsArray = getContext().getAsArrayType(Type) != nullptr; 1296 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(); 1297 // Store the address of the original variable associated with the LHS 1298 // implicit variable. 1299 if (IsArray) { 1300 OriginalAddr = 1301 OriginalAddr.withElementType(ConvertTypeForMem(LHSVD->getType())); 1302 } 1303 PrivateScope.addPrivate(LHSVD, OriginalAddr); 1304 PrivateScope.addPrivate( 1305 RHSVD, IsArray ? GetAddrOfLocalVar(PrivateVD).withElementType( 1306 ConvertTypeForMem(RHSVD->getType())) 1307 : GetAddrOfLocalVar(PrivateVD)); 1308 } 1309 ++ILHS; 1310 ++IRHS; 1311 ++IPriv; 1312 ++Count; 1313 } 1314 if (!Data.ReductionVars.empty()) { 1315 Data.IsReductionWithTaskMod = true; 1316 Data.IsWorksharingReduction = 1317 isOpenMPWorksharingDirective(D.getDirectiveKind()); 1318 llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit( 1319 *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data); 1320 const Expr *TaskRedRef = nullptr; 1321 switch (D.getDirectiveKind()) { 1322 case OMPD_parallel: 1323 TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr(); 1324 break; 1325 case OMPD_for: 1326 TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr(); 1327 break; 1328 case OMPD_sections: 1329 TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr(); 1330 break; 1331 case OMPD_parallel_for: 1332 TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr(); 1333 break; 1334 case OMPD_parallel_master: 1335 TaskRedRef = 1336 cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr(); 1337 break; 1338 case OMPD_parallel_sections: 1339 TaskRedRef = 1340 cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr(); 1341 break; 1342 case OMPD_target_parallel: 1343 TaskRedRef = 1344 cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr(); 1345 break; 1346 case OMPD_target_parallel_for: 1347 TaskRedRef = 1348 cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr(); 1349 break; 1350 case OMPD_distribute_parallel_for: 1351 TaskRedRef = 1352 cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr(); 1353 break; 1354 case OMPD_teams_distribute_parallel_for: 1355 TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D) 1356 .getTaskReductionRefExpr(); 1357 break; 1358 case OMPD_target_teams_distribute_parallel_for: 1359 TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D) 1360 .getTaskReductionRefExpr(); 1361 break; 1362 case OMPD_simd: 1363 case OMPD_for_simd: 1364 case OMPD_section: 1365 case OMPD_single: 1366 case OMPD_master: 1367 case OMPD_critical: 1368 case OMPD_parallel_for_simd: 1369 case OMPD_task: 1370 case OMPD_taskyield: 1371 case OMPD_error: 1372 case OMPD_barrier: 1373 case OMPD_taskwait: 1374 case OMPD_taskgroup: 1375 case OMPD_flush: 1376 case OMPD_depobj: 1377 case OMPD_scan: 1378 case OMPD_ordered: 1379 case OMPD_atomic: 1380 case OMPD_teams: 1381 case OMPD_target: 1382 case OMPD_cancellation_point: 1383 case OMPD_cancel: 1384 case OMPD_target_data: 1385 case OMPD_target_enter_data: 1386 case OMPD_target_exit_data: 1387 case OMPD_taskloop: 1388 case OMPD_taskloop_simd: 1389 case OMPD_master_taskloop: 1390 case OMPD_master_taskloop_simd: 1391 case OMPD_parallel_master_taskloop: 1392 case OMPD_parallel_master_taskloop_simd: 1393 case OMPD_distribute: 1394 case OMPD_target_update: 1395 case OMPD_distribute_parallel_for_simd: 1396 case OMPD_distribute_simd: 1397 case OMPD_target_parallel_for_simd: 1398 case OMPD_target_simd: 1399 case OMPD_teams_distribute: 1400 case OMPD_teams_distribute_simd: 1401 case OMPD_teams_distribute_parallel_for_simd: 1402 case OMPD_target_teams: 1403 case OMPD_target_teams_distribute: 1404 case OMPD_target_teams_distribute_parallel_for_simd: 1405 case OMPD_target_teams_distribute_simd: 1406 case OMPD_declare_target: 1407 case OMPD_end_declare_target: 1408 case OMPD_threadprivate: 1409 case OMPD_allocate: 1410 case OMPD_declare_reduction: 1411 case OMPD_declare_mapper: 1412 case OMPD_declare_simd: 1413 case OMPD_requires: 1414 case OMPD_declare_variant: 1415 case OMPD_begin_declare_variant: 1416 case OMPD_end_declare_variant: 1417 case OMPD_unknown: 1418 default: 1419 llvm_unreachable("Unexpected directive with task reductions."); 1420 } 1421 1422 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl()); 1423 EmitVarDecl(*VD); 1424 EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD), 1425 /*Volatile=*/false, TaskRedRef->getType()); 1426 } 1427 } 1428 1429 void CodeGenFunction::EmitOMPReductionClauseFinal( 1430 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { 1431 if (!HaveInsertPoint()) 1432 return; 1433 llvm::SmallVector<const Expr *, 8> Privates; 1434 llvm::SmallVector<const Expr *, 8> LHSExprs; 1435 llvm::SmallVector<const Expr *, 8> RHSExprs; 1436 llvm::SmallVector<const Expr *, 8> ReductionOps; 1437 bool HasAtLeastOneReduction = false; 1438 bool IsReductionWithTaskMod = false; 1439 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1440 // Do not emit for inscan reductions. 1441 if (C->getModifier() == OMPC_REDUCTION_inscan) 1442 continue; 1443 HasAtLeastOneReduction = true; 1444 Privates.append(C->privates().begin(), C->privates().end()); 1445 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1446 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1447 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1448 IsReductionWithTaskMod = 1449 IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task; 1450 } 1451 if (HasAtLeastOneReduction) { 1452 if (IsReductionWithTaskMod) { 1453 CGM.getOpenMPRuntime().emitTaskReductionFini( 1454 *this, D.getBeginLoc(), 1455 isOpenMPWorksharingDirective(D.getDirectiveKind())); 1456 } 1457 bool TeamsLoopCanBeParallel = false; 1458 if (auto *TTLD = dyn_cast<OMPTargetTeamsGenericLoopDirective>(&D)) 1459 TeamsLoopCanBeParallel = TTLD->canBeParallelFor(); 1460 bool WithNowait = D.getSingleClause<OMPNowaitClause>() || 1461 isOpenMPParallelDirective(D.getDirectiveKind()) || 1462 TeamsLoopCanBeParallel || ReductionKind == OMPD_simd; 1463 bool SimpleReduction = ReductionKind == OMPD_simd; 1464 // Emit nowait reduction if nowait clause is present or directive is a 1465 // parallel directive (it always has implicit barrier). 1466 CGM.getOpenMPRuntime().emitReduction( 1467 *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps, 1468 {WithNowait, SimpleReduction, ReductionKind}); 1469 } 1470 } 1471 1472 static void emitPostUpdateForReductionClause( 1473 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1474 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 1475 if (!CGF.HaveInsertPoint()) 1476 return; 1477 llvm::BasicBlock *DoneBB = nullptr; 1478 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1479 if (const Expr *PostUpdate = C->getPostUpdateExpr()) { 1480 if (!DoneBB) { 1481 if (llvm::Value *Cond = CondGen(CGF)) { 1482 // If the first post-update expression is found, emit conditional 1483 // block if it was requested. 1484 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1485 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1486 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1487 CGF.EmitBlock(ThenBB); 1488 } 1489 } 1490 CGF.EmitIgnoredExpr(PostUpdate); 1491 } 1492 } 1493 if (DoneBB) 1494 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1495 } 1496 1497 namespace { 1498 /// Codegen lambda for appending distribute lower and upper bounds to outlined 1499 /// parallel function. This is necessary for combined constructs such as 1500 /// 'distribute parallel for' 1501 typedef llvm::function_ref<void(CodeGenFunction &, 1502 const OMPExecutableDirective &, 1503 llvm::SmallVectorImpl<llvm::Value *> &)> 1504 CodeGenBoundParametersTy; 1505 } // anonymous namespace 1506 1507 static void 1508 checkForLastprivateConditionalUpdate(CodeGenFunction &CGF, 1509 const OMPExecutableDirective &S) { 1510 if (CGF.getLangOpts().OpenMP < 50) 1511 return; 1512 llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls; 1513 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 1514 for (const Expr *Ref : C->varlists()) { 1515 if (!Ref->getType()->isScalarType()) 1516 continue; 1517 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1518 if (!DRE) 1519 continue; 1520 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1521 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); 1522 } 1523 } 1524 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 1525 for (const Expr *Ref : C->varlists()) { 1526 if (!Ref->getType()->isScalarType()) 1527 continue; 1528 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1529 if (!DRE) 1530 continue; 1531 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1532 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); 1533 } 1534 } 1535 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 1536 for (const Expr *Ref : C->varlists()) { 1537 if (!Ref->getType()->isScalarType()) 1538 continue; 1539 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1540 if (!DRE) 1541 continue; 1542 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1543 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); 1544 } 1545 } 1546 // Privates should ne analyzed since they are not captured at all. 1547 // Task reductions may be skipped - tasks are ignored. 1548 // Firstprivates do not return value but may be passed by reference - no need 1549 // to check for updated lastprivate conditional. 1550 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 1551 for (const Expr *Ref : C->varlists()) { 1552 if (!Ref->getType()->isScalarType()) 1553 continue; 1554 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1555 if (!DRE) 1556 continue; 1557 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1558 } 1559 } 1560 CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional( 1561 CGF, S, PrivateDecls); 1562 } 1563 1564 static void emitCommonOMPParallelDirective( 1565 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1566 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1567 const CodeGenBoundParametersTy &CodeGenBoundParameters) { 1568 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1569 llvm::Value *NumThreads = nullptr; 1570 llvm::Function *OutlinedFn = 1571 CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 1572 CGF, S, *CS->getCapturedDecl()->param_begin(), InnermostKind, 1573 CodeGen); 1574 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1575 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1576 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1577 /*IgnoreResultAssign=*/true); 1578 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1579 CGF, NumThreads, NumThreadsClause->getBeginLoc()); 1580 } 1581 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1582 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1583 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1584 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc()); 1585 } 1586 const Expr *IfCond = nullptr; 1587 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1588 if (C->getNameModifier() == OMPD_unknown || 1589 C->getNameModifier() == OMPD_parallel) { 1590 IfCond = C->getCondition(); 1591 break; 1592 } 1593 } 1594 1595 OMPParallelScope Scope(CGF, S); 1596 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1597 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk 1598 // lower and upper bounds with the pragma 'for' chunking mechanism. 1599 // The following lambda takes care of appending the lower and upper bound 1600 // parameters when necessary 1601 CodeGenBoundParameters(CGF, S, CapturedVars); 1602 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1603 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn, 1604 CapturedVars, IfCond, NumThreads); 1605 } 1606 1607 static bool isAllocatableDecl(const VarDecl *VD) { 1608 const VarDecl *CVD = VD->getCanonicalDecl(); 1609 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 1610 return false; 1611 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 1612 // Use the default allocation. 1613 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 1614 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 1615 !AA->getAllocator()); 1616 } 1617 1618 static void emitEmptyBoundParameters(CodeGenFunction &, 1619 const OMPExecutableDirective &, 1620 llvm::SmallVectorImpl<llvm::Value *> &) {} 1621 1622 static void emitOMPCopyinClause(CodeGenFunction &CGF, 1623 const OMPExecutableDirective &S) { 1624 bool Copyins = CGF.EmitOMPCopyinClause(S); 1625 if (Copyins) { 1626 // Emit implicit barrier to synchronize threads and avoid data races on 1627 // propagation master's thread values of threadprivate variables to local 1628 // instances of that variables of all other implicit threads. 1629 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1630 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 1631 /*ForceSimpleCall=*/true); 1632 } 1633 } 1634 1635 Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable( 1636 CodeGenFunction &CGF, const VarDecl *VD) { 1637 CodeGenModule &CGM = CGF.CGM; 1638 auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1639 1640 if (!VD) 1641 return Address::invalid(); 1642 const VarDecl *CVD = VD->getCanonicalDecl(); 1643 if (!isAllocatableDecl(CVD)) 1644 return Address::invalid(); 1645 llvm::Value *Size; 1646 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 1647 if (CVD->getType()->isVariablyModifiedType()) { 1648 Size = CGF.getTypeSize(CVD->getType()); 1649 // Align the size: ((size + align - 1) / align) * align 1650 Size = CGF.Builder.CreateNUWAdd( 1651 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 1652 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 1653 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 1654 } else { 1655 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 1656 Size = CGM.getSize(Sz.alignTo(Align)); 1657 } 1658 1659 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 1660 assert(AA->getAllocator() && 1661 "Expected allocator expression for non-default allocator."); 1662 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 1663 // According to the standard, the original allocator type is a enum (integer). 1664 // Convert to pointer type, if required. 1665 if (Allocator->getType()->isIntegerTy()) 1666 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 1667 else if (Allocator->getType()->isPointerTy()) 1668 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 1669 CGM.VoidPtrTy); 1670 1671 llvm::Value *Addr = OMPBuilder.createOMPAlloc( 1672 CGF.Builder, Size, Allocator, 1673 getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", ".")); 1674 llvm::CallInst *FreeCI = 1675 OMPBuilder.createOMPFree(CGF.Builder, Addr, Allocator); 1676 1677 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI); 1678 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1679 Addr, 1680 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 1681 getNameWithSeparators({CVD->getName(), ".addr"}, ".", ".")); 1682 return Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align); 1683 } 1684 1685 Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate( 1686 CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, 1687 SourceLocation Loc) { 1688 CodeGenModule &CGM = CGF.CGM; 1689 if (CGM.getLangOpts().OpenMPUseTLS && 1690 CGM.getContext().getTargetInfo().isTLSSupported()) 1691 return VDAddr; 1692 1693 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1694 1695 llvm::Type *VarTy = VDAddr.getElementType(); 1696 llvm::Value *Data = 1697 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy); 1698 llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)); 1699 std::string Suffix = getNameWithSeparators({"cache", ""}); 1700 llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix); 1701 1702 llvm::CallInst *ThreadPrivateCacheCall = 1703 OMPBuilder.createCachedThreadPrivate(CGF.Builder, Data, Size, CacheName); 1704 1705 return Address(ThreadPrivateCacheCall, CGM.Int8Ty, VDAddr.getAlignment()); 1706 } 1707 1708 std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators( 1709 ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) { 1710 SmallString<128> Buffer; 1711 llvm::raw_svector_ostream OS(Buffer); 1712 StringRef Sep = FirstSeparator; 1713 for (StringRef Part : Parts) { 1714 OS << Sep << Part; 1715 Sep = Separator; 1716 } 1717 return OS.str().str(); 1718 } 1719 1720 void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( 1721 CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP, 1722 InsertPointTy CodeGenIP, Twine RegionName) { 1723 CGBuilderTy &Builder = CGF.Builder; 1724 Builder.restoreIP(CodeGenIP); 1725 llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false, 1726 "." + RegionName + ".after"); 1727 1728 { 1729 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB); 1730 CGF.EmitStmt(RegionBodyStmt); 1731 } 1732 1733 if (Builder.saveIP().isSet()) 1734 Builder.CreateBr(FiniBB); 1735 } 1736 1737 void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody( 1738 CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP, 1739 InsertPointTy CodeGenIP, Twine RegionName) { 1740 CGBuilderTy &Builder = CGF.Builder; 1741 Builder.restoreIP(CodeGenIP); 1742 llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false, 1743 "." + RegionName + ".after"); 1744 1745 { 1746 OMPBuilderCBHelpers::OutlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB); 1747 CGF.EmitStmt(RegionBodyStmt); 1748 } 1749 1750 if (Builder.saveIP().isSet()) 1751 Builder.CreateBr(FiniBB); 1752 } 1753 1754 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1755 if (CGM.getLangOpts().OpenMPIRBuilder) { 1756 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1757 // Check if we have any if clause associated with the directive. 1758 llvm::Value *IfCond = nullptr; 1759 if (const auto *C = S.getSingleClause<OMPIfClause>()) 1760 IfCond = EmitScalarExpr(C->getCondition(), 1761 /*IgnoreResultAssign=*/true); 1762 1763 llvm::Value *NumThreads = nullptr; 1764 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) 1765 NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(), 1766 /*IgnoreResultAssign=*/true); 1767 1768 ProcBindKind ProcBind = OMP_PROC_BIND_default; 1769 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) 1770 ProcBind = ProcBindClause->getProcBindKind(); 1771 1772 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 1773 1774 // The cleanup callback that finalizes all variables at the given location, 1775 // thus calls destructors etc. 1776 auto FiniCB = [this](InsertPointTy IP) { 1777 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 1778 }; 1779 1780 // Privatization callback that performs appropriate action for 1781 // shared/private/firstprivate/lastprivate/copyin/... variables. 1782 // 1783 // TODO: This defaults to shared right now. 1784 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 1785 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { 1786 // The next line is appropriate only for variables (Val) with the 1787 // data-sharing attribute "shared". 1788 ReplVal = &Val; 1789 1790 return CodeGenIP; 1791 }; 1792 1793 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1794 const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt(); 1795 1796 auto BodyGenCB = [&, this](InsertPointTy AllocaIP, 1797 InsertPointTy CodeGenIP) { 1798 OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody( 1799 *this, ParallelRegionBodyStmt, AllocaIP, CodeGenIP, "parallel"); 1800 }; 1801 1802 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); 1803 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); 1804 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( 1805 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); 1806 Builder.restoreIP( 1807 OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB, 1808 IfCond, NumThreads, ProcBind, S.hasCancel())); 1809 return; 1810 } 1811 1812 // Emit parallel region as a standalone region. 1813 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 1814 Action.Enter(CGF); 1815 OMPPrivateScope PrivateScope(CGF); 1816 emitOMPCopyinClause(CGF, S); 1817 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1818 CGF.EmitOMPPrivateClause(S, PrivateScope); 1819 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1820 (void)PrivateScope.Privatize(); 1821 CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt()); 1822 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 1823 }; 1824 { 1825 auto LPCRegion = 1826 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 1827 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, 1828 emitEmptyBoundParameters); 1829 emitPostUpdateForReductionClause(*this, S, 1830 [](CodeGenFunction &) { return nullptr; }); 1831 } 1832 // Check for outer lastprivate conditional update. 1833 checkForLastprivateConditionalUpdate(*this, S); 1834 } 1835 1836 void CodeGenFunction::EmitOMPMetaDirective(const OMPMetaDirective &S) { 1837 EmitStmt(S.getIfStmt()); 1838 } 1839 1840 namespace { 1841 /// RAII to handle scopes for loop transformation directives. 1842 class OMPTransformDirectiveScopeRAII { 1843 OMPLoopScope *Scope = nullptr; 1844 CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr; 1845 CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr; 1846 1847 OMPTransformDirectiveScopeRAII(const OMPTransformDirectiveScopeRAII &) = 1848 delete; 1849 OMPTransformDirectiveScopeRAII & 1850 operator=(const OMPTransformDirectiveScopeRAII &) = delete; 1851 1852 public: 1853 OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) { 1854 if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(S)) { 1855 Scope = new OMPLoopScope(CGF, *Dir); 1856 CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP); 1857 CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI); 1858 } 1859 } 1860 ~OMPTransformDirectiveScopeRAII() { 1861 if (!Scope) 1862 return; 1863 delete CapInfoRAII; 1864 delete CGSI; 1865 delete Scope; 1866 } 1867 }; 1868 } // namespace 1869 1870 static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, 1871 int MaxLevel, int Level = 0) { 1872 assert(Level < MaxLevel && "Too deep lookup during loop body codegen."); 1873 const Stmt *SimplifiedS = S->IgnoreContainers(); 1874 if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) { 1875 PrettyStackTraceLoc CrashInfo( 1876 CGF.getContext().getSourceManager(), CS->getLBracLoc(), 1877 "LLVM IR generation of compound statement ('{}')"); 1878 1879 // Keep track of the current cleanup stack depth, including debug scopes. 1880 CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange()); 1881 for (const Stmt *CurStmt : CS->body()) 1882 emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level); 1883 return; 1884 } 1885 if (SimplifiedS == NextLoop) { 1886 if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(SimplifiedS)) 1887 SimplifiedS = Dir->getTransformedStmt(); 1888 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS)) 1889 SimplifiedS = CanonLoop->getLoopStmt(); 1890 if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) { 1891 S = For->getBody(); 1892 } else { 1893 assert(isa<CXXForRangeStmt>(SimplifiedS) && 1894 "Expected canonical for loop or range-based for loop."); 1895 const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS); 1896 CGF.EmitStmt(CXXFor->getLoopVarStmt()); 1897 S = CXXFor->getBody(); 1898 } 1899 if (Level + 1 < MaxLevel) { 1900 NextLoop = OMPLoopDirective::tryToFindNextInnerLoop( 1901 S, /*TryImperfectlyNestedLoops=*/true); 1902 emitBody(CGF, S, NextLoop, MaxLevel, Level + 1); 1903 return; 1904 } 1905 } 1906 CGF.EmitStmt(S); 1907 } 1908 1909 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1910 JumpDest LoopExit) { 1911 RunCleanupsScope BodyScope(*this); 1912 // Update counters values on current iteration. 1913 for (const Expr *UE : D.updates()) 1914 EmitIgnoredExpr(UE); 1915 // Update the linear variables. 1916 // In distribute directives only loop counters may be marked as linear, no 1917 // need to generate the code for them. 1918 if (!isOpenMPDistributeDirective(D.getDirectiveKind())) { 1919 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1920 for (const Expr *UE : C->updates()) 1921 EmitIgnoredExpr(UE); 1922 } 1923 } 1924 1925 // On a continue in the body, jump to the end. 1926 JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue"); 1927 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1928 for (const Expr *E : D.finals_conditions()) { 1929 if (!E) 1930 continue; 1931 // Check that loop counter in non-rectangular nest fits into the iteration 1932 // space. 1933 llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next"); 1934 EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(), 1935 getProfileCount(D.getBody())); 1936 EmitBlock(NextBB); 1937 } 1938 1939 OMPPrivateScope InscanScope(*this); 1940 EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true); 1941 bool IsInscanRegion = InscanScope.Privatize(); 1942 if (IsInscanRegion) { 1943 // Need to remember the block before and after scan directive 1944 // to dispatch them correctly depending on the clause used in 1945 // this directive, inclusive or exclusive. For inclusive scan the natural 1946 // order of the blocks is used, for exclusive clause the blocks must be 1947 // executed in reverse order. 1948 OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb"); 1949 OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb"); 1950 // No need to allocate inscan exit block, in simd mode it is selected in the 1951 // codegen for the scan directive. 1952 if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd) 1953 OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb"); 1954 OMPScanDispatch = createBasicBlock("omp.inscan.dispatch"); 1955 EmitBranch(OMPScanDispatch); 1956 EmitBlock(OMPBeforeScanBlock); 1957 } 1958 1959 // Emit loop variables for C++ range loops. 1960 const Stmt *Body = 1961 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); 1962 // Emit loop body. 1963 emitBody(*this, Body, 1964 OMPLoopBasedDirective::tryToFindNextInnerLoop( 1965 Body, /*TryImperfectlyNestedLoops=*/true), 1966 D.getLoopsNumber()); 1967 1968 // Jump to the dispatcher at the end of the loop body. 1969 if (IsInscanRegion) 1970 EmitBranch(OMPScanExitBlock); 1971 1972 // The end (updates/cleanups). 1973 EmitBlock(Continue.getBlock()); 1974 BreakContinueStack.pop_back(); 1975 } 1976 1977 using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>; 1978 1979 /// Emit a captured statement and return the function as well as its captured 1980 /// closure context. 1981 static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF, 1982 const CapturedStmt *S) { 1983 LValue CapStruct = ParentCGF.InitCapturedStruct(*S); 1984 CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true); 1985 std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI = 1986 std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(*S); 1987 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get()); 1988 llvm::Function *F = CGF.GenerateCapturedStmtFunction(*S); 1989 1990 return {F, CapStruct.getPointer(ParentCGF)}; 1991 } 1992 1993 /// Emit a call to a previously captured closure. 1994 static llvm::CallInst * 1995 emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap, 1996 llvm::ArrayRef<llvm::Value *> Args) { 1997 // Append the closure context to the argument. 1998 SmallVector<llvm::Value *> EffectiveArgs; 1999 EffectiveArgs.reserve(Args.size() + 1); 2000 llvm::append_range(EffectiveArgs, Args); 2001 EffectiveArgs.push_back(Cap.second); 2002 2003 return ParentCGF.Builder.CreateCall(Cap.first, EffectiveArgs); 2004 } 2005 2006 llvm::CanonicalLoopInfo * 2007 CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) { 2008 assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented"); 2009 2010 // The caller is processing the loop-associated directive processing the \p 2011 // Depth loops nested in \p S. Put the previous pending loop-associated 2012 // directive to the stack. If the current loop-associated directive is a loop 2013 // transformation directive, it will push its generated loops onto the stack 2014 // such that together with the loops left here they form the combined loop 2015 // nest for the parent loop-associated directive. 2016 int ParentExpectedOMPLoopDepth = ExpectedOMPLoopDepth; 2017 ExpectedOMPLoopDepth = Depth; 2018 2019 EmitStmt(S); 2020 assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops"); 2021 2022 // The last added loop is the outermost one. 2023 llvm::CanonicalLoopInfo *Result = OMPLoopNestStack.back(); 2024 2025 // Pop the \p Depth loops requested by the call from that stack and restore 2026 // the previous context. 2027 OMPLoopNestStack.pop_back_n(Depth); 2028 ExpectedOMPLoopDepth = ParentExpectedOMPLoopDepth; 2029 2030 return Result; 2031 } 2032 2033 void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) { 2034 const Stmt *SyntacticalLoop = S->getLoopStmt(); 2035 if (!getLangOpts().OpenMPIRBuilder) { 2036 // Ignore if OpenMPIRBuilder is not enabled. 2037 EmitStmt(SyntacticalLoop); 2038 return; 2039 } 2040 2041 LexicalScope ForScope(*this, S->getSourceRange()); 2042 2043 // Emit init statements. The Distance/LoopVar funcs may reference variable 2044 // declarations they contain. 2045 const Stmt *BodyStmt; 2046 if (const auto *For = dyn_cast<ForStmt>(SyntacticalLoop)) { 2047 if (const Stmt *InitStmt = For->getInit()) 2048 EmitStmt(InitStmt); 2049 BodyStmt = For->getBody(); 2050 } else if (const auto *RangeFor = 2051 dyn_cast<CXXForRangeStmt>(SyntacticalLoop)) { 2052 if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt()) 2053 EmitStmt(RangeStmt); 2054 if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt()) 2055 EmitStmt(BeginStmt); 2056 if (const DeclStmt *EndStmt = RangeFor->getEndStmt()) 2057 EmitStmt(EndStmt); 2058 if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt()) 2059 EmitStmt(LoopVarStmt); 2060 BodyStmt = RangeFor->getBody(); 2061 } else 2062 llvm_unreachable("Expected for-stmt or range-based for-stmt"); 2063 2064 // Emit closure for later use. By-value captures will be captured here. 2065 const CapturedStmt *DistanceFunc = S->getDistanceFunc(); 2066 EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(*this, DistanceFunc); 2067 const CapturedStmt *LoopVarFunc = S->getLoopVarFunc(); 2068 EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(*this, LoopVarFunc); 2069 2070 // Call the distance function to get the number of iterations of the loop to 2071 // come. 2072 QualType LogicalTy = DistanceFunc->getCapturedDecl() 2073 ->getParam(0) 2074 ->getType() 2075 .getNonReferenceType(); 2076 RawAddress CountAddr = CreateMemTemp(LogicalTy, ".count.addr"); 2077 emitCapturedStmtCall(*this, DistanceClosure, {CountAddr.getPointer()}); 2078 llvm::Value *DistVal = Builder.CreateLoad(CountAddr, ".count"); 2079 2080 // Emit the loop structure. 2081 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 2082 auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, 2083 llvm::Value *IndVar) { 2084 Builder.restoreIP(CodeGenIP); 2085 2086 // Emit the loop body: Convert the logical iteration number to the loop 2087 // variable and emit the body. 2088 const DeclRefExpr *LoopVarRef = S->getLoopVarRef(); 2089 LValue LCVal = EmitLValue(LoopVarRef); 2090 Address LoopVarAddress = LCVal.getAddress(); 2091 emitCapturedStmtCall(*this, LoopVarClosure, 2092 {LoopVarAddress.emitRawPointer(*this), IndVar}); 2093 2094 RunCleanupsScope BodyScope(*this); 2095 EmitStmt(BodyStmt); 2096 }; 2097 llvm::CanonicalLoopInfo *CL = 2098 OMPBuilder.createCanonicalLoop(Builder, BodyGen, DistVal); 2099 2100 // Finish up the loop. 2101 Builder.restoreIP(CL->getAfterIP()); 2102 ForScope.ForceCleanup(); 2103 2104 // Remember the CanonicalLoopInfo for parent AST nodes consuming it. 2105 OMPLoopNestStack.push_back(CL); 2106 } 2107 2108 void CodeGenFunction::EmitOMPInnerLoop( 2109 const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond, 2110 const Expr *IncExpr, 2111 const llvm::function_ref<void(CodeGenFunction &)> BodyGen, 2112 const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) { 2113 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 2114 2115 // Start the loop with a block that tests the condition. 2116 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 2117 EmitBlock(CondBlock); 2118 const SourceRange R = S.getSourceRange(); 2119 2120 // If attributes are attached, push to the basic block with them. 2121 const auto &OMPED = cast<OMPExecutableDirective>(S); 2122 const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt(); 2123 const Stmt *SS = ICS->getCapturedStmt(); 2124 const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS); 2125 OMPLoopNestStack.clear(); 2126 if (AS) 2127 LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), 2128 AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()), 2129 SourceLocToDebugLoc(R.getEnd())); 2130 else 2131 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 2132 SourceLocToDebugLoc(R.getEnd())); 2133 2134 // If there are any cleanups between here and the loop-exit scope, 2135 // create a block to stage a loop exit along. 2136 llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); 2137 if (RequiresCleanup) 2138 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 2139 2140 llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body"); 2141 2142 // Emit condition. 2143 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 2144 if (ExitBlock != LoopExit.getBlock()) { 2145 EmitBlock(ExitBlock); 2146 EmitBranchThroughCleanup(LoopExit); 2147 } 2148 2149 EmitBlock(LoopBody); 2150 incrementProfileCounter(&S); 2151 2152 // Create a block for the increment. 2153 JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 2154 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 2155 2156 BodyGen(*this); 2157 2158 // Emit "IV = IV + 1" and a back-edge to the condition block. 2159 EmitBlock(Continue.getBlock()); 2160 EmitIgnoredExpr(IncExpr); 2161 PostIncGen(*this); 2162 BreakContinueStack.pop_back(); 2163 EmitBranch(CondBlock); 2164 LoopStack.pop(); 2165 // Emit the fall-through block. 2166 EmitBlock(LoopExit.getBlock()); 2167 } 2168 2169 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 2170 if (!HaveInsertPoint()) 2171 return false; 2172 // Emit inits for the linear variables. 2173 bool HasLinears = false; 2174 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 2175 for (const Expr *Init : C->inits()) { 2176 HasLinears = true; 2177 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 2178 if (const auto *Ref = 2179 dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 2180 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 2181 const auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 2182 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 2183 CapturedStmtInfo->lookup(OrigVD) != nullptr, 2184 VD->getInit()->getType(), VK_LValue, 2185 VD->getInit()->getExprLoc()); 2186 EmitExprAsInit( 2187 &DRE, VD, 2188 MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()), 2189 /*capturedByInit=*/false); 2190 EmitAutoVarCleanups(Emission); 2191 } else { 2192 EmitVarDecl(*VD); 2193 } 2194 } 2195 // Emit the linear steps for the linear clauses. 2196 // If a step is not constant, it is pre-calculated before the loop. 2197 if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 2198 if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 2199 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 2200 // Emit calculation of the linear step. 2201 EmitIgnoredExpr(CS); 2202 } 2203 } 2204 return HasLinears; 2205 } 2206 2207 void CodeGenFunction::EmitOMPLinearClauseFinal( 2208 const OMPLoopDirective &D, 2209 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 2210 if (!HaveInsertPoint()) 2211 return; 2212 llvm::BasicBlock *DoneBB = nullptr; 2213 // Emit the final values of the linear variables. 2214 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 2215 auto IC = C->varlist_begin(); 2216 for (const Expr *F : C->finals()) { 2217 if (!DoneBB) { 2218 if (llvm::Value *Cond = CondGen(*this)) { 2219 // If the first post-update expression is found, emit conditional 2220 // block if it was requested. 2221 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu"); 2222 DoneBB = createBasicBlock(".omp.linear.pu.done"); 2223 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 2224 EmitBlock(ThenBB); 2225 } 2226 } 2227 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 2228 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 2229 CapturedStmtInfo->lookup(OrigVD) != nullptr, 2230 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 2231 Address OrigAddr = EmitLValue(&DRE).getAddress(); 2232 CodeGenFunction::OMPPrivateScope VarScope(*this); 2233 VarScope.addPrivate(OrigVD, OrigAddr); 2234 (void)VarScope.Privatize(); 2235 EmitIgnoredExpr(F); 2236 ++IC; 2237 } 2238 if (const Expr *PostUpdate = C->getPostUpdateExpr()) 2239 EmitIgnoredExpr(PostUpdate); 2240 } 2241 if (DoneBB) 2242 EmitBlock(DoneBB, /*IsFinished=*/true); 2243 } 2244 2245 static void emitAlignedClause(CodeGenFunction &CGF, 2246 const OMPExecutableDirective &D) { 2247 if (!CGF.HaveInsertPoint()) 2248 return; 2249 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 2250 llvm::APInt ClauseAlignment(64, 0); 2251 if (const Expr *AlignmentExpr = Clause->getAlignment()) { 2252 auto *AlignmentCI = 2253 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 2254 ClauseAlignment = AlignmentCI->getValue(); 2255 } 2256 for (const Expr *E : Clause->varlists()) { 2257 llvm::APInt Alignment(ClauseAlignment); 2258 if (Alignment == 0) { 2259 // OpenMP [2.8.1, Description] 2260 // If no optional parameter is specified, implementation-defined default 2261 // alignments for SIMD instructions on the target platforms are assumed. 2262 Alignment = 2263 CGF.getContext() 2264 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 2265 E->getType()->getPointeeType())) 2266 .getQuantity(); 2267 } 2268 assert((Alignment == 0 || Alignment.isPowerOf2()) && 2269 "alignment is not power of 2"); 2270 if (Alignment != 0) { 2271 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 2272 CGF.emitAlignmentAssumption( 2273 PtrValue, E, /*No second loc needed*/ SourceLocation(), 2274 llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment)); 2275 } 2276 } 2277 } 2278 } 2279 2280 void CodeGenFunction::EmitOMPPrivateLoopCounters( 2281 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { 2282 if (!HaveInsertPoint()) 2283 return; 2284 auto I = S.private_counters().begin(); 2285 for (const Expr *E : S.counters()) { 2286 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2287 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 2288 // Emit var without initialization. 2289 AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD); 2290 EmitAutoVarCleanups(VarEmission); 2291 LocalDeclMap.erase(PrivateVD); 2292 (void)LoopScope.addPrivate(VD, VarEmission.getAllocatedAddress()); 2293 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || 2294 VD->hasGlobalStorage()) { 2295 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), 2296 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), 2297 E->getType(), VK_LValue, E->getExprLoc()); 2298 (void)LoopScope.addPrivate(PrivateVD, EmitLValue(&DRE).getAddress()); 2299 } else { 2300 (void)LoopScope.addPrivate(PrivateVD, VarEmission.getAllocatedAddress()); 2301 } 2302 ++I; 2303 } 2304 // Privatize extra loop counters used in loops for ordered(n) clauses. 2305 for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) { 2306 if (!C->getNumForLoops()) 2307 continue; 2308 for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size(); 2309 I < E; ++I) { 2310 const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I)); 2311 const auto *VD = cast<VarDecl>(DRE->getDecl()); 2312 // Override only those variables that can be captured to avoid re-emission 2313 // of the variables declared within the loops. 2314 if (DRE->refersToEnclosingVariableOrCapture()) { 2315 (void)LoopScope.addPrivate( 2316 VD, CreateMemTemp(DRE->getType(), VD->getName())); 2317 } 2318 } 2319 } 2320 } 2321 2322 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 2323 const Expr *Cond, llvm::BasicBlock *TrueBlock, 2324 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 2325 if (!CGF.HaveInsertPoint()) 2326 return; 2327 { 2328 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 2329 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); 2330 (void)PreCondScope.Privatize(); 2331 // Get initial values of real counters. 2332 for (const Expr *I : S.inits()) { 2333 CGF.EmitIgnoredExpr(I); 2334 } 2335 } 2336 // Create temp loop control variables with their init values to support 2337 // non-rectangular loops. 2338 CodeGenFunction::OMPMapVars PreCondVars; 2339 for (const Expr *E : S.dependent_counters()) { 2340 if (!E) 2341 continue; 2342 assert(!E->getType().getNonReferenceType()->isRecordType() && 2343 "dependent counter must not be an iterator."); 2344 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2345 Address CounterAddr = 2346 CGF.CreateMemTemp(VD->getType().getNonReferenceType()); 2347 (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr); 2348 } 2349 (void)PreCondVars.apply(CGF); 2350 for (const Expr *E : S.dependent_inits()) { 2351 if (!E) 2352 continue; 2353 CGF.EmitIgnoredExpr(E); 2354 } 2355 // Check that loop is executed at least one time. 2356 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 2357 PreCondVars.restore(CGF); 2358 } 2359 2360 void CodeGenFunction::EmitOMPLinearClause( 2361 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { 2362 if (!HaveInsertPoint()) 2363 return; 2364 llvm::DenseSet<const VarDecl *> SIMDLCVs; 2365 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 2366 const auto *LoopDirective = cast<OMPLoopDirective>(&D); 2367 for (const Expr *C : LoopDirective->counters()) { 2368 SIMDLCVs.insert( 2369 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 2370 } 2371 } 2372 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 2373 auto CurPrivate = C->privates().begin(); 2374 for (const Expr *E : C->varlists()) { 2375 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2376 const auto *PrivateVD = 2377 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 2378 if (!SIMDLCVs.count(VD->getCanonicalDecl())) { 2379 // Emit private VarDecl with copy init. 2380 EmitVarDecl(*PrivateVD); 2381 bool IsRegistered = 2382 PrivateScope.addPrivate(VD, GetAddrOfLocalVar(PrivateVD)); 2383 assert(IsRegistered && "linear var already registered as private"); 2384 // Silence the warning about unused variable. 2385 (void)IsRegistered; 2386 } else { 2387 EmitVarDecl(*PrivateVD); 2388 } 2389 ++CurPrivate; 2390 } 2391 } 2392 } 2393 2394 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 2395 const OMPExecutableDirective &D) { 2396 if (!CGF.HaveInsertPoint()) 2397 return; 2398 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 2399 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 2400 /*ignoreResult=*/true); 2401 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 2402 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 2403 // In presence of finite 'safelen', it may be unsafe to mark all 2404 // the memory instructions parallel, because loop-carried 2405 // dependences of 'safelen' iterations are possible. 2406 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 2407 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 2408 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 2409 /*ignoreResult=*/true); 2410 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 2411 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 2412 // In presence of finite 'safelen', it may be unsafe to mark all 2413 // the memory instructions parallel, because loop-carried 2414 // dependences of 'safelen' iterations are possible. 2415 CGF.LoopStack.setParallel(/*Enable=*/false); 2416 } 2417 } 2418 2419 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) { 2420 // Walk clauses and process safelen/lastprivate. 2421 LoopStack.setParallel(/*Enable=*/true); 2422 LoopStack.setVectorizeEnable(); 2423 emitSimdlenSafelenClause(*this, D); 2424 if (const auto *C = D.getSingleClause<OMPOrderClause>()) 2425 if (C->getKind() == OMPC_ORDER_concurrent) 2426 LoopStack.setParallel(/*Enable=*/true); 2427 if ((D.getDirectiveKind() == OMPD_simd || 2428 (getLangOpts().OpenMPSimd && 2429 isOpenMPSimdDirective(D.getDirectiveKind()))) && 2430 llvm::any_of(D.getClausesOfKind<OMPReductionClause>(), 2431 [](const OMPReductionClause *C) { 2432 return C->getModifier() == OMPC_REDUCTION_inscan; 2433 })) 2434 // Disable parallel access in case of prefix sum. 2435 LoopStack.setParallel(/*Enable=*/false); 2436 } 2437 2438 void CodeGenFunction::EmitOMPSimdFinal( 2439 const OMPLoopDirective &D, 2440 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 2441 if (!HaveInsertPoint()) 2442 return; 2443 llvm::BasicBlock *DoneBB = nullptr; 2444 auto IC = D.counters().begin(); 2445 auto IPC = D.private_counters().begin(); 2446 for (const Expr *F : D.finals()) { 2447 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 2448 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); 2449 const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); 2450 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || 2451 OrigVD->hasGlobalStorage() || CED) { 2452 if (!DoneBB) { 2453 if (llvm::Value *Cond = CondGen(*this)) { 2454 // If the first post-update expression is found, emit conditional 2455 // block if it was requested. 2456 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then"); 2457 DoneBB = createBasicBlock(".omp.final.done"); 2458 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 2459 EmitBlock(ThenBB); 2460 } 2461 } 2462 Address OrigAddr = Address::invalid(); 2463 if (CED) { 2464 OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(); 2465 } else { 2466 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD), 2467 /*RefersToEnclosingVariableOrCapture=*/false, 2468 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); 2469 OrigAddr = EmitLValue(&DRE).getAddress(); 2470 } 2471 OMPPrivateScope VarScope(*this); 2472 VarScope.addPrivate(OrigVD, OrigAddr); 2473 (void)VarScope.Privatize(); 2474 EmitIgnoredExpr(F); 2475 } 2476 ++IC; 2477 ++IPC; 2478 } 2479 if (DoneBB) 2480 EmitBlock(DoneBB, /*IsFinished=*/true); 2481 } 2482 2483 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, 2484 const OMPLoopDirective &S, 2485 CodeGenFunction::JumpDest LoopExit) { 2486 CGF.EmitOMPLoopBody(S, LoopExit); 2487 CGF.EmitStopPoint(&S); 2488 } 2489 2490 /// Emit a helper variable and return corresponding lvalue. 2491 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 2492 const DeclRefExpr *Helper) { 2493 auto VDecl = cast<VarDecl>(Helper->getDecl()); 2494 CGF.EmitVarDecl(*VDecl); 2495 return CGF.EmitLValue(Helper); 2496 } 2497 2498 static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S, 2499 const RegionCodeGenTy &SimdInitGen, 2500 const RegionCodeGenTy &BodyCodeGen) { 2501 auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF, 2502 PrePostActionTy &) { 2503 CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S); 2504 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 2505 SimdInitGen(CGF); 2506 2507 BodyCodeGen(CGF); 2508 }; 2509 auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 2510 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 2511 CGF.LoopStack.setVectorizeEnable(/*Enable=*/false); 2512 2513 BodyCodeGen(CGF); 2514 }; 2515 const Expr *IfCond = nullptr; 2516 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2517 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2518 if (CGF.getLangOpts().OpenMP >= 50 && 2519 (C->getNameModifier() == OMPD_unknown || 2520 C->getNameModifier() == OMPD_simd)) { 2521 IfCond = C->getCondition(); 2522 break; 2523 } 2524 } 2525 } 2526 if (IfCond) { 2527 CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2528 } else { 2529 RegionCodeGenTy ThenRCG(ThenGen); 2530 ThenRCG(CGF); 2531 } 2532 } 2533 2534 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, 2535 PrePostActionTy &Action) { 2536 Action.Enter(CGF); 2537 assert(isOpenMPSimdDirective(S.getDirectiveKind()) && 2538 "Expected simd directive"); 2539 OMPLoopScope PreInitScope(CGF, S); 2540 // if (PreCond) { 2541 // for (IV in 0..LastIteration) BODY; 2542 // <Final counter/linear vars updates>; 2543 // } 2544 // 2545 if (isOpenMPDistributeDirective(S.getDirectiveKind()) || 2546 isOpenMPWorksharingDirective(S.getDirectiveKind()) || 2547 isOpenMPTaskLoopDirective(S.getDirectiveKind())) { 2548 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable())); 2549 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable())); 2550 } 2551 2552 // Emit: if (PreCond) - begin. 2553 // If the condition constant folds and can be elided, avoid emitting the 2554 // whole loop. 2555 bool CondConstant; 2556 llvm::BasicBlock *ContBlock = nullptr; 2557 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2558 if (!CondConstant) 2559 return; 2560 } else { 2561 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then"); 2562 ContBlock = CGF.createBasicBlock("simd.if.end"); 2563 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 2564 CGF.getProfileCount(&S)); 2565 CGF.EmitBlock(ThenBlock); 2566 CGF.incrementProfileCounter(&S); 2567 } 2568 2569 // Emit the loop iteration variable. 2570 const Expr *IVExpr = S.getIterationVariable(); 2571 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 2572 CGF.EmitVarDecl(*IVDecl); 2573 CGF.EmitIgnoredExpr(S.getInit()); 2574 2575 // Emit the iterations count variable. 2576 // If it is not a variable, Sema decided to calculate iterations count on 2577 // each iteration (e.g., it is foldable into a constant). 2578 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2579 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2580 // Emit calculation of the iterations count. 2581 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 2582 } 2583 2584 emitAlignedClause(CGF, S); 2585 (void)CGF.EmitOMPLinearClauseInit(S); 2586 { 2587 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2588 CGF.EmitOMPPrivateClause(S, LoopScope); 2589 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 2590 CGF.EmitOMPLinearClause(S, LoopScope); 2591 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2592 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( 2593 CGF, S, CGF.EmitLValue(S.getIterationVariable())); 2594 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2595 (void)LoopScope.Privatize(); 2596 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 2597 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 2598 2599 emitCommonSimdLoop( 2600 CGF, S, 2601 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2602 CGF.EmitOMPSimdInit(S); 2603 }, 2604 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 2605 CGF.EmitOMPInnerLoop( 2606 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 2607 [&S](CodeGenFunction &CGF) { 2608 emitOMPLoopBodyWithStopPoint(CGF, S, 2609 CodeGenFunction::JumpDest()); 2610 }, 2611 [](CodeGenFunction &) {}); 2612 }); 2613 CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; }); 2614 // Emit final copy of the lastprivate variables at the end of loops. 2615 if (HasLastprivateClause) 2616 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); 2617 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); 2618 emitPostUpdateForReductionClause(CGF, S, 2619 [](CodeGenFunction &) { return nullptr; }); 2620 LoopScope.restoreMap(); 2621 CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; }); 2622 } 2623 // Emit: if (PreCond) - end. 2624 if (ContBlock) { 2625 CGF.EmitBranch(ContBlock); 2626 CGF.EmitBlock(ContBlock, true); 2627 } 2628 } 2629 2630 static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective &S) { 2631 // Check for unsupported clauses 2632 for (OMPClause *C : S.clauses()) { 2633 // Currently only order, simdlen and safelen clauses are supported 2634 if (!(isa<OMPSimdlenClause>(C) || isa<OMPSafelenClause>(C) || 2635 isa<OMPOrderClause>(C) || isa<OMPAlignedClause>(C))) 2636 return false; 2637 } 2638 2639 // Check if we have a statement with the ordered directive. 2640 // Visit the statement hierarchy to find a compound statement 2641 // with a ordered directive in it. 2642 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(S.getRawStmt())) { 2643 if (const Stmt *SyntacticalLoop = CanonLoop->getLoopStmt()) { 2644 for (const Stmt *SubStmt : SyntacticalLoop->children()) { 2645 if (!SubStmt) 2646 continue; 2647 if (const CompoundStmt *CS = dyn_cast<CompoundStmt>(SubStmt)) { 2648 for (const Stmt *CSSubStmt : CS->children()) { 2649 if (!CSSubStmt) 2650 continue; 2651 if (isa<OMPOrderedDirective>(CSSubStmt)) { 2652 return false; 2653 } 2654 } 2655 } 2656 } 2657 } 2658 } 2659 return true; 2660 } 2661 static llvm::MapVector<llvm::Value *, llvm::Value *> 2662 GetAlignedMapping(const OMPSimdDirective &S, CodeGenFunction &CGF) { 2663 llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars; 2664 for (const auto *Clause : S.getClausesOfKind<OMPAlignedClause>()) { 2665 llvm::APInt ClauseAlignment(64, 0); 2666 if (const Expr *AlignmentExpr = Clause->getAlignment()) { 2667 auto *AlignmentCI = 2668 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 2669 ClauseAlignment = AlignmentCI->getValue(); 2670 } 2671 for (const Expr *E : Clause->varlists()) { 2672 llvm::APInt Alignment(ClauseAlignment); 2673 if (Alignment == 0) { 2674 // OpenMP [2.8.1, Description] 2675 // If no optional parameter is specified, implementation-defined default 2676 // alignments for SIMD instructions on the target platforms are assumed. 2677 Alignment = 2678 CGF.getContext() 2679 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 2680 E->getType()->getPointeeType())) 2681 .getQuantity(); 2682 } 2683 assert((Alignment == 0 || Alignment.isPowerOf2()) && 2684 "alignment is not power of 2"); 2685 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 2686 AlignedVars[PtrValue] = CGF.Builder.getInt64(Alignment.getSExtValue()); 2687 } 2688 } 2689 return AlignedVars; 2690 } 2691 2692 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 2693 bool UseOMPIRBuilder = 2694 CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S); 2695 if (UseOMPIRBuilder) { 2696 auto &&CodeGenIRBuilder = [this, &S, UseOMPIRBuilder](CodeGenFunction &CGF, 2697 PrePostActionTy &) { 2698 // Use the OpenMPIRBuilder if enabled. 2699 if (UseOMPIRBuilder) { 2700 llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars = 2701 GetAlignedMapping(S, CGF); 2702 // Emit the associated statement and get its loop representation. 2703 const Stmt *Inner = S.getRawStmt(); 2704 llvm::CanonicalLoopInfo *CLI = 2705 EmitOMPCollapsedCanonicalLoopNest(Inner, 1); 2706 2707 llvm::OpenMPIRBuilder &OMPBuilder = 2708 CGM.getOpenMPRuntime().getOMPBuilder(); 2709 // Add SIMD specific metadata 2710 llvm::ConstantInt *Simdlen = nullptr; 2711 if (const auto *C = S.getSingleClause<OMPSimdlenClause>()) { 2712 RValue Len = 2713 this->EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 2714 /*ignoreResult=*/true); 2715 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 2716 Simdlen = Val; 2717 } 2718 llvm::ConstantInt *Safelen = nullptr; 2719 if (const auto *C = S.getSingleClause<OMPSafelenClause>()) { 2720 RValue Len = 2721 this->EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 2722 /*ignoreResult=*/true); 2723 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 2724 Safelen = Val; 2725 } 2726 llvm::omp::OrderKind Order = llvm::omp::OrderKind::OMP_ORDER_unknown; 2727 if (const auto *C = S.getSingleClause<OMPOrderClause>()) { 2728 if (C->getKind() == OpenMPOrderClauseKind ::OMPC_ORDER_concurrent) { 2729 Order = llvm::omp::OrderKind::OMP_ORDER_concurrent; 2730 } 2731 } 2732 // Add simd metadata to the collapsed loop. Do not generate 2733 // another loop for if clause. Support for if clause is done earlier. 2734 OMPBuilder.applySimd(CLI, AlignedVars, 2735 /*IfCond*/ nullptr, Order, Simdlen, Safelen); 2736 return; 2737 } 2738 }; 2739 { 2740 auto LPCRegion = 2741 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 2742 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2743 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, 2744 CodeGenIRBuilder); 2745 } 2746 return; 2747 } 2748 2749 ParentLoopDirectiveForScanRegion ScanRegion(*this, S); 2750 OMPFirstScanLoop = true; 2751 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2752 emitOMPSimdRegion(CGF, S, Action); 2753 }; 2754 { 2755 auto LPCRegion = 2756 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 2757 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2758 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2759 } 2760 // Check for outer lastprivate conditional update. 2761 checkForLastprivateConditionalUpdate(*this, S); 2762 } 2763 2764 void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) { 2765 // Emit the de-sugared statement. 2766 OMPTransformDirectiveScopeRAII TileScope(*this, &S); 2767 EmitStmt(S.getTransformedStmt()); 2768 } 2769 2770 void CodeGenFunction::EmitOMPReverseDirective(const OMPReverseDirective &S) { 2771 // Emit the de-sugared statement. 2772 OMPTransformDirectiveScopeRAII ReverseScope(*this, &S); 2773 EmitStmt(S.getTransformedStmt()); 2774 } 2775 2776 void CodeGenFunction::EmitOMPInterchangeDirective( 2777 const OMPInterchangeDirective &S) { 2778 // Emit the de-sugared statement. 2779 OMPTransformDirectiveScopeRAII InterchangeScope(*this, &S); 2780 EmitStmt(S.getTransformedStmt()); 2781 } 2782 2783 void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) { 2784 bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder; 2785 2786 if (UseOMPIRBuilder) { 2787 auto DL = SourceLocToDebugLoc(S.getBeginLoc()); 2788 const Stmt *Inner = S.getRawStmt(); 2789 2790 // Consume nested loop. Clear the entire remaining loop stack because a 2791 // fully unrolled loop is non-transformable. For partial unrolling the 2792 // generated outer loop is pushed back to the stack. 2793 llvm::CanonicalLoopInfo *CLI = EmitOMPCollapsedCanonicalLoopNest(Inner, 1); 2794 OMPLoopNestStack.clear(); 2795 2796 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 2797 2798 bool NeedsUnrolledCLI = ExpectedOMPLoopDepth >= 1; 2799 llvm::CanonicalLoopInfo *UnrolledCLI = nullptr; 2800 2801 if (S.hasClausesOfKind<OMPFullClause>()) { 2802 assert(ExpectedOMPLoopDepth == 0); 2803 OMPBuilder.unrollLoopFull(DL, CLI); 2804 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) { 2805 uint64_t Factor = 0; 2806 if (Expr *FactorExpr = PartialClause->getFactor()) { 2807 Factor = FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue(); 2808 assert(Factor >= 1 && "Only positive factors are valid"); 2809 } 2810 OMPBuilder.unrollLoopPartial(DL, CLI, Factor, 2811 NeedsUnrolledCLI ? &UnrolledCLI : nullptr); 2812 } else { 2813 OMPBuilder.unrollLoopHeuristic(DL, CLI); 2814 } 2815 2816 assert((!NeedsUnrolledCLI || UnrolledCLI) && 2817 "NeedsUnrolledCLI implies UnrolledCLI to be set"); 2818 if (UnrolledCLI) 2819 OMPLoopNestStack.push_back(UnrolledCLI); 2820 2821 return; 2822 } 2823 2824 // This function is only called if the unrolled loop is not consumed by any 2825 // other loop-associated construct. Such a loop-associated construct will have 2826 // used the transformed AST. 2827 2828 // Set the unroll metadata for the next emitted loop. 2829 LoopStack.setUnrollState(LoopAttributes::Enable); 2830 2831 if (S.hasClausesOfKind<OMPFullClause>()) { 2832 LoopStack.setUnrollState(LoopAttributes::Full); 2833 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) { 2834 if (Expr *FactorExpr = PartialClause->getFactor()) { 2835 uint64_t Factor = 2836 FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue(); 2837 assert(Factor >= 1 && "Only positive factors are valid"); 2838 LoopStack.setUnrollCount(Factor); 2839 } 2840 } 2841 2842 EmitStmt(S.getAssociatedStmt()); 2843 } 2844 2845 void CodeGenFunction::EmitOMPOuterLoop( 2846 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, 2847 CodeGenFunction::OMPPrivateScope &LoopScope, 2848 const CodeGenFunction::OMPLoopArguments &LoopArgs, 2849 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, 2850 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { 2851 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 2852 2853 const Expr *IVExpr = S.getIterationVariable(); 2854 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2855 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2856 2857 JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 2858 2859 // Start the loop with a block that tests the condition. 2860 llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond"); 2861 EmitBlock(CondBlock); 2862 const SourceRange R = S.getSourceRange(); 2863 OMPLoopNestStack.clear(); 2864 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 2865 SourceLocToDebugLoc(R.getEnd())); 2866 2867 llvm::Value *BoolCondVal = nullptr; 2868 if (!DynamicOrOrdered) { 2869 // UB = min(UB, GlobalUB) or 2870 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. 2871 // 'distribute parallel for') 2872 EmitIgnoredExpr(LoopArgs.EUB); 2873 // IV = LB 2874 EmitIgnoredExpr(LoopArgs.Init); 2875 // IV < UB 2876 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); 2877 } else { 2878 BoolCondVal = 2879 RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL, 2880 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); 2881 } 2882 2883 // If there are any cleanups between here and the loop-exit scope, 2884 // create a block to stage a loop exit along. 2885 llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); 2886 if (LoopScope.requiresCleanups()) 2887 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 2888 2889 llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body"); 2890 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 2891 if (ExitBlock != LoopExit.getBlock()) { 2892 EmitBlock(ExitBlock); 2893 EmitBranchThroughCleanup(LoopExit); 2894 } 2895 EmitBlock(LoopBody); 2896 2897 // Emit "IV = LB" (in case of static schedule, we have already calculated new 2898 // LB for loop condition and emitted it above). 2899 if (DynamicOrOrdered) 2900 EmitIgnoredExpr(LoopArgs.Init); 2901 2902 // Create a block for the increment. 2903 JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 2904 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 2905 2906 emitCommonSimdLoop( 2907 *this, S, 2908 [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) { 2909 // Generate !llvm.loop.parallel metadata for loads and stores for loops 2910 // with dynamic/guided scheduling and without ordered clause. 2911 if (!isOpenMPSimdDirective(S.getDirectiveKind())) { 2912 CGF.LoopStack.setParallel(!IsMonotonic); 2913 if (const auto *C = S.getSingleClause<OMPOrderClause>()) 2914 if (C->getKind() == OMPC_ORDER_concurrent) 2915 CGF.LoopStack.setParallel(/*Enable=*/true); 2916 } else { 2917 CGF.EmitOMPSimdInit(S); 2918 } 2919 }, 2920 [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered, 2921 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 2922 SourceLocation Loc = S.getBeginLoc(); 2923 // when 'distribute' is not combined with a 'for': 2924 // while (idx <= UB) { BODY; ++idx; } 2925 // when 'distribute' is combined with a 'for' 2926 // (e.g. 'distribute parallel for') 2927 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 2928 CGF.EmitOMPInnerLoop( 2929 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, 2930 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 2931 CodeGenLoop(CGF, S, LoopExit); 2932 }, 2933 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { 2934 CodeGenOrdered(CGF, Loc, IVSize, IVSigned); 2935 }); 2936 }); 2937 2938 EmitBlock(Continue.getBlock()); 2939 BreakContinueStack.pop_back(); 2940 if (!DynamicOrOrdered) { 2941 // Emit "LB = LB + Stride", "UB = UB + Stride". 2942 EmitIgnoredExpr(LoopArgs.NextLB); 2943 EmitIgnoredExpr(LoopArgs.NextUB); 2944 } 2945 2946 EmitBranch(CondBlock); 2947 OMPLoopNestStack.clear(); 2948 LoopStack.pop(); 2949 // Emit the fall-through block. 2950 EmitBlock(LoopExit.getBlock()); 2951 2952 // Tell the runtime we are done. 2953 auto &&CodeGen = [DynamicOrOrdered, &S, &LoopArgs](CodeGenFunction &CGF) { 2954 if (!DynamicOrOrdered) 2955 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 2956 LoopArgs.DKind); 2957 }; 2958 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 2959 } 2960 2961 void CodeGenFunction::EmitOMPForOuterLoop( 2962 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, 2963 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 2964 const OMPLoopArguments &LoopArgs, 2965 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 2966 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 2967 2968 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 2969 const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind.Schedule); 2970 2971 assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule, 2972 LoopArgs.Chunk != nullptr)) && 2973 "static non-chunked schedule does not need outer loop"); 2974 2975 // Emit outer loop. 2976 // 2977 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2978 // When schedule(dynamic,chunk_size) is specified, the iterations are 2979 // distributed to threads in the team in chunks as the threads request them. 2980 // Each thread executes a chunk of iterations, then requests another chunk, 2981 // until no chunks remain to be distributed. Each chunk contains chunk_size 2982 // iterations, except for the last chunk to be distributed, which may have 2983 // fewer iterations. When no chunk_size is specified, it defaults to 1. 2984 // 2985 // When schedule(guided,chunk_size) is specified, the iterations are assigned 2986 // to threads in the team in chunks as the executing threads request them. 2987 // Each thread executes a chunk of iterations, then requests another chunk, 2988 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 2989 // each chunk is proportional to the number of unassigned iterations divided 2990 // by the number of threads in the team, decreasing to 1. For a chunk_size 2991 // with value k (greater than 1), the size of each chunk is determined in the 2992 // same way, with the restriction that the chunks do not contain fewer than k 2993 // iterations (except for the last chunk to be assigned, which may have fewer 2994 // than k iterations). 2995 // 2996 // When schedule(auto) is specified, the decision regarding scheduling is 2997 // delegated to the compiler and/or runtime system. The programmer gives the 2998 // implementation the freedom to choose any possible mapping of iterations to 2999 // threads in the team. 3000 // 3001 // When schedule(runtime) is specified, the decision regarding scheduling is 3002 // deferred until run time, and the schedule and chunk size are taken from the 3003 // run-sched-var ICV. If the ICV is set to auto, the schedule is 3004 // implementation defined 3005 // 3006 // __kmpc_dispatch_init(); 3007 // while(__kmpc_dispatch_next(&LB, &UB)) { 3008 // idx = LB; 3009 // while (idx <= UB) { BODY; ++idx; 3010 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 3011 // } // inner loop 3012 // } 3013 // __kmpc_dispatch_deinit(); 3014 // 3015 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 3016 // When schedule(static, chunk_size) is specified, iterations are divided into 3017 // chunks of size chunk_size, and the chunks are assigned to the threads in 3018 // the team in a round-robin fashion in the order of the thread number. 3019 // 3020 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 3021 // while (idx <= UB) { BODY; ++idx; } // inner loop 3022 // LB = LB + ST; 3023 // UB = UB + ST; 3024 // } 3025 // 3026 3027 const Expr *IVExpr = S.getIterationVariable(); 3028 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 3029 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 3030 3031 if (DynamicOrOrdered) { 3032 const std::pair<llvm::Value *, llvm::Value *> DispatchBounds = 3033 CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); 3034 llvm::Value *LBVal = DispatchBounds.first; 3035 llvm::Value *UBVal = DispatchBounds.second; 3036 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, 3037 LoopArgs.Chunk}; 3038 RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize, 3039 IVSigned, Ordered, DipatchRTInputValues); 3040 } else { 3041 CGOpenMPRuntime::StaticRTInput StaticInit( 3042 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, 3043 LoopArgs.ST, LoopArgs.Chunk); 3044 RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(), 3045 ScheduleKind, StaticInit); 3046 } 3047 3048 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, 3049 const unsigned IVSize, 3050 const bool IVSigned) { 3051 if (Ordered) { 3052 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, 3053 IVSigned); 3054 } 3055 }; 3056 3057 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, 3058 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); 3059 OuterLoopArgs.IncExpr = S.getInc(); 3060 OuterLoopArgs.Init = S.getInit(); 3061 OuterLoopArgs.Cond = S.getCond(); 3062 OuterLoopArgs.NextLB = S.getNextLowerBound(); 3063 OuterLoopArgs.NextUB = S.getNextUpperBound(); 3064 OuterLoopArgs.DKind = LoopArgs.DKind; 3065 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, 3066 emitOMPLoopBodyWithStopPoint, CodeGenOrdered); 3067 if (DynamicOrOrdered) { 3068 RT.emitForDispatchDeinit(*this, S.getBeginLoc()); 3069 } 3070 } 3071 3072 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, 3073 const unsigned IVSize, const bool IVSigned) {} 3074 3075 void CodeGenFunction::EmitOMPDistributeOuterLoop( 3076 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, 3077 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, 3078 const CodeGenLoopTy &CodeGenLoopContent) { 3079 3080 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 3081 3082 // Emit outer loop. 3083 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 3084 // dynamic 3085 // 3086 3087 const Expr *IVExpr = S.getIterationVariable(); 3088 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 3089 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 3090 3091 CGOpenMPRuntime::StaticRTInput StaticInit( 3092 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB, 3093 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk); 3094 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit); 3095 3096 // for combined 'distribute' and 'for' the increment expression of distribute 3097 // is stored in DistInc. For 'distribute' alone, it is in Inc. 3098 Expr *IncExpr; 3099 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) 3100 IncExpr = S.getDistInc(); 3101 else 3102 IncExpr = S.getInc(); 3103 3104 // this routine is shared by 'omp distribute parallel for' and 3105 // 'omp distribute': select the right EUB expression depending on the 3106 // directive 3107 OMPLoopArguments OuterLoopArgs; 3108 OuterLoopArgs.LB = LoopArgs.LB; 3109 OuterLoopArgs.UB = LoopArgs.UB; 3110 OuterLoopArgs.ST = LoopArgs.ST; 3111 OuterLoopArgs.IL = LoopArgs.IL; 3112 OuterLoopArgs.Chunk = LoopArgs.Chunk; 3113 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3114 ? S.getCombinedEnsureUpperBound() 3115 : S.getEnsureUpperBound(); 3116 OuterLoopArgs.IncExpr = IncExpr; 3117 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3118 ? S.getCombinedInit() 3119 : S.getInit(); 3120 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3121 ? S.getCombinedCond() 3122 : S.getCond(); 3123 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3124 ? S.getCombinedNextLowerBound() 3125 : S.getNextLowerBound(); 3126 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3127 ? S.getCombinedNextUpperBound() 3128 : S.getNextUpperBound(); 3129 OuterLoopArgs.DKind = OMPD_distribute; 3130 3131 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, 3132 LoopScope, OuterLoopArgs, CodeGenLoopContent, 3133 emitEmptyOrdered); 3134 } 3135 3136 static std::pair<LValue, LValue> 3137 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, 3138 const OMPExecutableDirective &S) { 3139 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 3140 LValue LB = 3141 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 3142 LValue UB = 3143 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 3144 3145 // When composing 'distribute' with 'for' (e.g. as in 'distribute 3146 // parallel for') we need to use the 'distribute' 3147 // chunk lower and upper bounds rather than the whole loop iteration 3148 // space. These are parameters to the outlined function for 'parallel' 3149 // and we copy the bounds of the previous schedule into the 3150 // the current ones. 3151 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); 3152 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); 3153 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar( 3154 PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc()); 3155 PrevLBVal = CGF.EmitScalarConversion( 3156 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), 3157 LS.getIterationVariable()->getType(), 3158 LS.getPrevLowerBoundVariable()->getExprLoc()); 3159 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar( 3160 PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc()); 3161 PrevUBVal = CGF.EmitScalarConversion( 3162 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), 3163 LS.getIterationVariable()->getType(), 3164 LS.getPrevUpperBoundVariable()->getExprLoc()); 3165 3166 CGF.EmitStoreOfScalar(PrevLBVal, LB); 3167 CGF.EmitStoreOfScalar(PrevUBVal, UB); 3168 3169 return {LB, UB}; 3170 } 3171 3172 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then 3173 /// we need to use the LB and UB expressions generated by the worksharing 3174 /// code generation support, whereas in non combined situations we would 3175 /// just emit 0 and the LastIteration expression 3176 /// This function is necessary due to the difference of the LB and UB 3177 /// types for the RT emission routines for 'for_static_init' and 3178 /// 'for_dispatch_init' 3179 static std::pair<llvm::Value *, llvm::Value *> 3180 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, 3181 const OMPExecutableDirective &S, 3182 Address LB, Address UB) { 3183 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 3184 const Expr *IVExpr = LS.getIterationVariable(); 3185 // when implementing a dynamic schedule for a 'for' combined with a 3186 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop 3187 // is not normalized as each team only executes its own assigned 3188 // distribute chunk 3189 QualType IteratorTy = IVExpr->getType(); 3190 llvm::Value *LBVal = 3191 CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); 3192 llvm::Value *UBVal = 3193 CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); 3194 return {LBVal, UBVal}; 3195 } 3196 3197 static void emitDistributeParallelForDistributeInnerBoundParams( 3198 CodeGenFunction &CGF, const OMPExecutableDirective &S, 3199 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { 3200 const auto &Dir = cast<OMPLoopDirective>(S); 3201 LValue LB = 3202 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); 3203 llvm::Value *LBCast = CGF.Builder.CreateIntCast( 3204 CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 3205 CapturedVars.push_back(LBCast); 3206 LValue UB = 3207 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); 3208 3209 llvm::Value *UBCast = CGF.Builder.CreateIntCast( 3210 CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 3211 CapturedVars.push_back(UBCast); 3212 } 3213 3214 static void 3215 emitInnerParallelForWhenCombined(CodeGenFunction &CGF, 3216 const OMPLoopDirective &S, 3217 CodeGenFunction::JumpDest LoopExit) { 3218 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, 3219 PrePostActionTy &Action) { 3220 Action.Enter(CGF); 3221 bool HasCancel = false; 3222 if (!isOpenMPSimdDirective(S.getDirectiveKind())) { 3223 if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S)) 3224 HasCancel = D->hasCancel(); 3225 else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S)) 3226 HasCancel = D->hasCancel(); 3227 else if (const auto *D = 3228 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S)) 3229 HasCancel = D->hasCancel(); 3230 } 3231 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), 3232 HasCancel); 3233 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), 3234 emitDistributeParallelForInnerBounds, 3235 emitDistributeParallelForDispatchBounds); 3236 }; 3237 3238 emitCommonOMPParallelDirective( 3239 CGF, S, 3240 isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for, 3241 CGInlinedWorksharingLoop, 3242 emitDistributeParallelForDistributeInnerBoundParams); 3243 } 3244 3245 void CodeGenFunction::EmitOMPDistributeParallelForDirective( 3246 const OMPDistributeParallelForDirective &S) { 3247 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3248 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 3249 S.getDistInc()); 3250 }; 3251 OMPLexicalScope Scope(*this, S, OMPD_parallel); 3252 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 3253 } 3254 3255 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( 3256 const OMPDistributeParallelForSimdDirective &S) { 3257 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3258 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 3259 S.getDistInc()); 3260 }; 3261 OMPLexicalScope Scope(*this, S, OMPD_parallel); 3262 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 3263 } 3264 3265 void CodeGenFunction::EmitOMPDistributeSimdDirective( 3266 const OMPDistributeSimdDirective &S) { 3267 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3268 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 3269 }; 3270 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3271 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 3272 } 3273 3274 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 3275 CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) { 3276 // Emit SPMD target parallel for region as a standalone region. 3277 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3278 emitOMPSimdRegion(CGF, S, Action); 3279 }; 3280 llvm::Function *Fn; 3281 llvm::Constant *Addr; 3282 // Emit target region as a standalone region. 3283 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3284 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3285 assert(Fn && Addr && "Target device function emission failed."); 3286 } 3287 3288 void CodeGenFunction::EmitOMPTargetSimdDirective( 3289 const OMPTargetSimdDirective &S) { 3290 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3291 emitOMPSimdRegion(CGF, S, Action); 3292 }; 3293 emitCommonOMPTargetDirective(*this, S, CodeGen); 3294 } 3295 3296 namespace { 3297 struct ScheduleKindModifiersTy { 3298 OpenMPScheduleClauseKind Kind; 3299 OpenMPScheduleClauseModifier M1; 3300 OpenMPScheduleClauseModifier M2; 3301 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 3302 OpenMPScheduleClauseModifier M1, 3303 OpenMPScheduleClauseModifier M2) 3304 : Kind(Kind), M1(M1), M2(M2) {} 3305 }; 3306 } // namespace 3307 3308 bool CodeGenFunction::EmitOMPWorksharingLoop( 3309 const OMPLoopDirective &S, Expr *EUB, 3310 const CodeGenLoopBoundsTy &CodeGenLoopBounds, 3311 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 3312 // Emit the loop iteration variable. 3313 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 3314 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); 3315 EmitVarDecl(*IVDecl); 3316 3317 // Emit the iterations count variable. 3318 // If it is not a variable, Sema decided to calculate iterations count on each 3319 // iteration (e.g., it is foldable into a constant). 3320 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 3321 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 3322 // Emit calculation of the iterations count. 3323 EmitIgnoredExpr(S.getCalcLastIteration()); 3324 } 3325 3326 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 3327 3328 bool HasLastprivateClause; 3329 // Check pre-condition. 3330 { 3331 OMPLoopScope PreInitScope(*this, S); 3332 // Skip the entire loop if we don't meet the precondition. 3333 // If the condition constant folds and can be elided, avoid emitting the 3334 // whole loop. 3335 bool CondConstant; 3336 llvm::BasicBlock *ContBlock = nullptr; 3337 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 3338 if (!CondConstant) 3339 return false; 3340 } else { 3341 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then"); 3342 ContBlock = createBasicBlock("omp.precond.end"); 3343 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 3344 getProfileCount(&S)); 3345 EmitBlock(ThenBlock); 3346 incrementProfileCounter(&S); 3347 } 3348 3349 RunCleanupsScope DoacrossCleanupScope(*this); 3350 bool Ordered = false; 3351 if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { 3352 if (OrderedClause->getNumForLoops()) 3353 RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations()); 3354 else 3355 Ordered = true; 3356 } 3357 3358 llvm::DenseSet<const Expr *> EmittedFinals; 3359 emitAlignedClause(*this, S); 3360 bool HasLinears = EmitOMPLinearClauseInit(S); 3361 // Emit helper vars inits. 3362 3363 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); 3364 LValue LB = Bounds.first; 3365 LValue UB = Bounds.second; 3366 LValue ST = 3367 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 3368 LValue IL = 3369 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 3370 3371 // Emit 'then' code. 3372 { 3373 OMPPrivateScope LoopScope(*this); 3374 if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) { 3375 // Emit implicit barrier to synchronize threads and avoid data races on 3376 // initialization of firstprivate variables and post-update of 3377 // lastprivate variables. 3378 CGM.getOpenMPRuntime().emitBarrierCall( 3379 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 3380 /*ForceSimpleCall=*/true); 3381 } 3382 EmitOMPPrivateClause(S, LoopScope); 3383 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( 3384 *this, S, EmitLValue(S.getIterationVariable())); 3385 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 3386 EmitOMPReductionClauseInit(S, LoopScope); 3387 EmitOMPPrivateLoopCounters(S, LoopScope); 3388 EmitOMPLinearClause(S, LoopScope); 3389 (void)LoopScope.Privatize(); 3390 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 3391 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); 3392 3393 // Detect the loop schedule kind and chunk. 3394 const Expr *ChunkExpr = nullptr; 3395 OpenMPScheduleTy ScheduleKind; 3396 if (const auto *C = S.getSingleClause<OMPScheduleClause>()) { 3397 ScheduleKind.Schedule = C->getScheduleKind(); 3398 ScheduleKind.M1 = C->getFirstScheduleModifier(); 3399 ScheduleKind.M2 = C->getSecondScheduleModifier(); 3400 ChunkExpr = C->getChunkSize(); 3401 } else { 3402 // Default behaviour for schedule clause. 3403 CGM.getOpenMPRuntime().getDefaultScheduleAndChunk( 3404 *this, S, ScheduleKind.Schedule, ChunkExpr); 3405 } 3406 bool HasChunkSizeOne = false; 3407 llvm::Value *Chunk = nullptr; 3408 if (ChunkExpr) { 3409 Chunk = EmitScalarExpr(ChunkExpr); 3410 Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(), 3411 S.getIterationVariable()->getType(), 3412 S.getBeginLoc()); 3413 Expr::EvalResult Result; 3414 if (ChunkExpr->EvaluateAsInt(Result, getContext())) { 3415 llvm::APSInt EvaluatedChunk = Result.Val.getInt(); 3416 HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1); 3417 } 3418 } 3419 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 3420 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 3421 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 3422 // If the static schedule kind is specified or if the ordered clause is 3423 // specified, and if no monotonic modifier is specified, the effect will 3424 // be as if the monotonic modifier was specified. 3425 bool StaticChunkedOne = 3426 RT.isStaticChunked(ScheduleKind.Schedule, 3427 /* Chunked */ Chunk != nullptr) && 3428 HasChunkSizeOne && 3429 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); 3430 bool IsMonotonic = 3431 Ordered || 3432 (ScheduleKind.Schedule == OMPC_SCHEDULE_static && 3433 !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic || 3434 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) || 3435 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 3436 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 3437 if ((RT.isStaticNonchunked(ScheduleKind.Schedule, 3438 /* Chunked */ Chunk != nullptr) || 3439 StaticChunkedOne) && 3440 !Ordered) { 3441 JumpDest LoopExit = 3442 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 3443 emitCommonSimdLoop( 3444 *this, S, 3445 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3446 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 3447 CGF.EmitOMPSimdInit(S); 3448 } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) { 3449 if (C->getKind() == OMPC_ORDER_concurrent) 3450 CGF.LoopStack.setParallel(/*Enable=*/true); 3451 } 3452 }, 3453 [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk, 3454 &S, ScheduleKind, LoopExit, 3455 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 3456 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 3457 // When no chunk_size is specified, the iteration space is divided 3458 // into chunks that are approximately equal in size, and at most 3459 // one chunk is distributed to each thread. Note that the size of 3460 // the chunks is unspecified in this case. 3461 CGOpenMPRuntime::StaticRTInput StaticInit( 3462 IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(), 3463 UB.getAddress(), ST.getAddress(), 3464 StaticChunkedOne ? Chunk : nullptr); 3465 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 3466 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, 3467 StaticInit); 3468 // UB = min(UB, GlobalUB); 3469 if (!StaticChunkedOne) 3470 CGF.EmitIgnoredExpr(S.getEnsureUpperBound()); 3471 // IV = LB; 3472 CGF.EmitIgnoredExpr(S.getInit()); 3473 // For unchunked static schedule generate: 3474 // 3475 // while (idx <= UB) { 3476 // BODY; 3477 // ++idx; 3478 // } 3479 // 3480 // For static schedule with chunk one: 3481 // 3482 // while (IV <= PrevUB) { 3483 // BODY; 3484 // IV += ST; 3485 // } 3486 CGF.EmitOMPInnerLoop( 3487 S, LoopScope.requiresCleanups(), 3488 StaticChunkedOne ? S.getCombinedParForInDistCond() 3489 : S.getCond(), 3490 StaticChunkedOne ? S.getDistInc() : S.getInc(), 3491 [&S, LoopExit](CodeGenFunction &CGF) { 3492 emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit); 3493 }, 3494 [](CodeGenFunction &) {}); 3495 }); 3496 EmitBlock(LoopExit.getBlock()); 3497 // Tell the runtime we are done. 3498 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 3499 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 3500 OMPD_for); 3501 }; 3502 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 3503 } else { 3504 // Emit the outer loop, which requests its work chunk [LB..UB] from 3505 // runtime and runs the inner loop to process it. 3506 OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(), 3507 ST.getAddress(), IL.getAddress(), Chunk, 3508 EUB); 3509 LoopArguments.DKind = OMPD_for; 3510 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 3511 LoopArguments, CGDispatchBounds); 3512 } 3513 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 3514 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { 3515 return CGF.Builder.CreateIsNotNull( 3516 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3517 }); 3518 } 3519 EmitOMPReductionClauseFinal( 3520 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) 3521 ? /*Parallel and Simd*/ OMPD_parallel_for_simd 3522 : /*Parallel only*/ OMPD_parallel); 3523 // Emit post-update of the reduction variables if IsLastIter != 0. 3524 emitPostUpdateForReductionClause( 3525 *this, S, [IL, &S](CodeGenFunction &CGF) { 3526 return CGF.Builder.CreateIsNotNull( 3527 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3528 }); 3529 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3530 if (HasLastprivateClause) 3531 EmitOMPLastprivateClauseFinal( 3532 S, isOpenMPSimdDirective(S.getDirectiveKind()), 3533 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); 3534 LoopScope.restoreMap(); 3535 EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) { 3536 return CGF.Builder.CreateIsNotNull( 3537 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3538 }); 3539 } 3540 DoacrossCleanupScope.ForceCleanup(); 3541 // We're now done with the loop, so jump to the continuation block. 3542 if (ContBlock) { 3543 EmitBranch(ContBlock); 3544 EmitBlock(ContBlock, /*IsFinished=*/true); 3545 } 3546 } 3547 return HasLastprivateClause; 3548 } 3549 3550 /// The following two functions generate expressions for the loop lower 3551 /// and upper bounds in case of static and dynamic (dispatch) schedule 3552 /// of the associated 'for' or 'distribute' loop. 3553 static std::pair<LValue, LValue> 3554 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 3555 const auto &LS = cast<OMPLoopDirective>(S); 3556 LValue LB = 3557 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 3558 LValue UB = 3559 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 3560 return {LB, UB}; 3561 } 3562 3563 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not 3564 /// consider the lower and upper bound expressions generated by the 3565 /// worksharing loop support, but we use 0 and the iteration space size as 3566 /// constants 3567 static std::pair<llvm::Value *, llvm::Value *> 3568 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, 3569 Address LB, Address UB) { 3570 const auto &LS = cast<OMPLoopDirective>(S); 3571 const Expr *IVExpr = LS.getIterationVariable(); 3572 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); 3573 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); 3574 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); 3575 return {LBVal, UBVal}; 3576 } 3577 3578 /// Emits internal temp array declarations for the directive with inscan 3579 /// reductions. 3580 /// The code is the following: 3581 /// \code 3582 /// size num_iters = <num_iters>; 3583 /// <type> buffer[num_iters]; 3584 /// \endcode 3585 static void emitScanBasedDirectiveDecls( 3586 CodeGenFunction &CGF, const OMPLoopDirective &S, 3587 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) { 3588 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( 3589 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); 3590 SmallVector<const Expr *, 4> Shareds; 3591 SmallVector<const Expr *, 4> Privates; 3592 SmallVector<const Expr *, 4> ReductionOps; 3593 SmallVector<const Expr *, 4> CopyArrayTemps; 3594 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 3595 assert(C->getModifier() == OMPC_REDUCTION_inscan && 3596 "Only inscan reductions are expected."); 3597 Shareds.append(C->varlist_begin(), C->varlist_end()); 3598 Privates.append(C->privates().begin(), C->privates().end()); 3599 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 3600 CopyArrayTemps.append(C->copy_array_temps().begin(), 3601 C->copy_array_temps().end()); 3602 } 3603 { 3604 // Emit buffers for each reduction variables. 3605 // ReductionCodeGen is required to emit correctly the code for array 3606 // reductions. 3607 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); 3608 unsigned Count = 0; 3609 auto *ITA = CopyArrayTemps.begin(); 3610 for (const Expr *IRef : Privates) { 3611 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 3612 // Emit variably modified arrays, used for arrays/array sections 3613 // reductions. 3614 if (PrivateVD->getType()->isVariablyModifiedType()) { 3615 RedCG.emitSharedOrigLValue(CGF, Count); 3616 RedCG.emitAggregateType(CGF, Count); 3617 } 3618 CodeGenFunction::OpaqueValueMapping DimMapping( 3619 CGF, 3620 cast<OpaqueValueExpr>( 3621 cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe()) 3622 ->getSizeExpr()), 3623 RValue::get(OMPScanNumIterations)); 3624 // Emit temp buffer. 3625 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl())); 3626 ++ITA; 3627 ++Count; 3628 } 3629 } 3630 } 3631 3632 /// Copies final inscan reductions values to the original variables. 3633 /// The code is the following: 3634 /// \code 3635 /// <orig_var> = buffer[num_iters-1]; 3636 /// \endcode 3637 static void emitScanBasedDirectiveFinals( 3638 CodeGenFunction &CGF, const OMPLoopDirective &S, 3639 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) { 3640 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( 3641 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); 3642 SmallVector<const Expr *, 4> Shareds; 3643 SmallVector<const Expr *, 4> LHSs; 3644 SmallVector<const Expr *, 4> RHSs; 3645 SmallVector<const Expr *, 4> Privates; 3646 SmallVector<const Expr *, 4> CopyOps; 3647 SmallVector<const Expr *, 4> CopyArrayElems; 3648 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 3649 assert(C->getModifier() == OMPC_REDUCTION_inscan && 3650 "Only inscan reductions are expected."); 3651 Shareds.append(C->varlist_begin(), C->varlist_end()); 3652 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 3653 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 3654 Privates.append(C->privates().begin(), C->privates().end()); 3655 CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); 3656 CopyArrayElems.append(C->copy_array_elems().begin(), 3657 C->copy_array_elems().end()); 3658 } 3659 // Create temp var and copy LHS value to this temp value. 3660 // LHS = TMP[LastIter]; 3661 llvm::Value *OMPLast = CGF.Builder.CreateNSWSub( 3662 OMPScanNumIterations, 3663 llvm::ConstantInt::get(CGF.SizeTy, 1, /*isSigned=*/false)); 3664 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 3665 const Expr *PrivateExpr = Privates[I]; 3666 const Expr *OrigExpr = Shareds[I]; 3667 const Expr *CopyArrayElem = CopyArrayElems[I]; 3668 CodeGenFunction::OpaqueValueMapping IdxMapping( 3669 CGF, 3670 cast<OpaqueValueExpr>( 3671 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 3672 RValue::get(OMPLast)); 3673 LValue DestLVal = CGF.EmitLValue(OrigExpr); 3674 LValue SrcLVal = CGF.EmitLValue(CopyArrayElem); 3675 CGF.EmitOMPCopy( 3676 PrivateExpr->getType(), DestLVal.getAddress(), SrcLVal.getAddress(), 3677 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 3678 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), CopyOps[I]); 3679 } 3680 } 3681 3682 /// Emits the code for the directive with inscan reductions. 3683 /// The code is the following: 3684 /// \code 3685 /// #pragma omp ... 3686 /// for (i: 0..<num_iters>) { 3687 /// <input phase>; 3688 /// buffer[i] = red; 3689 /// } 3690 /// #pragma omp master // in parallel region 3691 /// for (int k = 0; k != ceil(log2(num_iters)); ++k) 3692 /// for (size cnt = last_iter; cnt >= pow(2, k); --k) 3693 /// buffer[i] op= buffer[i-pow(2,k)]; 3694 /// #pragma omp barrier // in parallel region 3695 /// #pragma omp ... 3696 /// for (0..<num_iters>) { 3697 /// red = InclusiveScan ? buffer[i] : buffer[i-1]; 3698 /// <scan phase>; 3699 /// } 3700 /// \endcode 3701 static void emitScanBasedDirective( 3702 CodeGenFunction &CGF, const OMPLoopDirective &S, 3703 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen, 3704 llvm::function_ref<void(CodeGenFunction &)> FirstGen, 3705 llvm::function_ref<void(CodeGenFunction &)> SecondGen) { 3706 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( 3707 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); 3708 SmallVector<const Expr *, 4> Privates; 3709 SmallVector<const Expr *, 4> ReductionOps; 3710 SmallVector<const Expr *, 4> LHSs; 3711 SmallVector<const Expr *, 4> RHSs; 3712 SmallVector<const Expr *, 4> CopyArrayElems; 3713 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 3714 assert(C->getModifier() == OMPC_REDUCTION_inscan && 3715 "Only inscan reductions are expected."); 3716 Privates.append(C->privates().begin(), C->privates().end()); 3717 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 3718 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 3719 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 3720 CopyArrayElems.append(C->copy_array_elems().begin(), 3721 C->copy_array_elems().end()); 3722 } 3723 CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S); 3724 { 3725 // Emit loop with input phase: 3726 // #pragma omp ... 3727 // for (i: 0..<num_iters>) { 3728 // <input phase>; 3729 // buffer[i] = red; 3730 // } 3731 CGF.OMPFirstScanLoop = true; 3732 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 3733 FirstGen(CGF); 3734 } 3735 // #pragma omp barrier // in parallel region 3736 auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems, 3737 &ReductionOps, 3738 &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) { 3739 Action.Enter(CGF); 3740 // Emit prefix reduction: 3741 // #pragma omp master // in parallel region 3742 // for (int k = 0; k <= ceil(log2(n)); ++k) 3743 llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock(); 3744 llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body"); 3745 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit"); 3746 llvm::Function *F = 3747 CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy); 3748 llvm::Value *Arg = 3749 CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy); 3750 llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg); 3751 F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy); 3752 LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal); 3753 LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy); 3754 llvm::Value *NMin1 = CGF.Builder.CreateNUWSub( 3755 OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1)); 3756 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc()); 3757 CGF.EmitBlock(LoopBB); 3758 auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2); 3759 // size pow2k = 1; 3760 auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2); 3761 Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB); 3762 Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB); 3763 // for (size i = n - 1; i >= 2 ^ k; --i) 3764 // tmp[i] op= tmp[i-pow2k]; 3765 llvm::BasicBlock *InnerLoopBB = 3766 CGF.createBasicBlock("omp.inner.log.scan.body"); 3767 llvm::BasicBlock *InnerExitBB = 3768 CGF.createBasicBlock("omp.inner.log.scan.exit"); 3769 llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K); 3770 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); 3771 CGF.EmitBlock(InnerLoopBB); 3772 auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2); 3773 IVal->addIncoming(NMin1, LoopBB); 3774 { 3775 CodeGenFunction::OMPPrivateScope PrivScope(CGF); 3776 auto *ILHS = LHSs.begin(); 3777 auto *IRHS = RHSs.begin(); 3778 for (const Expr *CopyArrayElem : CopyArrayElems) { 3779 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 3780 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 3781 Address LHSAddr = Address::invalid(); 3782 { 3783 CodeGenFunction::OpaqueValueMapping IdxMapping( 3784 CGF, 3785 cast<OpaqueValueExpr>( 3786 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 3787 RValue::get(IVal)); 3788 LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(); 3789 } 3790 PrivScope.addPrivate(LHSVD, LHSAddr); 3791 Address RHSAddr = Address::invalid(); 3792 { 3793 llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K); 3794 CodeGenFunction::OpaqueValueMapping IdxMapping( 3795 CGF, 3796 cast<OpaqueValueExpr>( 3797 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 3798 RValue::get(OffsetIVal)); 3799 RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(); 3800 } 3801 PrivScope.addPrivate(RHSVD, RHSAddr); 3802 ++ILHS; 3803 ++IRHS; 3804 } 3805 PrivScope.Privatize(); 3806 CGF.CGM.getOpenMPRuntime().emitReduction( 3807 CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, 3808 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown}); 3809 } 3810 llvm::Value *NextIVal = 3811 CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1)); 3812 IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock()); 3813 CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K); 3814 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); 3815 CGF.EmitBlock(InnerExitBB); 3816 llvm::Value *Next = 3817 CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1)); 3818 Counter->addIncoming(Next, CGF.Builder.GetInsertBlock()); 3819 // pow2k <<= 1; 3820 llvm::Value *NextPow2K = 3821 CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true); 3822 Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock()); 3823 llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal); 3824 CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB); 3825 auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc()); 3826 CGF.EmitBlock(ExitBB); 3827 }; 3828 if (isOpenMPParallelDirective(S.getDirectiveKind())) { 3829 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc()); 3830 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 3831 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 3832 /*ForceSimpleCall=*/true); 3833 } else { 3834 RegionCodeGenTy RCG(CodeGen); 3835 RCG(CGF); 3836 } 3837 3838 CGF.OMPFirstScanLoop = false; 3839 SecondGen(CGF); 3840 } 3841 3842 static bool emitWorksharingDirective(CodeGenFunction &CGF, 3843 const OMPLoopDirective &S, 3844 bool HasCancel) { 3845 bool HasLastprivates; 3846 if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), 3847 [](const OMPReductionClause *C) { 3848 return C->getModifier() == OMPC_REDUCTION_inscan; 3849 })) { 3850 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { 3851 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 3852 OMPLoopScope LoopScope(CGF, S); 3853 return CGF.EmitScalarExpr(S.getNumIterations()); 3854 }; 3855 const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) { 3856 CodeGenFunction::OMPCancelStackRAII CancelRegion( 3857 CGF, S.getDirectiveKind(), HasCancel); 3858 (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 3859 emitForLoopBounds, 3860 emitDispatchForLoopBounds); 3861 // Emit an implicit barrier at the end. 3862 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(), 3863 OMPD_for); 3864 }; 3865 const auto &&SecondGen = [&S, HasCancel, 3866 &HasLastprivates](CodeGenFunction &CGF) { 3867 CodeGenFunction::OMPCancelStackRAII CancelRegion( 3868 CGF, S.getDirectiveKind(), HasCancel); 3869 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 3870 emitForLoopBounds, 3871 emitDispatchForLoopBounds); 3872 }; 3873 if (!isOpenMPParallelDirective(S.getDirectiveKind())) 3874 emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen); 3875 emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen); 3876 if (!isOpenMPParallelDirective(S.getDirectiveKind())) 3877 emitScanBasedDirectiveFinals(CGF, S, NumIteratorsGen); 3878 } else { 3879 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), 3880 HasCancel); 3881 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 3882 emitForLoopBounds, 3883 emitDispatchForLoopBounds); 3884 } 3885 return HasLastprivates; 3886 } 3887 3888 static bool isSupportedByOpenMPIRBuilder(const OMPForDirective &S) { 3889 if (S.hasCancel()) 3890 return false; 3891 for (OMPClause *C : S.clauses()) { 3892 if (isa<OMPNowaitClause>(C)) 3893 continue; 3894 3895 if (auto *SC = dyn_cast<OMPScheduleClause>(C)) { 3896 if (SC->getFirstScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown) 3897 return false; 3898 if (SC->getSecondScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown) 3899 return false; 3900 switch (SC->getScheduleKind()) { 3901 case OMPC_SCHEDULE_auto: 3902 case OMPC_SCHEDULE_dynamic: 3903 case OMPC_SCHEDULE_runtime: 3904 case OMPC_SCHEDULE_guided: 3905 case OMPC_SCHEDULE_static: 3906 continue; 3907 case OMPC_SCHEDULE_unknown: 3908 return false; 3909 } 3910 } 3911 3912 return false; 3913 } 3914 3915 return true; 3916 } 3917 3918 static llvm::omp::ScheduleKind 3919 convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind) { 3920 switch (ScheduleClauseKind) { 3921 case OMPC_SCHEDULE_unknown: 3922 return llvm::omp::OMP_SCHEDULE_Default; 3923 case OMPC_SCHEDULE_auto: 3924 return llvm::omp::OMP_SCHEDULE_Auto; 3925 case OMPC_SCHEDULE_dynamic: 3926 return llvm::omp::OMP_SCHEDULE_Dynamic; 3927 case OMPC_SCHEDULE_guided: 3928 return llvm::omp::OMP_SCHEDULE_Guided; 3929 case OMPC_SCHEDULE_runtime: 3930 return llvm::omp::OMP_SCHEDULE_Runtime; 3931 case OMPC_SCHEDULE_static: 3932 return llvm::omp::OMP_SCHEDULE_Static; 3933 } 3934 llvm_unreachable("Unhandled schedule kind"); 3935 } 3936 3937 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 3938 bool HasLastprivates = false; 3939 bool UseOMPIRBuilder = 3940 CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S); 3941 auto &&CodeGen = [this, &S, &HasLastprivates, 3942 UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) { 3943 // Use the OpenMPIRBuilder if enabled. 3944 if (UseOMPIRBuilder) { 3945 bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>(); 3946 3947 llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default; 3948 llvm::Value *ChunkSize = nullptr; 3949 if (auto *SchedClause = S.getSingleClause<OMPScheduleClause>()) { 3950 SchedKind = 3951 convertClauseKindToSchedKind(SchedClause->getScheduleKind()); 3952 if (const Expr *ChunkSizeExpr = SchedClause->getChunkSize()) 3953 ChunkSize = EmitScalarExpr(ChunkSizeExpr); 3954 } 3955 3956 // Emit the associated statement and get its loop representation. 3957 const Stmt *Inner = S.getRawStmt(); 3958 llvm::CanonicalLoopInfo *CLI = 3959 EmitOMPCollapsedCanonicalLoopNest(Inner, 1); 3960 3961 llvm::OpenMPIRBuilder &OMPBuilder = 3962 CGM.getOpenMPRuntime().getOMPBuilder(); 3963 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( 3964 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); 3965 OMPBuilder.applyWorkshareLoop( 3966 Builder.getCurrentDebugLocation(), CLI, AllocaIP, NeedsBarrier, 3967 SchedKind, ChunkSize, /*HasSimdModifier=*/false, 3968 /*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false, 3969 /*HasOrderedClause=*/false); 3970 return; 3971 } 3972 3973 HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel()); 3974 }; 3975 { 3976 auto LPCRegion = 3977 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3978 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3979 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 3980 S.hasCancel()); 3981 } 3982 3983 if (!UseOMPIRBuilder) { 3984 // Emit an implicit barrier at the end. 3985 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) 3986 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); 3987 } 3988 // Check for outer lastprivate conditional update. 3989 checkForLastprivateConditionalUpdate(*this, S); 3990 } 3991 3992 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 3993 bool HasLastprivates = false; 3994 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 3995 PrePostActionTy &) { 3996 HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false); 3997 }; 3998 { 3999 auto LPCRegion = 4000 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4001 OMPLexicalScope Scope(*this, S, OMPD_unknown); 4002 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 4003 } 4004 4005 // Emit an implicit barrier at the end. 4006 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) 4007 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); 4008 // Check for outer lastprivate conditional update. 4009 checkForLastprivateConditionalUpdate(*this, S); 4010 } 4011 4012 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 4013 const Twine &Name, 4014 llvm::Value *Init = nullptr) { 4015 LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 4016 if (Init) 4017 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); 4018 return LVal; 4019 } 4020 4021 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 4022 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); 4023 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt); 4024 bool HasLastprivates = false; 4025 auto &&CodeGen = [&S, CapturedStmt, CS, 4026 &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { 4027 const ASTContext &C = CGF.getContext(); 4028 QualType KmpInt32Ty = 4029 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4030 // Emit helper vars inits. 4031 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 4032 CGF.Builder.getInt32(0)); 4033 llvm::ConstantInt *GlobalUBVal = CS != nullptr 4034 ? CGF.Builder.getInt32(CS->size() - 1) 4035 : CGF.Builder.getInt32(0); 4036 LValue UB = 4037 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 4038 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 4039 CGF.Builder.getInt32(1)); 4040 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 4041 CGF.Builder.getInt32(0)); 4042 // Loop counter. 4043 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 4044 OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); 4045 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 4046 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); 4047 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 4048 // Generate condition for loop. 4049 BinaryOperator *Cond = BinaryOperator::Create( 4050 C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_PRValue, OK_Ordinary, 4051 S.getBeginLoc(), FPOptionsOverride()); 4052 // Increment for loop counter. 4053 UnaryOperator *Inc = UnaryOperator::Create( 4054 C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_PRValue, OK_Ordinary, 4055 S.getBeginLoc(), true, FPOptionsOverride()); 4056 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) { 4057 // Iterate through all sections and emit a switch construct: 4058 // switch (IV) { 4059 // case 0: 4060 // <SectionStmt[0]>; 4061 // break; 4062 // ... 4063 // case <NumSection> - 1: 4064 // <SectionStmt[<NumSection> - 1]>; 4065 // break; 4066 // } 4067 // .omp.sections.exit: 4068 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 4069 llvm::SwitchInst *SwitchStmt = 4070 CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()), 4071 ExitBB, CS == nullptr ? 1 : CS->size()); 4072 if (CS) { 4073 unsigned CaseNumber = 0; 4074 for (const Stmt *SubStmt : CS->children()) { 4075 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 4076 CGF.EmitBlock(CaseBB); 4077 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 4078 CGF.EmitStmt(SubStmt); 4079 CGF.EmitBranch(ExitBB); 4080 ++CaseNumber; 4081 } 4082 } else { 4083 llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case"); 4084 CGF.EmitBlock(CaseBB); 4085 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 4086 CGF.EmitStmt(CapturedStmt); 4087 CGF.EmitBranch(ExitBB); 4088 } 4089 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 4090 }; 4091 4092 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 4093 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 4094 // Emit implicit barrier to synchronize threads and avoid data races on 4095 // initialization of firstprivate variables and post-update of lastprivate 4096 // variables. 4097 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 4098 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 4099 /*ForceSimpleCall=*/true); 4100 } 4101 CGF.EmitOMPPrivateClause(S, LoopScope); 4102 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV); 4103 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 4104 CGF.EmitOMPReductionClauseInit(S, LoopScope); 4105 (void)LoopScope.Privatize(); 4106 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 4107 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 4108 4109 // Emit static non-chunked loop. 4110 OpenMPScheduleTy ScheduleKind; 4111 ScheduleKind.Schedule = OMPC_SCHEDULE_static; 4112 CGOpenMPRuntime::StaticRTInput StaticInit( 4113 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), 4114 LB.getAddress(), UB.getAddress(), ST.getAddress()); 4115 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 4116 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit); 4117 // UB = min(UB, GlobalUB); 4118 llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc()); 4119 llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect( 4120 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 4121 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 4122 // IV = LB; 4123 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV); 4124 // while (idx <= UB) { BODY; ++idx; } 4125 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen, 4126 [](CodeGenFunction &) {}); 4127 // Tell the runtime we are done. 4128 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 4129 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 4130 OMPD_sections); 4131 }; 4132 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); 4133 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 4134 // Emit post-update of the reduction variables if IsLastIter != 0. 4135 emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) { 4136 return CGF.Builder.CreateIsNotNull( 4137 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 4138 }); 4139 4140 // Emit final copy of the lastprivate variables if IsLastIter != 0. 4141 if (HasLastprivates) 4142 CGF.EmitOMPLastprivateClauseFinal( 4143 S, /*NoFinals=*/false, 4144 CGF.Builder.CreateIsNotNull( 4145 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()))); 4146 }; 4147 4148 bool HasCancel = false; 4149 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 4150 HasCancel = OSD->hasCancel(); 4151 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 4152 HasCancel = OPSD->hasCancel(); 4153 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); 4154 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 4155 HasCancel); 4156 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 4157 // clause. Otherwise the barrier will be generated by the codegen for the 4158 // directive. 4159 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 4160 // Emit implicit barrier to synchronize threads and avoid data races on 4161 // initialization of firstprivate variables. 4162 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), 4163 OMPD_unknown); 4164 } 4165 } 4166 4167 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 4168 if (CGM.getLangOpts().OpenMPIRBuilder) { 4169 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 4170 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 4171 using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; 4172 4173 auto FiniCB = [this](InsertPointTy IP) { 4174 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 4175 }; 4176 4177 const CapturedStmt *ICS = S.getInnermostCapturedStmt(); 4178 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); 4179 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt); 4180 llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector; 4181 if (CS) { 4182 for (const Stmt *SubStmt : CS->children()) { 4183 auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP, 4184 InsertPointTy CodeGenIP) { 4185 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( 4186 *this, SubStmt, AllocaIP, CodeGenIP, "section"); 4187 }; 4188 SectionCBVector.push_back(SectionCB); 4189 } 4190 } else { 4191 auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP, 4192 InsertPointTy CodeGenIP) { 4193 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( 4194 *this, CapturedStmt, AllocaIP, CodeGenIP, "section"); 4195 }; 4196 SectionCBVector.push_back(SectionCB); 4197 } 4198 4199 // Privatization callback that performs appropriate action for 4200 // shared/private/firstprivate/lastprivate/copyin/... variables. 4201 // 4202 // TODO: This defaults to shared right now. 4203 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 4204 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { 4205 // The next line is appropriate only for variables (Val) with the 4206 // data-sharing attribute "shared". 4207 ReplVal = &Val; 4208 4209 return CodeGenIP; 4210 }; 4211 4212 CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP); 4213 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); 4214 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( 4215 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); 4216 Builder.restoreIP(OMPBuilder.createSections( 4217 Builder, AllocaIP, SectionCBVector, PrivCB, FiniCB, S.hasCancel(), 4218 S.getSingleClause<OMPNowaitClause>())); 4219 return; 4220 } 4221 { 4222 auto LPCRegion = 4223 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4224 OMPLexicalScope Scope(*this, S, OMPD_unknown); 4225 EmitSections(S); 4226 } 4227 // Emit an implicit barrier at the end. 4228 if (!S.getSingleClause<OMPNowaitClause>()) { 4229 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), 4230 OMPD_sections); 4231 } 4232 // Check for outer lastprivate conditional update. 4233 checkForLastprivateConditionalUpdate(*this, S); 4234 } 4235 4236 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 4237 if (CGM.getLangOpts().OpenMPIRBuilder) { 4238 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 4239 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 4240 4241 const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt(); 4242 auto FiniCB = [this](InsertPointTy IP) { 4243 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 4244 }; 4245 4246 auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP, 4247 InsertPointTy CodeGenIP) { 4248 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( 4249 *this, SectionRegionBodyStmt, AllocaIP, CodeGenIP, "section"); 4250 }; 4251 4252 LexicalScope Scope(*this, S.getSourceRange()); 4253 EmitStopPoint(&S); 4254 Builder.restoreIP(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB)); 4255 4256 return; 4257 } 4258 LexicalScope Scope(*this, S.getSourceRange()); 4259 EmitStopPoint(&S); 4260 EmitStmt(S.getAssociatedStmt()); 4261 } 4262 4263 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 4264 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 4265 llvm::SmallVector<const Expr *, 8> DestExprs; 4266 llvm::SmallVector<const Expr *, 8> SrcExprs; 4267 llvm::SmallVector<const Expr *, 8> AssignmentOps; 4268 // Check if there are any 'copyprivate' clauses associated with this 4269 // 'single' construct. 4270 // Build a list of copyprivate variables along with helper expressions 4271 // (<source>, <destination>, <destination>=<source> expressions) 4272 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 4273 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 4274 DestExprs.append(C->destination_exprs().begin(), 4275 C->destination_exprs().end()); 4276 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 4277 AssignmentOps.append(C->assignment_ops().begin(), 4278 C->assignment_ops().end()); 4279 } 4280 // Emit code for 'single' region along with 'copyprivate' clauses 4281 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4282 Action.Enter(CGF); 4283 OMPPrivateScope SingleScope(CGF); 4284 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 4285 CGF.EmitOMPPrivateClause(S, SingleScope); 4286 (void)SingleScope.Privatize(); 4287 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 4288 }; 4289 { 4290 auto LPCRegion = 4291 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4292 OMPLexicalScope Scope(*this, S, OMPD_unknown); 4293 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(), 4294 CopyprivateVars, DestExprs, 4295 SrcExprs, AssignmentOps); 4296 } 4297 // Emit an implicit barrier at the end (to avoid data race on firstprivate 4298 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 4299 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 4300 CGM.getOpenMPRuntime().emitBarrierCall( 4301 *this, S.getBeginLoc(), 4302 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 4303 } 4304 // Check for outer lastprivate conditional update. 4305 checkForLastprivateConditionalUpdate(*this, S); 4306 } 4307 4308 static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 4309 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4310 Action.Enter(CGF); 4311 CGF.EmitStmt(S.getRawStmt()); 4312 }; 4313 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc()); 4314 } 4315 4316 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 4317 if (CGM.getLangOpts().OpenMPIRBuilder) { 4318 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 4319 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 4320 4321 const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt(); 4322 4323 auto FiniCB = [this](InsertPointTy IP) { 4324 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 4325 }; 4326 4327 auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP, 4328 InsertPointTy CodeGenIP) { 4329 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( 4330 *this, MasterRegionBodyStmt, AllocaIP, CodeGenIP, "master"); 4331 }; 4332 4333 LexicalScope Scope(*this, S.getSourceRange()); 4334 EmitStopPoint(&S); 4335 Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB)); 4336 4337 return; 4338 } 4339 LexicalScope Scope(*this, S.getSourceRange()); 4340 EmitStopPoint(&S); 4341 emitMaster(*this, S); 4342 } 4343 4344 static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 4345 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4346 Action.Enter(CGF); 4347 CGF.EmitStmt(S.getRawStmt()); 4348 }; 4349 Expr *Filter = nullptr; 4350 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>()) 4351 Filter = FilterClause->getThreadID(); 4352 CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, CodeGen, S.getBeginLoc(), 4353 Filter); 4354 } 4355 4356 void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) { 4357 if (CGM.getLangOpts().OpenMPIRBuilder) { 4358 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 4359 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 4360 4361 const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt(); 4362 const Expr *Filter = nullptr; 4363 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>()) 4364 Filter = FilterClause->getThreadID(); 4365 llvm::Value *FilterVal = Filter 4366 ? EmitScalarExpr(Filter, CGM.Int32Ty) 4367 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 4368 4369 auto FiniCB = [this](InsertPointTy IP) { 4370 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 4371 }; 4372 4373 auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP, 4374 InsertPointTy CodeGenIP) { 4375 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( 4376 *this, MaskedRegionBodyStmt, AllocaIP, CodeGenIP, "masked"); 4377 }; 4378 4379 LexicalScope Scope(*this, S.getSourceRange()); 4380 EmitStopPoint(&S); 4381 Builder.restoreIP( 4382 OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal)); 4383 4384 return; 4385 } 4386 LexicalScope Scope(*this, S.getSourceRange()); 4387 EmitStopPoint(&S); 4388 emitMasked(*this, S); 4389 } 4390 4391 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 4392 if (CGM.getLangOpts().OpenMPIRBuilder) { 4393 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 4394 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 4395 4396 const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt(); 4397 const Expr *Hint = nullptr; 4398 if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) 4399 Hint = HintClause->getHint(); 4400 4401 // TODO: This is slightly different from what's currently being done in 4402 // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything 4403 // about typing is final. 4404 llvm::Value *HintInst = nullptr; 4405 if (Hint) 4406 HintInst = 4407 Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false); 4408 4409 auto FiniCB = [this](InsertPointTy IP) { 4410 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 4411 }; 4412 4413 auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP, 4414 InsertPointTy CodeGenIP) { 4415 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( 4416 *this, CriticalRegionBodyStmt, AllocaIP, CodeGenIP, "critical"); 4417 }; 4418 4419 LexicalScope Scope(*this, S.getSourceRange()); 4420 EmitStopPoint(&S); 4421 Builder.restoreIP(OMPBuilder.createCritical( 4422 Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(), 4423 HintInst)); 4424 4425 return; 4426 } 4427 4428 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4429 Action.Enter(CGF); 4430 CGF.EmitStmt(S.getAssociatedStmt()); 4431 }; 4432 const Expr *Hint = nullptr; 4433 if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) 4434 Hint = HintClause->getHint(); 4435 LexicalScope Scope(*this, S.getSourceRange()); 4436 EmitStopPoint(&S); 4437 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 4438 S.getDirectiveName().getAsString(), 4439 CodeGen, S.getBeginLoc(), Hint); 4440 } 4441 4442 void CodeGenFunction::EmitOMPParallelForDirective( 4443 const OMPParallelForDirective &S) { 4444 // Emit directive as a combined directive that consists of two implicit 4445 // directives: 'parallel' with 'for' directive. 4446 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4447 Action.Enter(CGF); 4448 emitOMPCopyinClause(CGF, S); 4449 (void)emitWorksharingDirective(CGF, S, S.hasCancel()); 4450 }; 4451 { 4452 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { 4453 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 4454 CGCapturedStmtInfo CGSI(CR_OpenMP); 4455 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); 4456 OMPLoopScope LoopScope(CGF, S); 4457 return CGF.EmitScalarExpr(S.getNumIterations()); 4458 }; 4459 bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), 4460 [](const OMPReductionClause *C) { 4461 return C->getModifier() == OMPC_REDUCTION_inscan; 4462 }); 4463 if (IsInscan) 4464 emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen); 4465 auto LPCRegion = 4466 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4467 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, 4468 emitEmptyBoundParameters); 4469 if (IsInscan) 4470 emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen); 4471 } 4472 // Check for outer lastprivate conditional update. 4473 checkForLastprivateConditionalUpdate(*this, S); 4474 } 4475 4476 void CodeGenFunction::EmitOMPParallelForSimdDirective( 4477 const OMPParallelForSimdDirective &S) { 4478 // Emit directive as a combined directive that consists of two implicit 4479 // directives: 'parallel' with 'for' directive. 4480 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4481 Action.Enter(CGF); 4482 emitOMPCopyinClause(CGF, S); 4483 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false); 4484 }; 4485 { 4486 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { 4487 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 4488 CGCapturedStmtInfo CGSI(CR_OpenMP); 4489 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); 4490 OMPLoopScope LoopScope(CGF, S); 4491 return CGF.EmitScalarExpr(S.getNumIterations()); 4492 }; 4493 bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), 4494 [](const OMPReductionClause *C) { 4495 return C->getModifier() == OMPC_REDUCTION_inscan; 4496 }); 4497 if (IsInscan) 4498 emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen); 4499 auto LPCRegion = 4500 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4501 emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen, 4502 emitEmptyBoundParameters); 4503 if (IsInscan) 4504 emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen); 4505 } 4506 // Check for outer lastprivate conditional update. 4507 checkForLastprivateConditionalUpdate(*this, S); 4508 } 4509 4510 void CodeGenFunction::EmitOMPParallelMasterDirective( 4511 const OMPParallelMasterDirective &S) { 4512 // Emit directive as a combined directive that consists of two implicit 4513 // directives: 'parallel' with 'master' directive. 4514 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4515 Action.Enter(CGF); 4516 OMPPrivateScope PrivateScope(CGF); 4517 emitOMPCopyinClause(CGF, S); 4518 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4519 CGF.EmitOMPPrivateClause(S, PrivateScope); 4520 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4521 (void)PrivateScope.Privatize(); 4522 emitMaster(CGF, S); 4523 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 4524 }; 4525 { 4526 auto LPCRegion = 4527 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4528 emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen, 4529 emitEmptyBoundParameters); 4530 emitPostUpdateForReductionClause(*this, S, 4531 [](CodeGenFunction &) { return nullptr; }); 4532 } 4533 // Check for outer lastprivate conditional update. 4534 checkForLastprivateConditionalUpdate(*this, S); 4535 } 4536 4537 void CodeGenFunction::EmitOMPParallelMaskedDirective( 4538 const OMPParallelMaskedDirective &S) { 4539 // Emit directive as a combined directive that consists of two implicit 4540 // directives: 'parallel' with 'masked' directive. 4541 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4542 Action.Enter(CGF); 4543 OMPPrivateScope PrivateScope(CGF); 4544 emitOMPCopyinClause(CGF, S); 4545 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4546 CGF.EmitOMPPrivateClause(S, PrivateScope); 4547 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4548 (void)PrivateScope.Privatize(); 4549 emitMasked(CGF, S); 4550 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 4551 }; 4552 { 4553 auto LPCRegion = 4554 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4555 emitCommonOMPParallelDirective(*this, S, OMPD_masked, CodeGen, 4556 emitEmptyBoundParameters); 4557 emitPostUpdateForReductionClause(*this, S, 4558 [](CodeGenFunction &) { return nullptr; }); 4559 } 4560 // Check for outer lastprivate conditional update. 4561 checkForLastprivateConditionalUpdate(*this, S); 4562 } 4563 4564 void CodeGenFunction::EmitOMPParallelSectionsDirective( 4565 const OMPParallelSectionsDirective &S) { 4566 // Emit directive as a combined directive that consists of two implicit 4567 // directives: 'parallel' with 'sections' directive. 4568 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4569 Action.Enter(CGF); 4570 emitOMPCopyinClause(CGF, S); 4571 CGF.EmitSections(S); 4572 }; 4573 { 4574 auto LPCRegion = 4575 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4576 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, 4577 emitEmptyBoundParameters); 4578 } 4579 // Check for outer lastprivate conditional update. 4580 checkForLastprivateConditionalUpdate(*this, S); 4581 } 4582 4583 namespace { 4584 /// Get the list of variables declared in the context of the untied tasks. 4585 class CheckVarsEscapingUntiedTaskDeclContext final 4586 : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> { 4587 llvm::SmallVector<const VarDecl *, 4> PrivateDecls; 4588 4589 public: 4590 explicit CheckVarsEscapingUntiedTaskDeclContext() = default; 4591 virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default; 4592 void VisitDeclStmt(const DeclStmt *S) { 4593 if (!S) 4594 return; 4595 // Need to privatize only local vars, static locals can be processed as is. 4596 for (const Decl *D : S->decls()) { 4597 if (const auto *VD = dyn_cast_or_null<VarDecl>(D)) 4598 if (VD->hasLocalStorage()) 4599 PrivateDecls.push_back(VD); 4600 } 4601 } 4602 void VisitOMPExecutableDirective(const OMPExecutableDirective *) {} 4603 void VisitCapturedStmt(const CapturedStmt *) {} 4604 void VisitLambdaExpr(const LambdaExpr *) {} 4605 void VisitBlockExpr(const BlockExpr *) {} 4606 void VisitStmt(const Stmt *S) { 4607 if (!S) 4608 return; 4609 for (const Stmt *Child : S->children()) 4610 if (Child) 4611 Visit(Child); 4612 } 4613 4614 /// Swaps list of vars with the provided one. 4615 ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; } 4616 }; 4617 } // anonymous namespace 4618 4619 static void buildDependences(const OMPExecutableDirective &S, 4620 OMPTaskDataTy &Data) { 4621 4622 // First look for 'omp_all_memory' and add this first. 4623 bool OmpAllMemory = false; 4624 if (llvm::any_of( 4625 S.getClausesOfKind<OMPDependClause>(), [](const OMPDependClause *C) { 4626 return C->getDependencyKind() == OMPC_DEPEND_outallmemory || 4627 C->getDependencyKind() == OMPC_DEPEND_inoutallmemory; 4628 })) { 4629 OmpAllMemory = true; 4630 // Since both OMPC_DEPEND_outallmemory and OMPC_DEPEND_inoutallmemory are 4631 // equivalent to the runtime, always use OMPC_DEPEND_outallmemory to 4632 // simplify. 4633 OMPTaskDataTy::DependData &DD = 4634 Data.Dependences.emplace_back(OMPC_DEPEND_outallmemory, 4635 /*IteratorExpr=*/nullptr); 4636 // Add a nullptr Expr to simplify the codegen in emitDependData. 4637 DD.DepExprs.push_back(nullptr); 4638 } 4639 // Add remaining dependences skipping any 'out' or 'inout' if they are 4640 // overridden by 'omp_all_memory'. 4641 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { 4642 OpenMPDependClauseKind Kind = C->getDependencyKind(); 4643 if (Kind == OMPC_DEPEND_outallmemory || Kind == OMPC_DEPEND_inoutallmemory) 4644 continue; 4645 if (OmpAllMemory && (Kind == OMPC_DEPEND_out || Kind == OMPC_DEPEND_inout)) 4646 continue; 4647 OMPTaskDataTy::DependData &DD = 4648 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); 4649 DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); 4650 } 4651 } 4652 4653 void CodeGenFunction::EmitOMPTaskBasedDirective( 4654 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, 4655 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, 4656 OMPTaskDataTy &Data) { 4657 // Emit outlined function for task construct. 4658 const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion); 4659 auto I = CS->getCapturedDecl()->param_begin(); 4660 auto PartId = std::next(I); 4661 auto TaskT = std::next(I, 4); 4662 // Check if the task is final 4663 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 4664 // If the condition constant folds and can be elided, try to avoid emitting 4665 // the condition and the dead arm of the if/else. 4666 const Expr *Cond = Clause->getCondition(); 4667 bool CondConstant; 4668 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 4669 Data.Final.setInt(CondConstant); 4670 else 4671 Data.Final.setPointer(EvaluateExprAsBool(Cond)); 4672 } else { 4673 // By default the task is not final. 4674 Data.Final.setInt(/*IntVal=*/false); 4675 } 4676 // Check if the task has 'priority' clause. 4677 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { 4678 const Expr *Prio = Clause->getPriority(); 4679 Data.Priority.setInt(/*IntVal=*/true); 4680 Data.Priority.setPointer(EmitScalarConversion( 4681 EmitScalarExpr(Prio), Prio->getType(), 4682 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), 4683 Prio->getExprLoc())); 4684 } 4685 // The first function argument for tasks is a thread id, the second one is a 4686 // part id (0 for tied tasks, >=0 for untied task). 4687 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 4688 // Get list of private variables. 4689 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 4690 auto IRef = C->varlist_begin(); 4691 for (const Expr *IInit : C->private_copies()) { 4692 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 4693 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 4694 Data.PrivateVars.push_back(*IRef); 4695 Data.PrivateCopies.push_back(IInit); 4696 } 4697 ++IRef; 4698 } 4699 } 4700 EmittedAsPrivate.clear(); 4701 // Get list of firstprivate variables. 4702 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 4703 auto IRef = C->varlist_begin(); 4704 auto IElemInitRef = C->inits().begin(); 4705 for (const Expr *IInit : C->private_copies()) { 4706 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 4707 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 4708 Data.FirstprivateVars.push_back(*IRef); 4709 Data.FirstprivateCopies.push_back(IInit); 4710 Data.FirstprivateInits.push_back(*IElemInitRef); 4711 } 4712 ++IRef; 4713 ++IElemInitRef; 4714 } 4715 } 4716 // Get list of lastprivate variables (for taskloops). 4717 llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 4718 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 4719 auto IRef = C->varlist_begin(); 4720 auto ID = C->destination_exprs().begin(); 4721 for (const Expr *IInit : C->private_copies()) { 4722 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 4723 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 4724 Data.LastprivateVars.push_back(*IRef); 4725 Data.LastprivateCopies.push_back(IInit); 4726 } 4727 LastprivateDstsOrigs.insert( 4728 std::make_pair(cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 4729 cast<DeclRefExpr>(*IRef))); 4730 ++IRef; 4731 ++ID; 4732 } 4733 } 4734 SmallVector<const Expr *, 4> LHSs; 4735 SmallVector<const Expr *, 4> RHSs; 4736 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 4737 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); 4738 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); 4739 Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); 4740 Data.ReductionOps.append(C->reduction_ops().begin(), 4741 C->reduction_ops().end()); 4742 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 4743 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 4744 } 4745 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( 4746 *this, S.getBeginLoc(), LHSs, RHSs, Data); 4747 // Build list of dependences. 4748 buildDependences(S, Data); 4749 // Get list of local vars for untied tasks. 4750 if (!Data.Tied) { 4751 CheckVarsEscapingUntiedTaskDeclContext Checker; 4752 Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt()); 4753 Data.PrivateLocals.append(Checker.getPrivateDecls().begin(), 4754 Checker.getPrivateDecls().end()); 4755 } 4756 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, 4757 CapturedRegion](CodeGenFunction &CGF, 4758 PrePostActionTy &Action) { 4759 llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 4760 std::pair<Address, Address>> 4761 UntiedLocalVars; 4762 // Set proper addresses for generated private copies. 4763 OMPPrivateScope Scope(CGF); 4764 // Generate debug info for variables present in shared clause. 4765 if (auto *DI = CGF.getDebugInfo()) { 4766 llvm::SmallDenseMap<const VarDecl *, FieldDecl *> CaptureFields = 4767 CGF.CapturedStmtInfo->getCaptureFields(); 4768 llvm::Value *ContextValue = CGF.CapturedStmtInfo->getContextValue(); 4769 if (CaptureFields.size() && ContextValue) { 4770 unsigned CharWidth = CGF.getContext().getCharWidth(); 4771 // The shared variables are packed together as members of structure. 4772 // So the address of each shared variable can be computed by adding 4773 // offset of it (within record) to the base address of record. For each 4774 // shared variable, debug intrinsic llvm.dbg.declare is generated with 4775 // appropriate expressions (DIExpression). 4776 // Ex: 4777 // %12 = load %struct.anon*, %struct.anon** %__context.addr.i 4778 // call void @llvm.dbg.declare(metadata %struct.anon* %12, 4779 // metadata !svar1, 4780 // metadata !DIExpression(DW_OP_deref)) 4781 // call void @llvm.dbg.declare(metadata %struct.anon* %12, 4782 // metadata !svar2, 4783 // metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref)) 4784 for (auto It = CaptureFields.begin(); It != CaptureFields.end(); ++It) { 4785 const VarDecl *SharedVar = It->first; 4786 RecordDecl *CaptureRecord = It->second->getParent(); 4787 const ASTRecordLayout &Layout = 4788 CGF.getContext().getASTRecordLayout(CaptureRecord); 4789 unsigned Offset = 4790 Layout.getFieldOffset(It->second->getFieldIndex()) / CharWidth; 4791 if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo()) 4792 (void)DI->EmitDeclareOfAutoVariable(SharedVar, ContextValue, 4793 CGF.Builder, false); 4794 // Get the call dbg.declare instruction we just created and update 4795 // its DIExpression to add offset to base address. 4796 auto UpdateExpr = [](llvm::LLVMContext &Ctx, auto *Declare, 4797 unsigned Offset) { 4798 SmallVector<uint64_t, 8> Ops; 4799 // Add offset to the base address if non zero. 4800 if (Offset) { 4801 Ops.push_back(llvm::dwarf::DW_OP_plus_uconst); 4802 Ops.push_back(Offset); 4803 } 4804 Ops.push_back(llvm::dwarf::DW_OP_deref); 4805 Declare->setExpression(llvm::DIExpression::get(Ctx, Ops)); 4806 }; 4807 llvm::Instruction &Last = CGF.Builder.GetInsertBlock()->back(); 4808 if (auto DDI = dyn_cast<llvm::DbgVariableIntrinsic>(&Last)) 4809 UpdateExpr(DDI->getContext(), DDI, Offset); 4810 // If we're emitting using the new debug info format into a block 4811 // without a terminator, the record will be "trailing". 4812 assert(!Last.isTerminator() && "unexpected terminator"); 4813 if (auto *Marker = 4814 CGF.Builder.GetInsertBlock()->getTrailingDbgRecords()) { 4815 for (llvm::DbgVariableRecord &DVR : llvm::reverse( 4816 llvm::filterDbgVars(Marker->getDbgRecordRange()))) { 4817 UpdateExpr(Last.getContext(), &DVR, Offset); 4818 break; 4819 } 4820 } 4821 } 4822 } 4823 } 4824 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs; 4825 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 4826 !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) { 4827 enum { PrivatesParam = 2, CopyFnParam = 3 }; 4828 llvm::Value *CopyFn = CGF.Builder.CreateLoad( 4829 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); 4830 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( 4831 CS->getCapturedDecl()->getParam(PrivatesParam))); 4832 // Map privates. 4833 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 4834 llvm::SmallVector<llvm::Value *, 16> CallArgs; 4835 llvm::SmallVector<llvm::Type *, 4> ParamTypes; 4836 CallArgs.push_back(PrivatesPtr); 4837 ParamTypes.push_back(PrivatesPtr->getType()); 4838 for (const Expr *E : Data.PrivateVars) { 4839 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4840 RawAddress PrivatePtr = CGF.CreateMemTemp( 4841 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 4842 PrivatePtrs.emplace_back(VD, PrivatePtr); 4843 CallArgs.push_back(PrivatePtr.getPointer()); 4844 ParamTypes.push_back(PrivatePtr.getType()); 4845 } 4846 for (const Expr *E : Data.FirstprivateVars) { 4847 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4848 RawAddress PrivatePtr = 4849 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 4850 ".firstpriv.ptr.addr"); 4851 PrivatePtrs.emplace_back(VD, PrivatePtr); 4852 FirstprivatePtrs.emplace_back(VD, PrivatePtr); 4853 CallArgs.push_back(PrivatePtr.getPointer()); 4854 ParamTypes.push_back(PrivatePtr.getType()); 4855 } 4856 for (const Expr *E : Data.LastprivateVars) { 4857 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4858 RawAddress PrivatePtr = 4859 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 4860 ".lastpriv.ptr.addr"); 4861 PrivatePtrs.emplace_back(VD, PrivatePtr); 4862 CallArgs.push_back(PrivatePtr.getPointer()); 4863 ParamTypes.push_back(PrivatePtr.getType()); 4864 } 4865 for (const VarDecl *VD : Data.PrivateLocals) { 4866 QualType Ty = VD->getType().getNonReferenceType(); 4867 if (VD->getType()->isLValueReferenceType()) 4868 Ty = CGF.getContext().getPointerType(Ty); 4869 if (isAllocatableDecl(VD)) 4870 Ty = CGF.getContext().getPointerType(Ty); 4871 RawAddress PrivatePtr = CGF.CreateMemTemp( 4872 CGF.getContext().getPointerType(Ty), ".local.ptr.addr"); 4873 auto Result = UntiedLocalVars.insert( 4874 std::make_pair(VD, std::make_pair(PrivatePtr, Address::invalid()))); 4875 // If key exists update in place. 4876 if (Result.second == false) 4877 *Result.first = std::make_pair( 4878 VD, std::make_pair(PrivatePtr, Address::invalid())); 4879 CallArgs.push_back(PrivatePtr.getPointer()); 4880 ParamTypes.push_back(PrivatePtr.getType()); 4881 } 4882 auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(), 4883 ParamTypes, /*isVarArg=*/false); 4884 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( 4885 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); 4886 for (const auto &Pair : LastprivateDstsOrigs) { 4887 const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 4888 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD), 4889 /*RefersToEnclosingVariableOrCapture=*/ 4890 CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, 4891 Pair.second->getType(), VK_LValue, 4892 Pair.second->getExprLoc()); 4893 Scope.addPrivate(Pair.first, CGF.EmitLValue(&DRE).getAddress()); 4894 } 4895 for (const auto &Pair : PrivatePtrs) { 4896 Address Replacement = Address( 4897 CGF.Builder.CreateLoad(Pair.second), 4898 CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()), 4899 CGF.getContext().getDeclAlign(Pair.first)); 4900 Scope.addPrivate(Pair.first, Replacement); 4901 if (auto *DI = CGF.getDebugInfo()) 4902 if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo()) 4903 (void)DI->EmitDeclareOfAutoVariable( 4904 Pair.first, Pair.second.getBasePointer(), CGF.Builder, 4905 /*UsePointerValue*/ true); 4906 } 4907 // Adjust mapping for internal locals by mapping actual memory instead of 4908 // a pointer to this memory. 4909 for (auto &Pair : UntiedLocalVars) { 4910 QualType VDType = Pair.first->getType().getNonReferenceType(); 4911 if (Pair.first->getType()->isLValueReferenceType()) 4912 VDType = CGF.getContext().getPointerType(VDType); 4913 if (isAllocatableDecl(Pair.first)) { 4914 llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first); 4915 Address Replacement( 4916 Ptr, 4917 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(VDType)), 4918 CGF.getPointerAlign()); 4919 Pair.second.first = Replacement; 4920 Ptr = CGF.Builder.CreateLoad(Replacement); 4921 Replacement = Address(Ptr, CGF.ConvertTypeForMem(VDType), 4922 CGF.getContext().getDeclAlign(Pair.first)); 4923 Pair.second.second = Replacement; 4924 } else { 4925 llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first); 4926 Address Replacement(Ptr, CGF.ConvertTypeForMem(VDType), 4927 CGF.getContext().getDeclAlign(Pair.first)); 4928 Pair.second.first = Replacement; 4929 } 4930 } 4931 } 4932 if (Data.Reductions) { 4933 OMPPrivateScope FirstprivateScope(CGF); 4934 for (const auto &Pair : FirstprivatePtrs) { 4935 Address Replacement( 4936 CGF.Builder.CreateLoad(Pair.second), 4937 CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()), 4938 CGF.getContext().getDeclAlign(Pair.first)); 4939 FirstprivateScope.addPrivate(Pair.first, Replacement); 4940 } 4941 (void)FirstprivateScope.Privatize(); 4942 OMPLexicalScope LexScope(CGF, S, CapturedRegion); 4943 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars, 4944 Data.ReductionCopies, Data.ReductionOps); 4945 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( 4946 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); 4947 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { 4948 RedCG.emitSharedOrigLValue(CGF, Cnt); 4949 RedCG.emitAggregateType(CGF, Cnt); 4950 // FIXME: This must removed once the runtime library is fixed. 4951 // Emit required threadprivate variables for 4952 // initializer/combiner/finalizer. 4953 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), 4954 RedCG, Cnt); 4955 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 4956 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 4957 Replacement = Address( 4958 CGF.EmitScalarConversion(Replacement.emitRawPointer(CGF), 4959 CGF.getContext().VoidPtrTy, 4960 CGF.getContext().getPointerType( 4961 Data.ReductionCopies[Cnt]->getType()), 4962 Data.ReductionCopies[Cnt]->getExprLoc()), 4963 CGF.ConvertTypeForMem(Data.ReductionCopies[Cnt]->getType()), 4964 Replacement.getAlignment()); 4965 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 4966 Scope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement); 4967 } 4968 } 4969 // Privatize all private variables except for in_reduction items. 4970 (void)Scope.Privatize(); 4971 SmallVector<const Expr *, 4> InRedVars; 4972 SmallVector<const Expr *, 4> InRedPrivs; 4973 SmallVector<const Expr *, 4> InRedOps; 4974 SmallVector<const Expr *, 4> TaskgroupDescriptors; 4975 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { 4976 auto IPriv = C->privates().begin(); 4977 auto IRed = C->reduction_ops().begin(); 4978 auto ITD = C->taskgroup_descriptors().begin(); 4979 for (const Expr *Ref : C->varlists()) { 4980 InRedVars.emplace_back(Ref); 4981 InRedPrivs.emplace_back(*IPriv); 4982 InRedOps.emplace_back(*IRed); 4983 TaskgroupDescriptors.emplace_back(*ITD); 4984 std::advance(IPriv, 1); 4985 std::advance(IRed, 1); 4986 std::advance(ITD, 1); 4987 } 4988 } 4989 // Privatize in_reduction items here, because taskgroup descriptors must be 4990 // privatized earlier. 4991 OMPPrivateScope InRedScope(CGF); 4992 if (!InRedVars.empty()) { 4993 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps); 4994 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { 4995 RedCG.emitSharedOrigLValue(CGF, Cnt); 4996 RedCG.emitAggregateType(CGF, Cnt); 4997 // The taskgroup descriptor variable is always implicit firstprivate and 4998 // privatized already during processing of the firstprivates. 4999 // FIXME: This must removed once the runtime library is fixed. 5000 // Emit required threadprivate variables for 5001 // initializer/combiner/finalizer. 5002 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), 5003 RedCG, Cnt); 5004 llvm::Value *ReductionsPtr; 5005 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) { 5006 ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr), 5007 TRExpr->getExprLoc()); 5008 } else { 5009 ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5010 } 5011 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 5012 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 5013 Replacement = Address( 5014 CGF.EmitScalarConversion( 5015 Replacement.emitRawPointer(CGF), CGF.getContext().VoidPtrTy, 5016 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), 5017 InRedPrivs[Cnt]->getExprLoc()), 5018 CGF.ConvertTypeForMem(InRedPrivs[Cnt]->getType()), 5019 Replacement.getAlignment()); 5020 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 5021 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement); 5022 } 5023 } 5024 (void)InRedScope.Privatize(); 5025 5026 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF, 5027 UntiedLocalVars); 5028 Action.Enter(CGF); 5029 BodyGen(CGF); 5030 }; 5031 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 5032 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, 5033 Data.NumberOfParts); 5034 OMPLexicalScope Scope(*this, S, std::nullopt, 5035 !isOpenMPParallelDirective(S.getDirectiveKind()) && 5036 !isOpenMPSimdDirective(S.getDirectiveKind())); 5037 TaskGen(*this, OutlinedFn, Data); 5038 } 5039 5040 static ImplicitParamDecl * 5041 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, 5042 QualType Ty, CapturedDecl *CD, 5043 SourceLocation Loc) { 5044 auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, 5045 ImplicitParamKind::Other); 5046 auto *OrigRef = DeclRefExpr::Create( 5047 C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD, 5048 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); 5049 auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, 5050 ImplicitParamKind::Other); 5051 auto *PrivateRef = DeclRefExpr::Create( 5052 C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD, 5053 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); 5054 QualType ElemType = C.getBaseElementType(Ty); 5055 auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType, 5056 ImplicitParamKind::Other); 5057 auto *InitRef = DeclRefExpr::Create( 5058 C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, 5059 /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue); 5060 PrivateVD->setInitStyle(VarDecl::CInit); 5061 PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, 5062 InitRef, /*BasePath=*/nullptr, 5063 VK_PRValue, FPOptionsOverride())); 5064 Data.FirstprivateVars.emplace_back(OrigRef); 5065 Data.FirstprivateCopies.emplace_back(PrivateRef); 5066 Data.FirstprivateInits.emplace_back(InitRef); 5067 return OrigVD; 5068 } 5069 5070 void CodeGenFunction::EmitOMPTargetTaskBasedDirective( 5071 const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, 5072 OMPTargetDataInfo &InputInfo) { 5073 // Emit outlined function for task construct. 5074 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); 5075 Address CapturedStruct = GenerateCapturedStmtArgument(*CS); 5076 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 5077 auto I = CS->getCapturedDecl()->param_begin(); 5078 auto PartId = std::next(I); 5079 auto TaskT = std::next(I, 4); 5080 OMPTaskDataTy Data; 5081 // The task is not final. 5082 Data.Final.setInt(/*IntVal=*/false); 5083 // Get list of firstprivate variables. 5084 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 5085 auto IRef = C->varlist_begin(); 5086 auto IElemInitRef = C->inits().begin(); 5087 for (auto *IInit : C->private_copies()) { 5088 Data.FirstprivateVars.push_back(*IRef); 5089 Data.FirstprivateCopies.push_back(IInit); 5090 Data.FirstprivateInits.push_back(*IElemInitRef); 5091 ++IRef; 5092 ++IElemInitRef; 5093 } 5094 } 5095 SmallVector<const Expr *, 4> LHSs; 5096 SmallVector<const Expr *, 4> RHSs; 5097 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { 5098 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); 5099 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); 5100 Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); 5101 Data.ReductionOps.append(C->reduction_ops().begin(), 5102 C->reduction_ops().end()); 5103 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 5104 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 5105 } 5106 OMPPrivateScope TargetScope(*this); 5107 VarDecl *BPVD = nullptr; 5108 VarDecl *PVD = nullptr; 5109 VarDecl *SVD = nullptr; 5110 VarDecl *MVD = nullptr; 5111 if (InputInfo.NumberOfTargetItems > 0) { 5112 auto *CD = CapturedDecl::Create( 5113 getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0); 5114 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems); 5115 QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType( 5116 getContext().VoidPtrTy, ArrSize, nullptr, ArraySizeModifier::Normal, 5117 /*IndexTypeQuals=*/0); 5118 BPVD = createImplicitFirstprivateForType( 5119 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); 5120 PVD = createImplicitFirstprivateForType( 5121 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); 5122 QualType SizesType = getContext().getConstantArrayType( 5123 getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1), 5124 ArrSize, nullptr, ArraySizeModifier::Normal, 5125 /*IndexTypeQuals=*/0); 5126 SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD, 5127 S.getBeginLoc()); 5128 TargetScope.addPrivate(BPVD, InputInfo.BasePointersArray); 5129 TargetScope.addPrivate(PVD, InputInfo.PointersArray); 5130 TargetScope.addPrivate(SVD, InputInfo.SizesArray); 5131 // If there is no user-defined mapper, the mapper array will be nullptr. In 5132 // this case, we don't need to privatize it. 5133 if (!isa_and_nonnull<llvm::ConstantPointerNull>( 5134 InputInfo.MappersArray.emitRawPointer(*this))) { 5135 MVD = createImplicitFirstprivateForType( 5136 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); 5137 TargetScope.addPrivate(MVD, InputInfo.MappersArray); 5138 } 5139 } 5140 (void)TargetScope.Privatize(); 5141 buildDependences(S, Data); 5142 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD, 5143 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { 5144 // Set proper addresses for generated private copies. 5145 OMPPrivateScope Scope(CGF); 5146 if (!Data.FirstprivateVars.empty()) { 5147 enum { PrivatesParam = 2, CopyFnParam = 3 }; 5148 llvm::Value *CopyFn = CGF.Builder.CreateLoad( 5149 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); 5150 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( 5151 CS->getCapturedDecl()->getParam(PrivatesParam))); 5152 // Map privates. 5153 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 5154 llvm::SmallVector<llvm::Value *, 16> CallArgs; 5155 llvm::SmallVector<llvm::Type *, 4> ParamTypes; 5156 CallArgs.push_back(PrivatesPtr); 5157 ParamTypes.push_back(PrivatesPtr->getType()); 5158 for (const Expr *E : Data.FirstprivateVars) { 5159 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5160 RawAddress PrivatePtr = 5161 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 5162 ".firstpriv.ptr.addr"); 5163 PrivatePtrs.emplace_back(VD, PrivatePtr); 5164 CallArgs.push_back(PrivatePtr.getPointer()); 5165 ParamTypes.push_back(PrivatePtr.getType()); 5166 } 5167 auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(), 5168 ParamTypes, /*isVarArg=*/false); 5169 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( 5170 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); 5171 for (const auto &Pair : PrivatePtrs) { 5172 Address Replacement( 5173 CGF.Builder.CreateLoad(Pair.second), 5174 CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()), 5175 CGF.getContext().getDeclAlign(Pair.first)); 5176 Scope.addPrivate(Pair.first, Replacement); 5177 } 5178 } 5179 CGF.processInReduction(S, Data, CGF, CS, Scope); 5180 if (InputInfo.NumberOfTargetItems > 0) { 5181 InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP( 5182 CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0); 5183 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( 5184 CGF.GetAddrOfLocalVar(PVD), /*Index=*/0); 5185 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( 5186 CGF.GetAddrOfLocalVar(SVD), /*Index=*/0); 5187 // If MVD is nullptr, the mapper array is not privatized 5188 if (MVD) 5189 InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP( 5190 CGF.GetAddrOfLocalVar(MVD), /*Index=*/0); 5191 } 5192 5193 Action.Enter(CGF); 5194 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false); 5195 auto *TL = S.getSingleClause<OMPThreadLimitClause>(); 5196 if (CGF.CGM.getLangOpts().OpenMP >= 51 && 5197 needsTaskBasedThreadLimit(S.getDirectiveKind()) && TL) { 5198 // Emit __kmpc_set_thread_limit() to set the thread_limit for the task 5199 // enclosing this target region. This will indirectly set the thread_limit 5200 // for every applicable construct within target region. 5201 CGF.CGM.getOpenMPRuntime().emitThreadLimitClause( 5202 CGF, TL->getThreadLimit(), S.getBeginLoc()); 5203 } 5204 BodyGen(CGF); 5205 }; 5206 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 5207 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true, 5208 Data.NumberOfParts); 5209 llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0); 5210 IntegerLiteral IfCond(getContext(), TrueOrFalse, 5211 getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 5212 SourceLocation()); 5213 CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn, 5214 SharedsTy, CapturedStruct, &IfCond, Data); 5215 } 5216 5217 void CodeGenFunction::processInReduction(const OMPExecutableDirective &S, 5218 OMPTaskDataTy &Data, 5219 CodeGenFunction &CGF, 5220 const CapturedStmt *CS, 5221 OMPPrivateScope &Scope) { 5222 if (Data.Reductions) { 5223 OpenMPDirectiveKind CapturedRegion = S.getDirectiveKind(); 5224 OMPLexicalScope LexScope(CGF, S, CapturedRegion); 5225 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars, 5226 Data.ReductionCopies, Data.ReductionOps); 5227 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( 5228 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(4))); 5229 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { 5230 RedCG.emitSharedOrigLValue(CGF, Cnt); 5231 RedCG.emitAggregateType(CGF, Cnt); 5232 // FIXME: This must removed once the runtime library is fixed. 5233 // Emit required threadprivate variables for 5234 // initializer/combiner/finalizer. 5235 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), 5236 RedCG, Cnt); 5237 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 5238 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 5239 Replacement = Address( 5240 CGF.EmitScalarConversion(Replacement.emitRawPointer(CGF), 5241 CGF.getContext().VoidPtrTy, 5242 CGF.getContext().getPointerType( 5243 Data.ReductionCopies[Cnt]->getType()), 5244 Data.ReductionCopies[Cnt]->getExprLoc()), 5245 CGF.ConvertTypeForMem(Data.ReductionCopies[Cnt]->getType()), 5246 Replacement.getAlignment()); 5247 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 5248 Scope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement); 5249 } 5250 } 5251 (void)Scope.Privatize(); 5252 SmallVector<const Expr *, 4> InRedVars; 5253 SmallVector<const Expr *, 4> InRedPrivs; 5254 SmallVector<const Expr *, 4> InRedOps; 5255 SmallVector<const Expr *, 4> TaskgroupDescriptors; 5256 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { 5257 auto IPriv = C->privates().begin(); 5258 auto IRed = C->reduction_ops().begin(); 5259 auto ITD = C->taskgroup_descriptors().begin(); 5260 for (const Expr *Ref : C->varlists()) { 5261 InRedVars.emplace_back(Ref); 5262 InRedPrivs.emplace_back(*IPriv); 5263 InRedOps.emplace_back(*IRed); 5264 TaskgroupDescriptors.emplace_back(*ITD); 5265 std::advance(IPriv, 1); 5266 std::advance(IRed, 1); 5267 std::advance(ITD, 1); 5268 } 5269 } 5270 OMPPrivateScope InRedScope(CGF); 5271 if (!InRedVars.empty()) { 5272 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps); 5273 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { 5274 RedCG.emitSharedOrigLValue(CGF, Cnt); 5275 RedCG.emitAggregateType(CGF, Cnt); 5276 // FIXME: This must removed once the runtime library is fixed. 5277 // Emit required threadprivate variables for 5278 // initializer/combiner/finalizer. 5279 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), 5280 RedCG, Cnt); 5281 llvm::Value *ReductionsPtr; 5282 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) { 5283 ReductionsPtr = 5284 CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr), TRExpr->getExprLoc()); 5285 } else { 5286 ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5287 } 5288 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 5289 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 5290 Replacement = Address( 5291 CGF.EmitScalarConversion( 5292 Replacement.emitRawPointer(CGF), CGF.getContext().VoidPtrTy, 5293 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), 5294 InRedPrivs[Cnt]->getExprLoc()), 5295 CGF.ConvertTypeForMem(InRedPrivs[Cnt]->getType()), 5296 Replacement.getAlignment()); 5297 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 5298 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement); 5299 } 5300 } 5301 (void)InRedScope.Privatize(); 5302 } 5303 5304 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 5305 // Emit outlined function for task construct. 5306 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); 5307 Address CapturedStruct = GenerateCapturedStmtArgument(*CS); 5308 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 5309 const Expr *IfCond = nullptr; 5310 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 5311 if (C->getNameModifier() == OMPD_unknown || 5312 C->getNameModifier() == OMPD_task) { 5313 IfCond = C->getCondition(); 5314 break; 5315 } 5316 } 5317 5318 OMPTaskDataTy Data; 5319 // Check if we should emit tied or untied task. 5320 Data.Tied = !S.getSingleClause<OMPUntiedClause>(); 5321 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 5322 CGF.EmitStmt(CS->getCapturedStmt()); 5323 }; 5324 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 5325 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, 5326 const OMPTaskDataTy &Data) { 5327 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn, 5328 SharedsTy, CapturedStruct, IfCond, 5329 Data); 5330 }; 5331 auto LPCRegion = 5332 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 5333 EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data); 5334 } 5335 5336 void CodeGenFunction::EmitOMPTaskyieldDirective( 5337 const OMPTaskyieldDirective &S) { 5338 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc()); 5339 } 5340 5341 void CodeGenFunction::EmitOMPErrorDirective(const OMPErrorDirective &S) { 5342 const OMPMessageClause *MC = S.getSingleClause<OMPMessageClause>(); 5343 Expr *ME = MC ? MC->getMessageString() : nullptr; 5344 const OMPSeverityClause *SC = S.getSingleClause<OMPSeverityClause>(); 5345 bool IsFatal = false; 5346 if (!SC || SC->getSeverityKind() == OMPC_SEVERITY_fatal) 5347 IsFatal = true; 5348 CGM.getOpenMPRuntime().emitErrorCall(*this, S.getBeginLoc(), ME, IsFatal); 5349 } 5350 5351 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 5352 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier); 5353 } 5354 5355 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 5356 OMPTaskDataTy Data; 5357 // Build list of dependences 5358 buildDependences(S, Data); 5359 Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>(); 5360 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc(), Data); 5361 } 5362 5363 bool isSupportedByOpenMPIRBuilder(const OMPTaskgroupDirective &T) { 5364 return T.clauses().empty(); 5365 } 5366 5367 void CodeGenFunction::EmitOMPTaskgroupDirective( 5368 const OMPTaskgroupDirective &S) { 5369 OMPLexicalScope Scope(*this, S, OMPD_unknown); 5370 if (CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S)) { 5371 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 5372 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 5373 InsertPointTy AllocaIP(AllocaInsertPt->getParent(), 5374 AllocaInsertPt->getIterator()); 5375 5376 auto BodyGenCB = [&, this](InsertPointTy AllocaIP, 5377 InsertPointTy CodeGenIP) { 5378 Builder.restoreIP(CodeGenIP); 5379 EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 5380 }; 5381 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 5382 if (!CapturedStmtInfo) 5383 CapturedStmtInfo = &CapStmtInfo; 5384 Builder.restoreIP(OMPBuilder.createTaskgroup(Builder, AllocaIP, BodyGenCB)); 5385 return; 5386 } 5387 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5388 Action.Enter(CGF); 5389 if (const Expr *E = S.getReductionRef()) { 5390 SmallVector<const Expr *, 4> LHSs; 5391 SmallVector<const Expr *, 4> RHSs; 5392 OMPTaskDataTy Data; 5393 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { 5394 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); 5395 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); 5396 Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); 5397 Data.ReductionOps.append(C->reduction_ops().begin(), 5398 C->reduction_ops().end()); 5399 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 5400 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 5401 } 5402 llvm::Value *ReductionDesc = 5403 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(), 5404 LHSs, RHSs, Data); 5405 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5406 CGF.EmitVarDecl(*VD); 5407 CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD), 5408 /*Volatile=*/false, E->getType()); 5409 } 5410 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 5411 }; 5412 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc()); 5413 } 5414 5415 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 5416 llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>() 5417 ? llvm::AtomicOrdering::NotAtomic 5418 : llvm::AtomicOrdering::AcquireRelease; 5419 CGM.getOpenMPRuntime().emitFlush( 5420 *this, 5421 [&S]() -> ArrayRef<const Expr *> { 5422 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) 5423 return llvm::ArrayRef(FlushClause->varlist_begin(), 5424 FlushClause->varlist_end()); 5425 return std::nullopt; 5426 }(), 5427 S.getBeginLoc(), AO); 5428 } 5429 5430 void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) { 5431 const auto *DO = S.getSingleClause<OMPDepobjClause>(); 5432 LValue DOLVal = EmitLValue(DO->getDepobj()); 5433 if (const auto *DC = S.getSingleClause<OMPDependClause>()) { 5434 OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(), 5435 DC->getModifier()); 5436 Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end()); 5437 Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause( 5438 *this, Dependencies, DC->getBeginLoc()); 5439 EmitStoreOfScalar(DepAddr.emitRawPointer(*this), DOLVal); 5440 return; 5441 } 5442 if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) { 5443 CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc()); 5444 return; 5445 } 5446 if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) { 5447 CGM.getOpenMPRuntime().emitUpdateClause( 5448 *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc()); 5449 return; 5450 } 5451 } 5452 5453 void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) { 5454 if (!OMPParentLoopDirectiveForScan) 5455 return; 5456 const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan; 5457 bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>(); 5458 SmallVector<const Expr *, 4> Shareds; 5459 SmallVector<const Expr *, 4> Privates; 5460 SmallVector<const Expr *, 4> LHSs; 5461 SmallVector<const Expr *, 4> RHSs; 5462 SmallVector<const Expr *, 4> ReductionOps; 5463 SmallVector<const Expr *, 4> CopyOps; 5464 SmallVector<const Expr *, 4> CopyArrayTemps; 5465 SmallVector<const Expr *, 4> CopyArrayElems; 5466 for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) { 5467 if (C->getModifier() != OMPC_REDUCTION_inscan) 5468 continue; 5469 Shareds.append(C->varlist_begin(), C->varlist_end()); 5470 Privates.append(C->privates().begin(), C->privates().end()); 5471 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 5472 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 5473 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 5474 CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); 5475 CopyArrayTemps.append(C->copy_array_temps().begin(), 5476 C->copy_array_temps().end()); 5477 CopyArrayElems.append(C->copy_array_elems().begin(), 5478 C->copy_array_elems().end()); 5479 } 5480 if (ParentDir.getDirectiveKind() == OMPD_simd || 5481 (getLangOpts().OpenMPSimd && 5482 isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) { 5483 // For simd directive and simd-based directives in simd only mode, use the 5484 // following codegen: 5485 // int x = 0; 5486 // #pragma omp simd reduction(inscan, +: x) 5487 // for (..) { 5488 // <first part> 5489 // #pragma omp scan inclusive(x) 5490 // <second part> 5491 // } 5492 // is transformed to: 5493 // int x = 0; 5494 // for (..) { 5495 // int x_priv = 0; 5496 // <first part> 5497 // x = x_priv + x; 5498 // x_priv = x; 5499 // <second part> 5500 // } 5501 // and 5502 // int x = 0; 5503 // #pragma omp simd reduction(inscan, +: x) 5504 // for (..) { 5505 // <first part> 5506 // #pragma omp scan exclusive(x) 5507 // <second part> 5508 // } 5509 // to 5510 // int x = 0; 5511 // for (..) { 5512 // int x_priv = 0; 5513 // <second part> 5514 // int temp = x; 5515 // x = x_priv + x; 5516 // x_priv = temp; 5517 // <first part> 5518 // } 5519 llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce"); 5520 EmitBranch(IsInclusive 5521 ? OMPScanReduce 5522 : BreakContinueStack.back().ContinueBlock.getBlock()); 5523 EmitBlock(OMPScanDispatch); 5524 { 5525 // New scope for correct construction/destruction of temp variables for 5526 // exclusive scan. 5527 LexicalScope Scope(*this, S.getSourceRange()); 5528 EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock); 5529 EmitBlock(OMPScanReduce); 5530 if (!IsInclusive) { 5531 // Create temp var and copy LHS value to this temp value. 5532 // TMP = LHS; 5533 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 5534 const Expr *PrivateExpr = Privates[I]; 5535 const Expr *TempExpr = CopyArrayTemps[I]; 5536 EmitAutoVarDecl( 5537 *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl())); 5538 LValue DestLVal = EmitLValue(TempExpr); 5539 LValue SrcLVal = EmitLValue(LHSs[I]); 5540 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(), 5541 SrcLVal.getAddress(), 5542 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 5543 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), 5544 CopyOps[I]); 5545 } 5546 } 5547 CGM.getOpenMPRuntime().emitReduction( 5548 *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, 5549 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd}); 5550 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 5551 const Expr *PrivateExpr = Privates[I]; 5552 LValue DestLVal; 5553 LValue SrcLVal; 5554 if (IsInclusive) { 5555 DestLVal = EmitLValue(RHSs[I]); 5556 SrcLVal = EmitLValue(LHSs[I]); 5557 } else { 5558 const Expr *TempExpr = CopyArrayTemps[I]; 5559 DestLVal = EmitLValue(RHSs[I]); 5560 SrcLVal = EmitLValue(TempExpr); 5561 } 5562 EmitOMPCopy( 5563 PrivateExpr->getType(), DestLVal.getAddress(), SrcLVal.getAddress(), 5564 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 5565 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), CopyOps[I]); 5566 } 5567 } 5568 EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock); 5569 OMPScanExitBlock = IsInclusive 5570 ? BreakContinueStack.back().ContinueBlock.getBlock() 5571 : OMPScanReduce; 5572 EmitBlock(OMPAfterScanBlock); 5573 return; 5574 } 5575 if (!IsInclusive) { 5576 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); 5577 EmitBlock(OMPScanExitBlock); 5578 } 5579 if (OMPFirstScanLoop) { 5580 // Emit buffer[i] = red; at the end of the input phase. 5581 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir) 5582 .getIterationVariable() 5583 ->IgnoreParenImpCasts(); 5584 LValue IdxLVal = EmitLValue(IVExpr); 5585 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc()); 5586 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false); 5587 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 5588 const Expr *PrivateExpr = Privates[I]; 5589 const Expr *OrigExpr = Shareds[I]; 5590 const Expr *CopyArrayElem = CopyArrayElems[I]; 5591 OpaqueValueMapping IdxMapping( 5592 *this, 5593 cast<OpaqueValueExpr>( 5594 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 5595 RValue::get(IdxVal)); 5596 LValue DestLVal = EmitLValue(CopyArrayElem); 5597 LValue SrcLVal = EmitLValue(OrigExpr); 5598 EmitOMPCopy( 5599 PrivateExpr->getType(), DestLVal.getAddress(), SrcLVal.getAddress(), 5600 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 5601 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), CopyOps[I]); 5602 } 5603 } 5604 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); 5605 if (IsInclusive) { 5606 EmitBlock(OMPScanExitBlock); 5607 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); 5608 } 5609 EmitBlock(OMPScanDispatch); 5610 if (!OMPFirstScanLoop) { 5611 // Emit red = buffer[i]; at the entrance to the scan phase. 5612 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir) 5613 .getIterationVariable() 5614 ->IgnoreParenImpCasts(); 5615 LValue IdxLVal = EmitLValue(IVExpr); 5616 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc()); 5617 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false); 5618 llvm::BasicBlock *ExclusiveExitBB = nullptr; 5619 if (!IsInclusive) { 5620 llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec"); 5621 ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit"); 5622 llvm::Value *Cmp = Builder.CreateIsNull(IdxVal); 5623 Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB); 5624 EmitBlock(ContBB); 5625 // Use idx - 1 iteration for exclusive scan. 5626 IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1)); 5627 } 5628 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 5629 const Expr *PrivateExpr = Privates[I]; 5630 const Expr *OrigExpr = Shareds[I]; 5631 const Expr *CopyArrayElem = CopyArrayElems[I]; 5632 OpaqueValueMapping IdxMapping( 5633 *this, 5634 cast<OpaqueValueExpr>( 5635 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 5636 RValue::get(IdxVal)); 5637 LValue SrcLVal = EmitLValue(CopyArrayElem); 5638 LValue DestLVal = EmitLValue(OrigExpr); 5639 EmitOMPCopy( 5640 PrivateExpr->getType(), DestLVal.getAddress(), SrcLVal.getAddress(), 5641 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 5642 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), CopyOps[I]); 5643 } 5644 if (!IsInclusive) { 5645 EmitBlock(ExclusiveExitBB); 5646 } 5647 } 5648 EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock 5649 : OMPAfterScanBlock); 5650 EmitBlock(OMPAfterScanBlock); 5651 } 5652 5653 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, 5654 const CodeGenLoopTy &CodeGenLoop, 5655 Expr *IncExpr) { 5656 // Emit the loop iteration variable. 5657 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 5658 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); 5659 EmitVarDecl(*IVDecl); 5660 5661 // Emit the iterations count variable. 5662 // If it is not a variable, Sema decided to calculate iterations count on each 5663 // iteration (e.g., it is foldable into a constant). 5664 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 5665 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 5666 // Emit calculation of the iterations count. 5667 EmitIgnoredExpr(S.getCalcLastIteration()); 5668 } 5669 5670 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 5671 5672 bool HasLastprivateClause = false; 5673 // Check pre-condition. 5674 { 5675 OMPLoopScope PreInitScope(*this, S); 5676 // Skip the entire loop if we don't meet the precondition. 5677 // If the condition constant folds and can be elided, avoid emitting the 5678 // whole loop. 5679 bool CondConstant; 5680 llvm::BasicBlock *ContBlock = nullptr; 5681 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 5682 if (!CondConstant) 5683 return; 5684 } else { 5685 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then"); 5686 ContBlock = createBasicBlock("omp.precond.end"); 5687 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 5688 getProfileCount(&S)); 5689 EmitBlock(ThenBlock); 5690 incrementProfileCounter(&S); 5691 } 5692 5693 emitAlignedClause(*this, S); 5694 // Emit 'then' code. 5695 { 5696 // Emit helper vars inits. 5697 5698 LValue LB = EmitOMPHelperVar( 5699 *this, cast<DeclRefExpr>( 5700 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5701 ? S.getCombinedLowerBoundVariable() 5702 : S.getLowerBoundVariable()))); 5703 LValue UB = EmitOMPHelperVar( 5704 *this, cast<DeclRefExpr>( 5705 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5706 ? S.getCombinedUpperBoundVariable() 5707 : S.getUpperBoundVariable()))); 5708 LValue ST = 5709 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 5710 LValue IL = 5711 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 5712 5713 OMPPrivateScope LoopScope(*this); 5714 if (EmitOMPFirstprivateClause(S, LoopScope)) { 5715 // Emit implicit barrier to synchronize threads and avoid data races 5716 // on initialization of firstprivate variables and post-update of 5717 // lastprivate variables. 5718 CGM.getOpenMPRuntime().emitBarrierCall( 5719 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 5720 /*ForceSimpleCall=*/true); 5721 } 5722 EmitOMPPrivateClause(S, LoopScope); 5723 if (isOpenMPSimdDirective(S.getDirectiveKind()) && 5724 !isOpenMPParallelDirective(S.getDirectiveKind()) && 5725 !isOpenMPTeamsDirective(S.getDirectiveKind())) 5726 EmitOMPReductionClauseInit(S, LoopScope); 5727 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 5728 EmitOMPPrivateLoopCounters(S, LoopScope); 5729 (void)LoopScope.Privatize(); 5730 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 5731 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); 5732 5733 // Detect the distribute schedule kind and chunk. 5734 llvm::Value *Chunk = nullptr; 5735 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 5736 if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 5737 ScheduleKind = C->getDistScheduleKind(); 5738 if (const Expr *Ch = C->getChunkSize()) { 5739 Chunk = EmitScalarExpr(Ch); 5740 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 5741 S.getIterationVariable()->getType(), 5742 S.getBeginLoc()); 5743 } 5744 } else { 5745 // Default behaviour for dist_schedule clause. 5746 CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk( 5747 *this, S, ScheduleKind, Chunk); 5748 } 5749 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 5750 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 5751 5752 // OpenMP [2.10.8, distribute Construct, Description] 5753 // If dist_schedule is specified, kind must be static. If specified, 5754 // iterations are divided into chunks of size chunk_size, chunks are 5755 // assigned to the teams of the league in a round-robin fashion in the 5756 // order of the team number. When no chunk_size is specified, the 5757 // iteration space is divided into chunks that are approximately equal 5758 // in size, and at most one chunk is distributed to each team of the 5759 // league. The size of the chunks is unspecified in this case. 5760 bool StaticChunked = 5761 RT.isStaticChunked(ScheduleKind, /* Chunked */ Chunk != nullptr) && 5762 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); 5763 if (RT.isStaticNonchunked(ScheduleKind, 5764 /* Chunked */ Chunk != nullptr) || 5765 StaticChunked) { 5766 CGOpenMPRuntime::StaticRTInput StaticInit( 5767 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(), 5768 LB.getAddress(), UB.getAddress(), ST.getAddress(), 5769 StaticChunked ? Chunk : nullptr); 5770 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, 5771 StaticInit); 5772 JumpDest LoopExit = 5773 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 5774 // UB = min(UB, GlobalUB); 5775 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5776 ? S.getCombinedEnsureUpperBound() 5777 : S.getEnsureUpperBound()); 5778 // IV = LB; 5779 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5780 ? S.getCombinedInit() 5781 : S.getInit()); 5782 5783 const Expr *Cond = 5784 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5785 ? S.getCombinedCond() 5786 : S.getCond(); 5787 5788 if (StaticChunked) 5789 Cond = S.getCombinedDistCond(); 5790 5791 // For static unchunked schedules generate: 5792 // 5793 // 1. For distribute alone, codegen 5794 // while (idx <= UB) { 5795 // BODY; 5796 // ++idx; 5797 // } 5798 // 5799 // 2. When combined with 'for' (e.g. as in 'distribute parallel for') 5800 // while (idx <= UB) { 5801 // <CodeGen rest of pragma>(LB, UB); 5802 // idx += ST; 5803 // } 5804 // 5805 // For static chunk one schedule generate: 5806 // 5807 // while (IV <= GlobalUB) { 5808 // <CodeGen rest of pragma>(LB, UB); 5809 // LB += ST; 5810 // UB += ST; 5811 // UB = min(UB, GlobalUB); 5812 // IV = LB; 5813 // } 5814 // 5815 emitCommonSimdLoop( 5816 *this, S, 5817 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5818 if (isOpenMPSimdDirective(S.getDirectiveKind())) 5819 CGF.EmitOMPSimdInit(S); 5820 }, 5821 [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop, 5822 StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) { 5823 CGF.EmitOMPInnerLoop( 5824 S, LoopScope.requiresCleanups(), Cond, IncExpr, 5825 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 5826 CodeGenLoop(CGF, S, LoopExit); 5827 }, 5828 [&S, StaticChunked](CodeGenFunction &CGF) { 5829 if (StaticChunked) { 5830 CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound()); 5831 CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound()); 5832 CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound()); 5833 CGF.EmitIgnoredExpr(S.getCombinedInit()); 5834 } 5835 }); 5836 }); 5837 EmitBlock(LoopExit.getBlock()); 5838 // Tell the runtime we are done. 5839 RT.emitForStaticFinish(*this, S.getEndLoc(), OMPD_distribute); 5840 } else { 5841 // Emit the outer loop, which requests its work chunk [LB..UB] from 5842 // runtime and runs the inner loop to process it. 5843 const OMPLoopArguments LoopArguments = { 5844 LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), 5845 Chunk}; 5846 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, 5847 CodeGenLoop); 5848 } 5849 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 5850 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { 5851 return CGF.Builder.CreateIsNotNull( 5852 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 5853 }); 5854 } 5855 if (isOpenMPSimdDirective(S.getDirectiveKind()) && 5856 !isOpenMPParallelDirective(S.getDirectiveKind()) && 5857 !isOpenMPTeamsDirective(S.getDirectiveKind())) { 5858 EmitOMPReductionClauseFinal(S, OMPD_simd); 5859 // Emit post-update of the reduction variables if IsLastIter != 0. 5860 emitPostUpdateForReductionClause( 5861 *this, S, [IL, &S](CodeGenFunction &CGF) { 5862 return CGF.Builder.CreateIsNotNull( 5863 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 5864 }); 5865 } 5866 // Emit final copy of the lastprivate variables if IsLastIter != 0. 5867 if (HasLastprivateClause) { 5868 EmitOMPLastprivateClauseFinal( 5869 S, /*NoFinals=*/false, 5870 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); 5871 } 5872 } 5873 5874 // We're now done with the loop, so jump to the continuation block. 5875 if (ContBlock) { 5876 EmitBranch(ContBlock); 5877 EmitBlock(ContBlock, true); 5878 } 5879 } 5880 } 5881 5882 void CodeGenFunction::EmitOMPDistributeDirective( 5883 const OMPDistributeDirective &S) { 5884 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5885 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 5886 }; 5887 OMPLexicalScope Scope(*this, S, OMPD_unknown); 5888 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 5889 } 5890 5891 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 5892 const CapturedStmt *S, 5893 SourceLocation Loc) { 5894 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 5895 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 5896 CGF.CapturedStmtInfo = &CapStmtInfo; 5897 llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc); 5898 Fn->setDoesNotRecurse(); 5899 return Fn; 5900 } 5901 5902 template <typename T> 5903 static void emitRestoreIP(CodeGenFunction &CGF, const T *C, 5904 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP, 5905 llvm::OpenMPIRBuilder &OMPBuilder) { 5906 5907 unsigned NumLoops = C->getNumLoops(); 5908 QualType Int64Ty = CGF.CGM.getContext().getIntTypeForBitwidth( 5909 /*DestWidth=*/64, /*Signed=*/1); 5910 llvm::SmallVector<llvm::Value *> StoreValues; 5911 for (unsigned I = 0; I < NumLoops; I++) { 5912 const Expr *CounterVal = C->getLoopData(I); 5913 assert(CounterVal); 5914 llvm::Value *StoreValue = CGF.EmitScalarConversion( 5915 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 5916 CounterVal->getExprLoc()); 5917 StoreValues.emplace_back(StoreValue); 5918 } 5919 OMPDoacrossKind<T> ODK; 5920 bool IsDependSource = ODK.isSource(C); 5921 CGF.Builder.restoreIP( 5922 OMPBuilder.createOrderedDepend(CGF.Builder, AllocaIP, NumLoops, 5923 StoreValues, ".cnt.addr", IsDependSource)); 5924 } 5925 5926 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 5927 if (CGM.getLangOpts().OpenMPIRBuilder) { 5928 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 5929 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 5930 5931 if (S.hasClausesOfKind<OMPDependClause>() || 5932 S.hasClausesOfKind<OMPDoacrossClause>()) { 5933 // The ordered directive with depend clause. 5934 assert(!S.hasAssociatedStmt() && "No associated statement must be in " 5935 "ordered depend|doacross construct."); 5936 InsertPointTy AllocaIP(AllocaInsertPt->getParent(), 5937 AllocaInsertPt->getIterator()); 5938 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 5939 emitRestoreIP(*this, DC, AllocaIP, OMPBuilder); 5940 for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>()) 5941 emitRestoreIP(*this, DC, AllocaIP, OMPBuilder); 5942 } else { 5943 // The ordered directive with threads or simd clause, or without clause. 5944 // Without clause, it behaves as if the threads clause is specified. 5945 const auto *C = S.getSingleClause<OMPSIMDClause>(); 5946 5947 auto FiniCB = [this](InsertPointTy IP) { 5948 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 5949 }; 5950 5951 auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP, 5952 InsertPointTy CodeGenIP) { 5953 Builder.restoreIP(CodeGenIP); 5954 5955 const CapturedStmt *CS = S.getInnermostCapturedStmt(); 5956 if (C) { 5957 llvm::BasicBlock *FiniBB = splitBBWithSuffix( 5958 Builder, /*CreateBranch=*/false, ".ordered.after"); 5959 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 5960 GenerateOpenMPCapturedVars(*CS, CapturedVars); 5961 llvm::Function *OutlinedFn = 5962 emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc()); 5963 assert(S.getBeginLoc().isValid() && 5964 "Outlined function call location must be valid."); 5965 ApplyDebugLocation::CreateDefaultArtificial(*this, S.getBeginLoc()); 5966 OMPBuilderCBHelpers::EmitCaptureStmt(*this, CodeGenIP, *FiniBB, 5967 OutlinedFn, CapturedVars); 5968 } else { 5969 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( 5970 *this, CS->getCapturedStmt(), AllocaIP, CodeGenIP, "ordered"); 5971 } 5972 }; 5973 5974 OMPLexicalScope Scope(*this, S, OMPD_unknown); 5975 Builder.restoreIP( 5976 OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, !C)); 5977 } 5978 return; 5979 } 5980 5981 if (S.hasClausesOfKind<OMPDependClause>()) { 5982 assert(!S.hasAssociatedStmt() && 5983 "No associated statement must be in ordered depend construct."); 5984 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 5985 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 5986 return; 5987 } 5988 if (S.hasClausesOfKind<OMPDoacrossClause>()) { 5989 assert(!S.hasAssociatedStmt() && 5990 "No associated statement must be in ordered doacross construct."); 5991 for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>()) 5992 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 5993 return; 5994 } 5995 const auto *C = S.getSingleClause<OMPSIMDClause>(); 5996 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 5997 PrePostActionTy &Action) { 5998 const CapturedStmt *CS = S.getInnermostCapturedStmt(); 5999 if (C) { 6000 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 6001 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 6002 llvm::Function *OutlinedFn = 6003 emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc()); 6004 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(), 6005 OutlinedFn, CapturedVars); 6006 } else { 6007 Action.Enter(CGF); 6008 CGF.EmitStmt(CS->getCapturedStmt()); 6009 } 6010 }; 6011 OMPLexicalScope Scope(*this, S, OMPD_unknown); 6012 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C); 6013 } 6014 6015 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 6016 QualType SrcType, QualType DestType, 6017 SourceLocation Loc) { 6018 assert(CGF.hasScalarEvaluationKind(DestType) && 6019 "DestType must have scalar evaluation kind."); 6020 assert(!Val.isAggregate() && "Must be a scalar or complex."); 6021 return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 6022 DestType, Loc) 6023 : CGF.EmitComplexToScalarConversion( 6024 Val.getComplexVal(), SrcType, DestType, Loc); 6025 } 6026 6027 static CodeGenFunction::ComplexPairTy 6028 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 6029 QualType DestType, SourceLocation Loc) { 6030 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 6031 "DestType must have complex evaluation kind."); 6032 CodeGenFunction::ComplexPairTy ComplexVal; 6033 if (Val.isScalar()) { 6034 // Convert the input element to the element type of the complex. 6035 QualType DestElementType = 6036 DestType->castAs<ComplexType>()->getElementType(); 6037 llvm::Value *ScalarVal = CGF.EmitScalarConversion( 6038 Val.getScalarVal(), SrcType, DestElementType, Loc); 6039 ComplexVal = CodeGenFunction::ComplexPairTy( 6040 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 6041 } else { 6042 assert(Val.isComplex() && "Must be a scalar or complex."); 6043 QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 6044 QualType DestElementType = 6045 DestType->castAs<ComplexType>()->getElementType(); 6046 ComplexVal.first = CGF.EmitScalarConversion( 6047 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 6048 ComplexVal.second = CGF.EmitScalarConversion( 6049 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 6050 } 6051 return ComplexVal; 6052 } 6053 6054 static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO, 6055 LValue LVal, RValue RVal) { 6056 if (LVal.isGlobalReg()) 6057 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 6058 else 6059 CGF.EmitAtomicStore(RVal, LVal, AO, LVal.isVolatile(), /*isInit=*/false); 6060 } 6061 6062 static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF, 6063 llvm::AtomicOrdering AO, LValue LVal, 6064 SourceLocation Loc) { 6065 if (LVal.isGlobalReg()) 6066 return CGF.EmitLoadOfLValue(LVal, Loc); 6067 return CGF.EmitAtomicLoad( 6068 LVal, Loc, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO), 6069 LVal.isVolatile()); 6070 } 6071 6072 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 6073 QualType RValTy, SourceLocation Loc) { 6074 switch (getEvaluationKind(LVal.getType())) { 6075 case TEK_Scalar: 6076 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 6077 *this, RVal, RValTy, LVal.getType(), Loc)), 6078 LVal); 6079 break; 6080 case TEK_Complex: 6081 EmitStoreOfComplex( 6082 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 6083 /*isInit=*/false); 6084 break; 6085 case TEK_Aggregate: 6086 llvm_unreachable("Must be a scalar or complex."); 6087 } 6088 } 6089 6090 static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO, 6091 const Expr *X, const Expr *V, 6092 SourceLocation Loc) { 6093 // v = x; 6094 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 6095 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 6096 LValue XLValue = CGF.EmitLValue(X); 6097 LValue VLValue = CGF.EmitLValue(V); 6098 RValue Res = emitSimpleAtomicLoad(CGF, AO, XLValue, Loc); 6099 // OpenMP, 2.17.7, atomic Construct 6100 // If the read or capture clause is specified and the acquire, acq_rel, or 6101 // seq_cst clause is specified then the strong flush on exit from the atomic 6102 // operation is also an acquire flush. 6103 switch (AO) { 6104 case llvm::AtomicOrdering::Acquire: 6105 case llvm::AtomicOrdering::AcquireRelease: 6106 case llvm::AtomicOrdering::SequentiallyConsistent: 6107 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc, 6108 llvm::AtomicOrdering::Acquire); 6109 break; 6110 case llvm::AtomicOrdering::Monotonic: 6111 case llvm::AtomicOrdering::Release: 6112 break; 6113 case llvm::AtomicOrdering::NotAtomic: 6114 case llvm::AtomicOrdering::Unordered: 6115 llvm_unreachable("Unexpected ordering."); 6116 } 6117 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 6118 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V); 6119 } 6120 6121 static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, 6122 llvm::AtomicOrdering AO, const Expr *X, 6123 const Expr *E, SourceLocation Loc) { 6124 // x = expr; 6125 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 6126 emitSimpleAtomicStore(CGF, AO, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 6127 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 6128 // OpenMP, 2.17.7, atomic Construct 6129 // If the write, update, or capture clause is specified and the release, 6130 // acq_rel, or seq_cst clause is specified then the strong flush on entry to 6131 // the atomic operation is also a release flush. 6132 switch (AO) { 6133 case llvm::AtomicOrdering::Release: 6134 case llvm::AtomicOrdering::AcquireRelease: 6135 case llvm::AtomicOrdering::SequentiallyConsistent: 6136 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc, 6137 llvm::AtomicOrdering::Release); 6138 break; 6139 case llvm::AtomicOrdering::Acquire: 6140 case llvm::AtomicOrdering::Monotonic: 6141 break; 6142 case llvm::AtomicOrdering::NotAtomic: 6143 case llvm::AtomicOrdering::Unordered: 6144 llvm_unreachable("Unexpected ordering."); 6145 } 6146 } 6147 6148 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 6149 RValue Update, 6150 BinaryOperatorKind BO, 6151 llvm::AtomicOrdering AO, 6152 bool IsXLHSInRHSPart) { 6153 ASTContext &Context = CGF.getContext(); 6154 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 6155 // expression is simple and atomic is allowed for the given type for the 6156 // target platform. 6157 if (BO == BO_Comma || !Update.isScalar() || !X.isSimple() || 6158 (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 6159 (Update.getScalarVal()->getType() != X.getAddress().getElementType())) || 6160 !Context.getTargetInfo().hasBuiltinAtomic( 6161 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 6162 return std::make_pair(false, RValue::get(nullptr)); 6163 6164 auto &&CheckAtomicSupport = [&CGF](llvm::Type *T, BinaryOperatorKind BO) { 6165 if (T->isIntegerTy()) 6166 return true; 6167 6168 if (T->isFloatingPointTy() && (BO == BO_Add || BO == BO_Sub)) 6169 return llvm::isPowerOf2_64(CGF.CGM.getDataLayout().getTypeStoreSize(T)); 6170 6171 return false; 6172 }; 6173 6174 if (!CheckAtomicSupport(Update.getScalarVal()->getType(), BO) || 6175 !CheckAtomicSupport(X.getAddress().getElementType(), BO)) 6176 return std::make_pair(false, RValue::get(nullptr)); 6177 6178 bool IsInteger = X.getAddress().getElementType()->isIntegerTy(); 6179 llvm::AtomicRMWInst::BinOp RMWOp; 6180 switch (BO) { 6181 case BO_Add: 6182 RMWOp = IsInteger ? llvm::AtomicRMWInst::Add : llvm::AtomicRMWInst::FAdd; 6183 break; 6184 case BO_Sub: 6185 if (!IsXLHSInRHSPart) 6186 return std::make_pair(false, RValue::get(nullptr)); 6187 RMWOp = IsInteger ? llvm::AtomicRMWInst::Sub : llvm::AtomicRMWInst::FSub; 6188 break; 6189 case BO_And: 6190 RMWOp = llvm::AtomicRMWInst::And; 6191 break; 6192 case BO_Or: 6193 RMWOp = llvm::AtomicRMWInst::Or; 6194 break; 6195 case BO_Xor: 6196 RMWOp = llvm::AtomicRMWInst::Xor; 6197 break; 6198 case BO_LT: 6199 if (IsInteger) 6200 RMWOp = X.getType()->hasSignedIntegerRepresentation() 6201 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 6202 : llvm::AtomicRMWInst::Max) 6203 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 6204 : llvm::AtomicRMWInst::UMax); 6205 else 6206 RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMin 6207 : llvm::AtomicRMWInst::FMax; 6208 break; 6209 case BO_GT: 6210 if (IsInteger) 6211 RMWOp = X.getType()->hasSignedIntegerRepresentation() 6212 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 6213 : llvm::AtomicRMWInst::Min) 6214 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 6215 : llvm::AtomicRMWInst::UMin); 6216 else 6217 RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMax 6218 : llvm::AtomicRMWInst::FMin; 6219 break; 6220 case BO_Assign: 6221 RMWOp = llvm::AtomicRMWInst::Xchg; 6222 break; 6223 case BO_Mul: 6224 case BO_Div: 6225 case BO_Rem: 6226 case BO_Shl: 6227 case BO_Shr: 6228 case BO_LAnd: 6229 case BO_LOr: 6230 return std::make_pair(false, RValue::get(nullptr)); 6231 case BO_PtrMemD: 6232 case BO_PtrMemI: 6233 case BO_LE: 6234 case BO_GE: 6235 case BO_EQ: 6236 case BO_NE: 6237 case BO_Cmp: 6238 case BO_AddAssign: 6239 case BO_SubAssign: 6240 case BO_AndAssign: 6241 case BO_OrAssign: 6242 case BO_XorAssign: 6243 case BO_MulAssign: 6244 case BO_DivAssign: 6245 case BO_RemAssign: 6246 case BO_ShlAssign: 6247 case BO_ShrAssign: 6248 case BO_Comma: 6249 llvm_unreachable("Unsupported atomic update operation"); 6250 } 6251 llvm::Value *UpdateVal = Update.getScalarVal(); 6252 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 6253 if (IsInteger) 6254 UpdateVal = CGF.Builder.CreateIntCast( 6255 IC, X.getAddress().getElementType(), 6256 X.getType()->hasSignedIntegerRepresentation()); 6257 else 6258 UpdateVal = CGF.Builder.CreateCast(llvm::Instruction::CastOps::UIToFP, IC, 6259 X.getAddress().getElementType()); 6260 } 6261 llvm::Value *Res = 6262 CGF.Builder.CreateAtomicRMW(RMWOp, X.getAddress(), UpdateVal, AO); 6263 return std::make_pair(true, RValue::get(Res)); 6264 } 6265 6266 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 6267 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 6268 llvm::AtomicOrdering AO, SourceLocation Loc, 6269 const llvm::function_ref<RValue(RValue)> CommonGen) { 6270 // Update expressions are allowed to have the following forms: 6271 // x binop= expr; -> xrval + expr; 6272 // x++, ++x -> xrval + 1; 6273 // x--, --x -> xrval - 1; 6274 // x = x binop expr; -> xrval binop expr 6275 // x = expr Op x; - > expr binop xrval; 6276 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 6277 if (!Res.first) { 6278 if (X.isGlobalReg()) { 6279 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 6280 // 'xrval'. 6281 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 6282 } else { 6283 // Perform compare-and-swap procedure. 6284 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 6285 } 6286 } 6287 return Res; 6288 } 6289 6290 static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, 6291 llvm::AtomicOrdering AO, const Expr *X, 6292 const Expr *E, const Expr *UE, 6293 bool IsXLHSInRHSPart, SourceLocation Loc) { 6294 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 6295 "Update expr in 'atomic update' must be a binary operator."); 6296 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 6297 // Update expressions are allowed to have the following forms: 6298 // x binop= expr; -> xrval + expr; 6299 // x++, ++x -> xrval + 1; 6300 // x--, --x -> xrval - 1; 6301 // x = x binop expr; -> xrval binop expr 6302 // x = expr Op x; - > expr binop xrval; 6303 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 6304 LValue XLValue = CGF.EmitLValue(X); 6305 RValue ExprRValue = CGF.EmitAnyExpr(E); 6306 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 6307 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 6308 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 6309 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 6310 auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) { 6311 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 6312 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 6313 return CGF.EmitAnyExpr(UE); 6314 }; 6315 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 6316 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 6317 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 6318 // OpenMP, 2.17.7, atomic Construct 6319 // If the write, update, or capture clause is specified and the release, 6320 // acq_rel, or seq_cst clause is specified then the strong flush on entry to 6321 // the atomic operation is also a release flush. 6322 switch (AO) { 6323 case llvm::AtomicOrdering::Release: 6324 case llvm::AtomicOrdering::AcquireRelease: 6325 case llvm::AtomicOrdering::SequentiallyConsistent: 6326 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc, 6327 llvm::AtomicOrdering::Release); 6328 break; 6329 case llvm::AtomicOrdering::Acquire: 6330 case llvm::AtomicOrdering::Monotonic: 6331 break; 6332 case llvm::AtomicOrdering::NotAtomic: 6333 case llvm::AtomicOrdering::Unordered: 6334 llvm_unreachable("Unexpected ordering."); 6335 } 6336 } 6337 6338 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 6339 QualType SourceType, QualType ResType, 6340 SourceLocation Loc) { 6341 switch (CGF.getEvaluationKind(ResType)) { 6342 case TEK_Scalar: 6343 return RValue::get( 6344 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 6345 case TEK_Complex: { 6346 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 6347 return RValue::getComplex(Res.first, Res.second); 6348 } 6349 case TEK_Aggregate: 6350 break; 6351 } 6352 llvm_unreachable("Must be a scalar or complex."); 6353 } 6354 6355 static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, 6356 llvm::AtomicOrdering AO, 6357 bool IsPostfixUpdate, const Expr *V, 6358 const Expr *X, const Expr *E, 6359 const Expr *UE, bool IsXLHSInRHSPart, 6360 SourceLocation Loc) { 6361 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 6362 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 6363 RValue NewVVal; 6364 LValue VLValue = CGF.EmitLValue(V); 6365 LValue XLValue = CGF.EmitLValue(X); 6366 RValue ExprRValue = CGF.EmitAnyExpr(E); 6367 QualType NewVValType; 6368 if (UE) { 6369 // 'x' is updated with some additional value. 6370 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 6371 "Update expr in 'atomic capture' must be a binary operator."); 6372 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 6373 // Update expressions are allowed to have the following forms: 6374 // x binop= expr; -> xrval + expr; 6375 // x++, ++x -> xrval + 1; 6376 // x--, --x -> xrval - 1; 6377 // x = x binop expr; -> xrval binop expr 6378 // x = expr Op x; - > expr binop xrval; 6379 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 6380 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 6381 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 6382 NewVValType = XRValExpr->getType(); 6383 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 6384 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 6385 IsPostfixUpdate](RValue XRValue) { 6386 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 6387 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 6388 RValue Res = CGF.EmitAnyExpr(UE); 6389 NewVVal = IsPostfixUpdate ? XRValue : Res; 6390 return Res; 6391 }; 6392 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 6393 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 6394 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 6395 if (Res.first) { 6396 // 'atomicrmw' instruction was generated. 6397 if (IsPostfixUpdate) { 6398 // Use old value from 'atomicrmw'. 6399 NewVVal = Res.second; 6400 } else { 6401 // 'atomicrmw' does not provide new value, so evaluate it using old 6402 // value of 'x'. 6403 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 6404 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 6405 NewVVal = CGF.EmitAnyExpr(UE); 6406 } 6407 } 6408 } else { 6409 // 'x' is simply rewritten with some 'expr'. 6410 NewVValType = X->getType().getNonReferenceType(); 6411 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 6412 X->getType().getNonReferenceType(), Loc); 6413 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) { 6414 NewVVal = XRValue; 6415 return ExprRValue; 6416 }; 6417 // Try to perform atomicrmw xchg, otherwise simple exchange. 6418 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 6419 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 6420 Loc, Gen); 6421 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 6422 if (Res.first) { 6423 // 'atomicrmw' instruction was generated. 6424 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 6425 } 6426 } 6427 // Emit post-update store to 'v' of old/new 'x' value. 6428 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 6429 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V); 6430 // OpenMP 5.1 removes the required flush for capture clause. 6431 if (CGF.CGM.getLangOpts().OpenMP < 51) { 6432 // OpenMP, 2.17.7, atomic Construct 6433 // If the write, update, or capture clause is specified and the release, 6434 // acq_rel, or seq_cst clause is specified then the strong flush on entry to 6435 // the atomic operation is also a release flush. 6436 // If the read or capture clause is specified and the acquire, acq_rel, or 6437 // seq_cst clause is specified then the strong flush on exit from the atomic 6438 // operation is also an acquire flush. 6439 switch (AO) { 6440 case llvm::AtomicOrdering::Release: 6441 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc, 6442 llvm::AtomicOrdering::Release); 6443 break; 6444 case llvm::AtomicOrdering::Acquire: 6445 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc, 6446 llvm::AtomicOrdering::Acquire); 6447 break; 6448 case llvm::AtomicOrdering::AcquireRelease: 6449 case llvm::AtomicOrdering::SequentiallyConsistent: 6450 CGF.CGM.getOpenMPRuntime().emitFlush( 6451 CGF, std::nullopt, Loc, llvm::AtomicOrdering::AcquireRelease); 6452 break; 6453 case llvm::AtomicOrdering::Monotonic: 6454 break; 6455 case llvm::AtomicOrdering::NotAtomic: 6456 case llvm::AtomicOrdering::Unordered: 6457 llvm_unreachable("Unexpected ordering."); 6458 } 6459 } 6460 } 6461 6462 static void emitOMPAtomicCompareExpr( 6463 CodeGenFunction &CGF, llvm::AtomicOrdering AO, llvm::AtomicOrdering FailAO, 6464 const Expr *X, const Expr *V, const Expr *R, const Expr *E, const Expr *D, 6465 const Expr *CE, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly, 6466 SourceLocation Loc) { 6467 llvm::OpenMPIRBuilder &OMPBuilder = 6468 CGF.CGM.getOpenMPRuntime().getOMPBuilder(); 6469 6470 OMPAtomicCompareOp Op; 6471 assert(isa<BinaryOperator>(CE) && "CE is not a BinaryOperator"); 6472 switch (cast<BinaryOperator>(CE)->getOpcode()) { 6473 case BO_EQ: 6474 Op = OMPAtomicCompareOp::EQ; 6475 break; 6476 case BO_LT: 6477 Op = OMPAtomicCompareOp::MIN; 6478 break; 6479 case BO_GT: 6480 Op = OMPAtomicCompareOp::MAX; 6481 break; 6482 default: 6483 llvm_unreachable("unsupported atomic compare binary operator"); 6484 } 6485 6486 LValue XLVal = CGF.EmitLValue(X); 6487 Address XAddr = XLVal.getAddress(); 6488 6489 auto EmitRValueWithCastIfNeeded = [&CGF, Loc](const Expr *X, const Expr *E) { 6490 if (X->getType() == E->getType()) 6491 return CGF.EmitScalarExpr(E); 6492 const Expr *NewE = E->IgnoreImplicitAsWritten(); 6493 llvm::Value *V = CGF.EmitScalarExpr(NewE); 6494 if (NewE->getType() == X->getType()) 6495 return V; 6496 return CGF.EmitScalarConversion(V, NewE->getType(), X->getType(), Loc); 6497 }; 6498 6499 llvm::Value *EVal = EmitRValueWithCastIfNeeded(X, E); 6500 llvm::Value *DVal = D ? EmitRValueWithCastIfNeeded(X, D) : nullptr; 6501 if (auto *CI = dyn_cast<llvm::ConstantInt>(EVal)) 6502 EVal = CGF.Builder.CreateIntCast( 6503 CI, XLVal.getAddress().getElementType(), 6504 E->getType()->hasSignedIntegerRepresentation()); 6505 if (DVal) 6506 if (auto *CI = dyn_cast<llvm::ConstantInt>(DVal)) 6507 DVal = CGF.Builder.CreateIntCast( 6508 CI, XLVal.getAddress().getElementType(), 6509 D->getType()->hasSignedIntegerRepresentation()); 6510 6511 llvm::OpenMPIRBuilder::AtomicOpValue XOpVal{ 6512 XAddr.emitRawPointer(CGF), XAddr.getElementType(), 6513 X->getType()->hasSignedIntegerRepresentation(), 6514 X->getType().isVolatileQualified()}; 6515 llvm::OpenMPIRBuilder::AtomicOpValue VOpVal, ROpVal; 6516 if (V) { 6517 LValue LV = CGF.EmitLValue(V); 6518 Address Addr = LV.getAddress(); 6519 VOpVal = {Addr.emitRawPointer(CGF), Addr.getElementType(), 6520 V->getType()->hasSignedIntegerRepresentation(), 6521 V->getType().isVolatileQualified()}; 6522 } 6523 if (R) { 6524 LValue LV = CGF.EmitLValue(R); 6525 Address Addr = LV.getAddress(); 6526 ROpVal = {Addr.emitRawPointer(CGF), Addr.getElementType(), 6527 R->getType()->hasSignedIntegerRepresentation(), 6528 R->getType().isVolatileQualified()}; 6529 } 6530 6531 if (FailAO == llvm::AtomicOrdering::NotAtomic) { 6532 // fail clause was not mentioned on the 6533 // "#pragma omp atomic compare" construct. 6534 CGF.Builder.restoreIP(OMPBuilder.createAtomicCompare( 6535 CGF.Builder, XOpVal, VOpVal, ROpVal, EVal, DVal, AO, Op, IsXBinopExpr, 6536 IsPostfixUpdate, IsFailOnly)); 6537 } else 6538 CGF.Builder.restoreIP(OMPBuilder.createAtomicCompare( 6539 CGF.Builder, XOpVal, VOpVal, ROpVal, EVal, DVal, AO, Op, IsXBinopExpr, 6540 IsPostfixUpdate, IsFailOnly, FailAO)); 6541 } 6542 6543 static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 6544 llvm::AtomicOrdering AO, 6545 llvm::AtomicOrdering FailAO, bool IsPostfixUpdate, 6546 const Expr *X, const Expr *V, const Expr *R, 6547 const Expr *E, const Expr *UE, const Expr *D, 6548 const Expr *CE, bool IsXLHSInRHSPart, 6549 bool IsFailOnly, SourceLocation Loc) { 6550 switch (Kind) { 6551 case OMPC_read: 6552 emitOMPAtomicReadExpr(CGF, AO, X, V, Loc); 6553 break; 6554 case OMPC_write: 6555 emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc); 6556 break; 6557 case OMPC_unknown: 6558 case OMPC_update: 6559 emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc); 6560 break; 6561 case OMPC_capture: 6562 emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE, 6563 IsXLHSInRHSPart, Loc); 6564 break; 6565 case OMPC_compare: { 6566 emitOMPAtomicCompareExpr(CGF, AO, FailAO, X, V, R, E, D, CE, 6567 IsXLHSInRHSPart, IsPostfixUpdate, IsFailOnly, Loc); 6568 break; 6569 } 6570 default: 6571 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 6572 } 6573 } 6574 6575 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 6576 llvm::AtomicOrdering AO = CGM.getOpenMPRuntime().getDefaultMemoryOrdering(); 6577 // Fail Memory Clause Ordering. 6578 llvm::AtomicOrdering FailAO = llvm::AtomicOrdering::NotAtomic; 6579 bool MemOrderingSpecified = false; 6580 if (S.getSingleClause<OMPSeqCstClause>()) { 6581 AO = llvm::AtomicOrdering::SequentiallyConsistent; 6582 MemOrderingSpecified = true; 6583 } else if (S.getSingleClause<OMPAcqRelClause>()) { 6584 AO = llvm::AtomicOrdering::AcquireRelease; 6585 MemOrderingSpecified = true; 6586 } else if (S.getSingleClause<OMPAcquireClause>()) { 6587 AO = llvm::AtomicOrdering::Acquire; 6588 MemOrderingSpecified = true; 6589 } else if (S.getSingleClause<OMPReleaseClause>()) { 6590 AO = llvm::AtomicOrdering::Release; 6591 MemOrderingSpecified = true; 6592 } else if (S.getSingleClause<OMPRelaxedClause>()) { 6593 AO = llvm::AtomicOrdering::Monotonic; 6594 MemOrderingSpecified = true; 6595 } 6596 llvm::SmallSet<OpenMPClauseKind, 2> KindsEncountered; 6597 OpenMPClauseKind Kind = OMPC_unknown; 6598 for (const OMPClause *C : S.clauses()) { 6599 // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause, 6600 // if it is first). 6601 OpenMPClauseKind K = C->getClauseKind(); 6602 // TBD 6603 if (K == OMPC_weak) 6604 return; 6605 if (K == OMPC_seq_cst || K == OMPC_acq_rel || K == OMPC_acquire || 6606 K == OMPC_release || K == OMPC_relaxed || K == OMPC_hint) 6607 continue; 6608 Kind = K; 6609 KindsEncountered.insert(K); 6610 } 6611 // We just need to correct Kind here. No need to set a bool saying it is 6612 // actually compare capture because we can tell from whether V and R are 6613 // nullptr. 6614 if (KindsEncountered.contains(OMPC_compare) && 6615 KindsEncountered.contains(OMPC_capture)) 6616 Kind = OMPC_compare; 6617 if (!MemOrderingSpecified) { 6618 llvm::AtomicOrdering DefaultOrder = 6619 CGM.getOpenMPRuntime().getDefaultMemoryOrdering(); 6620 if (DefaultOrder == llvm::AtomicOrdering::Monotonic || 6621 DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent || 6622 (DefaultOrder == llvm::AtomicOrdering::AcquireRelease && 6623 Kind == OMPC_capture)) { 6624 AO = DefaultOrder; 6625 } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) { 6626 if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) { 6627 AO = llvm::AtomicOrdering::Release; 6628 } else if (Kind == OMPC_read) { 6629 assert(Kind == OMPC_read && "Unexpected atomic kind."); 6630 AO = llvm::AtomicOrdering::Acquire; 6631 } 6632 } 6633 } 6634 6635 if (KindsEncountered.contains(OMPC_compare) && 6636 KindsEncountered.contains(OMPC_fail)) { 6637 Kind = OMPC_compare; 6638 const auto *FailClause = S.getSingleClause<OMPFailClause>(); 6639 if (FailClause) { 6640 OpenMPClauseKind FailParameter = FailClause->getFailParameter(); 6641 if (FailParameter == llvm::omp::OMPC_relaxed) 6642 FailAO = llvm::AtomicOrdering::Monotonic; 6643 else if (FailParameter == llvm::omp::OMPC_acquire) 6644 FailAO = llvm::AtomicOrdering::Acquire; 6645 else if (FailParameter == llvm::omp::OMPC_seq_cst) 6646 FailAO = llvm::AtomicOrdering::SequentiallyConsistent; 6647 } 6648 } 6649 6650 LexicalScope Scope(*this, S.getSourceRange()); 6651 EmitStopPoint(S.getAssociatedStmt()); 6652 emitOMPAtomicExpr(*this, Kind, AO, FailAO, S.isPostfixUpdate(), S.getX(), 6653 S.getV(), S.getR(), S.getExpr(), S.getUpdateExpr(), 6654 S.getD(), S.getCondExpr(), S.isXLHSInRHSPart(), 6655 S.isFailOnly(), S.getBeginLoc()); 6656 } 6657 6658 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 6659 const OMPExecutableDirective &S, 6660 const RegionCodeGenTy &CodeGen) { 6661 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); 6662 CodeGenModule &CGM = CGF.CGM; 6663 6664 // On device emit this construct as inlined code. 6665 if (CGM.getLangOpts().OpenMPIsTargetDevice) { 6666 OMPLexicalScope Scope(CGF, S, OMPD_target); 6667 CGM.getOpenMPRuntime().emitInlinedDirective( 6668 CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6669 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 6670 }); 6671 return; 6672 } 6673 6674 auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S); 6675 llvm::Function *Fn = nullptr; 6676 llvm::Constant *FnID = nullptr; 6677 6678 const Expr *IfCond = nullptr; 6679 // Check for the at most one if clause associated with the target region. 6680 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 6681 if (C->getNameModifier() == OMPD_unknown || 6682 C->getNameModifier() == OMPD_target) { 6683 IfCond = C->getCondition(); 6684 break; 6685 } 6686 } 6687 6688 // Check if we have any device clause associated with the directive. 6689 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device( 6690 nullptr, OMPC_DEVICE_unknown); 6691 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 6692 Device.setPointerAndInt(C->getDevice(), C->getModifier()); 6693 6694 // Check if we have an if clause whose conditional always evaluates to false 6695 // or if we do not have any targets specified. If so the target region is not 6696 // an offload entry point. 6697 bool IsOffloadEntry = true; 6698 if (IfCond) { 6699 bool Val; 6700 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 6701 IsOffloadEntry = false; 6702 } 6703 if (CGM.getLangOpts().OMPTargetTriples.empty()) 6704 IsOffloadEntry = false; 6705 6706 if (CGM.getLangOpts().OpenMPOffloadMandatory && !IsOffloadEntry) { 6707 unsigned DiagID = CGM.getDiags().getCustomDiagID( 6708 DiagnosticsEngine::Error, 6709 "No offloading entry generated while offloading is mandatory."); 6710 CGM.getDiags().Report(DiagID); 6711 } 6712 6713 assert(CGF.CurFuncDecl && "No parent declaration for target region!"); 6714 StringRef ParentName; 6715 // In case we have Ctors/Dtors we use the complete type variant to produce 6716 // the mangling of the device outlined kernel. 6717 if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) 6718 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 6719 else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) 6720 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 6721 else 6722 ParentName = 6723 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); 6724 6725 // Emit target region as a standalone region. 6726 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 6727 IsOffloadEntry, CodeGen); 6728 OMPLexicalScope Scope(CGF, S, OMPD_task); 6729 auto &&SizeEmitter = 6730 [IsOffloadEntry](CodeGenFunction &CGF, 6731 const OMPLoopDirective &D) -> llvm::Value * { 6732 if (IsOffloadEntry) { 6733 OMPLoopScope(CGF, D); 6734 // Emit calculation of the iterations count. 6735 llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations()); 6736 NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty, 6737 /*isSigned=*/false); 6738 return NumIterations; 6739 } 6740 return nullptr; 6741 }; 6742 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, 6743 SizeEmitter); 6744 } 6745 6746 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, 6747 PrePostActionTy &Action) { 6748 Action.Enter(CGF); 6749 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6750 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 6751 CGF.EmitOMPPrivateClause(S, PrivateScope); 6752 (void)PrivateScope.Privatize(); 6753 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 6754 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 6755 6756 CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt()); 6757 CGF.EnsureInsertPoint(); 6758 } 6759 6760 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, 6761 StringRef ParentName, 6762 const OMPTargetDirective &S) { 6763 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6764 emitTargetRegion(CGF, S, Action); 6765 }; 6766 llvm::Function *Fn; 6767 llvm::Constant *Addr; 6768 // Emit target region as a standalone region. 6769 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6770 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6771 assert(Fn && Addr && "Target device function emission failed."); 6772 } 6773 6774 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 6775 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6776 emitTargetRegion(CGF, S, Action); 6777 }; 6778 emitCommonOMPTargetDirective(*this, S, CodeGen); 6779 } 6780 6781 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 6782 const OMPExecutableDirective &S, 6783 OpenMPDirectiveKind InnermostKind, 6784 const RegionCodeGenTy &CodeGen) { 6785 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); 6786 llvm::Function *OutlinedFn = 6787 CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( 6788 CGF, S, *CS->getCapturedDecl()->param_begin(), InnermostKind, 6789 CodeGen); 6790 6791 const auto *NT = S.getSingleClause<OMPNumTeamsClause>(); 6792 const auto *TL = S.getSingleClause<OMPThreadLimitClause>(); 6793 if (NT || TL) { 6794 const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr; 6795 const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr; 6796 6797 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 6798 S.getBeginLoc()); 6799 } 6800 6801 OMPTeamsScope Scope(CGF, S); 6802 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 6803 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 6804 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn, 6805 CapturedVars); 6806 } 6807 6808 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 6809 // Emit teams region as a standalone region. 6810 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6811 Action.Enter(CGF); 6812 OMPPrivateScope PrivateScope(CGF); 6813 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 6814 CGF.EmitOMPPrivateClause(S, PrivateScope); 6815 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6816 (void)PrivateScope.Privatize(); 6817 CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt()); 6818 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6819 }; 6820 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 6821 emitPostUpdateForReductionClause(*this, S, 6822 [](CodeGenFunction &) { return nullptr; }); 6823 } 6824 6825 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 6826 const OMPTargetTeamsDirective &S) { 6827 auto *CS = S.getCapturedStmt(OMPD_teams); 6828 Action.Enter(CGF); 6829 // Emit teams region as a standalone region. 6830 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { 6831 Action.Enter(CGF); 6832 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6833 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 6834 CGF.EmitOMPPrivateClause(S, PrivateScope); 6835 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6836 (void)PrivateScope.Privatize(); 6837 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 6838 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 6839 CGF.EmitStmt(CS->getCapturedStmt()); 6840 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6841 }; 6842 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); 6843 emitPostUpdateForReductionClause(CGF, S, 6844 [](CodeGenFunction &) { return nullptr; }); 6845 } 6846 6847 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 6848 CodeGenModule &CGM, StringRef ParentName, 6849 const OMPTargetTeamsDirective &S) { 6850 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6851 emitTargetTeamsRegion(CGF, Action, S); 6852 }; 6853 llvm::Function *Fn; 6854 llvm::Constant *Addr; 6855 // Emit target region as a standalone region. 6856 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6857 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6858 assert(Fn && Addr && "Target device function emission failed."); 6859 } 6860 6861 void CodeGenFunction::EmitOMPTargetTeamsDirective( 6862 const OMPTargetTeamsDirective &S) { 6863 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6864 emitTargetTeamsRegion(CGF, Action, S); 6865 }; 6866 emitCommonOMPTargetDirective(*this, S, CodeGen); 6867 } 6868 6869 static void 6870 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 6871 const OMPTargetTeamsDistributeDirective &S) { 6872 Action.Enter(CGF); 6873 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6874 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 6875 }; 6876 6877 // Emit teams region as a standalone region. 6878 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 6879 PrePostActionTy &Action) { 6880 Action.Enter(CGF); 6881 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6882 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6883 (void)PrivateScope.Privatize(); 6884 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 6885 CodeGenDistribute); 6886 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6887 }; 6888 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen); 6889 emitPostUpdateForReductionClause(CGF, S, 6890 [](CodeGenFunction &) { return nullptr; }); 6891 } 6892 6893 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 6894 CodeGenModule &CGM, StringRef ParentName, 6895 const OMPTargetTeamsDistributeDirective &S) { 6896 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6897 emitTargetTeamsDistributeRegion(CGF, Action, S); 6898 }; 6899 llvm::Function *Fn; 6900 llvm::Constant *Addr; 6901 // Emit target region as a standalone region. 6902 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6903 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6904 assert(Fn && Addr && "Target device function emission failed."); 6905 } 6906 6907 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( 6908 const OMPTargetTeamsDistributeDirective &S) { 6909 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6910 emitTargetTeamsDistributeRegion(CGF, Action, S); 6911 }; 6912 emitCommonOMPTargetDirective(*this, S, CodeGen); 6913 } 6914 6915 static void emitTargetTeamsDistributeSimdRegion( 6916 CodeGenFunction &CGF, PrePostActionTy &Action, 6917 const OMPTargetTeamsDistributeSimdDirective &S) { 6918 Action.Enter(CGF); 6919 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6920 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 6921 }; 6922 6923 // Emit teams region as a standalone region. 6924 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 6925 PrePostActionTy &Action) { 6926 Action.Enter(CGF); 6927 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6928 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6929 (void)PrivateScope.Privatize(); 6930 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 6931 CodeGenDistribute); 6932 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6933 }; 6934 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen); 6935 emitPostUpdateForReductionClause(CGF, S, 6936 [](CodeGenFunction &) { return nullptr; }); 6937 } 6938 6939 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 6940 CodeGenModule &CGM, StringRef ParentName, 6941 const OMPTargetTeamsDistributeSimdDirective &S) { 6942 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6943 emitTargetTeamsDistributeSimdRegion(CGF, Action, S); 6944 }; 6945 llvm::Function *Fn; 6946 llvm::Constant *Addr; 6947 // Emit target region as a standalone region. 6948 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6949 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6950 assert(Fn && Addr && "Target device function emission failed."); 6951 } 6952 6953 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( 6954 const OMPTargetTeamsDistributeSimdDirective &S) { 6955 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6956 emitTargetTeamsDistributeSimdRegion(CGF, Action, S); 6957 }; 6958 emitCommonOMPTargetDirective(*this, S, CodeGen); 6959 } 6960 6961 void CodeGenFunction::EmitOMPTeamsDistributeDirective( 6962 const OMPTeamsDistributeDirective &S) { 6963 6964 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6965 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 6966 }; 6967 6968 // Emit teams region as a standalone region. 6969 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 6970 PrePostActionTy &Action) { 6971 Action.Enter(CGF); 6972 OMPPrivateScope PrivateScope(CGF); 6973 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6974 (void)PrivateScope.Privatize(); 6975 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 6976 CodeGenDistribute); 6977 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6978 }; 6979 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 6980 emitPostUpdateForReductionClause(*this, S, 6981 [](CodeGenFunction &) { return nullptr; }); 6982 } 6983 6984 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( 6985 const OMPTeamsDistributeSimdDirective &S) { 6986 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6987 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 6988 }; 6989 6990 // Emit teams region as a standalone region. 6991 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 6992 PrePostActionTy &Action) { 6993 Action.Enter(CGF); 6994 OMPPrivateScope PrivateScope(CGF); 6995 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6996 (void)PrivateScope.Privatize(); 6997 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd, 6998 CodeGenDistribute); 6999 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 7000 }; 7001 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen); 7002 emitPostUpdateForReductionClause(*this, S, 7003 [](CodeGenFunction &) { return nullptr; }); 7004 } 7005 7006 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( 7007 const OMPTeamsDistributeParallelForDirective &S) { 7008 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 7009 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 7010 S.getDistInc()); 7011 }; 7012 7013 // Emit teams region as a standalone region. 7014 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 7015 PrePostActionTy &Action) { 7016 Action.Enter(CGF); 7017 OMPPrivateScope PrivateScope(CGF); 7018 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 7019 (void)PrivateScope.Privatize(); 7020 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 7021 CodeGenDistribute); 7022 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 7023 }; 7024 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); 7025 emitPostUpdateForReductionClause(*this, S, 7026 [](CodeGenFunction &) { return nullptr; }); 7027 } 7028 7029 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( 7030 const OMPTeamsDistributeParallelForSimdDirective &S) { 7031 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 7032 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 7033 S.getDistInc()); 7034 }; 7035 7036 // Emit teams region as a standalone region. 7037 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 7038 PrePostActionTy &Action) { 7039 Action.Enter(CGF); 7040 OMPPrivateScope PrivateScope(CGF); 7041 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 7042 (void)PrivateScope.Privatize(); 7043 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 7044 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 7045 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 7046 }; 7047 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for_simd, 7048 CodeGen); 7049 emitPostUpdateForReductionClause(*this, S, 7050 [](CodeGenFunction &) { return nullptr; }); 7051 } 7052 7053 void CodeGenFunction::EmitOMPInteropDirective(const OMPInteropDirective &S) { 7054 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 7055 llvm::Value *Device = nullptr; 7056 llvm::Value *NumDependences = nullptr; 7057 llvm::Value *DependenceList = nullptr; 7058 7059 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 7060 Device = EmitScalarExpr(C->getDevice()); 7061 7062 // Build list and emit dependences 7063 OMPTaskDataTy Data; 7064 buildDependences(S, Data); 7065 if (!Data.Dependences.empty()) { 7066 Address DependenciesArray = Address::invalid(); 7067 std::tie(NumDependences, DependenciesArray) = 7068 CGM.getOpenMPRuntime().emitDependClause(*this, Data.Dependences, 7069 S.getBeginLoc()); 7070 DependenceList = DependenciesArray.emitRawPointer(*this); 7071 } 7072 Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>(); 7073 7074 assert(!(Data.HasNowaitClause && !(S.getSingleClause<OMPInitClause>() || 7075 S.getSingleClause<OMPDestroyClause>() || 7076 S.getSingleClause<OMPUseClause>())) && 7077 "OMPNowaitClause clause is used separately in OMPInteropDirective."); 7078 7079 auto ItOMPInitClause = S.getClausesOfKind<OMPInitClause>(); 7080 if (!ItOMPInitClause.empty()) { 7081 // Look at the multiple init clauses 7082 for (const OMPInitClause *C : ItOMPInitClause) { 7083 llvm::Value *InteropvarPtr = 7084 EmitLValue(C->getInteropVar()).getPointer(*this); 7085 llvm::omp::OMPInteropType InteropType = 7086 llvm::omp::OMPInteropType::Unknown; 7087 if (C->getIsTarget()) { 7088 InteropType = llvm::omp::OMPInteropType::Target; 7089 } else { 7090 assert(C->getIsTargetSync() && 7091 "Expected interop-type target/targetsync"); 7092 InteropType = llvm::omp::OMPInteropType::TargetSync; 7093 } 7094 OMPBuilder.createOMPInteropInit(Builder, InteropvarPtr, InteropType, 7095 Device, NumDependences, DependenceList, 7096 Data.HasNowaitClause); 7097 } 7098 } 7099 auto ItOMPDestroyClause = S.getClausesOfKind<OMPDestroyClause>(); 7100 if (!ItOMPDestroyClause.empty()) { 7101 // Look at the multiple destroy clauses 7102 for (const OMPDestroyClause *C : ItOMPDestroyClause) { 7103 llvm::Value *InteropvarPtr = 7104 EmitLValue(C->getInteropVar()).getPointer(*this); 7105 OMPBuilder.createOMPInteropDestroy(Builder, InteropvarPtr, Device, 7106 NumDependences, DependenceList, 7107 Data.HasNowaitClause); 7108 } 7109 } 7110 auto ItOMPUseClause = S.getClausesOfKind<OMPUseClause>(); 7111 if (!ItOMPUseClause.empty()) { 7112 // Look at the multiple use clauses 7113 for (const OMPUseClause *C : ItOMPUseClause) { 7114 llvm::Value *InteropvarPtr = 7115 EmitLValue(C->getInteropVar()).getPointer(*this); 7116 OMPBuilder.createOMPInteropUse(Builder, InteropvarPtr, Device, 7117 NumDependences, DependenceList, 7118 Data.HasNowaitClause); 7119 } 7120 } 7121 } 7122 7123 static void emitTargetTeamsDistributeParallelForRegion( 7124 CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S, 7125 PrePostActionTy &Action) { 7126 Action.Enter(CGF); 7127 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 7128 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 7129 S.getDistInc()); 7130 }; 7131 7132 // Emit teams region as a standalone region. 7133 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 7134 PrePostActionTy &Action) { 7135 Action.Enter(CGF); 7136 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 7137 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 7138 (void)PrivateScope.Privatize(); 7139 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 7140 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 7141 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 7142 }; 7143 7144 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for, 7145 CodeGenTeams); 7146 emitPostUpdateForReductionClause(CGF, S, 7147 [](CodeGenFunction &) { return nullptr; }); 7148 } 7149 7150 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 7151 CodeGenModule &CGM, StringRef ParentName, 7152 const OMPTargetTeamsDistributeParallelForDirective &S) { 7153 // Emit SPMD target teams distribute parallel for region as a standalone 7154 // region. 7155 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7156 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); 7157 }; 7158 llvm::Function *Fn; 7159 llvm::Constant *Addr; 7160 // Emit target region as a standalone region. 7161 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 7162 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 7163 assert(Fn && Addr && "Target device function emission failed."); 7164 } 7165 7166 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( 7167 const OMPTargetTeamsDistributeParallelForDirective &S) { 7168 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7169 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); 7170 }; 7171 emitCommonOMPTargetDirective(*this, S, CodeGen); 7172 } 7173 7174 static void emitTargetTeamsDistributeParallelForSimdRegion( 7175 CodeGenFunction &CGF, 7176 const OMPTargetTeamsDistributeParallelForSimdDirective &S, 7177 PrePostActionTy &Action) { 7178 Action.Enter(CGF); 7179 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 7180 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 7181 S.getDistInc()); 7182 }; 7183 7184 // Emit teams region as a standalone region. 7185 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 7186 PrePostActionTy &Action) { 7187 Action.Enter(CGF); 7188 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 7189 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 7190 (void)PrivateScope.Privatize(); 7191 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 7192 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 7193 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 7194 }; 7195 7196 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd, 7197 CodeGenTeams); 7198 emitPostUpdateForReductionClause(CGF, S, 7199 [](CodeGenFunction &) { return nullptr; }); 7200 } 7201 7202 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 7203 CodeGenModule &CGM, StringRef ParentName, 7204 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 7205 // Emit SPMD target teams distribute parallel for simd region as a standalone 7206 // region. 7207 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7208 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); 7209 }; 7210 llvm::Function *Fn; 7211 llvm::Constant *Addr; 7212 // Emit target region as a standalone region. 7213 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 7214 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 7215 assert(Fn && Addr && "Target device function emission failed."); 7216 } 7217 7218 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( 7219 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 7220 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7221 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); 7222 }; 7223 emitCommonOMPTargetDirective(*this, S, CodeGen); 7224 } 7225 7226 void CodeGenFunction::EmitOMPCancellationPointDirective( 7227 const OMPCancellationPointDirective &S) { 7228 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(), 7229 S.getCancelRegion()); 7230 } 7231 7232 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 7233 const Expr *IfCond = nullptr; 7234 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 7235 if (C->getNameModifier() == OMPD_unknown || 7236 C->getNameModifier() == OMPD_cancel) { 7237 IfCond = C->getCondition(); 7238 break; 7239 } 7240 } 7241 if (CGM.getLangOpts().OpenMPIRBuilder) { 7242 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 7243 // TODO: This check is necessary as we only generate `omp parallel` through 7244 // the OpenMPIRBuilder for now. 7245 if (S.getCancelRegion() == OMPD_parallel || 7246 S.getCancelRegion() == OMPD_sections || 7247 S.getCancelRegion() == OMPD_section) { 7248 llvm::Value *IfCondition = nullptr; 7249 if (IfCond) 7250 IfCondition = EmitScalarExpr(IfCond, 7251 /*IgnoreResultAssign=*/true); 7252 return Builder.restoreIP( 7253 OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion())); 7254 } 7255 } 7256 7257 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond, 7258 S.getCancelRegion()); 7259 } 7260 7261 CodeGenFunction::JumpDest 7262 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 7263 if (Kind == OMPD_parallel || Kind == OMPD_task || 7264 Kind == OMPD_target_parallel || Kind == OMPD_taskloop || 7265 Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop) 7266 return ReturnBlock; 7267 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 7268 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || 7269 Kind == OMPD_distribute_parallel_for || 7270 Kind == OMPD_target_parallel_for || 7271 Kind == OMPD_teams_distribute_parallel_for || 7272 Kind == OMPD_target_teams_distribute_parallel_for); 7273 return OMPCancelStack.getExitBlock(); 7274 } 7275 7276 void CodeGenFunction::EmitOMPUseDevicePtrClause( 7277 const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope, 7278 const llvm::DenseMap<const ValueDecl *, llvm::Value *> 7279 CaptureDeviceAddrMap) { 7280 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 7281 for (const Expr *OrigVarIt : C.varlists()) { 7282 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(OrigVarIt)->getDecl()); 7283 if (!Processed.insert(OrigVD).second) 7284 continue; 7285 7286 // In order to identify the right initializer we need to match the 7287 // declaration used by the mapping logic. In some cases we may get 7288 // OMPCapturedExprDecl that refers to the original declaration. 7289 const ValueDecl *MatchingVD = OrigVD; 7290 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 7291 // OMPCapturedExprDecl are used to privative fields of the current 7292 // structure. 7293 const auto *ME = cast<MemberExpr>(OED->getInit()); 7294 assert(isa<CXXThisExpr>(ME->getBase()->IgnoreImpCasts()) && 7295 "Base should be the current struct!"); 7296 MatchingVD = ME->getMemberDecl(); 7297 } 7298 7299 // If we don't have information about the current list item, move on to 7300 // the next one. 7301 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 7302 if (InitAddrIt == CaptureDeviceAddrMap.end()) 7303 continue; 7304 7305 llvm::Type *Ty = ConvertTypeForMem(OrigVD->getType().getNonReferenceType()); 7306 7307 // Return the address of the private variable. 7308 bool IsRegistered = PrivateScope.addPrivate( 7309 OrigVD, 7310 Address(InitAddrIt->second, Ty, 7311 getContext().getTypeAlignInChars(getContext().VoidPtrTy))); 7312 assert(IsRegistered && "firstprivate var already registered as private"); 7313 // Silence the warning about unused variable. 7314 (void)IsRegistered; 7315 } 7316 } 7317 7318 static const VarDecl *getBaseDecl(const Expr *Ref) { 7319 const Expr *Base = Ref->IgnoreParenImpCasts(); 7320 while (const auto *OASE = dyn_cast<ArraySectionExpr>(Base)) 7321 Base = OASE->getBase()->IgnoreParenImpCasts(); 7322 while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Base)) 7323 Base = ASE->getBase()->IgnoreParenImpCasts(); 7324 return cast<VarDecl>(cast<DeclRefExpr>(Base)->getDecl()); 7325 } 7326 7327 void CodeGenFunction::EmitOMPUseDeviceAddrClause( 7328 const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope, 7329 const llvm::DenseMap<const ValueDecl *, llvm::Value *> 7330 CaptureDeviceAddrMap) { 7331 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 7332 for (const Expr *Ref : C.varlists()) { 7333 const VarDecl *OrigVD = getBaseDecl(Ref); 7334 if (!Processed.insert(OrigVD).second) 7335 continue; 7336 // In order to identify the right initializer we need to match the 7337 // declaration used by the mapping logic. In some cases we may get 7338 // OMPCapturedExprDecl that refers to the original declaration. 7339 const ValueDecl *MatchingVD = OrigVD; 7340 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 7341 // OMPCapturedExprDecl are used to privative fields of the current 7342 // structure. 7343 const auto *ME = cast<MemberExpr>(OED->getInit()); 7344 assert(isa<CXXThisExpr>(ME->getBase()) && 7345 "Base should be the current struct!"); 7346 MatchingVD = ME->getMemberDecl(); 7347 } 7348 7349 // If we don't have information about the current list item, move on to 7350 // the next one. 7351 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 7352 if (InitAddrIt == CaptureDeviceAddrMap.end()) 7353 continue; 7354 7355 llvm::Type *Ty = ConvertTypeForMem(OrigVD->getType().getNonReferenceType()); 7356 7357 Address PrivAddr = 7358 Address(InitAddrIt->second, Ty, 7359 getContext().getTypeAlignInChars(getContext().VoidPtrTy)); 7360 // For declrefs and variable length array need to load the pointer for 7361 // correct mapping, since the pointer to the data was passed to the runtime. 7362 if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) || 7363 MatchingVD->getType()->isArrayType()) { 7364 QualType PtrTy = getContext().getPointerType( 7365 OrigVD->getType().getNonReferenceType()); 7366 PrivAddr = 7367 EmitLoadOfPointer(PrivAddr.withElementType(ConvertTypeForMem(PtrTy)), 7368 PtrTy->castAs<PointerType>()); 7369 } 7370 7371 (void)PrivateScope.addPrivate(OrigVD, PrivAddr); 7372 } 7373 } 7374 7375 // Generate the instructions for '#pragma omp target data' directive. 7376 void CodeGenFunction::EmitOMPTargetDataDirective( 7377 const OMPTargetDataDirective &S) { 7378 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true, 7379 /*SeparateBeginEndCalls=*/true); 7380 7381 // Create a pre/post action to signal the privatization of the device pointer. 7382 // This action can be replaced by the OpenMP runtime code generation to 7383 // deactivate privatization. 7384 bool PrivatizeDevicePointers = false; 7385 class DevicePointerPrivActionTy : public PrePostActionTy { 7386 bool &PrivatizeDevicePointers; 7387 7388 public: 7389 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) 7390 : PrivatizeDevicePointers(PrivatizeDevicePointers) {} 7391 void Enter(CodeGenFunction &CGF) override { 7392 PrivatizeDevicePointers = true; 7393 } 7394 }; 7395 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); 7396 7397 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) { 7398 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 7399 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 7400 }; 7401 7402 // Codegen that selects whether to generate the privatization code or not. 7403 auto &&PrivCodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) { 7404 RegionCodeGenTy RCG(InnermostCodeGen); 7405 PrivatizeDevicePointers = false; 7406 7407 // Call the pre-action to change the status of PrivatizeDevicePointers if 7408 // needed. 7409 Action.Enter(CGF); 7410 7411 if (PrivatizeDevicePointers) { 7412 OMPPrivateScope PrivateScope(CGF); 7413 // Emit all instances of the use_device_ptr clause. 7414 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 7415 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, 7416 Info.CaptureDeviceAddrMap); 7417 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>()) 7418 CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope, 7419 Info.CaptureDeviceAddrMap); 7420 (void)PrivateScope.Privatize(); 7421 RCG(CGF); 7422 } else { 7423 // If we don't have target devices, don't bother emitting the data 7424 // mapping code. 7425 std::optional<OpenMPDirectiveKind> CaptureRegion; 7426 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 7427 // Emit helper decls of the use_device_ptr/use_device_addr clauses. 7428 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 7429 for (const Expr *E : C->varlists()) { 7430 const Decl *D = cast<DeclRefExpr>(E)->getDecl(); 7431 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) 7432 CGF.EmitVarDecl(*OED); 7433 } 7434 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>()) 7435 for (const Expr *E : C->varlists()) { 7436 const Decl *D = getBaseDecl(E); 7437 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) 7438 CGF.EmitVarDecl(*OED); 7439 } 7440 } else { 7441 CaptureRegion = OMPD_unknown; 7442 } 7443 7444 OMPLexicalScope Scope(CGF, S, CaptureRegion); 7445 RCG(CGF); 7446 } 7447 }; 7448 7449 // Forward the provided action to the privatization codegen. 7450 RegionCodeGenTy PrivRCG(PrivCodeGen); 7451 PrivRCG.setAction(Action); 7452 7453 // Notwithstanding the body of the region is emitted as inlined directive, 7454 // we don't use an inline scope as changes in the references inside the 7455 // region are expected to be visible outside, so we do not privative them. 7456 OMPLexicalScope Scope(CGF, S); 7457 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, 7458 PrivRCG); 7459 }; 7460 7461 RegionCodeGenTy RCG(CodeGen); 7462 7463 // If we don't have target devices, don't bother emitting the data mapping 7464 // code. 7465 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 7466 RCG(*this); 7467 return; 7468 } 7469 7470 // Check if we have any if clause associated with the directive. 7471 const Expr *IfCond = nullptr; 7472 if (const auto *C = S.getSingleClause<OMPIfClause>()) 7473 IfCond = C->getCondition(); 7474 7475 // Check if we have any device clause associated with the directive. 7476 const Expr *Device = nullptr; 7477 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 7478 Device = C->getDevice(); 7479 7480 // Set the action to signal privatization of device pointers. 7481 RCG.setAction(PrivAction); 7482 7483 // Emit region code. 7484 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, 7485 Info); 7486 } 7487 7488 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 7489 const OMPTargetEnterDataDirective &S) { 7490 // If we don't have target devices, don't bother emitting the data mapping 7491 // code. 7492 if (CGM.getLangOpts().OMPTargetTriples.empty()) 7493 return; 7494 7495 // Check if we have any if clause associated with the directive. 7496 const Expr *IfCond = nullptr; 7497 if (const auto *C = S.getSingleClause<OMPIfClause>()) 7498 IfCond = C->getCondition(); 7499 7500 // Check if we have any device clause associated with the directive. 7501 const Expr *Device = nullptr; 7502 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 7503 Device = C->getDevice(); 7504 7505 OMPLexicalScope Scope(*this, S, OMPD_task); 7506 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 7507 } 7508 7509 void CodeGenFunction::EmitOMPTargetExitDataDirective( 7510 const OMPTargetExitDataDirective &S) { 7511 // If we don't have target devices, don't bother emitting the data mapping 7512 // code. 7513 if (CGM.getLangOpts().OMPTargetTriples.empty()) 7514 return; 7515 7516 // Check if we have any if clause associated with the directive. 7517 const Expr *IfCond = nullptr; 7518 if (const auto *C = S.getSingleClause<OMPIfClause>()) 7519 IfCond = C->getCondition(); 7520 7521 // Check if we have any device clause associated with the directive. 7522 const Expr *Device = nullptr; 7523 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 7524 Device = C->getDevice(); 7525 7526 OMPLexicalScope Scope(*this, S, OMPD_task); 7527 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 7528 } 7529 7530 static void emitTargetParallelRegion(CodeGenFunction &CGF, 7531 const OMPTargetParallelDirective &S, 7532 PrePostActionTy &Action) { 7533 // Get the captured statement associated with the 'parallel' region. 7534 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 7535 Action.Enter(CGF); 7536 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { 7537 Action.Enter(CGF); 7538 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 7539 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 7540 CGF.EmitOMPPrivateClause(S, PrivateScope); 7541 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 7542 (void)PrivateScope.Privatize(); 7543 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 7544 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 7545 // TODO: Add support for clauses. 7546 CGF.EmitStmt(CS->getCapturedStmt()); 7547 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 7548 }; 7549 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, 7550 emitEmptyBoundParameters); 7551 emitPostUpdateForReductionClause(CGF, S, 7552 [](CodeGenFunction &) { return nullptr; }); 7553 } 7554 7555 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 7556 CodeGenModule &CGM, StringRef ParentName, 7557 const OMPTargetParallelDirective &S) { 7558 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7559 emitTargetParallelRegion(CGF, S, Action); 7560 }; 7561 llvm::Function *Fn; 7562 llvm::Constant *Addr; 7563 // Emit target region as a standalone region. 7564 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 7565 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 7566 assert(Fn && Addr && "Target device function emission failed."); 7567 } 7568 7569 void CodeGenFunction::EmitOMPTargetParallelDirective( 7570 const OMPTargetParallelDirective &S) { 7571 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7572 emitTargetParallelRegion(CGF, S, Action); 7573 }; 7574 emitCommonOMPTargetDirective(*this, S, CodeGen); 7575 } 7576 7577 static void emitTargetParallelForRegion(CodeGenFunction &CGF, 7578 const OMPTargetParallelForDirective &S, 7579 PrePostActionTy &Action) { 7580 Action.Enter(CGF); 7581 // Emit directive as a combined directive that consists of two implicit 7582 // directives: 'parallel' with 'for' directive. 7583 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7584 Action.Enter(CGF); 7585 CodeGenFunction::OMPCancelStackRAII CancelRegion( 7586 CGF, OMPD_target_parallel_for, S.hasCancel()); 7587 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 7588 emitDispatchForLoopBounds); 7589 }; 7590 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen, 7591 emitEmptyBoundParameters); 7592 } 7593 7594 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 7595 CodeGenModule &CGM, StringRef ParentName, 7596 const OMPTargetParallelForDirective &S) { 7597 // Emit SPMD target parallel for region as a standalone region. 7598 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7599 emitTargetParallelForRegion(CGF, S, Action); 7600 }; 7601 llvm::Function *Fn; 7602 llvm::Constant *Addr; 7603 // Emit target region as a standalone region. 7604 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 7605 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 7606 assert(Fn && Addr && "Target device function emission failed."); 7607 } 7608 7609 void CodeGenFunction::EmitOMPTargetParallelForDirective( 7610 const OMPTargetParallelForDirective &S) { 7611 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7612 emitTargetParallelForRegion(CGF, S, Action); 7613 }; 7614 emitCommonOMPTargetDirective(*this, S, CodeGen); 7615 } 7616 7617 static void 7618 emitTargetParallelForSimdRegion(CodeGenFunction &CGF, 7619 const OMPTargetParallelForSimdDirective &S, 7620 PrePostActionTy &Action) { 7621 Action.Enter(CGF); 7622 // Emit directive as a combined directive that consists of two implicit 7623 // directives: 'parallel' with 'for' directive. 7624 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7625 Action.Enter(CGF); 7626 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 7627 emitDispatchForLoopBounds); 7628 }; 7629 emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen, 7630 emitEmptyBoundParameters); 7631 } 7632 7633 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 7634 CodeGenModule &CGM, StringRef ParentName, 7635 const OMPTargetParallelForSimdDirective &S) { 7636 // Emit SPMD target parallel for region as a standalone region. 7637 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7638 emitTargetParallelForSimdRegion(CGF, S, Action); 7639 }; 7640 llvm::Function *Fn; 7641 llvm::Constant *Addr; 7642 // Emit target region as a standalone region. 7643 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 7644 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 7645 assert(Fn && Addr && "Target device function emission failed."); 7646 } 7647 7648 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( 7649 const OMPTargetParallelForSimdDirective &S) { 7650 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7651 emitTargetParallelForSimdRegion(CGF, S, Action); 7652 }; 7653 emitCommonOMPTargetDirective(*this, S, CodeGen); 7654 } 7655 7656 /// Emit a helper variable and return corresponding lvalue. 7657 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, 7658 const ImplicitParamDecl *PVD, 7659 CodeGenFunction::OMPPrivateScope &Privates) { 7660 const auto *VDecl = cast<VarDecl>(Helper->getDecl()); 7661 Privates.addPrivate(VDecl, CGF.GetAddrOfLocalVar(PVD)); 7662 } 7663 7664 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { 7665 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); 7666 // Emit outlined function for task construct. 7667 const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop); 7668 Address CapturedStruct = Address::invalid(); 7669 { 7670 OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); 7671 CapturedStruct = GenerateCapturedStmtArgument(*CS); 7672 } 7673 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 7674 const Expr *IfCond = nullptr; 7675 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 7676 if (C->getNameModifier() == OMPD_unknown || 7677 C->getNameModifier() == OMPD_taskloop) { 7678 IfCond = C->getCondition(); 7679 break; 7680 } 7681 } 7682 7683 OMPTaskDataTy Data; 7684 // Check if taskloop must be emitted without taskgroup. 7685 Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); 7686 // TODO: Check if we should emit tied or untied task. 7687 Data.Tied = true; 7688 // Set scheduling for taskloop 7689 if (const auto *Clause = S.getSingleClause<OMPGrainsizeClause>()) { 7690 // grainsize clause 7691 Data.Schedule.setInt(/*IntVal=*/false); 7692 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); 7693 } else if (const auto *Clause = S.getSingleClause<OMPNumTasksClause>()) { 7694 // num_tasks clause 7695 Data.Schedule.setInt(/*IntVal=*/true); 7696 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); 7697 } 7698 7699 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { 7700 // if (PreCond) { 7701 // for (IV in 0..LastIteration) BODY; 7702 // <Final counter/linear vars updates>; 7703 // } 7704 // 7705 7706 // Emit: if (PreCond) - begin. 7707 // If the condition constant folds and can be elided, avoid emitting the 7708 // whole loop. 7709 bool CondConstant; 7710 llvm::BasicBlock *ContBlock = nullptr; 7711 OMPLoopScope PreInitScope(CGF, S); 7712 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 7713 if (!CondConstant) 7714 return; 7715 } else { 7716 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); 7717 ContBlock = CGF.createBasicBlock("taskloop.if.end"); 7718 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 7719 CGF.getProfileCount(&S)); 7720 CGF.EmitBlock(ThenBlock); 7721 CGF.incrementProfileCounter(&S); 7722 } 7723 7724 (void)CGF.EmitOMPLinearClauseInit(S); 7725 7726 OMPPrivateScope LoopScope(CGF); 7727 // Emit helper vars inits. 7728 enum { LowerBound = 5, UpperBound, Stride, LastIter }; 7729 auto *I = CS->getCapturedDecl()->param_begin(); 7730 auto *LBP = std::next(I, LowerBound); 7731 auto *UBP = std::next(I, UpperBound); 7732 auto *STP = std::next(I, Stride); 7733 auto *LIP = std::next(I, LastIter); 7734 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, 7735 LoopScope); 7736 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, 7737 LoopScope); 7738 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); 7739 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, 7740 LoopScope); 7741 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 7742 CGF.EmitOMPLinearClause(S, LoopScope); 7743 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 7744 (void)LoopScope.Privatize(); 7745 // Emit the loop iteration variable. 7746 const Expr *IVExpr = S.getIterationVariable(); 7747 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 7748 CGF.EmitVarDecl(*IVDecl); 7749 CGF.EmitIgnoredExpr(S.getInit()); 7750 7751 // Emit the iterations count variable. 7752 // If it is not a variable, Sema decided to calculate iterations count on 7753 // each iteration (e.g., it is foldable into a constant). 7754 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 7755 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 7756 // Emit calculation of the iterations count. 7757 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 7758 } 7759 7760 { 7761 OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); 7762 emitCommonSimdLoop( 7763 CGF, S, 7764 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 7765 if (isOpenMPSimdDirective(S.getDirectiveKind())) 7766 CGF.EmitOMPSimdInit(S); 7767 }, 7768 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 7769 CGF.EmitOMPInnerLoop( 7770 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 7771 [&S](CodeGenFunction &CGF) { 7772 emitOMPLoopBodyWithStopPoint(CGF, S, 7773 CodeGenFunction::JumpDest()); 7774 }, 7775 [](CodeGenFunction &) {}); 7776 }); 7777 } 7778 // Emit: if (PreCond) - end. 7779 if (ContBlock) { 7780 CGF.EmitBranch(ContBlock); 7781 CGF.EmitBlock(ContBlock, true); 7782 } 7783 // Emit final copy of the lastprivate variables if IsLastIter != 0. 7784 if (HasLastprivateClause) { 7785 CGF.EmitOMPLastprivateClauseFinal( 7786 S, isOpenMPSimdDirective(S.getDirectiveKind()), 7787 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( 7788 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 7789 (*LIP)->getType(), S.getBeginLoc()))); 7790 } 7791 LoopScope.restoreMap(); 7792 CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) { 7793 return CGF.Builder.CreateIsNotNull( 7794 CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 7795 (*LIP)->getType(), S.getBeginLoc())); 7796 }); 7797 }; 7798 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 7799 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, 7800 const OMPTaskDataTy &Data) { 7801 auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond, 7802 &Data](CodeGenFunction &CGF, PrePostActionTy &) { 7803 OMPLoopScope PreInitScope(CGF, S); 7804 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S, 7805 OutlinedFn, SharedsTy, 7806 CapturedStruct, IfCond, Data); 7807 }; 7808 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, 7809 CodeGen); 7810 }; 7811 if (Data.Nogroup) { 7812 EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data); 7813 } else { 7814 CGM.getOpenMPRuntime().emitTaskgroupRegion( 7815 *this, 7816 [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, 7817 PrePostActionTy &Action) { 7818 Action.Enter(CGF); 7819 CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, 7820 Data); 7821 }, 7822 S.getBeginLoc()); 7823 } 7824 } 7825 7826 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 7827 auto LPCRegion = 7828 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7829 EmitOMPTaskLoopBasedDirective(S); 7830 } 7831 7832 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 7833 const OMPTaskLoopSimdDirective &S) { 7834 auto LPCRegion = 7835 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7836 OMPLexicalScope Scope(*this, S); 7837 EmitOMPTaskLoopBasedDirective(S); 7838 } 7839 7840 void CodeGenFunction::EmitOMPMasterTaskLoopDirective( 7841 const OMPMasterTaskLoopDirective &S) { 7842 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7843 Action.Enter(CGF); 7844 EmitOMPTaskLoopBasedDirective(S); 7845 }; 7846 auto LPCRegion = 7847 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7848 OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false); 7849 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); 7850 } 7851 7852 void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective( 7853 const OMPMasterTaskLoopSimdDirective &S) { 7854 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7855 Action.Enter(CGF); 7856 EmitOMPTaskLoopBasedDirective(S); 7857 }; 7858 auto LPCRegion = 7859 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7860 OMPLexicalScope Scope(*this, S); 7861 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); 7862 } 7863 7864 void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective( 7865 const OMPParallelMasterTaskLoopDirective &S) { 7866 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7867 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, 7868 PrePostActionTy &Action) { 7869 Action.Enter(CGF); 7870 CGF.EmitOMPTaskLoopBasedDirective(S); 7871 }; 7872 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); 7873 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, 7874 S.getBeginLoc()); 7875 }; 7876 auto LPCRegion = 7877 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7878 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen, 7879 emitEmptyBoundParameters); 7880 } 7881 7882 void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective( 7883 const OMPParallelMasterTaskLoopSimdDirective &S) { 7884 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7885 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, 7886 PrePostActionTy &Action) { 7887 Action.Enter(CGF); 7888 CGF.EmitOMPTaskLoopBasedDirective(S); 7889 }; 7890 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); 7891 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, 7892 S.getBeginLoc()); 7893 }; 7894 auto LPCRegion = 7895 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7896 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen, 7897 emitEmptyBoundParameters); 7898 } 7899 7900 // Generate the instructions for '#pragma omp target update' directive. 7901 void CodeGenFunction::EmitOMPTargetUpdateDirective( 7902 const OMPTargetUpdateDirective &S) { 7903 // If we don't have target devices, don't bother emitting the data mapping 7904 // code. 7905 if (CGM.getLangOpts().OMPTargetTriples.empty()) 7906 return; 7907 7908 // Check if we have any if clause associated with the directive. 7909 const Expr *IfCond = nullptr; 7910 if (const auto *C = S.getSingleClause<OMPIfClause>()) 7911 IfCond = C->getCondition(); 7912 7913 // Check if we have any device clause associated with the directive. 7914 const Expr *Device = nullptr; 7915 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 7916 Device = C->getDevice(); 7917 7918 OMPLexicalScope Scope(*this, S, OMPD_task); 7919 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 7920 } 7921 7922 void CodeGenFunction::EmitOMPGenericLoopDirective( 7923 const OMPGenericLoopDirective &S) { 7924 // Unimplemented, just inline the underlying statement for now. 7925 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7926 // Emit the loop iteration variable. 7927 const Stmt *CS = 7928 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 7929 const auto *ForS = dyn_cast<ForStmt>(CS); 7930 if (ForS && !isa<DeclStmt>(ForS->getInit())) { 7931 OMPPrivateScope LoopScope(CGF); 7932 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 7933 (void)LoopScope.Privatize(); 7934 CGF.EmitStmt(CS); 7935 LoopScope.restoreMap(); 7936 } else { 7937 CGF.EmitStmt(CS); 7938 } 7939 }; 7940 OMPLexicalScope Scope(*this, S, OMPD_unknown); 7941 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_loop, CodeGen); 7942 } 7943 7944 void CodeGenFunction::EmitOMPParallelGenericLoopDirective( 7945 const OMPLoopDirective &S) { 7946 // Emit combined directive as if its constituent constructs are 'parallel' 7947 // and 'for'. 7948 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7949 Action.Enter(CGF); 7950 emitOMPCopyinClause(CGF, S); 7951 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false); 7952 }; 7953 { 7954 auto LPCRegion = 7955 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7956 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, 7957 emitEmptyBoundParameters); 7958 } 7959 // Check for outer lastprivate conditional update. 7960 checkForLastprivateConditionalUpdate(*this, S); 7961 } 7962 7963 void CodeGenFunction::EmitOMPTeamsGenericLoopDirective( 7964 const OMPTeamsGenericLoopDirective &S) { 7965 // To be consistent with current behavior of 'target teams loop', emit 7966 // 'teams loop' as if its constituent constructs are 'teams' and 'distribute'. 7967 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 7968 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 7969 }; 7970 7971 // Emit teams region as a standalone region. 7972 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 7973 PrePostActionTy &Action) { 7974 Action.Enter(CGF); 7975 OMPPrivateScope PrivateScope(CGF); 7976 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 7977 (void)PrivateScope.Privatize(); 7978 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 7979 CodeGenDistribute); 7980 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 7981 }; 7982 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 7983 emitPostUpdateForReductionClause(*this, S, 7984 [](CodeGenFunction &) { return nullptr; }); 7985 } 7986 7987 #ifndef NDEBUG 7988 static void emitTargetTeamsLoopCodegenStatus(CodeGenFunction &CGF, 7989 std::string StatusMsg, 7990 const OMPExecutableDirective &D) { 7991 bool IsDevice = CGF.CGM.getLangOpts().OpenMPIsTargetDevice; 7992 if (IsDevice) 7993 StatusMsg += ": DEVICE"; 7994 else 7995 StatusMsg += ": HOST"; 7996 SourceLocation L = D.getBeginLoc(); 7997 auto &SM = CGF.getContext().getSourceManager(); 7998 PresumedLoc PLoc = SM.getPresumedLoc(L); 7999 const char *FileName = PLoc.isValid() ? PLoc.getFilename() : nullptr; 8000 unsigned LineNo = 8001 PLoc.isValid() ? PLoc.getLine() : SM.getExpansionLineNumber(L); 8002 llvm::dbgs() << StatusMsg << ": " << FileName << ": " << LineNo << "\n"; 8003 } 8004 #endif 8005 8006 static void emitTargetTeamsGenericLoopRegionAsParallel( 8007 CodeGenFunction &CGF, PrePostActionTy &Action, 8008 const OMPTargetTeamsGenericLoopDirective &S) { 8009 Action.Enter(CGF); 8010 // Emit 'teams loop' as if its constituent constructs are 'distribute, 8011 // 'parallel, and 'for'. 8012 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 8013 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 8014 S.getDistInc()); 8015 }; 8016 8017 // Emit teams region as a standalone region. 8018 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 8019 PrePostActionTy &Action) { 8020 Action.Enter(CGF); 8021 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 8022 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 8023 (void)PrivateScope.Privatize(); 8024 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 8025 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 8026 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 8027 }; 8028 DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE, 8029 emitTargetTeamsLoopCodegenStatus( 8030 CGF, TTL_CODEGEN_TYPE " as parallel for", S)); 8031 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for, 8032 CodeGenTeams); 8033 emitPostUpdateForReductionClause(CGF, S, 8034 [](CodeGenFunction &) { return nullptr; }); 8035 } 8036 8037 static void emitTargetTeamsGenericLoopRegionAsDistribute( 8038 CodeGenFunction &CGF, PrePostActionTy &Action, 8039 const OMPTargetTeamsGenericLoopDirective &S) { 8040 Action.Enter(CGF); 8041 // Emit 'teams loop' as if its constituent construct is 'distribute'. 8042 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 8043 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 8044 }; 8045 8046 // Emit teams region as a standalone region. 8047 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 8048 PrePostActionTy &Action) { 8049 Action.Enter(CGF); 8050 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 8051 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 8052 (void)PrivateScope.Privatize(); 8053 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 8054 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 8055 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 8056 }; 8057 DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE, 8058 emitTargetTeamsLoopCodegenStatus( 8059 CGF, TTL_CODEGEN_TYPE " as distribute", S)); 8060 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen); 8061 emitPostUpdateForReductionClause(CGF, S, 8062 [](CodeGenFunction &) { return nullptr; }); 8063 } 8064 8065 void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDirective( 8066 const OMPTargetTeamsGenericLoopDirective &S) { 8067 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 8068 if (S.canBeParallelFor()) 8069 emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S); 8070 else 8071 emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S); 8072 }; 8073 emitCommonOMPTargetDirective(*this, S, CodeGen); 8074 } 8075 8076 void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction( 8077 CodeGenModule &CGM, StringRef ParentName, 8078 const OMPTargetTeamsGenericLoopDirective &S) { 8079 // Emit SPMD target parallel loop region as a standalone region. 8080 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 8081 if (S.canBeParallelFor()) 8082 emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S); 8083 else 8084 emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S); 8085 }; 8086 llvm::Function *Fn; 8087 llvm::Constant *Addr; 8088 // Emit target region as a standalone region. 8089 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 8090 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 8091 assert(Fn && Addr && 8092 "Target device function emission failed for 'target teams loop'."); 8093 } 8094 8095 static void emitTargetParallelGenericLoopRegion( 8096 CodeGenFunction &CGF, const OMPTargetParallelGenericLoopDirective &S, 8097 PrePostActionTy &Action) { 8098 Action.Enter(CGF); 8099 // Emit as 'parallel for'. 8100 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 8101 Action.Enter(CGF); 8102 CodeGenFunction::OMPCancelStackRAII CancelRegion( 8103 CGF, OMPD_target_parallel_loop, /*hasCancel=*/false); 8104 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 8105 emitDispatchForLoopBounds); 8106 }; 8107 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen, 8108 emitEmptyBoundParameters); 8109 } 8110 8111 void CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction( 8112 CodeGenModule &CGM, StringRef ParentName, 8113 const OMPTargetParallelGenericLoopDirective &S) { 8114 // Emit target parallel loop region as a standalone region. 8115 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 8116 emitTargetParallelGenericLoopRegion(CGF, S, Action); 8117 }; 8118 llvm::Function *Fn; 8119 llvm::Constant *Addr; 8120 // Emit target region as a standalone region. 8121 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 8122 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 8123 assert(Fn && Addr && "Target device function emission failed."); 8124 } 8125 8126 /// Emit combined directive 'target parallel loop' as if its constituent 8127 /// constructs are 'target', 'parallel', and 'for'. 8128 void CodeGenFunction::EmitOMPTargetParallelGenericLoopDirective( 8129 const OMPTargetParallelGenericLoopDirective &S) { 8130 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 8131 emitTargetParallelGenericLoopRegion(CGF, S, Action); 8132 }; 8133 emitCommonOMPTargetDirective(*this, S, CodeGen); 8134 } 8135 8136 void CodeGenFunction::EmitSimpleOMPExecutableDirective( 8137 const OMPExecutableDirective &D) { 8138 if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) { 8139 EmitOMPScanDirective(*SD); 8140 return; 8141 } 8142 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt()) 8143 return; 8144 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) { 8145 OMPPrivateScope GlobalsScope(CGF); 8146 if (isOpenMPTaskingDirective(D.getDirectiveKind())) { 8147 // Capture global firstprivates to avoid crash. 8148 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 8149 for (const Expr *Ref : C->varlists()) { 8150 const auto *DRE = cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 8151 if (!DRE) 8152 continue; 8153 const auto *VD = dyn_cast<VarDecl>(DRE->getDecl()); 8154 if (!VD || VD->hasLocalStorage()) 8155 continue; 8156 if (!CGF.LocalDeclMap.count(VD)) { 8157 LValue GlobLVal = CGF.EmitLValue(Ref); 8158 GlobalsScope.addPrivate(VD, GlobLVal.getAddress()); 8159 } 8160 } 8161 } 8162 } 8163 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 8164 (void)GlobalsScope.Privatize(); 8165 ParentLoopDirectiveForScanRegion ScanRegion(CGF, D); 8166 emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action); 8167 } else { 8168 if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) { 8169 for (const Expr *E : LD->counters()) { 8170 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 8171 if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) { 8172 LValue GlobLVal = CGF.EmitLValue(E); 8173 GlobalsScope.addPrivate(VD, GlobLVal.getAddress()); 8174 } 8175 if (isa<OMPCapturedExprDecl>(VD)) { 8176 // Emit only those that were not explicitly referenced in clauses. 8177 if (!CGF.LocalDeclMap.count(VD)) 8178 CGF.EmitVarDecl(*VD); 8179 } 8180 } 8181 for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) { 8182 if (!C->getNumForLoops()) 8183 continue; 8184 for (unsigned I = LD->getLoopsNumber(), 8185 E = C->getLoopNumIterations().size(); 8186 I < E; ++I) { 8187 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>( 8188 cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) { 8189 // Emit only those that were not explicitly referenced in clauses. 8190 if (!CGF.LocalDeclMap.count(VD)) 8191 CGF.EmitVarDecl(*VD); 8192 } 8193 } 8194 } 8195 } 8196 (void)GlobalsScope.Privatize(); 8197 CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt()); 8198 } 8199 }; 8200 if (D.getDirectiveKind() == OMPD_atomic || 8201 D.getDirectiveKind() == OMPD_critical || 8202 D.getDirectiveKind() == OMPD_section || 8203 D.getDirectiveKind() == OMPD_master || 8204 D.getDirectiveKind() == OMPD_masked || 8205 D.getDirectiveKind() == OMPD_unroll) { 8206 EmitStmt(D.getAssociatedStmt()); 8207 } else { 8208 auto LPCRegion = 8209 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D); 8210 OMPSimdLexicalScope Scope(*this, D); 8211 CGM.getOpenMPRuntime().emitInlinedDirective( 8212 *this, 8213 isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd 8214 : D.getDirectiveKind(), 8215 CodeGen); 8216 } 8217 // Check for outer lastprivate conditional update. 8218 checkForLastprivateConditionalUpdate(*this, D); 8219 } 8220