1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This contains code to emit OpenMP nodes as LLVM code. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGCleanup.h" 14 #include "CGOpenMPRuntime.h" 15 #include "CodeGenFunction.h" 16 #include "CodeGenModule.h" 17 #include "TargetInfo.h" 18 #include "clang/AST/ASTContext.h" 19 #include "clang/AST/Attr.h" 20 #include "clang/AST/DeclOpenMP.h" 21 #include "clang/AST/OpenMPClause.h" 22 #include "clang/AST/Stmt.h" 23 #include "clang/AST/StmtOpenMP.h" 24 #include "clang/AST/StmtVisitor.h" 25 #include "clang/Basic/OpenMPKinds.h" 26 #include "clang/Basic/PrettyStackTrace.h" 27 #include "llvm/ADT/SmallSet.h" 28 #include "llvm/BinaryFormat/Dwarf.h" 29 #include "llvm/Frontend/OpenMP/OMPConstants.h" 30 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 31 #include "llvm/IR/Constants.h" 32 #include "llvm/IR/DebugInfoMetadata.h" 33 #include "llvm/IR/Instructions.h" 34 #include "llvm/IR/IntrinsicInst.h" 35 #include "llvm/IR/Metadata.h" 36 #include "llvm/Support/AtomicOrdering.h" 37 #include <optional> 38 using namespace clang; 39 using namespace CodeGen; 40 using namespace llvm::omp; 41 42 static const VarDecl *getBaseDecl(const Expr *Ref); 43 44 namespace { 45 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 46 /// for captured expressions. 47 class OMPLexicalScope : public CodeGenFunction::LexicalScope { 48 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 49 for (const auto *C : S.clauses()) { 50 if (const auto *CPI = OMPClauseWithPreInit::get(C)) { 51 if (const auto *PreInit = 52 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 53 for (const auto *I : PreInit->decls()) { 54 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 55 CGF.EmitVarDecl(cast<VarDecl>(*I)); 56 } else { 57 CodeGenFunction::AutoVarEmission Emission = 58 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 59 CGF.EmitAutoVarCleanups(Emission); 60 } 61 } 62 } 63 } 64 } 65 } 66 CodeGenFunction::OMPPrivateScope InlinedShareds; 67 68 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 69 return CGF.LambdaCaptureFields.lookup(VD) || 70 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 71 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) && 72 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); 73 } 74 75 public: 76 OMPLexicalScope( 77 CodeGenFunction &CGF, const OMPExecutableDirective &S, 78 const std::optional<OpenMPDirectiveKind> CapturedRegion = std::nullopt, 79 const bool EmitPreInitStmt = true) 80 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 81 InlinedShareds(CGF) { 82 if (EmitPreInitStmt) 83 emitPreInitStmt(CGF, S); 84 if (!CapturedRegion) 85 return; 86 assert(S.hasAssociatedStmt() && 87 "Expected associated statement for inlined directive."); 88 const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion); 89 for (const auto &C : CS->captures()) { 90 if (C.capturesVariable() || C.capturesVariableByCopy()) { 91 auto *VD = C.getCapturedVar(); 92 assert(VD == VD->getCanonicalDecl() && 93 "Canonical decl must be captured."); 94 DeclRefExpr DRE( 95 CGF.getContext(), const_cast<VarDecl *>(VD), 96 isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && 97 InlinedShareds.isGlobalVarCaptured(VD)), 98 VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); 99 InlinedShareds.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF)); 100 } 101 } 102 (void)InlinedShareds.Privatize(); 103 } 104 }; 105 106 /// Lexical scope for OpenMP parallel construct, that handles correct codegen 107 /// for captured expressions. 108 class OMPParallelScope final : public OMPLexicalScope { 109 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 110 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 111 return !(isOpenMPTargetExecutionDirective(Kind) || 112 isOpenMPLoopBoundSharingDirective(Kind)) && 113 isOpenMPParallelDirective(Kind); 114 } 115 116 public: 117 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 118 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt, 119 EmitPreInitStmt(S)) {} 120 }; 121 122 /// Lexical scope for OpenMP teams construct, that handles correct codegen 123 /// for captured expressions. 124 class OMPTeamsScope final : public OMPLexicalScope { 125 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 126 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 127 return !isOpenMPTargetExecutionDirective(Kind) && 128 isOpenMPTeamsDirective(Kind); 129 } 130 131 public: 132 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 133 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt, 134 EmitPreInitStmt(S)) {} 135 }; 136 137 /// Private scope for OpenMP loop-based directives, that supports capturing 138 /// of used expression from loop statement. 139 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 140 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) { 141 const DeclStmt *PreInits; 142 CodeGenFunction::OMPMapVars PreCondVars; 143 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { 144 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 145 for (const auto *E : LD->counters()) { 146 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 147 EmittedAsPrivate.insert(VD->getCanonicalDecl()); 148 (void)PreCondVars.setVarAddr( 149 CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType())); 150 } 151 // Mark private vars as undefs. 152 for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) { 153 for (const Expr *IRef : C->varlists()) { 154 const auto *OrigVD = 155 cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 156 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 157 QualType OrigVDTy = OrigVD->getType().getNonReferenceType(); 158 (void)PreCondVars.setVarAddr( 159 CGF, OrigVD, 160 Address(llvm::UndefValue::get(CGF.ConvertTypeForMem( 161 CGF.getContext().getPointerType(OrigVDTy))), 162 CGF.ConvertTypeForMem(OrigVDTy), 163 CGF.getContext().getDeclAlign(OrigVD))); 164 } 165 } 166 } 167 (void)PreCondVars.apply(CGF); 168 // Emit init, __range and __end variables for C++ range loops. 169 (void)OMPLoopBasedDirective::doForAllLoops( 170 LD->getInnermostCapturedStmt()->getCapturedStmt(), 171 /*TryImperfectlyNestedLoops=*/true, LD->getLoopsNumber(), 172 [&CGF](unsigned Cnt, const Stmt *CurStmt) { 173 if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(CurStmt)) { 174 if (const Stmt *Init = CXXFor->getInit()) 175 CGF.EmitStmt(Init); 176 CGF.EmitStmt(CXXFor->getRangeStmt()); 177 CGF.EmitStmt(CXXFor->getEndStmt()); 178 } 179 return false; 180 }); 181 PreInits = cast_or_null<DeclStmt>(LD->getPreInits()); 182 } else if (const auto *Tile = dyn_cast<OMPTileDirective>(&S)) { 183 PreInits = cast_or_null<DeclStmt>(Tile->getPreInits()); 184 } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(&S)) { 185 PreInits = cast_or_null<DeclStmt>(Unroll->getPreInits()); 186 } else { 187 llvm_unreachable("Unknown loop-based directive kind."); 188 } 189 if (PreInits) { 190 for (const auto *I : PreInits->decls()) 191 CGF.EmitVarDecl(cast<VarDecl>(*I)); 192 } 193 PreCondVars.restore(CGF); 194 } 195 196 public: 197 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) 198 : CodeGenFunction::RunCleanupsScope(CGF) { 199 emitPreInitStmt(CGF, S); 200 } 201 }; 202 203 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope { 204 CodeGenFunction::OMPPrivateScope InlinedShareds; 205 206 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 207 return CGF.LambdaCaptureFields.lookup(VD) || 208 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 209 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) && 210 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); 211 } 212 213 public: 214 OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 215 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 216 InlinedShareds(CGF) { 217 for (const auto *C : S.clauses()) { 218 if (const auto *CPI = OMPClauseWithPreInit::get(C)) { 219 if (const auto *PreInit = 220 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 221 for (const auto *I : PreInit->decls()) { 222 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 223 CGF.EmitVarDecl(cast<VarDecl>(*I)); 224 } else { 225 CodeGenFunction::AutoVarEmission Emission = 226 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 227 CGF.EmitAutoVarCleanups(Emission); 228 } 229 } 230 } 231 } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) { 232 for (const Expr *E : UDP->varlists()) { 233 const Decl *D = cast<DeclRefExpr>(E)->getDecl(); 234 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) 235 CGF.EmitVarDecl(*OED); 236 } 237 } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) { 238 for (const Expr *E : UDP->varlists()) { 239 const Decl *D = getBaseDecl(E); 240 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) 241 CGF.EmitVarDecl(*OED); 242 } 243 } 244 } 245 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 246 CGF.EmitOMPPrivateClause(S, InlinedShareds); 247 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) { 248 if (const Expr *E = TG->getReductionRef()) 249 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl())); 250 } 251 // Temp copy arrays for inscan reductions should not be emitted as they are 252 // not used in simd only mode. 253 llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps; 254 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 255 if (C->getModifier() != OMPC_REDUCTION_inscan) 256 continue; 257 for (const Expr *E : C->copy_array_temps()) 258 CopyArrayTemps.insert(cast<DeclRefExpr>(E)->getDecl()); 259 } 260 const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt()); 261 while (CS) { 262 for (auto &C : CS->captures()) { 263 if (C.capturesVariable() || C.capturesVariableByCopy()) { 264 auto *VD = C.getCapturedVar(); 265 if (CopyArrayTemps.contains(VD)) 266 continue; 267 assert(VD == VD->getCanonicalDecl() && 268 "Canonical decl must be captured."); 269 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 270 isCapturedVar(CGF, VD) || 271 (CGF.CapturedStmtInfo && 272 InlinedShareds.isGlobalVarCaptured(VD)), 273 VD->getType().getNonReferenceType(), VK_LValue, 274 C.getLocation()); 275 InlinedShareds.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF)); 276 } 277 } 278 CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt()); 279 } 280 (void)InlinedShareds.Privatize(); 281 } 282 }; 283 284 } // namespace 285 286 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 287 const OMPExecutableDirective &S, 288 const RegionCodeGenTy &CodeGen); 289 290 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) { 291 if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) { 292 if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) { 293 OrigVD = OrigVD->getCanonicalDecl(); 294 bool IsCaptured = 295 LambdaCaptureFields.lookup(OrigVD) || 296 (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) || 297 (CurCodeDecl && isa<BlockDecl>(CurCodeDecl)); 298 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured, 299 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc()); 300 return EmitLValue(&DRE); 301 } 302 } 303 return EmitLValue(E); 304 } 305 306 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 307 ASTContext &C = getContext(); 308 llvm::Value *Size = nullptr; 309 auto SizeInChars = C.getTypeSizeInChars(Ty); 310 if (SizeInChars.isZero()) { 311 // getTypeSizeInChars() returns 0 for a VLA. 312 while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) { 313 VlaSizePair VlaSize = getVLASize(VAT); 314 Ty = VlaSize.Type; 315 Size = 316 Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) : VlaSize.NumElts; 317 } 318 SizeInChars = C.getTypeSizeInChars(Ty); 319 if (SizeInChars.isZero()) 320 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 321 return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 322 } 323 return CGM.getSize(SizeInChars); 324 } 325 326 void CodeGenFunction::GenerateOpenMPCapturedVars( 327 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 328 const RecordDecl *RD = S.getCapturedRecordDecl(); 329 auto CurField = RD->field_begin(); 330 auto CurCap = S.captures().begin(); 331 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 332 E = S.capture_init_end(); 333 I != E; ++I, ++CurField, ++CurCap) { 334 if (CurField->hasCapturedVLAType()) { 335 const VariableArrayType *VAT = CurField->getCapturedVLAType(); 336 llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()]; 337 CapturedVars.push_back(Val); 338 } else if (CurCap->capturesThis()) { 339 CapturedVars.push_back(CXXThisValue); 340 } else if (CurCap->capturesVariableByCopy()) { 341 llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation()); 342 343 // If the field is not a pointer, we need to save the actual value 344 // and load it as a void pointer. 345 if (!CurField->getType()->isAnyPointerType()) { 346 ASTContext &Ctx = getContext(); 347 Address DstAddr = CreateMemTemp( 348 Ctx.getUIntPtrType(), 349 Twine(CurCap->getCapturedVar()->getName(), ".casted")); 350 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 351 352 llvm::Value *SrcAddrVal = EmitScalarConversion( 353 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 354 Ctx.getPointerType(CurField->getType()), CurCap->getLocation()); 355 LValue SrcLV = 356 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); 357 358 // Store the value using the source type pointer. 359 EmitStoreThroughLValue(RValue::get(CV), SrcLV); 360 361 // Load the value using the destination type pointer. 362 CV = EmitLoadOfScalar(DstLV, CurCap->getLocation()); 363 } 364 CapturedVars.push_back(CV); 365 } else { 366 assert(CurCap->capturesVariable() && "Expected capture by reference."); 367 CapturedVars.push_back(EmitLValue(*I).getAddress(*this).getPointer()); 368 } 369 } 370 } 371 372 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc, 373 QualType DstType, StringRef Name, 374 LValue AddrLV) { 375 ASTContext &Ctx = CGF.getContext(); 376 377 llvm::Value *CastedPtr = CGF.EmitScalarConversion( 378 AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(), 379 Ctx.getPointerType(DstType), Loc); 380 Address TmpAddr = 381 CGF.MakeNaturalAlignAddrLValue(CastedPtr, DstType).getAddress(CGF); 382 return TmpAddr; 383 } 384 385 static QualType getCanonicalParamType(ASTContext &C, QualType T) { 386 if (T->isLValueReferenceType()) 387 return C.getLValueReferenceType( 388 getCanonicalParamType(C, T.getNonReferenceType()), 389 /*SpelledAsLValue=*/false); 390 if (T->isPointerType()) 391 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); 392 if (const ArrayType *A = T->getAsArrayTypeUnsafe()) { 393 if (const auto *VLA = dyn_cast<VariableArrayType>(A)) 394 return getCanonicalParamType(C, VLA->getElementType()); 395 if (!A->isVariablyModifiedType()) 396 return C.getCanonicalType(T); 397 } 398 return C.getCanonicalParamType(T); 399 } 400 401 namespace { 402 /// Contains required data for proper outlined function codegen. 403 struct FunctionOptions { 404 /// Captured statement for which the function is generated. 405 const CapturedStmt *S = nullptr; 406 /// true if cast to/from UIntPtr is required for variables captured by 407 /// value. 408 const bool UIntPtrCastRequired = true; 409 /// true if only casted arguments must be registered as local args or VLA 410 /// sizes. 411 const bool RegisterCastedArgsOnly = false; 412 /// Name of the generated function. 413 const StringRef FunctionName; 414 /// Location of the non-debug version of the outlined function. 415 SourceLocation Loc; 416 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, 417 bool RegisterCastedArgsOnly, StringRef FunctionName, 418 SourceLocation Loc) 419 : S(S), UIntPtrCastRequired(UIntPtrCastRequired), 420 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), 421 FunctionName(FunctionName), Loc(Loc) {} 422 }; 423 } // namespace 424 425 static llvm::Function *emitOutlinedFunctionPrologue( 426 CodeGenFunction &CGF, FunctionArgList &Args, 427 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> 428 &LocalAddrs, 429 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> 430 &VLASizes, 431 llvm::Value *&CXXThisValue, const FunctionOptions &FO) { 432 const CapturedDecl *CD = FO.S->getCapturedDecl(); 433 const RecordDecl *RD = FO.S->getCapturedRecordDecl(); 434 assert(CD->hasBody() && "missing CapturedDecl body"); 435 436 CXXThisValue = nullptr; 437 // Build the argument list. 438 CodeGenModule &CGM = CGF.CGM; 439 ASTContext &Ctx = CGM.getContext(); 440 FunctionArgList TargetArgs; 441 Args.append(CD->param_begin(), 442 std::next(CD->param_begin(), CD->getContextParamPosition())); 443 TargetArgs.append( 444 CD->param_begin(), 445 std::next(CD->param_begin(), CD->getContextParamPosition())); 446 auto I = FO.S->captures().begin(); 447 FunctionDecl *DebugFunctionDecl = nullptr; 448 if (!FO.UIntPtrCastRequired) { 449 FunctionProtoType::ExtProtoInfo EPI; 450 QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, std::nullopt, EPI); 451 DebugFunctionDecl = FunctionDecl::Create( 452 Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(), 453 SourceLocation(), DeclarationName(), FunctionTy, 454 Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static, 455 /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false, 456 /*hasWrittenPrototype=*/false); 457 } 458 for (const FieldDecl *FD : RD->fields()) { 459 QualType ArgType = FD->getType(); 460 IdentifierInfo *II = nullptr; 461 VarDecl *CapVar = nullptr; 462 463 // If this is a capture by copy and the type is not a pointer, the outlined 464 // function argument type should be uintptr and the value properly casted to 465 // uintptr. This is necessary given that the runtime library is only able to 466 // deal with pointers. We can pass in the same way the VLA type sizes to the 467 // outlined function. 468 if (FO.UIntPtrCastRequired && 469 ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 470 I->capturesVariableArrayType())) 471 ArgType = Ctx.getUIntPtrType(); 472 473 if (I->capturesVariable() || I->capturesVariableByCopy()) { 474 CapVar = I->getCapturedVar(); 475 II = CapVar->getIdentifier(); 476 } else if (I->capturesThis()) { 477 II = &Ctx.Idents.get("this"); 478 } else { 479 assert(I->capturesVariableArrayType()); 480 II = &Ctx.Idents.get("vla"); 481 } 482 if (ArgType->isVariablyModifiedType()) 483 ArgType = getCanonicalParamType(Ctx, ArgType); 484 VarDecl *Arg; 485 if (CapVar && (CapVar->getTLSKind() != clang::VarDecl::TLS_None)) { 486 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), 487 II, ArgType, 488 ImplicitParamDecl::ThreadPrivateVar); 489 } else if (DebugFunctionDecl && (CapVar || I->capturesThis())) { 490 Arg = ParmVarDecl::Create( 491 Ctx, DebugFunctionDecl, 492 CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(), 493 CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType, 494 /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr); 495 } else { 496 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), 497 II, ArgType, ImplicitParamDecl::Other); 498 } 499 Args.emplace_back(Arg); 500 // Do not cast arguments if we emit function with non-original types. 501 TargetArgs.emplace_back( 502 FO.UIntPtrCastRequired 503 ? Arg 504 : CGM.getOpenMPRuntime().translateParameter(FD, Arg)); 505 ++I; 506 } 507 Args.append(std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 508 CD->param_end()); 509 TargetArgs.append( 510 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 511 CD->param_end()); 512 513 // Create the function declaration. 514 const CGFunctionInfo &FuncInfo = 515 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); 516 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 517 518 auto *F = 519 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 520 FO.FunctionName, &CGM.getModule()); 521 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 522 if (CD->isNothrow()) 523 F->setDoesNotThrow(); 524 F->setDoesNotRecurse(); 525 526 // Always inline the outlined function if optimizations are enabled. 527 if (CGM.getCodeGenOpts().OptimizationLevel != 0) { 528 F->removeFnAttr(llvm::Attribute::NoInline); 529 F->addFnAttr(llvm::Attribute::AlwaysInline); 530 } 531 532 // Generate the function. 533 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, 534 FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(), 535 FO.UIntPtrCastRequired ? FO.Loc 536 : CD->getBody()->getBeginLoc()); 537 unsigned Cnt = CD->getContextParamPosition(); 538 I = FO.S->captures().begin(); 539 for (const FieldDecl *FD : RD->fields()) { 540 // Do not map arguments if we emit function with non-original types. 541 Address LocalAddr(Address::invalid()); 542 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) { 543 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt], 544 TargetArgs[Cnt]); 545 } else { 546 LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); 547 } 548 // If we are capturing a pointer by copy we don't need to do anything, just 549 // use the value that we get from the arguments. 550 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 551 const VarDecl *CurVD = I->getCapturedVar(); 552 if (!FO.RegisterCastedArgsOnly) 553 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); 554 ++Cnt; 555 ++I; 556 continue; 557 } 558 559 LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), 560 AlignmentSource::Decl); 561 if (FD->hasCapturedVLAType()) { 562 if (FO.UIntPtrCastRequired) { 563 ArgLVal = CGF.MakeAddrLValue( 564 castValueFromUintptr(CGF, I->getLocation(), FD->getType(), 565 Args[Cnt]->getName(), ArgLVal), 566 FD->getType(), AlignmentSource::Decl); 567 } 568 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); 569 const VariableArrayType *VAT = FD->getCapturedVLAType(); 570 VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg); 571 } else if (I->capturesVariable()) { 572 const VarDecl *Var = I->getCapturedVar(); 573 QualType VarTy = Var->getType(); 574 Address ArgAddr = ArgLVal.getAddress(CGF); 575 if (ArgLVal.getType()->isLValueReferenceType()) { 576 ArgAddr = CGF.EmitLoadOfReference(ArgLVal); 577 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { 578 assert(ArgLVal.getType()->isPointerType()); 579 ArgAddr = CGF.EmitLoadOfPointer( 580 ArgAddr, ArgLVal.getType()->castAs<PointerType>()); 581 } 582 if (!FO.RegisterCastedArgsOnly) { 583 LocalAddrs.insert( 584 {Args[Cnt], {Var, ArgAddr.withAlignment(Ctx.getDeclAlign(Var))}}); 585 } 586 } else if (I->capturesVariableByCopy()) { 587 assert(!FD->getType()->isAnyPointerType() && 588 "Not expecting a captured pointer."); 589 const VarDecl *Var = I->getCapturedVar(); 590 LocalAddrs.insert({Args[Cnt], 591 {Var, FO.UIntPtrCastRequired 592 ? castValueFromUintptr( 593 CGF, I->getLocation(), FD->getType(), 594 Args[Cnt]->getName(), ArgLVal) 595 : ArgLVal.getAddress(CGF)}}); 596 } else { 597 // If 'this' is captured, load it into CXXThisValue. 598 assert(I->capturesThis()); 599 CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); 600 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}}); 601 } 602 ++Cnt; 603 ++I; 604 } 605 606 return F; 607 } 608 609 llvm::Function * 610 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, 611 SourceLocation Loc) { 612 assert( 613 CapturedStmtInfo && 614 "CapturedStmtInfo should be set when generating the captured function"); 615 const CapturedDecl *CD = S.getCapturedDecl(); 616 // Build the argument list. 617 bool NeedWrapperFunction = 618 getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo(); 619 FunctionArgList Args; 620 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; 621 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; 622 SmallString<256> Buffer; 623 llvm::raw_svector_ostream Out(Buffer); 624 Out << CapturedStmtInfo->getHelperName(); 625 if (NeedWrapperFunction) 626 Out << "_debug__"; 627 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, 628 Out.str(), Loc); 629 llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs, 630 VLASizes, CXXThisValue, FO); 631 CodeGenFunction::OMPPrivateScope LocalScope(*this); 632 for (const auto &LocalAddrPair : LocalAddrs) { 633 if (LocalAddrPair.second.first) { 634 LocalScope.addPrivate(LocalAddrPair.second.first, 635 LocalAddrPair.second.second); 636 } 637 } 638 (void)LocalScope.Privatize(); 639 for (const auto &VLASizePair : VLASizes) 640 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; 641 PGO.assignRegionCounters(GlobalDecl(CD), F); 642 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 643 (void)LocalScope.ForceCleanup(); 644 FinishFunction(CD->getBodyRBrace()); 645 if (!NeedWrapperFunction) 646 return F; 647 648 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, 649 /*RegisterCastedArgsOnly=*/true, 650 CapturedStmtInfo->getHelperName(), Loc); 651 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); 652 WrapperCGF.CapturedStmtInfo = CapturedStmtInfo; 653 Args.clear(); 654 LocalAddrs.clear(); 655 VLASizes.clear(); 656 llvm::Function *WrapperF = 657 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, 658 WrapperCGF.CXXThisValue, WrapperFO); 659 llvm::SmallVector<llvm::Value *, 4> CallArgs; 660 auto *PI = F->arg_begin(); 661 for (const auto *Arg : Args) { 662 llvm::Value *CallArg; 663 auto I = LocalAddrs.find(Arg); 664 if (I != LocalAddrs.end()) { 665 LValue LV = WrapperCGF.MakeAddrLValue( 666 I->second.second, 667 I->second.first ? I->second.first->getType() : Arg->getType(), 668 AlignmentSource::Decl); 669 if (LV.getType()->isAnyComplexType()) 670 LV.setAddress(WrapperCGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 671 LV.getAddress(WrapperCGF), 672 PI->getType()->getPointerTo( 673 LV.getAddress(WrapperCGF).getAddressSpace()), 674 PI->getType())); 675 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); 676 } else { 677 auto EI = VLASizes.find(Arg); 678 if (EI != VLASizes.end()) { 679 CallArg = EI->second.second; 680 } else { 681 LValue LV = 682 WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), 683 Arg->getType(), AlignmentSource::Decl); 684 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); 685 } 686 } 687 CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType())); 688 ++PI; 689 } 690 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs); 691 WrapperCGF.FinishFunction(); 692 return WrapperF; 693 } 694 695 //===----------------------------------------------------------------------===// 696 // OpenMP Directive Emission 697 //===----------------------------------------------------------------------===// 698 void CodeGenFunction::EmitOMPAggregateAssign( 699 Address DestAddr, Address SrcAddr, QualType OriginalType, 700 const llvm::function_ref<void(Address, Address)> CopyGen) { 701 // Perform element-by-element initialization. 702 QualType ElementTy; 703 704 // Drill down to the base element type on both arrays. 705 const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 706 llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 707 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType()); 708 709 llvm::Value *SrcBegin = SrcAddr.getPointer(); 710 llvm::Value *DestBegin = DestAddr.getPointer(); 711 // Cast from pointer to array type to pointer to single element. 712 llvm::Value *DestEnd = Builder.CreateInBoundsGEP(DestAddr.getElementType(), 713 DestBegin, NumElements); 714 715 // The basic structure here is a while-do loop. 716 llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body"); 717 llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done"); 718 llvm::Value *IsEmpty = 719 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 720 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 721 722 // Enter the loop body, making that address the current address. 723 llvm::BasicBlock *EntryBB = Builder.GetInsertBlock(); 724 EmitBlock(BodyBB); 725 726 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 727 728 llvm::PHINode *SrcElementPHI = 729 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 730 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 731 Address SrcElementCurrent = 732 Address(SrcElementPHI, SrcAddr.getElementType(), 733 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 734 735 llvm::PHINode *DestElementPHI = Builder.CreatePHI( 736 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 737 DestElementPHI->addIncoming(DestBegin, EntryBB); 738 Address DestElementCurrent = 739 Address(DestElementPHI, DestAddr.getElementType(), 740 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 741 742 // Emit copy. 743 CopyGen(DestElementCurrent, SrcElementCurrent); 744 745 // Shift the address forward by one element. 746 llvm::Value *DestElementNext = 747 Builder.CreateConstGEP1_32(DestAddr.getElementType(), DestElementPHI, 748 /*Idx0=*/1, "omp.arraycpy.dest.element"); 749 llvm::Value *SrcElementNext = 750 Builder.CreateConstGEP1_32(SrcAddr.getElementType(), SrcElementPHI, 751 /*Idx0=*/1, "omp.arraycpy.src.element"); 752 // Check whether we've reached the end. 753 llvm::Value *Done = 754 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 755 Builder.CreateCondBr(Done, DoneBB, BodyBB); 756 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 757 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 758 759 // Done. 760 EmitBlock(DoneBB, /*IsFinished=*/true); 761 } 762 763 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 764 Address SrcAddr, const VarDecl *DestVD, 765 const VarDecl *SrcVD, const Expr *Copy) { 766 if (OriginalType->isArrayType()) { 767 const auto *BO = dyn_cast<BinaryOperator>(Copy); 768 if (BO && BO->getOpcode() == BO_Assign) { 769 // Perform simple memcpy for simple copying. 770 LValue Dest = MakeAddrLValue(DestAddr, OriginalType); 771 LValue Src = MakeAddrLValue(SrcAddr, OriginalType); 772 EmitAggregateAssign(Dest, Src, OriginalType); 773 } else { 774 // For arrays with complex element types perform element by element 775 // copying. 776 EmitOMPAggregateAssign( 777 DestAddr, SrcAddr, OriginalType, 778 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 779 // Working with the single array element, so have to remap 780 // destination and source variables to corresponding array 781 // elements. 782 CodeGenFunction::OMPPrivateScope Remap(*this); 783 Remap.addPrivate(DestVD, DestElement); 784 Remap.addPrivate(SrcVD, SrcElement); 785 (void)Remap.Privatize(); 786 EmitIgnoredExpr(Copy); 787 }); 788 } 789 } else { 790 // Remap pseudo source variable to private copy. 791 CodeGenFunction::OMPPrivateScope Remap(*this); 792 Remap.addPrivate(SrcVD, SrcAddr); 793 Remap.addPrivate(DestVD, DestAddr); 794 (void)Remap.Privatize(); 795 // Emit copying of the whole variable. 796 EmitIgnoredExpr(Copy); 797 } 798 } 799 800 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 801 OMPPrivateScope &PrivateScope) { 802 if (!HaveInsertPoint()) 803 return false; 804 bool DeviceConstTarget = 805 getLangOpts().OpenMPIsTargetDevice && 806 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 807 bool FirstprivateIsLastprivate = false; 808 llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates; 809 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 810 for (const auto *D : C->varlists()) 811 Lastprivates.try_emplace( 812 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(), 813 C->getKind()); 814 } 815 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 816 llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 817 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 818 // Force emission of the firstprivate copy if the directive does not emit 819 // outlined function, like omp for, omp simd, omp distribute etc. 820 bool MustEmitFirstprivateCopy = 821 CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown; 822 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 823 const auto *IRef = C->varlist_begin(); 824 const auto *InitsRef = C->inits().begin(); 825 for (const Expr *IInit : C->private_copies()) { 826 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 827 bool ThisFirstprivateIsLastprivate = 828 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; 829 const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD); 830 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 831 if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD && 832 !FD->getType()->isReferenceType() && 833 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { 834 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 835 ++IRef; 836 ++InitsRef; 837 continue; 838 } 839 // Do not emit copy for firstprivate constant variables in target regions, 840 // captured by reference. 841 if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) && 842 FD && FD->getType()->isReferenceType() && 843 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { 844 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 845 ++IRef; 846 ++InitsRef; 847 continue; 848 } 849 FirstprivateIsLastprivate = 850 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; 851 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 852 const auto *VDInit = 853 cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 854 bool IsRegistered; 855 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 856 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, 857 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 858 LValue OriginalLVal; 859 if (!FD) { 860 // Check if the firstprivate variable is just a constant value. 861 ConstantEmission CE = tryEmitAsConstant(&DRE); 862 if (CE && !CE.isReference()) { 863 // Constant value, no need to create a copy. 864 ++IRef; 865 ++InitsRef; 866 continue; 867 } 868 if (CE && CE.isReference()) { 869 OriginalLVal = CE.getReferenceLValue(*this, &DRE); 870 } else { 871 assert(!CE && "Expected non-constant firstprivate."); 872 OriginalLVal = EmitLValue(&DRE); 873 } 874 } else { 875 OriginalLVal = EmitLValue(&DRE); 876 } 877 QualType Type = VD->getType(); 878 if (Type->isArrayType()) { 879 // Emit VarDecl with copy init for arrays. 880 // Get the address of the original variable captured in current 881 // captured region. 882 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 883 const Expr *Init = VD->getInit(); 884 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 885 // Perform simple memcpy. 886 LValue Dest = MakeAddrLValue(Emission.getAllocatedAddress(), Type); 887 EmitAggregateAssign(Dest, OriginalLVal, Type); 888 } else { 889 EmitOMPAggregateAssign( 890 Emission.getAllocatedAddress(), OriginalLVal.getAddress(*this), 891 Type, 892 [this, VDInit, Init](Address DestElement, Address SrcElement) { 893 // Clean up any temporaries needed by the 894 // initialization. 895 RunCleanupsScope InitScope(*this); 896 // Emit initialization for single element. 897 setAddrOfLocalVar(VDInit, SrcElement); 898 EmitAnyExprToMem(Init, DestElement, 899 Init->getType().getQualifiers(), 900 /*IsInitializer*/ false); 901 LocalDeclMap.erase(VDInit); 902 }); 903 } 904 EmitAutoVarCleanups(Emission); 905 IsRegistered = 906 PrivateScope.addPrivate(OrigVD, Emission.getAllocatedAddress()); 907 } else { 908 Address OriginalAddr = OriginalLVal.getAddress(*this); 909 // Emit private VarDecl with copy init. 910 // Remap temp VDInit variable to the address of the original 911 // variable (for proper handling of captured global variables). 912 setAddrOfLocalVar(VDInit, OriginalAddr); 913 EmitDecl(*VD); 914 LocalDeclMap.erase(VDInit); 915 Address VDAddr = GetAddrOfLocalVar(VD); 916 if (ThisFirstprivateIsLastprivate && 917 Lastprivates[OrigVD->getCanonicalDecl()] == 918 OMPC_LASTPRIVATE_conditional) { 919 // Create/init special variable for lastprivate conditionals. 920 llvm::Value *V = 921 EmitLoadOfScalar(MakeAddrLValue(VDAddr, (*IRef)->getType(), 922 AlignmentSource::Decl), 923 (*IRef)->getExprLoc()); 924 VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit( 925 *this, OrigVD); 926 EmitStoreOfScalar(V, MakeAddrLValue(VDAddr, (*IRef)->getType(), 927 AlignmentSource::Decl)); 928 LocalDeclMap.erase(VD); 929 setAddrOfLocalVar(VD, VDAddr); 930 } 931 IsRegistered = PrivateScope.addPrivate(OrigVD, VDAddr); 932 } 933 assert(IsRegistered && 934 "firstprivate var already registered as private"); 935 // Silence the warning about unused variable. 936 (void)IsRegistered; 937 } 938 ++IRef; 939 ++InitsRef; 940 } 941 } 942 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 943 } 944 945 void CodeGenFunction::EmitOMPPrivateClause( 946 const OMPExecutableDirective &D, 947 CodeGenFunction::OMPPrivateScope &PrivateScope) { 948 if (!HaveInsertPoint()) 949 return; 950 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 951 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 952 auto IRef = C->varlist_begin(); 953 for (const Expr *IInit : C->private_copies()) { 954 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 955 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 956 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 957 EmitDecl(*VD); 958 // Emit private VarDecl with copy init. 959 bool IsRegistered = 960 PrivateScope.addPrivate(OrigVD, GetAddrOfLocalVar(VD)); 961 assert(IsRegistered && "private var already registered as private"); 962 // Silence the warning about unused variable. 963 (void)IsRegistered; 964 } 965 ++IRef; 966 } 967 } 968 } 969 970 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 971 if (!HaveInsertPoint()) 972 return false; 973 // threadprivate_var1 = master_threadprivate_var1; 974 // operator=(threadprivate_var2, master_threadprivate_var2); 975 // ... 976 // __kmpc_barrier(&loc, global_tid); 977 llvm::DenseSet<const VarDecl *> CopiedVars; 978 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 979 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 980 auto IRef = C->varlist_begin(); 981 auto ISrcRef = C->source_exprs().begin(); 982 auto IDestRef = C->destination_exprs().begin(); 983 for (const Expr *AssignOp : C->assignment_ops()) { 984 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 985 QualType Type = VD->getType(); 986 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 987 // Get the address of the master variable. If we are emitting code with 988 // TLS support, the address is passed from the master as field in the 989 // captured declaration. 990 Address MasterAddr = Address::invalid(); 991 if (getLangOpts().OpenMPUseTLS && 992 getContext().getTargetInfo().isTLSSupported()) { 993 assert(CapturedStmtInfo->lookup(VD) && 994 "Copyin threadprivates should have been captured!"); 995 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true, 996 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 997 MasterAddr = EmitLValue(&DRE).getAddress(*this); 998 LocalDeclMap.erase(VD); 999 } else { 1000 MasterAddr = 1001 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 1002 : CGM.GetAddrOfGlobal(VD), 1003 CGM.getTypes().ConvertTypeForMem(VD->getType()), 1004 getContext().getDeclAlign(VD)); 1005 } 1006 // Get the address of the threadprivate variable. 1007 Address PrivateAddr = EmitLValue(*IRef).getAddress(*this); 1008 if (CopiedVars.size() == 1) { 1009 // At first check if current thread is a master thread. If it is, no 1010 // need to copy data. 1011 CopyBegin = createBasicBlock("copyin.not.master"); 1012 CopyEnd = createBasicBlock("copyin.not.master.end"); 1013 // TODO: Avoid ptrtoint conversion. 1014 auto *MasterAddrInt = 1015 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy); 1016 auto *PrivateAddrInt = 1017 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy); 1018 Builder.CreateCondBr( 1019 Builder.CreateICmpNE(MasterAddrInt, PrivateAddrInt), CopyBegin, 1020 CopyEnd); 1021 EmitBlock(CopyBegin); 1022 } 1023 const auto *SrcVD = 1024 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 1025 const auto *DestVD = 1026 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 1027 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 1028 } 1029 ++IRef; 1030 ++ISrcRef; 1031 ++IDestRef; 1032 } 1033 } 1034 if (CopyEnd) { 1035 // Exit out of copying procedure for non-master thread. 1036 EmitBlock(CopyEnd, /*IsFinished=*/true); 1037 return true; 1038 } 1039 return false; 1040 } 1041 1042 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 1043 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 1044 if (!HaveInsertPoint()) 1045 return false; 1046 bool HasAtLeastOneLastprivate = false; 1047 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1048 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 1049 const auto *LoopDirective = cast<OMPLoopDirective>(&D); 1050 for (const Expr *C : LoopDirective->counters()) { 1051 SIMDLCVs.insert( 1052 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1053 } 1054 } 1055 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 1056 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 1057 HasAtLeastOneLastprivate = true; 1058 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 1059 !getLangOpts().OpenMPSimd) 1060 break; 1061 const auto *IRef = C->varlist_begin(); 1062 const auto *IDestRef = C->destination_exprs().begin(); 1063 for (const Expr *IInit : C->private_copies()) { 1064 // Keep the address of the original variable for future update at the end 1065 // of the loop. 1066 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1067 // Taskloops do not require additional initialization, it is done in 1068 // runtime support library. 1069 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 1070 const auto *DestVD = 1071 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 1072 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 1073 /*RefersToEnclosingVariableOrCapture=*/ 1074 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1075 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 1076 PrivateScope.addPrivate(DestVD, EmitLValue(&DRE).getAddress(*this)); 1077 // Check if the variable is also a firstprivate: in this case IInit is 1078 // not generated. Initialization of this variable will happen in codegen 1079 // for 'firstprivate' clause. 1080 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { 1081 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 1082 Address VDAddr = Address::invalid(); 1083 if (C->getKind() == OMPC_LASTPRIVATE_conditional) { 1084 VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit( 1085 *this, OrigVD); 1086 setAddrOfLocalVar(VD, VDAddr); 1087 } else { 1088 // Emit private VarDecl with copy init. 1089 EmitDecl(*VD); 1090 VDAddr = GetAddrOfLocalVar(VD); 1091 } 1092 bool IsRegistered = PrivateScope.addPrivate(OrigVD, VDAddr); 1093 assert(IsRegistered && 1094 "lastprivate var already registered as private"); 1095 (void)IsRegistered; 1096 } 1097 } 1098 ++IRef; 1099 ++IDestRef; 1100 } 1101 } 1102 return HasAtLeastOneLastprivate; 1103 } 1104 1105 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 1106 const OMPExecutableDirective &D, bool NoFinals, 1107 llvm::Value *IsLastIterCond) { 1108 if (!HaveInsertPoint()) 1109 return; 1110 // Emit following code: 1111 // if (<IsLastIterCond>) { 1112 // orig_var1 = private_orig_var1; 1113 // ... 1114 // orig_varn = private_orig_varn; 1115 // } 1116 llvm::BasicBlock *ThenBB = nullptr; 1117 llvm::BasicBlock *DoneBB = nullptr; 1118 if (IsLastIterCond) { 1119 // Emit implicit barrier if at least one lastprivate conditional is found 1120 // and this is not a simd mode. 1121 if (!getLangOpts().OpenMPSimd && 1122 llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(), 1123 [](const OMPLastprivateClause *C) { 1124 return C->getKind() == OMPC_LASTPRIVATE_conditional; 1125 })) { 1126 CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(), 1127 OMPD_unknown, 1128 /*EmitChecks=*/false, 1129 /*ForceSimpleCall=*/true); 1130 } 1131 ThenBB = createBasicBlock(".omp.lastprivate.then"); 1132 DoneBB = createBasicBlock(".omp.lastprivate.done"); 1133 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 1134 EmitBlock(ThenBB); 1135 } 1136 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 1137 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; 1138 if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 1139 auto IC = LoopDirective->counters().begin(); 1140 for (const Expr *F : LoopDirective->finals()) { 1141 const auto *D = 1142 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); 1143 if (NoFinals) 1144 AlreadyEmittedVars.insert(D); 1145 else 1146 LoopCountersAndUpdates[D] = F; 1147 ++IC; 1148 } 1149 } 1150 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 1151 auto IRef = C->varlist_begin(); 1152 auto ISrcRef = C->source_exprs().begin(); 1153 auto IDestRef = C->destination_exprs().begin(); 1154 for (const Expr *AssignOp : C->assignment_ops()) { 1155 const auto *PrivateVD = 1156 cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1157 QualType Type = PrivateVD->getType(); 1158 const auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 1159 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 1160 // If lastprivate variable is a loop control variable for loop-based 1161 // directive, update its value before copyin back to original 1162 // variable. 1163 if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 1164 EmitIgnoredExpr(FinalExpr); 1165 const auto *SrcVD = 1166 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 1167 const auto *DestVD = 1168 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 1169 // Get the address of the private variable. 1170 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 1171 if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 1172 PrivateAddr = Address( 1173 Builder.CreateLoad(PrivateAddr), 1174 CGM.getTypes().ConvertTypeForMem(RefTy->getPointeeType()), 1175 CGM.getNaturalTypeAlignment(RefTy->getPointeeType())); 1176 // Store the last value to the private copy in the last iteration. 1177 if (C->getKind() == OMPC_LASTPRIVATE_conditional) 1178 CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate( 1179 *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD, 1180 (*IRef)->getExprLoc()); 1181 // Get the address of the original variable. 1182 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 1183 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 1184 } 1185 ++IRef; 1186 ++ISrcRef; 1187 ++IDestRef; 1188 } 1189 if (const Expr *PostUpdate = C->getPostUpdateExpr()) 1190 EmitIgnoredExpr(PostUpdate); 1191 } 1192 if (IsLastIterCond) 1193 EmitBlock(DoneBB, /*IsFinished=*/true); 1194 } 1195 1196 void CodeGenFunction::EmitOMPReductionClauseInit( 1197 const OMPExecutableDirective &D, 1198 CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) { 1199 if (!HaveInsertPoint()) 1200 return; 1201 SmallVector<const Expr *, 4> Shareds; 1202 SmallVector<const Expr *, 4> Privates; 1203 SmallVector<const Expr *, 4> ReductionOps; 1204 SmallVector<const Expr *, 4> LHSs; 1205 SmallVector<const Expr *, 4> RHSs; 1206 OMPTaskDataTy Data; 1207 SmallVector<const Expr *, 4> TaskLHSs; 1208 SmallVector<const Expr *, 4> TaskRHSs; 1209 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1210 if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan)) 1211 continue; 1212 Shareds.append(C->varlist_begin(), C->varlist_end()); 1213 Privates.append(C->privates().begin(), C->privates().end()); 1214 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1215 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1216 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1217 if (C->getModifier() == OMPC_REDUCTION_task) { 1218 Data.ReductionVars.append(C->privates().begin(), C->privates().end()); 1219 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); 1220 Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); 1221 Data.ReductionOps.append(C->reduction_ops().begin(), 1222 C->reduction_ops().end()); 1223 TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1224 TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1225 } 1226 } 1227 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); 1228 unsigned Count = 0; 1229 auto *ILHS = LHSs.begin(); 1230 auto *IRHS = RHSs.begin(); 1231 auto *IPriv = Privates.begin(); 1232 for (const Expr *IRef : Shareds) { 1233 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 1234 // Emit private VarDecl with reduction init. 1235 RedCG.emitSharedOrigLValue(*this, Count); 1236 RedCG.emitAggregateType(*this, Count); 1237 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); 1238 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), 1239 RedCG.getSharedLValue(Count).getAddress(*this), 1240 [&Emission](CodeGenFunction &CGF) { 1241 CGF.EmitAutoVarInit(Emission); 1242 return true; 1243 }); 1244 EmitAutoVarCleanups(Emission); 1245 Address BaseAddr = RedCG.adjustPrivateAddress( 1246 *this, Count, Emission.getAllocatedAddress()); 1247 bool IsRegistered = 1248 PrivateScope.addPrivate(RedCG.getBaseDecl(Count), BaseAddr); 1249 assert(IsRegistered && "private var already registered as private"); 1250 // Silence the warning about unused variable. 1251 (void)IsRegistered; 1252 1253 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 1254 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 1255 QualType Type = PrivateVD->getType(); 1256 bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef); 1257 if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) { 1258 // Store the address of the original variable associated with the LHS 1259 // implicit variable. 1260 PrivateScope.addPrivate(LHSVD, 1261 RedCG.getSharedLValue(Count).getAddress(*this)); 1262 PrivateScope.addPrivate(RHSVD, GetAddrOfLocalVar(PrivateVD)); 1263 } else if ((isaOMPArraySectionExpr && Type->isScalarType()) || 1264 isa<ArraySubscriptExpr>(IRef)) { 1265 // Store the address of the original variable associated with the LHS 1266 // implicit variable. 1267 PrivateScope.addPrivate(LHSVD, 1268 RedCG.getSharedLValue(Count).getAddress(*this)); 1269 PrivateScope.addPrivate(RHSVD, 1270 GetAddrOfLocalVar(PrivateVD).withElementType( 1271 ConvertTypeForMem(RHSVD->getType()))); 1272 } else { 1273 QualType Type = PrivateVD->getType(); 1274 bool IsArray = getContext().getAsArrayType(Type) != nullptr; 1275 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(*this); 1276 // Store the address of the original variable associated with the LHS 1277 // implicit variable. 1278 if (IsArray) { 1279 OriginalAddr = 1280 OriginalAddr.withElementType(ConvertTypeForMem(LHSVD->getType())); 1281 } 1282 PrivateScope.addPrivate(LHSVD, OriginalAddr); 1283 PrivateScope.addPrivate( 1284 RHSVD, IsArray ? GetAddrOfLocalVar(PrivateVD).withElementType( 1285 ConvertTypeForMem(RHSVD->getType())) 1286 : GetAddrOfLocalVar(PrivateVD)); 1287 } 1288 ++ILHS; 1289 ++IRHS; 1290 ++IPriv; 1291 ++Count; 1292 } 1293 if (!Data.ReductionVars.empty()) { 1294 Data.IsReductionWithTaskMod = true; 1295 Data.IsWorksharingReduction = 1296 isOpenMPWorksharingDirective(D.getDirectiveKind()); 1297 llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit( 1298 *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data); 1299 const Expr *TaskRedRef = nullptr; 1300 switch (D.getDirectiveKind()) { 1301 case OMPD_parallel: 1302 TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr(); 1303 break; 1304 case OMPD_for: 1305 TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr(); 1306 break; 1307 case OMPD_sections: 1308 TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr(); 1309 break; 1310 case OMPD_parallel_for: 1311 TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr(); 1312 break; 1313 case OMPD_parallel_master: 1314 TaskRedRef = 1315 cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr(); 1316 break; 1317 case OMPD_parallel_sections: 1318 TaskRedRef = 1319 cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr(); 1320 break; 1321 case OMPD_target_parallel: 1322 TaskRedRef = 1323 cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr(); 1324 break; 1325 case OMPD_target_parallel_for: 1326 TaskRedRef = 1327 cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr(); 1328 break; 1329 case OMPD_distribute_parallel_for: 1330 TaskRedRef = 1331 cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr(); 1332 break; 1333 case OMPD_teams_distribute_parallel_for: 1334 TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D) 1335 .getTaskReductionRefExpr(); 1336 break; 1337 case OMPD_target_teams_distribute_parallel_for: 1338 TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D) 1339 .getTaskReductionRefExpr(); 1340 break; 1341 case OMPD_simd: 1342 case OMPD_for_simd: 1343 case OMPD_section: 1344 case OMPD_single: 1345 case OMPD_master: 1346 case OMPD_critical: 1347 case OMPD_parallel_for_simd: 1348 case OMPD_task: 1349 case OMPD_taskyield: 1350 case OMPD_error: 1351 case OMPD_barrier: 1352 case OMPD_taskwait: 1353 case OMPD_taskgroup: 1354 case OMPD_flush: 1355 case OMPD_depobj: 1356 case OMPD_scan: 1357 case OMPD_ordered: 1358 case OMPD_atomic: 1359 case OMPD_teams: 1360 case OMPD_target: 1361 case OMPD_cancellation_point: 1362 case OMPD_cancel: 1363 case OMPD_target_data: 1364 case OMPD_target_enter_data: 1365 case OMPD_target_exit_data: 1366 case OMPD_taskloop: 1367 case OMPD_taskloop_simd: 1368 case OMPD_master_taskloop: 1369 case OMPD_master_taskloop_simd: 1370 case OMPD_parallel_master_taskloop: 1371 case OMPD_parallel_master_taskloop_simd: 1372 case OMPD_distribute: 1373 case OMPD_target_update: 1374 case OMPD_distribute_parallel_for_simd: 1375 case OMPD_distribute_simd: 1376 case OMPD_target_parallel_for_simd: 1377 case OMPD_target_simd: 1378 case OMPD_teams_distribute: 1379 case OMPD_teams_distribute_simd: 1380 case OMPD_teams_distribute_parallel_for_simd: 1381 case OMPD_target_teams: 1382 case OMPD_target_teams_distribute: 1383 case OMPD_target_teams_distribute_parallel_for_simd: 1384 case OMPD_target_teams_distribute_simd: 1385 case OMPD_declare_target: 1386 case OMPD_end_declare_target: 1387 case OMPD_threadprivate: 1388 case OMPD_allocate: 1389 case OMPD_declare_reduction: 1390 case OMPD_declare_mapper: 1391 case OMPD_declare_simd: 1392 case OMPD_requires: 1393 case OMPD_declare_variant: 1394 case OMPD_begin_declare_variant: 1395 case OMPD_end_declare_variant: 1396 case OMPD_unknown: 1397 default: 1398 llvm_unreachable("Enexpected directive with task reductions."); 1399 } 1400 1401 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl()); 1402 EmitVarDecl(*VD); 1403 EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD), 1404 /*Volatile=*/false, TaskRedRef->getType()); 1405 } 1406 } 1407 1408 void CodeGenFunction::EmitOMPReductionClauseFinal( 1409 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { 1410 if (!HaveInsertPoint()) 1411 return; 1412 llvm::SmallVector<const Expr *, 8> Privates; 1413 llvm::SmallVector<const Expr *, 8> LHSExprs; 1414 llvm::SmallVector<const Expr *, 8> RHSExprs; 1415 llvm::SmallVector<const Expr *, 8> ReductionOps; 1416 bool HasAtLeastOneReduction = false; 1417 bool IsReductionWithTaskMod = false; 1418 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1419 // Do not emit for inscan reductions. 1420 if (C->getModifier() == OMPC_REDUCTION_inscan) 1421 continue; 1422 HasAtLeastOneReduction = true; 1423 Privates.append(C->privates().begin(), C->privates().end()); 1424 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1425 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1426 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1427 IsReductionWithTaskMod = 1428 IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task; 1429 } 1430 if (HasAtLeastOneReduction) { 1431 if (IsReductionWithTaskMod) { 1432 CGM.getOpenMPRuntime().emitTaskReductionFini( 1433 *this, D.getBeginLoc(), 1434 isOpenMPWorksharingDirective(D.getDirectiveKind())); 1435 } 1436 bool WithNowait = D.getSingleClause<OMPNowaitClause>() || 1437 isOpenMPParallelDirective(D.getDirectiveKind()) || 1438 ReductionKind == OMPD_simd; 1439 bool SimpleReduction = ReductionKind == OMPD_simd; 1440 // Emit nowait reduction if nowait clause is present or directive is a 1441 // parallel directive (it always has implicit barrier). 1442 CGM.getOpenMPRuntime().emitReduction( 1443 *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps, 1444 {WithNowait, SimpleReduction, ReductionKind}); 1445 } 1446 } 1447 1448 static void emitPostUpdateForReductionClause( 1449 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1450 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 1451 if (!CGF.HaveInsertPoint()) 1452 return; 1453 llvm::BasicBlock *DoneBB = nullptr; 1454 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1455 if (const Expr *PostUpdate = C->getPostUpdateExpr()) { 1456 if (!DoneBB) { 1457 if (llvm::Value *Cond = CondGen(CGF)) { 1458 // If the first post-update expression is found, emit conditional 1459 // block if it was requested. 1460 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1461 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1462 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1463 CGF.EmitBlock(ThenBB); 1464 } 1465 } 1466 CGF.EmitIgnoredExpr(PostUpdate); 1467 } 1468 } 1469 if (DoneBB) 1470 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1471 } 1472 1473 namespace { 1474 /// Codegen lambda for appending distribute lower and upper bounds to outlined 1475 /// parallel function. This is necessary for combined constructs such as 1476 /// 'distribute parallel for' 1477 typedef llvm::function_ref<void(CodeGenFunction &, 1478 const OMPExecutableDirective &, 1479 llvm::SmallVectorImpl<llvm::Value *> &)> 1480 CodeGenBoundParametersTy; 1481 } // anonymous namespace 1482 1483 static void 1484 checkForLastprivateConditionalUpdate(CodeGenFunction &CGF, 1485 const OMPExecutableDirective &S) { 1486 if (CGF.getLangOpts().OpenMP < 50) 1487 return; 1488 llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls; 1489 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 1490 for (const Expr *Ref : C->varlists()) { 1491 if (!Ref->getType()->isScalarType()) 1492 continue; 1493 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1494 if (!DRE) 1495 continue; 1496 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1497 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); 1498 } 1499 } 1500 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 1501 for (const Expr *Ref : C->varlists()) { 1502 if (!Ref->getType()->isScalarType()) 1503 continue; 1504 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1505 if (!DRE) 1506 continue; 1507 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1508 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); 1509 } 1510 } 1511 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 1512 for (const Expr *Ref : C->varlists()) { 1513 if (!Ref->getType()->isScalarType()) 1514 continue; 1515 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1516 if (!DRE) 1517 continue; 1518 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1519 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); 1520 } 1521 } 1522 // Privates should ne analyzed since they are not captured at all. 1523 // Task reductions may be skipped - tasks are ignored. 1524 // Firstprivates do not return value but may be passed by reference - no need 1525 // to check for updated lastprivate conditional. 1526 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 1527 for (const Expr *Ref : C->varlists()) { 1528 if (!Ref->getType()->isScalarType()) 1529 continue; 1530 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1531 if (!DRE) 1532 continue; 1533 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1534 } 1535 } 1536 CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional( 1537 CGF, S, PrivateDecls); 1538 } 1539 1540 static void emitCommonOMPParallelDirective( 1541 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1542 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1543 const CodeGenBoundParametersTy &CodeGenBoundParameters) { 1544 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1545 llvm::Value *NumThreads = nullptr; 1546 llvm::Function *OutlinedFn = 1547 CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 1548 CGF, S, *CS->getCapturedDecl()->param_begin(), InnermostKind, 1549 CodeGen); 1550 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1551 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1552 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1553 /*IgnoreResultAssign=*/true); 1554 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1555 CGF, NumThreads, NumThreadsClause->getBeginLoc()); 1556 } 1557 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1558 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1559 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1560 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc()); 1561 } 1562 const Expr *IfCond = nullptr; 1563 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1564 if (C->getNameModifier() == OMPD_unknown || 1565 C->getNameModifier() == OMPD_parallel) { 1566 IfCond = C->getCondition(); 1567 break; 1568 } 1569 } 1570 1571 OMPParallelScope Scope(CGF, S); 1572 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1573 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk 1574 // lower and upper bounds with the pragma 'for' chunking mechanism. 1575 // The following lambda takes care of appending the lower and upper bound 1576 // parameters when necessary 1577 CodeGenBoundParameters(CGF, S, CapturedVars); 1578 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1579 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn, 1580 CapturedVars, IfCond, NumThreads); 1581 } 1582 1583 static bool isAllocatableDecl(const VarDecl *VD) { 1584 const VarDecl *CVD = VD->getCanonicalDecl(); 1585 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 1586 return false; 1587 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 1588 // Use the default allocation. 1589 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 1590 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 1591 !AA->getAllocator()); 1592 } 1593 1594 static void emitEmptyBoundParameters(CodeGenFunction &, 1595 const OMPExecutableDirective &, 1596 llvm::SmallVectorImpl<llvm::Value *> &) {} 1597 1598 static void emitOMPCopyinClause(CodeGenFunction &CGF, 1599 const OMPExecutableDirective &S) { 1600 bool Copyins = CGF.EmitOMPCopyinClause(S); 1601 if (Copyins) { 1602 // Emit implicit barrier to synchronize threads and avoid data races on 1603 // propagation master's thread values of threadprivate variables to local 1604 // instances of that variables of all other implicit threads. 1605 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1606 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 1607 /*ForceSimpleCall=*/true); 1608 } 1609 } 1610 1611 Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable( 1612 CodeGenFunction &CGF, const VarDecl *VD) { 1613 CodeGenModule &CGM = CGF.CGM; 1614 auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1615 1616 if (!VD) 1617 return Address::invalid(); 1618 const VarDecl *CVD = VD->getCanonicalDecl(); 1619 if (!isAllocatableDecl(CVD)) 1620 return Address::invalid(); 1621 llvm::Value *Size; 1622 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 1623 if (CVD->getType()->isVariablyModifiedType()) { 1624 Size = CGF.getTypeSize(CVD->getType()); 1625 // Align the size: ((size + align - 1) / align) * align 1626 Size = CGF.Builder.CreateNUWAdd( 1627 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 1628 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 1629 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 1630 } else { 1631 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 1632 Size = CGM.getSize(Sz.alignTo(Align)); 1633 } 1634 1635 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 1636 assert(AA->getAllocator() && 1637 "Expected allocator expression for non-default allocator."); 1638 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 1639 // According to the standard, the original allocator type is a enum (integer). 1640 // Convert to pointer type, if required. 1641 if (Allocator->getType()->isIntegerTy()) 1642 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 1643 else if (Allocator->getType()->isPointerTy()) 1644 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 1645 CGM.VoidPtrTy); 1646 1647 llvm::Value *Addr = OMPBuilder.createOMPAlloc( 1648 CGF.Builder, Size, Allocator, 1649 getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", ".")); 1650 llvm::CallInst *FreeCI = 1651 OMPBuilder.createOMPFree(CGF.Builder, Addr, Allocator); 1652 1653 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI); 1654 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1655 Addr, 1656 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 1657 getNameWithSeparators({CVD->getName(), ".addr"}, ".", ".")); 1658 return Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align); 1659 } 1660 1661 Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate( 1662 CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, 1663 SourceLocation Loc) { 1664 CodeGenModule &CGM = CGF.CGM; 1665 if (CGM.getLangOpts().OpenMPUseTLS && 1666 CGM.getContext().getTargetInfo().isTLSSupported()) 1667 return VDAddr; 1668 1669 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1670 1671 llvm::Type *VarTy = VDAddr.getElementType(); 1672 llvm::Value *Data = 1673 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy); 1674 llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)); 1675 std::string Suffix = getNameWithSeparators({"cache", ""}); 1676 llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix); 1677 1678 llvm::CallInst *ThreadPrivateCacheCall = 1679 OMPBuilder.createCachedThreadPrivate(CGF.Builder, Data, Size, CacheName); 1680 1681 return Address(ThreadPrivateCacheCall, CGM.Int8Ty, VDAddr.getAlignment()); 1682 } 1683 1684 std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators( 1685 ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) { 1686 SmallString<128> Buffer; 1687 llvm::raw_svector_ostream OS(Buffer); 1688 StringRef Sep = FirstSeparator; 1689 for (StringRef Part : Parts) { 1690 OS << Sep << Part; 1691 Sep = Separator; 1692 } 1693 return OS.str().str(); 1694 } 1695 1696 void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( 1697 CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP, 1698 InsertPointTy CodeGenIP, Twine RegionName) { 1699 CGBuilderTy &Builder = CGF.Builder; 1700 Builder.restoreIP(CodeGenIP); 1701 llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false, 1702 "." + RegionName + ".after"); 1703 1704 { 1705 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB); 1706 CGF.EmitStmt(RegionBodyStmt); 1707 } 1708 1709 if (Builder.saveIP().isSet()) 1710 Builder.CreateBr(FiniBB); 1711 } 1712 1713 void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody( 1714 CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP, 1715 InsertPointTy CodeGenIP, Twine RegionName) { 1716 CGBuilderTy &Builder = CGF.Builder; 1717 Builder.restoreIP(CodeGenIP); 1718 llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false, 1719 "." + RegionName + ".after"); 1720 1721 { 1722 OMPBuilderCBHelpers::OutlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB); 1723 CGF.EmitStmt(RegionBodyStmt); 1724 } 1725 1726 if (Builder.saveIP().isSet()) 1727 Builder.CreateBr(FiniBB); 1728 } 1729 1730 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1731 if (CGM.getLangOpts().OpenMPIRBuilder) { 1732 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1733 // Check if we have any if clause associated with the directive. 1734 llvm::Value *IfCond = nullptr; 1735 if (const auto *C = S.getSingleClause<OMPIfClause>()) 1736 IfCond = EmitScalarExpr(C->getCondition(), 1737 /*IgnoreResultAssign=*/true); 1738 1739 llvm::Value *NumThreads = nullptr; 1740 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) 1741 NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(), 1742 /*IgnoreResultAssign=*/true); 1743 1744 ProcBindKind ProcBind = OMP_PROC_BIND_default; 1745 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) 1746 ProcBind = ProcBindClause->getProcBindKind(); 1747 1748 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 1749 1750 // The cleanup callback that finalizes all variabels at the given location, 1751 // thus calls destructors etc. 1752 auto FiniCB = [this](InsertPointTy IP) { 1753 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 1754 }; 1755 1756 // Privatization callback that performs appropriate action for 1757 // shared/private/firstprivate/lastprivate/copyin/... variables. 1758 // 1759 // TODO: This defaults to shared right now. 1760 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 1761 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { 1762 // The next line is appropriate only for variables (Val) with the 1763 // data-sharing attribute "shared". 1764 ReplVal = &Val; 1765 1766 return CodeGenIP; 1767 }; 1768 1769 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1770 const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt(); 1771 1772 auto BodyGenCB = [&, this](InsertPointTy AllocaIP, 1773 InsertPointTy CodeGenIP) { 1774 OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody( 1775 *this, ParallelRegionBodyStmt, AllocaIP, CodeGenIP, "parallel"); 1776 }; 1777 1778 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); 1779 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); 1780 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( 1781 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); 1782 Builder.restoreIP( 1783 OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB, 1784 IfCond, NumThreads, ProcBind, S.hasCancel())); 1785 return; 1786 } 1787 1788 // Emit parallel region as a standalone region. 1789 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 1790 Action.Enter(CGF); 1791 OMPPrivateScope PrivateScope(CGF); 1792 emitOMPCopyinClause(CGF, S); 1793 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1794 CGF.EmitOMPPrivateClause(S, PrivateScope); 1795 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1796 (void)PrivateScope.Privatize(); 1797 CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt()); 1798 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 1799 }; 1800 { 1801 auto LPCRegion = 1802 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 1803 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, 1804 emitEmptyBoundParameters); 1805 emitPostUpdateForReductionClause(*this, S, 1806 [](CodeGenFunction &) { return nullptr; }); 1807 } 1808 // Check for outer lastprivate conditional update. 1809 checkForLastprivateConditionalUpdate(*this, S); 1810 } 1811 1812 void CodeGenFunction::EmitOMPMetaDirective(const OMPMetaDirective &S) { 1813 EmitStmt(S.getIfStmt()); 1814 } 1815 1816 namespace { 1817 /// RAII to handle scopes for loop transformation directives. 1818 class OMPTransformDirectiveScopeRAII { 1819 OMPLoopScope *Scope = nullptr; 1820 CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr; 1821 CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr; 1822 1823 OMPTransformDirectiveScopeRAII(const OMPTransformDirectiveScopeRAII &) = 1824 delete; 1825 OMPTransformDirectiveScopeRAII & 1826 operator=(const OMPTransformDirectiveScopeRAII &) = delete; 1827 1828 public: 1829 OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) { 1830 if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(S)) { 1831 Scope = new OMPLoopScope(CGF, *Dir); 1832 CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP); 1833 CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI); 1834 } 1835 } 1836 ~OMPTransformDirectiveScopeRAII() { 1837 if (!Scope) 1838 return; 1839 delete CapInfoRAII; 1840 delete CGSI; 1841 delete Scope; 1842 } 1843 }; 1844 } // namespace 1845 1846 static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, 1847 int MaxLevel, int Level = 0) { 1848 assert(Level < MaxLevel && "Too deep lookup during loop body codegen."); 1849 const Stmt *SimplifiedS = S->IgnoreContainers(); 1850 if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) { 1851 PrettyStackTraceLoc CrashInfo( 1852 CGF.getContext().getSourceManager(), CS->getLBracLoc(), 1853 "LLVM IR generation of compound statement ('{}')"); 1854 1855 // Keep track of the current cleanup stack depth, including debug scopes. 1856 CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange()); 1857 for (const Stmt *CurStmt : CS->body()) 1858 emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level); 1859 return; 1860 } 1861 if (SimplifiedS == NextLoop) { 1862 if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(SimplifiedS)) 1863 SimplifiedS = Dir->getTransformedStmt(); 1864 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS)) 1865 SimplifiedS = CanonLoop->getLoopStmt(); 1866 if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) { 1867 S = For->getBody(); 1868 } else { 1869 assert(isa<CXXForRangeStmt>(SimplifiedS) && 1870 "Expected canonical for loop or range-based for loop."); 1871 const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS); 1872 CGF.EmitStmt(CXXFor->getLoopVarStmt()); 1873 S = CXXFor->getBody(); 1874 } 1875 if (Level + 1 < MaxLevel) { 1876 NextLoop = OMPLoopDirective::tryToFindNextInnerLoop( 1877 S, /*TryImperfectlyNestedLoops=*/true); 1878 emitBody(CGF, S, NextLoop, MaxLevel, Level + 1); 1879 return; 1880 } 1881 } 1882 CGF.EmitStmt(S); 1883 } 1884 1885 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1886 JumpDest LoopExit) { 1887 RunCleanupsScope BodyScope(*this); 1888 // Update counters values on current iteration. 1889 for (const Expr *UE : D.updates()) 1890 EmitIgnoredExpr(UE); 1891 // Update the linear variables. 1892 // In distribute directives only loop counters may be marked as linear, no 1893 // need to generate the code for them. 1894 if (!isOpenMPDistributeDirective(D.getDirectiveKind())) { 1895 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1896 for (const Expr *UE : C->updates()) 1897 EmitIgnoredExpr(UE); 1898 } 1899 } 1900 1901 // On a continue in the body, jump to the end. 1902 JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue"); 1903 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1904 for (const Expr *E : D.finals_conditions()) { 1905 if (!E) 1906 continue; 1907 // Check that loop counter in non-rectangular nest fits into the iteration 1908 // space. 1909 llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next"); 1910 EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(), 1911 getProfileCount(D.getBody())); 1912 EmitBlock(NextBB); 1913 } 1914 1915 OMPPrivateScope InscanScope(*this); 1916 EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true); 1917 bool IsInscanRegion = InscanScope.Privatize(); 1918 if (IsInscanRegion) { 1919 // Need to remember the block before and after scan directive 1920 // to dispatch them correctly depending on the clause used in 1921 // this directive, inclusive or exclusive. For inclusive scan the natural 1922 // order of the blocks is used, for exclusive clause the blocks must be 1923 // executed in reverse order. 1924 OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb"); 1925 OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb"); 1926 // No need to allocate inscan exit block, in simd mode it is selected in the 1927 // codegen for the scan directive. 1928 if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd) 1929 OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb"); 1930 OMPScanDispatch = createBasicBlock("omp.inscan.dispatch"); 1931 EmitBranch(OMPScanDispatch); 1932 EmitBlock(OMPBeforeScanBlock); 1933 } 1934 1935 // Emit loop variables for C++ range loops. 1936 const Stmt *Body = 1937 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); 1938 // Emit loop body. 1939 emitBody(*this, Body, 1940 OMPLoopBasedDirective::tryToFindNextInnerLoop( 1941 Body, /*TryImperfectlyNestedLoops=*/true), 1942 D.getLoopsNumber()); 1943 1944 // Jump to the dispatcher at the end of the loop body. 1945 if (IsInscanRegion) 1946 EmitBranch(OMPScanExitBlock); 1947 1948 // The end (updates/cleanups). 1949 EmitBlock(Continue.getBlock()); 1950 BreakContinueStack.pop_back(); 1951 } 1952 1953 using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>; 1954 1955 /// Emit a captured statement and return the function as well as its captured 1956 /// closure context. 1957 static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF, 1958 const CapturedStmt *S) { 1959 LValue CapStruct = ParentCGF.InitCapturedStruct(*S); 1960 CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true); 1961 std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI = 1962 std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(*S); 1963 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get()); 1964 llvm::Function *F = CGF.GenerateCapturedStmtFunction(*S); 1965 1966 return {F, CapStruct.getPointer(ParentCGF)}; 1967 } 1968 1969 /// Emit a call to a previously captured closure. 1970 static llvm::CallInst * 1971 emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap, 1972 llvm::ArrayRef<llvm::Value *> Args) { 1973 // Append the closure context to the argument. 1974 SmallVector<llvm::Value *> EffectiveArgs; 1975 EffectiveArgs.reserve(Args.size() + 1); 1976 llvm::append_range(EffectiveArgs, Args); 1977 EffectiveArgs.push_back(Cap.second); 1978 1979 return ParentCGF.Builder.CreateCall(Cap.first, EffectiveArgs); 1980 } 1981 1982 llvm::CanonicalLoopInfo * 1983 CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) { 1984 assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented"); 1985 1986 // The caller is processing the loop-associated directive processing the \p 1987 // Depth loops nested in \p S. Put the previous pending loop-associated 1988 // directive to the stack. If the current loop-associated directive is a loop 1989 // transformation directive, it will push its generated loops onto the stack 1990 // such that together with the loops left here they form the combined loop 1991 // nest for the parent loop-associated directive. 1992 int ParentExpectedOMPLoopDepth = ExpectedOMPLoopDepth; 1993 ExpectedOMPLoopDepth = Depth; 1994 1995 EmitStmt(S); 1996 assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops"); 1997 1998 // The last added loop is the outermost one. 1999 llvm::CanonicalLoopInfo *Result = OMPLoopNestStack.back(); 2000 2001 // Pop the \p Depth loops requested by the call from that stack and restore 2002 // the previous context. 2003 OMPLoopNestStack.pop_back_n(Depth); 2004 ExpectedOMPLoopDepth = ParentExpectedOMPLoopDepth; 2005 2006 return Result; 2007 } 2008 2009 void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) { 2010 const Stmt *SyntacticalLoop = S->getLoopStmt(); 2011 if (!getLangOpts().OpenMPIRBuilder) { 2012 // Ignore if OpenMPIRBuilder is not enabled. 2013 EmitStmt(SyntacticalLoop); 2014 return; 2015 } 2016 2017 LexicalScope ForScope(*this, S->getSourceRange()); 2018 2019 // Emit init statements. The Distance/LoopVar funcs may reference variable 2020 // declarations they contain. 2021 const Stmt *BodyStmt; 2022 if (const auto *For = dyn_cast<ForStmt>(SyntacticalLoop)) { 2023 if (const Stmt *InitStmt = For->getInit()) 2024 EmitStmt(InitStmt); 2025 BodyStmt = For->getBody(); 2026 } else if (const auto *RangeFor = 2027 dyn_cast<CXXForRangeStmt>(SyntacticalLoop)) { 2028 if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt()) 2029 EmitStmt(RangeStmt); 2030 if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt()) 2031 EmitStmt(BeginStmt); 2032 if (const DeclStmt *EndStmt = RangeFor->getEndStmt()) 2033 EmitStmt(EndStmt); 2034 if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt()) 2035 EmitStmt(LoopVarStmt); 2036 BodyStmt = RangeFor->getBody(); 2037 } else 2038 llvm_unreachable("Expected for-stmt or range-based for-stmt"); 2039 2040 // Emit closure for later use. By-value captures will be captured here. 2041 const CapturedStmt *DistanceFunc = S->getDistanceFunc(); 2042 EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(*this, DistanceFunc); 2043 const CapturedStmt *LoopVarFunc = S->getLoopVarFunc(); 2044 EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(*this, LoopVarFunc); 2045 2046 // Call the distance function to get the number of iterations of the loop to 2047 // come. 2048 QualType LogicalTy = DistanceFunc->getCapturedDecl() 2049 ->getParam(0) 2050 ->getType() 2051 .getNonReferenceType(); 2052 Address CountAddr = CreateMemTemp(LogicalTy, ".count.addr"); 2053 emitCapturedStmtCall(*this, DistanceClosure, {CountAddr.getPointer()}); 2054 llvm::Value *DistVal = Builder.CreateLoad(CountAddr, ".count"); 2055 2056 // Emit the loop structure. 2057 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 2058 auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, 2059 llvm::Value *IndVar) { 2060 Builder.restoreIP(CodeGenIP); 2061 2062 // Emit the loop body: Convert the logical iteration number to the loop 2063 // variable and emit the body. 2064 const DeclRefExpr *LoopVarRef = S->getLoopVarRef(); 2065 LValue LCVal = EmitLValue(LoopVarRef); 2066 Address LoopVarAddress = LCVal.getAddress(*this); 2067 emitCapturedStmtCall(*this, LoopVarClosure, 2068 {LoopVarAddress.getPointer(), IndVar}); 2069 2070 RunCleanupsScope BodyScope(*this); 2071 EmitStmt(BodyStmt); 2072 }; 2073 llvm::CanonicalLoopInfo *CL = 2074 OMPBuilder.createCanonicalLoop(Builder, BodyGen, DistVal); 2075 2076 // Finish up the loop. 2077 Builder.restoreIP(CL->getAfterIP()); 2078 ForScope.ForceCleanup(); 2079 2080 // Remember the CanonicalLoopInfo for parent AST nodes consuming it. 2081 OMPLoopNestStack.push_back(CL); 2082 } 2083 2084 void CodeGenFunction::EmitOMPInnerLoop( 2085 const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond, 2086 const Expr *IncExpr, 2087 const llvm::function_ref<void(CodeGenFunction &)> BodyGen, 2088 const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) { 2089 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 2090 2091 // Start the loop with a block that tests the condition. 2092 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 2093 EmitBlock(CondBlock); 2094 const SourceRange R = S.getSourceRange(); 2095 2096 // If attributes are attached, push to the basic block with them. 2097 const auto &OMPED = cast<OMPExecutableDirective>(S); 2098 const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt(); 2099 const Stmt *SS = ICS->getCapturedStmt(); 2100 const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS); 2101 OMPLoopNestStack.clear(); 2102 if (AS) 2103 LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), 2104 AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()), 2105 SourceLocToDebugLoc(R.getEnd())); 2106 else 2107 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 2108 SourceLocToDebugLoc(R.getEnd())); 2109 2110 // If there are any cleanups between here and the loop-exit scope, 2111 // create a block to stage a loop exit along. 2112 llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); 2113 if (RequiresCleanup) 2114 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 2115 2116 llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body"); 2117 2118 // Emit condition. 2119 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 2120 if (ExitBlock != LoopExit.getBlock()) { 2121 EmitBlock(ExitBlock); 2122 EmitBranchThroughCleanup(LoopExit); 2123 } 2124 2125 EmitBlock(LoopBody); 2126 incrementProfileCounter(&S); 2127 2128 // Create a block for the increment. 2129 JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 2130 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 2131 2132 BodyGen(*this); 2133 2134 // Emit "IV = IV + 1" and a back-edge to the condition block. 2135 EmitBlock(Continue.getBlock()); 2136 EmitIgnoredExpr(IncExpr); 2137 PostIncGen(*this); 2138 BreakContinueStack.pop_back(); 2139 EmitBranch(CondBlock); 2140 LoopStack.pop(); 2141 // Emit the fall-through block. 2142 EmitBlock(LoopExit.getBlock()); 2143 } 2144 2145 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 2146 if (!HaveInsertPoint()) 2147 return false; 2148 // Emit inits for the linear variables. 2149 bool HasLinears = false; 2150 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 2151 for (const Expr *Init : C->inits()) { 2152 HasLinears = true; 2153 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 2154 if (const auto *Ref = 2155 dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 2156 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 2157 const auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 2158 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 2159 CapturedStmtInfo->lookup(OrigVD) != nullptr, 2160 VD->getInit()->getType(), VK_LValue, 2161 VD->getInit()->getExprLoc()); 2162 EmitExprAsInit( 2163 &DRE, VD, 2164 MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()), 2165 /*capturedByInit=*/false); 2166 EmitAutoVarCleanups(Emission); 2167 } else { 2168 EmitVarDecl(*VD); 2169 } 2170 } 2171 // Emit the linear steps for the linear clauses. 2172 // If a step is not constant, it is pre-calculated before the loop. 2173 if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 2174 if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 2175 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 2176 // Emit calculation of the linear step. 2177 EmitIgnoredExpr(CS); 2178 } 2179 } 2180 return HasLinears; 2181 } 2182 2183 void CodeGenFunction::EmitOMPLinearClauseFinal( 2184 const OMPLoopDirective &D, 2185 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 2186 if (!HaveInsertPoint()) 2187 return; 2188 llvm::BasicBlock *DoneBB = nullptr; 2189 // Emit the final values of the linear variables. 2190 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 2191 auto IC = C->varlist_begin(); 2192 for (const Expr *F : C->finals()) { 2193 if (!DoneBB) { 2194 if (llvm::Value *Cond = CondGen(*this)) { 2195 // If the first post-update expression is found, emit conditional 2196 // block if it was requested. 2197 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu"); 2198 DoneBB = createBasicBlock(".omp.linear.pu.done"); 2199 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 2200 EmitBlock(ThenBB); 2201 } 2202 } 2203 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 2204 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 2205 CapturedStmtInfo->lookup(OrigVD) != nullptr, 2206 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 2207 Address OrigAddr = EmitLValue(&DRE).getAddress(*this); 2208 CodeGenFunction::OMPPrivateScope VarScope(*this); 2209 VarScope.addPrivate(OrigVD, OrigAddr); 2210 (void)VarScope.Privatize(); 2211 EmitIgnoredExpr(F); 2212 ++IC; 2213 } 2214 if (const Expr *PostUpdate = C->getPostUpdateExpr()) 2215 EmitIgnoredExpr(PostUpdate); 2216 } 2217 if (DoneBB) 2218 EmitBlock(DoneBB, /*IsFinished=*/true); 2219 } 2220 2221 static void emitAlignedClause(CodeGenFunction &CGF, 2222 const OMPExecutableDirective &D) { 2223 if (!CGF.HaveInsertPoint()) 2224 return; 2225 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 2226 llvm::APInt ClauseAlignment(64, 0); 2227 if (const Expr *AlignmentExpr = Clause->getAlignment()) { 2228 auto *AlignmentCI = 2229 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 2230 ClauseAlignment = AlignmentCI->getValue(); 2231 } 2232 for (const Expr *E : Clause->varlists()) { 2233 llvm::APInt Alignment(ClauseAlignment); 2234 if (Alignment == 0) { 2235 // OpenMP [2.8.1, Description] 2236 // If no optional parameter is specified, implementation-defined default 2237 // alignments for SIMD instructions on the target platforms are assumed. 2238 Alignment = 2239 CGF.getContext() 2240 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 2241 E->getType()->getPointeeType())) 2242 .getQuantity(); 2243 } 2244 assert((Alignment == 0 || Alignment.isPowerOf2()) && 2245 "alignment is not power of 2"); 2246 if (Alignment != 0) { 2247 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 2248 CGF.emitAlignmentAssumption( 2249 PtrValue, E, /*No second loc needed*/ SourceLocation(), 2250 llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment)); 2251 } 2252 } 2253 } 2254 } 2255 2256 void CodeGenFunction::EmitOMPPrivateLoopCounters( 2257 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { 2258 if (!HaveInsertPoint()) 2259 return; 2260 auto I = S.private_counters().begin(); 2261 for (const Expr *E : S.counters()) { 2262 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2263 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 2264 // Emit var without initialization. 2265 AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD); 2266 EmitAutoVarCleanups(VarEmission); 2267 LocalDeclMap.erase(PrivateVD); 2268 (void)LoopScope.addPrivate(VD, VarEmission.getAllocatedAddress()); 2269 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || 2270 VD->hasGlobalStorage()) { 2271 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), 2272 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), 2273 E->getType(), VK_LValue, E->getExprLoc()); 2274 (void)LoopScope.addPrivate(PrivateVD, EmitLValue(&DRE).getAddress(*this)); 2275 } else { 2276 (void)LoopScope.addPrivate(PrivateVD, VarEmission.getAllocatedAddress()); 2277 } 2278 ++I; 2279 } 2280 // Privatize extra loop counters used in loops for ordered(n) clauses. 2281 for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) { 2282 if (!C->getNumForLoops()) 2283 continue; 2284 for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size(); 2285 I < E; ++I) { 2286 const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I)); 2287 const auto *VD = cast<VarDecl>(DRE->getDecl()); 2288 // Override only those variables that can be captured to avoid re-emission 2289 // of the variables declared within the loops. 2290 if (DRE->refersToEnclosingVariableOrCapture()) { 2291 (void)LoopScope.addPrivate( 2292 VD, CreateMemTemp(DRE->getType(), VD->getName())); 2293 } 2294 } 2295 } 2296 } 2297 2298 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 2299 const Expr *Cond, llvm::BasicBlock *TrueBlock, 2300 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 2301 if (!CGF.HaveInsertPoint()) 2302 return; 2303 { 2304 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 2305 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); 2306 (void)PreCondScope.Privatize(); 2307 // Get initial values of real counters. 2308 for (const Expr *I : S.inits()) { 2309 CGF.EmitIgnoredExpr(I); 2310 } 2311 } 2312 // Create temp loop control variables with their init values to support 2313 // non-rectangular loops. 2314 CodeGenFunction::OMPMapVars PreCondVars; 2315 for (const Expr *E : S.dependent_counters()) { 2316 if (!E) 2317 continue; 2318 assert(!E->getType().getNonReferenceType()->isRecordType() && 2319 "dependent counter must not be an iterator."); 2320 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2321 Address CounterAddr = 2322 CGF.CreateMemTemp(VD->getType().getNonReferenceType()); 2323 (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr); 2324 } 2325 (void)PreCondVars.apply(CGF); 2326 for (const Expr *E : S.dependent_inits()) { 2327 if (!E) 2328 continue; 2329 CGF.EmitIgnoredExpr(E); 2330 } 2331 // Check that loop is executed at least one time. 2332 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 2333 PreCondVars.restore(CGF); 2334 } 2335 2336 void CodeGenFunction::EmitOMPLinearClause( 2337 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { 2338 if (!HaveInsertPoint()) 2339 return; 2340 llvm::DenseSet<const VarDecl *> SIMDLCVs; 2341 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 2342 const auto *LoopDirective = cast<OMPLoopDirective>(&D); 2343 for (const Expr *C : LoopDirective->counters()) { 2344 SIMDLCVs.insert( 2345 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 2346 } 2347 } 2348 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 2349 auto CurPrivate = C->privates().begin(); 2350 for (const Expr *E : C->varlists()) { 2351 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2352 const auto *PrivateVD = 2353 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 2354 if (!SIMDLCVs.count(VD->getCanonicalDecl())) { 2355 // Emit private VarDecl with copy init. 2356 EmitVarDecl(*PrivateVD); 2357 bool IsRegistered = 2358 PrivateScope.addPrivate(VD, GetAddrOfLocalVar(PrivateVD)); 2359 assert(IsRegistered && "linear var already registered as private"); 2360 // Silence the warning about unused variable. 2361 (void)IsRegistered; 2362 } else { 2363 EmitVarDecl(*PrivateVD); 2364 } 2365 ++CurPrivate; 2366 } 2367 } 2368 } 2369 2370 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 2371 const OMPExecutableDirective &D) { 2372 if (!CGF.HaveInsertPoint()) 2373 return; 2374 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 2375 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 2376 /*ignoreResult=*/true); 2377 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 2378 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 2379 // In presence of finite 'safelen', it may be unsafe to mark all 2380 // the memory instructions parallel, because loop-carried 2381 // dependences of 'safelen' iterations are possible. 2382 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 2383 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 2384 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 2385 /*ignoreResult=*/true); 2386 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 2387 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 2388 // In presence of finite 'safelen', it may be unsafe to mark all 2389 // the memory instructions parallel, because loop-carried 2390 // dependences of 'safelen' iterations are possible. 2391 CGF.LoopStack.setParallel(/*Enable=*/false); 2392 } 2393 } 2394 2395 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) { 2396 // Walk clauses and process safelen/lastprivate. 2397 LoopStack.setParallel(/*Enable=*/true); 2398 LoopStack.setVectorizeEnable(); 2399 emitSimdlenSafelenClause(*this, D); 2400 if (const auto *C = D.getSingleClause<OMPOrderClause>()) 2401 if (C->getKind() == OMPC_ORDER_concurrent) 2402 LoopStack.setParallel(/*Enable=*/true); 2403 if ((D.getDirectiveKind() == OMPD_simd || 2404 (getLangOpts().OpenMPSimd && 2405 isOpenMPSimdDirective(D.getDirectiveKind()))) && 2406 llvm::any_of(D.getClausesOfKind<OMPReductionClause>(), 2407 [](const OMPReductionClause *C) { 2408 return C->getModifier() == OMPC_REDUCTION_inscan; 2409 })) 2410 // Disable parallel access in case of prefix sum. 2411 LoopStack.setParallel(/*Enable=*/false); 2412 } 2413 2414 void CodeGenFunction::EmitOMPSimdFinal( 2415 const OMPLoopDirective &D, 2416 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 2417 if (!HaveInsertPoint()) 2418 return; 2419 llvm::BasicBlock *DoneBB = nullptr; 2420 auto IC = D.counters().begin(); 2421 auto IPC = D.private_counters().begin(); 2422 for (const Expr *F : D.finals()) { 2423 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 2424 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); 2425 const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); 2426 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || 2427 OrigVD->hasGlobalStorage() || CED) { 2428 if (!DoneBB) { 2429 if (llvm::Value *Cond = CondGen(*this)) { 2430 // If the first post-update expression is found, emit conditional 2431 // block if it was requested. 2432 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then"); 2433 DoneBB = createBasicBlock(".omp.final.done"); 2434 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 2435 EmitBlock(ThenBB); 2436 } 2437 } 2438 Address OrigAddr = Address::invalid(); 2439 if (CED) { 2440 OrigAddr = 2441 EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(*this); 2442 } else { 2443 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD), 2444 /*RefersToEnclosingVariableOrCapture=*/false, 2445 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); 2446 OrigAddr = EmitLValue(&DRE).getAddress(*this); 2447 } 2448 OMPPrivateScope VarScope(*this); 2449 VarScope.addPrivate(OrigVD, OrigAddr); 2450 (void)VarScope.Privatize(); 2451 EmitIgnoredExpr(F); 2452 } 2453 ++IC; 2454 ++IPC; 2455 } 2456 if (DoneBB) 2457 EmitBlock(DoneBB, /*IsFinished=*/true); 2458 } 2459 2460 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, 2461 const OMPLoopDirective &S, 2462 CodeGenFunction::JumpDest LoopExit) { 2463 CGF.EmitOMPLoopBody(S, LoopExit); 2464 CGF.EmitStopPoint(&S); 2465 } 2466 2467 /// Emit a helper variable and return corresponding lvalue. 2468 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 2469 const DeclRefExpr *Helper) { 2470 auto VDecl = cast<VarDecl>(Helper->getDecl()); 2471 CGF.EmitVarDecl(*VDecl); 2472 return CGF.EmitLValue(Helper); 2473 } 2474 2475 static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S, 2476 const RegionCodeGenTy &SimdInitGen, 2477 const RegionCodeGenTy &BodyCodeGen) { 2478 auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF, 2479 PrePostActionTy &) { 2480 CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S); 2481 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 2482 SimdInitGen(CGF); 2483 2484 BodyCodeGen(CGF); 2485 }; 2486 auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 2487 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 2488 CGF.LoopStack.setVectorizeEnable(/*Enable=*/false); 2489 2490 BodyCodeGen(CGF); 2491 }; 2492 const Expr *IfCond = nullptr; 2493 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2494 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2495 if (CGF.getLangOpts().OpenMP >= 50 && 2496 (C->getNameModifier() == OMPD_unknown || 2497 C->getNameModifier() == OMPD_simd)) { 2498 IfCond = C->getCondition(); 2499 break; 2500 } 2501 } 2502 } 2503 if (IfCond) { 2504 CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2505 } else { 2506 RegionCodeGenTy ThenRCG(ThenGen); 2507 ThenRCG(CGF); 2508 } 2509 } 2510 2511 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, 2512 PrePostActionTy &Action) { 2513 Action.Enter(CGF); 2514 assert(isOpenMPSimdDirective(S.getDirectiveKind()) && 2515 "Expected simd directive"); 2516 OMPLoopScope PreInitScope(CGF, S); 2517 // if (PreCond) { 2518 // for (IV in 0..LastIteration) BODY; 2519 // <Final counter/linear vars updates>; 2520 // } 2521 // 2522 if (isOpenMPDistributeDirective(S.getDirectiveKind()) || 2523 isOpenMPWorksharingDirective(S.getDirectiveKind()) || 2524 isOpenMPTaskLoopDirective(S.getDirectiveKind())) { 2525 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable())); 2526 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable())); 2527 } 2528 2529 // Emit: if (PreCond) - begin. 2530 // If the condition constant folds and can be elided, avoid emitting the 2531 // whole loop. 2532 bool CondConstant; 2533 llvm::BasicBlock *ContBlock = nullptr; 2534 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2535 if (!CondConstant) 2536 return; 2537 } else { 2538 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then"); 2539 ContBlock = CGF.createBasicBlock("simd.if.end"); 2540 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 2541 CGF.getProfileCount(&S)); 2542 CGF.EmitBlock(ThenBlock); 2543 CGF.incrementProfileCounter(&S); 2544 } 2545 2546 // Emit the loop iteration variable. 2547 const Expr *IVExpr = S.getIterationVariable(); 2548 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 2549 CGF.EmitVarDecl(*IVDecl); 2550 CGF.EmitIgnoredExpr(S.getInit()); 2551 2552 // Emit the iterations count variable. 2553 // If it is not a variable, Sema decided to calculate iterations count on 2554 // each iteration (e.g., it is foldable into a constant). 2555 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2556 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2557 // Emit calculation of the iterations count. 2558 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 2559 } 2560 2561 emitAlignedClause(CGF, S); 2562 (void)CGF.EmitOMPLinearClauseInit(S); 2563 { 2564 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2565 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 2566 CGF.EmitOMPLinearClause(S, LoopScope); 2567 CGF.EmitOMPPrivateClause(S, LoopScope); 2568 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2569 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( 2570 CGF, S, CGF.EmitLValue(S.getIterationVariable())); 2571 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2572 (void)LoopScope.Privatize(); 2573 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 2574 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 2575 2576 emitCommonSimdLoop( 2577 CGF, S, 2578 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2579 CGF.EmitOMPSimdInit(S); 2580 }, 2581 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 2582 CGF.EmitOMPInnerLoop( 2583 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 2584 [&S](CodeGenFunction &CGF) { 2585 emitOMPLoopBodyWithStopPoint(CGF, S, 2586 CodeGenFunction::JumpDest()); 2587 }, 2588 [](CodeGenFunction &) {}); 2589 }); 2590 CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; }); 2591 // Emit final copy of the lastprivate variables at the end of loops. 2592 if (HasLastprivateClause) 2593 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); 2594 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); 2595 emitPostUpdateForReductionClause(CGF, S, 2596 [](CodeGenFunction &) { return nullptr; }); 2597 LoopScope.restoreMap(); 2598 CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; }); 2599 } 2600 // Emit: if (PreCond) - end. 2601 if (ContBlock) { 2602 CGF.EmitBranch(ContBlock); 2603 CGF.EmitBlock(ContBlock, true); 2604 } 2605 } 2606 2607 static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective &S) { 2608 // Check for unsupported clauses 2609 for (OMPClause *C : S.clauses()) { 2610 // Currently only order, simdlen and safelen clauses are supported 2611 if (!(isa<OMPSimdlenClause>(C) || isa<OMPSafelenClause>(C) || 2612 isa<OMPOrderClause>(C) || isa<OMPAlignedClause>(C))) 2613 return false; 2614 } 2615 2616 // Check if we have a statement with the ordered directive. 2617 // Visit the statement hierarchy to find a compound statement 2618 // with a ordered directive in it. 2619 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(S.getRawStmt())) { 2620 if (const Stmt *SyntacticalLoop = CanonLoop->getLoopStmt()) { 2621 for (const Stmt *SubStmt : SyntacticalLoop->children()) { 2622 if (!SubStmt) 2623 continue; 2624 if (const CompoundStmt *CS = dyn_cast<CompoundStmt>(SubStmt)) { 2625 for (const Stmt *CSSubStmt : CS->children()) { 2626 if (!CSSubStmt) 2627 continue; 2628 if (isa<OMPOrderedDirective>(CSSubStmt)) { 2629 return false; 2630 } 2631 } 2632 } 2633 } 2634 } 2635 } 2636 return true; 2637 } 2638 static llvm::MapVector<llvm::Value *, llvm::Value *> 2639 GetAlignedMapping(const OMPSimdDirective &S, CodeGenFunction &CGF) { 2640 llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars; 2641 for (const auto *Clause : S.getClausesOfKind<OMPAlignedClause>()) { 2642 llvm::APInt ClauseAlignment(64, 0); 2643 if (const Expr *AlignmentExpr = Clause->getAlignment()) { 2644 auto *AlignmentCI = 2645 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 2646 ClauseAlignment = AlignmentCI->getValue(); 2647 } 2648 for (const Expr *E : Clause->varlists()) { 2649 llvm::APInt Alignment(ClauseAlignment); 2650 if (Alignment == 0) { 2651 // OpenMP [2.8.1, Description] 2652 // If no optional parameter is specified, implementation-defined default 2653 // alignments for SIMD instructions on the target platforms are assumed. 2654 Alignment = 2655 CGF.getContext() 2656 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 2657 E->getType()->getPointeeType())) 2658 .getQuantity(); 2659 } 2660 assert((Alignment == 0 || Alignment.isPowerOf2()) && 2661 "alignment is not power of 2"); 2662 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 2663 AlignedVars[PtrValue] = CGF.Builder.getInt64(Alignment.getSExtValue()); 2664 } 2665 } 2666 return AlignedVars; 2667 } 2668 2669 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 2670 bool UseOMPIRBuilder = 2671 CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S); 2672 if (UseOMPIRBuilder) { 2673 auto &&CodeGenIRBuilder = [this, &S, UseOMPIRBuilder](CodeGenFunction &CGF, 2674 PrePostActionTy &) { 2675 // Use the OpenMPIRBuilder if enabled. 2676 if (UseOMPIRBuilder) { 2677 llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars = 2678 GetAlignedMapping(S, CGF); 2679 // Emit the associated statement and get its loop representation. 2680 const Stmt *Inner = S.getRawStmt(); 2681 llvm::CanonicalLoopInfo *CLI = 2682 EmitOMPCollapsedCanonicalLoopNest(Inner, 1); 2683 2684 llvm::OpenMPIRBuilder &OMPBuilder = 2685 CGM.getOpenMPRuntime().getOMPBuilder(); 2686 // Add SIMD specific metadata 2687 llvm::ConstantInt *Simdlen = nullptr; 2688 if (const auto *C = S.getSingleClause<OMPSimdlenClause>()) { 2689 RValue Len = 2690 this->EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 2691 /*ignoreResult=*/true); 2692 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 2693 Simdlen = Val; 2694 } 2695 llvm::ConstantInt *Safelen = nullptr; 2696 if (const auto *C = S.getSingleClause<OMPSafelenClause>()) { 2697 RValue Len = 2698 this->EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 2699 /*ignoreResult=*/true); 2700 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 2701 Safelen = Val; 2702 } 2703 llvm::omp::OrderKind Order = llvm::omp::OrderKind::OMP_ORDER_unknown; 2704 if (const auto *C = S.getSingleClause<OMPOrderClause>()) { 2705 if (C->getKind() == OpenMPOrderClauseKind ::OMPC_ORDER_concurrent) { 2706 Order = llvm::omp::OrderKind::OMP_ORDER_concurrent; 2707 } 2708 } 2709 // Add simd metadata to the collapsed loop. Do not generate 2710 // another loop for if clause. Support for if clause is done earlier. 2711 OMPBuilder.applySimd(CLI, AlignedVars, 2712 /*IfCond*/ nullptr, Order, Simdlen, Safelen); 2713 return; 2714 } 2715 }; 2716 { 2717 auto LPCRegion = 2718 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 2719 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2720 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, 2721 CodeGenIRBuilder); 2722 } 2723 return; 2724 } 2725 2726 ParentLoopDirectiveForScanRegion ScanRegion(*this, S); 2727 OMPFirstScanLoop = true; 2728 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2729 emitOMPSimdRegion(CGF, S, Action); 2730 }; 2731 { 2732 auto LPCRegion = 2733 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 2734 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2735 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2736 } 2737 // Check for outer lastprivate conditional update. 2738 checkForLastprivateConditionalUpdate(*this, S); 2739 } 2740 2741 void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) { 2742 // Emit the de-sugared statement. 2743 OMPTransformDirectiveScopeRAII TileScope(*this, &S); 2744 EmitStmt(S.getTransformedStmt()); 2745 } 2746 2747 void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) { 2748 bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder; 2749 2750 if (UseOMPIRBuilder) { 2751 auto DL = SourceLocToDebugLoc(S.getBeginLoc()); 2752 const Stmt *Inner = S.getRawStmt(); 2753 2754 // Consume nested loop. Clear the entire remaining loop stack because a 2755 // fully unrolled loop is non-transformable. For partial unrolling the 2756 // generated outer loop is pushed back to the stack. 2757 llvm::CanonicalLoopInfo *CLI = EmitOMPCollapsedCanonicalLoopNest(Inner, 1); 2758 OMPLoopNestStack.clear(); 2759 2760 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 2761 2762 bool NeedsUnrolledCLI = ExpectedOMPLoopDepth >= 1; 2763 llvm::CanonicalLoopInfo *UnrolledCLI = nullptr; 2764 2765 if (S.hasClausesOfKind<OMPFullClause>()) { 2766 assert(ExpectedOMPLoopDepth == 0); 2767 OMPBuilder.unrollLoopFull(DL, CLI); 2768 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) { 2769 uint64_t Factor = 0; 2770 if (Expr *FactorExpr = PartialClause->getFactor()) { 2771 Factor = FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue(); 2772 assert(Factor >= 1 && "Only positive factors are valid"); 2773 } 2774 OMPBuilder.unrollLoopPartial(DL, CLI, Factor, 2775 NeedsUnrolledCLI ? &UnrolledCLI : nullptr); 2776 } else { 2777 OMPBuilder.unrollLoopHeuristic(DL, CLI); 2778 } 2779 2780 assert((!NeedsUnrolledCLI || UnrolledCLI) && 2781 "NeedsUnrolledCLI implies UnrolledCLI to be set"); 2782 if (UnrolledCLI) 2783 OMPLoopNestStack.push_back(UnrolledCLI); 2784 2785 return; 2786 } 2787 2788 // This function is only called if the unrolled loop is not consumed by any 2789 // other loop-associated construct. Such a loop-associated construct will have 2790 // used the transformed AST. 2791 2792 // Set the unroll metadata for the next emitted loop. 2793 LoopStack.setUnrollState(LoopAttributes::Enable); 2794 2795 if (S.hasClausesOfKind<OMPFullClause>()) { 2796 LoopStack.setUnrollState(LoopAttributes::Full); 2797 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) { 2798 if (Expr *FactorExpr = PartialClause->getFactor()) { 2799 uint64_t Factor = 2800 FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue(); 2801 assert(Factor >= 1 && "Only positive factors are valid"); 2802 LoopStack.setUnrollCount(Factor); 2803 } 2804 } 2805 2806 EmitStmt(S.getAssociatedStmt()); 2807 } 2808 2809 void CodeGenFunction::EmitOMPOuterLoop( 2810 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, 2811 CodeGenFunction::OMPPrivateScope &LoopScope, 2812 const CodeGenFunction::OMPLoopArguments &LoopArgs, 2813 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, 2814 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { 2815 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 2816 2817 const Expr *IVExpr = S.getIterationVariable(); 2818 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2819 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2820 2821 JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 2822 2823 // Start the loop with a block that tests the condition. 2824 llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond"); 2825 EmitBlock(CondBlock); 2826 const SourceRange R = S.getSourceRange(); 2827 OMPLoopNestStack.clear(); 2828 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 2829 SourceLocToDebugLoc(R.getEnd())); 2830 2831 llvm::Value *BoolCondVal = nullptr; 2832 if (!DynamicOrOrdered) { 2833 // UB = min(UB, GlobalUB) or 2834 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. 2835 // 'distribute parallel for') 2836 EmitIgnoredExpr(LoopArgs.EUB); 2837 // IV = LB 2838 EmitIgnoredExpr(LoopArgs.Init); 2839 // IV < UB 2840 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); 2841 } else { 2842 BoolCondVal = 2843 RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL, 2844 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); 2845 } 2846 2847 // If there are any cleanups between here and the loop-exit scope, 2848 // create a block to stage a loop exit along. 2849 llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); 2850 if (LoopScope.requiresCleanups()) 2851 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 2852 2853 llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body"); 2854 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 2855 if (ExitBlock != LoopExit.getBlock()) { 2856 EmitBlock(ExitBlock); 2857 EmitBranchThroughCleanup(LoopExit); 2858 } 2859 EmitBlock(LoopBody); 2860 2861 // Emit "IV = LB" (in case of static schedule, we have already calculated new 2862 // LB for loop condition and emitted it above). 2863 if (DynamicOrOrdered) 2864 EmitIgnoredExpr(LoopArgs.Init); 2865 2866 // Create a block for the increment. 2867 JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 2868 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 2869 2870 emitCommonSimdLoop( 2871 *this, S, 2872 [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) { 2873 // Generate !llvm.loop.parallel metadata for loads and stores for loops 2874 // with dynamic/guided scheduling and without ordered clause. 2875 if (!isOpenMPSimdDirective(S.getDirectiveKind())) { 2876 CGF.LoopStack.setParallel(!IsMonotonic); 2877 if (const auto *C = S.getSingleClause<OMPOrderClause>()) 2878 if (C->getKind() == OMPC_ORDER_concurrent) 2879 CGF.LoopStack.setParallel(/*Enable=*/true); 2880 } else { 2881 CGF.EmitOMPSimdInit(S); 2882 } 2883 }, 2884 [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered, 2885 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 2886 SourceLocation Loc = S.getBeginLoc(); 2887 // when 'distribute' is not combined with a 'for': 2888 // while (idx <= UB) { BODY; ++idx; } 2889 // when 'distribute' is combined with a 'for' 2890 // (e.g. 'distribute parallel for') 2891 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 2892 CGF.EmitOMPInnerLoop( 2893 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, 2894 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 2895 CodeGenLoop(CGF, S, LoopExit); 2896 }, 2897 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { 2898 CodeGenOrdered(CGF, Loc, IVSize, IVSigned); 2899 }); 2900 }); 2901 2902 EmitBlock(Continue.getBlock()); 2903 BreakContinueStack.pop_back(); 2904 if (!DynamicOrOrdered) { 2905 // Emit "LB = LB + Stride", "UB = UB + Stride". 2906 EmitIgnoredExpr(LoopArgs.NextLB); 2907 EmitIgnoredExpr(LoopArgs.NextUB); 2908 } 2909 2910 EmitBranch(CondBlock); 2911 OMPLoopNestStack.clear(); 2912 LoopStack.pop(); 2913 // Emit the fall-through block. 2914 EmitBlock(LoopExit.getBlock()); 2915 2916 // Tell the runtime we are done. 2917 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { 2918 if (!DynamicOrOrdered) 2919 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 2920 S.getDirectiveKind()); 2921 }; 2922 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 2923 } 2924 2925 void CodeGenFunction::EmitOMPForOuterLoop( 2926 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, 2927 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 2928 const OMPLoopArguments &LoopArgs, 2929 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 2930 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 2931 2932 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 2933 const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind.Schedule); 2934 2935 assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule, 2936 LoopArgs.Chunk != nullptr)) && 2937 "static non-chunked schedule does not need outer loop"); 2938 2939 // Emit outer loop. 2940 // 2941 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2942 // When schedule(dynamic,chunk_size) is specified, the iterations are 2943 // distributed to threads in the team in chunks as the threads request them. 2944 // Each thread executes a chunk of iterations, then requests another chunk, 2945 // until no chunks remain to be distributed. Each chunk contains chunk_size 2946 // iterations, except for the last chunk to be distributed, which may have 2947 // fewer iterations. When no chunk_size is specified, it defaults to 1. 2948 // 2949 // When schedule(guided,chunk_size) is specified, the iterations are assigned 2950 // to threads in the team in chunks as the executing threads request them. 2951 // Each thread executes a chunk of iterations, then requests another chunk, 2952 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 2953 // each chunk is proportional to the number of unassigned iterations divided 2954 // by the number of threads in the team, decreasing to 1. For a chunk_size 2955 // with value k (greater than 1), the size of each chunk is determined in the 2956 // same way, with the restriction that the chunks do not contain fewer than k 2957 // iterations (except for the last chunk to be assigned, which may have fewer 2958 // than k iterations). 2959 // 2960 // When schedule(auto) is specified, the decision regarding scheduling is 2961 // delegated to the compiler and/or runtime system. The programmer gives the 2962 // implementation the freedom to choose any possible mapping of iterations to 2963 // threads in the team. 2964 // 2965 // When schedule(runtime) is specified, the decision regarding scheduling is 2966 // deferred until run time, and the schedule and chunk size are taken from the 2967 // run-sched-var ICV. If the ICV is set to auto, the schedule is 2968 // implementation defined 2969 // 2970 // while(__kmpc_dispatch_next(&LB, &UB)) { 2971 // idx = LB; 2972 // while (idx <= UB) { BODY; ++idx; 2973 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 2974 // } // inner loop 2975 // } 2976 // 2977 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2978 // When schedule(static, chunk_size) is specified, iterations are divided into 2979 // chunks of size chunk_size, and the chunks are assigned to the threads in 2980 // the team in a round-robin fashion in the order of the thread number. 2981 // 2982 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 2983 // while (idx <= UB) { BODY; ++idx; } // inner loop 2984 // LB = LB + ST; 2985 // UB = UB + ST; 2986 // } 2987 // 2988 2989 const Expr *IVExpr = S.getIterationVariable(); 2990 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2991 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2992 2993 if (DynamicOrOrdered) { 2994 const std::pair<llvm::Value *, llvm::Value *> DispatchBounds = 2995 CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); 2996 llvm::Value *LBVal = DispatchBounds.first; 2997 llvm::Value *UBVal = DispatchBounds.second; 2998 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, 2999 LoopArgs.Chunk}; 3000 RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize, 3001 IVSigned, Ordered, DipatchRTInputValues); 3002 } else { 3003 CGOpenMPRuntime::StaticRTInput StaticInit( 3004 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, 3005 LoopArgs.ST, LoopArgs.Chunk); 3006 RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(), 3007 ScheduleKind, StaticInit); 3008 } 3009 3010 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, 3011 const unsigned IVSize, 3012 const bool IVSigned) { 3013 if (Ordered) { 3014 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, 3015 IVSigned); 3016 } 3017 }; 3018 3019 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, 3020 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); 3021 OuterLoopArgs.IncExpr = S.getInc(); 3022 OuterLoopArgs.Init = S.getInit(); 3023 OuterLoopArgs.Cond = S.getCond(); 3024 OuterLoopArgs.NextLB = S.getNextLowerBound(); 3025 OuterLoopArgs.NextUB = S.getNextUpperBound(); 3026 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, 3027 emitOMPLoopBodyWithStopPoint, CodeGenOrdered); 3028 } 3029 3030 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, 3031 const unsigned IVSize, const bool IVSigned) {} 3032 3033 void CodeGenFunction::EmitOMPDistributeOuterLoop( 3034 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, 3035 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, 3036 const CodeGenLoopTy &CodeGenLoopContent) { 3037 3038 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 3039 3040 // Emit outer loop. 3041 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 3042 // dynamic 3043 // 3044 3045 const Expr *IVExpr = S.getIterationVariable(); 3046 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 3047 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 3048 3049 CGOpenMPRuntime::StaticRTInput StaticInit( 3050 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB, 3051 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk); 3052 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit); 3053 3054 // for combined 'distribute' and 'for' the increment expression of distribute 3055 // is stored in DistInc. For 'distribute' alone, it is in Inc. 3056 Expr *IncExpr; 3057 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) 3058 IncExpr = S.getDistInc(); 3059 else 3060 IncExpr = S.getInc(); 3061 3062 // this routine is shared by 'omp distribute parallel for' and 3063 // 'omp distribute': select the right EUB expression depending on the 3064 // directive 3065 OMPLoopArguments OuterLoopArgs; 3066 OuterLoopArgs.LB = LoopArgs.LB; 3067 OuterLoopArgs.UB = LoopArgs.UB; 3068 OuterLoopArgs.ST = LoopArgs.ST; 3069 OuterLoopArgs.IL = LoopArgs.IL; 3070 OuterLoopArgs.Chunk = LoopArgs.Chunk; 3071 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3072 ? S.getCombinedEnsureUpperBound() 3073 : S.getEnsureUpperBound(); 3074 OuterLoopArgs.IncExpr = IncExpr; 3075 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3076 ? S.getCombinedInit() 3077 : S.getInit(); 3078 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3079 ? S.getCombinedCond() 3080 : S.getCond(); 3081 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3082 ? S.getCombinedNextLowerBound() 3083 : S.getNextLowerBound(); 3084 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3085 ? S.getCombinedNextUpperBound() 3086 : S.getNextUpperBound(); 3087 3088 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, 3089 LoopScope, OuterLoopArgs, CodeGenLoopContent, 3090 emitEmptyOrdered); 3091 } 3092 3093 static std::pair<LValue, LValue> 3094 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, 3095 const OMPExecutableDirective &S) { 3096 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 3097 LValue LB = 3098 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 3099 LValue UB = 3100 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 3101 3102 // When composing 'distribute' with 'for' (e.g. as in 'distribute 3103 // parallel for') we need to use the 'distribute' 3104 // chunk lower and upper bounds rather than the whole loop iteration 3105 // space. These are parameters to the outlined function for 'parallel' 3106 // and we copy the bounds of the previous schedule into the 3107 // the current ones. 3108 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); 3109 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); 3110 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar( 3111 PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc()); 3112 PrevLBVal = CGF.EmitScalarConversion( 3113 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), 3114 LS.getIterationVariable()->getType(), 3115 LS.getPrevLowerBoundVariable()->getExprLoc()); 3116 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar( 3117 PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc()); 3118 PrevUBVal = CGF.EmitScalarConversion( 3119 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), 3120 LS.getIterationVariable()->getType(), 3121 LS.getPrevUpperBoundVariable()->getExprLoc()); 3122 3123 CGF.EmitStoreOfScalar(PrevLBVal, LB); 3124 CGF.EmitStoreOfScalar(PrevUBVal, UB); 3125 3126 return {LB, UB}; 3127 } 3128 3129 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then 3130 /// we need to use the LB and UB expressions generated by the worksharing 3131 /// code generation support, whereas in non combined situations we would 3132 /// just emit 0 and the LastIteration expression 3133 /// This function is necessary due to the difference of the LB and UB 3134 /// types for the RT emission routines for 'for_static_init' and 3135 /// 'for_dispatch_init' 3136 static std::pair<llvm::Value *, llvm::Value *> 3137 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, 3138 const OMPExecutableDirective &S, 3139 Address LB, Address UB) { 3140 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 3141 const Expr *IVExpr = LS.getIterationVariable(); 3142 // when implementing a dynamic schedule for a 'for' combined with a 3143 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop 3144 // is not normalized as each team only executes its own assigned 3145 // distribute chunk 3146 QualType IteratorTy = IVExpr->getType(); 3147 llvm::Value *LBVal = 3148 CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); 3149 llvm::Value *UBVal = 3150 CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); 3151 return {LBVal, UBVal}; 3152 } 3153 3154 static void emitDistributeParallelForDistributeInnerBoundParams( 3155 CodeGenFunction &CGF, const OMPExecutableDirective &S, 3156 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { 3157 const auto &Dir = cast<OMPLoopDirective>(S); 3158 LValue LB = 3159 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); 3160 llvm::Value *LBCast = 3161 CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(LB.getAddress(CGF)), 3162 CGF.SizeTy, /*isSigned=*/false); 3163 CapturedVars.push_back(LBCast); 3164 LValue UB = 3165 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); 3166 3167 llvm::Value *UBCast = 3168 CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(UB.getAddress(CGF)), 3169 CGF.SizeTy, /*isSigned=*/false); 3170 CapturedVars.push_back(UBCast); 3171 } 3172 3173 static void 3174 emitInnerParallelForWhenCombined(CodeGenFunction &CGF, 3175 const OMPLoopDirective &S, 3176 CodeGenFunction::JumpDest LoopExit) { 3177 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, 3178 PrePostActionTy &Action) { 3179 Action.Enter(CGF); 3180 bool HasCancel = false; 3181 if (!isOpenMPSimdDirective(S.getDirectiveKind())) { 3182 if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S)) 3183 HasCancel = D->hasCancel(); 3184 else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S)) 3185 HasCancel = D->hasCancel(); 3186 else if (const auto *D = 3187 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S)) 3188 HasCancel = D->hasCancel(); 3189 } 3190 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), 3191 HasCancel); 3192 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), 3193 emitDistributeParallelForInnerBounds, 3194 emitDistributeParallelForDispatchBounds); 3195 }; 3196 3197 emitCommonOMPParallelDirective( 3198 CGF, S, 3199 isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for, 3200 CGInlinedWorksharingLoop, 3201 emitDistributeParallelForDistributeInnerBoundParams); 3202 } 3203 3204 void CodeGenFunction::EmitOMPDistributeParallelForDirective( 3205 const OMPDistributeParallelForDirective &S) { 3206 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3207 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 3208 S.getDistInc()); 3209 }; 3210 OMPLexicalScope Scope(*this, S, OMPD_parallel); 3211 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 3212 } 3213 3214 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( 3215 const OMPDistributeParallelForSimdDirective &S) { 3216 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3217 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 3218 S.getDistInc()); 3219 }; 3220 OMPLexicalScope Scope(*this, S, OMPD_parallel); 3221 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 3222 } 3223 3224 void CodeGenFunction::EmitOMPDistributeSimdDirective( 3225 const OMPDistributeSimdDirective &S) { 3226 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3227 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 3228 }; 3229 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3230 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 3231 } 3232 3233 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 3234 CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) { 3235 // Emit SPMD target parallel for region as a standalone region. 3236 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3237 emitOMPSimdRegion(CGF, S, Action); 3238 }; 3239 llvm::Function *Fn; 3240 llvm::Constant *Addr; 3241 // Emit target region as a standalone region. 3242 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3243 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3244 assert(Fn && Addr && "Target device function emission failed."); 3245 } 3246 3247 void CodeGenFunction::EmitOMPTargetSimdDirective( 3248 const OMPTargetSimdDirective &S) { 3249 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3250 emitOMPSimdRegion(CGF, S, Action); 3251 }; 3252 emitCommonOMPTargetDirective(*this, S, CodeGen); 3253 } 3254 3255 namespace { 3256 struct ScheduleKindModifiersTy { 3257 OpenMPScheduleClauseKind Kind; 3258 OpenMPScheduleClauseModifier M1; 3259 OpenMPScheduleClauseModifier M2; 3260 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 3261 OpenMPScheduleClauseModifier M1, 3262 OpenMPScheduleClauseModifier M2) 3263 : Kind(Kind), M1(M1), M2(M2) {} 3264 }; 3265 } // namespace 3266 3267 bool CodeGenFunction::EmitOMPWorksharingLoop( 3268 const OMPLoopDirective &S, Expr *EUB, 3269 const CodeGenLoopBoundsTy &CodeGenLoopBounds, 3270 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 3271 // Emit the loop iteration variable. 3272 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 3273 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); 3274 EmitVarDecl(*IVDecl); 3275 3276 // Emit the iterations count variable. 3277 // If it is not a variable, Sema decided to calculate iterations count on each 3278 // iteration (e.g., it is foldable into a constant). 3279 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 3280 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 3281 // Emit calculation of the iterations count. 3282 EmitIgnoredExpr(S.getCalcLastIteration()); 3283 } 3284 3285 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 3286 3287 bool HasLastprivateClause; 3288 // Check pre-condition. 3289 { 3290 OMPLoopScope PreInitScope(*this, S); 3291 // Skip the entire loop if we don't meet the precondition. 3292 // If the condition constant folds and can be elided, avoid emitting the 3293 // whole loop. 3294 bool CondConstant; 3295 llvm::BasicBlock *ContBlock = nullptr; 3296 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 3297 if (!CondConstant) 3298 return false; 3299 } else { 3300 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then"); 3301 ContBlock = createBasicBlock("omp.precond.end"); 3302 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 3303 getProfileCount(&S)); 3304 EmitBlock(ThenBlock); 3305 incrementProfileCounter(&S); 3306 } 3307 3308 RunCleanupsScope DoacrossCleanupScope(*this); 3309 bool Ordered = false; 3310 if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { 3311 if (OrderedClause->getNumForLoops()) 3312 RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations()); 3313 else 3314 Ordered = true; 3315 } 3316 3317 llvm::DenseSet<const Expr *> EmittedFinals; 3318 emitAlignedClause(*this, S); 3319 bool HasLinears = EmitOMPLinearClauseInit(S); 3320 // Emit helper vars inits. 3321 3322 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); 3323 LValue LB = Bounds.first; 3324 LValue UB = Bounds.second; 3325 LValue ST = 3326 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 3327 LValue IL = 3328 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 3329 3330 // Emit 'then' code. 3331 { 3332 OMPPrivateScope LoopScope(*this); 3333 if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) { 3334 // Emit implicit barrier to synchronize threads and avoid data races on 3335 // initialization of firstprivate variables and post-update of 3336 // lastprivate variables. 3337 CGM.getOpenMPRuntime().emitBarrierCall( 3338 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 3339 /*ForceSimpleCall=*/true); 3340 } 3341 EmitOMPPrivateClause(S, LoopScope); 3342 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( 3343 *this, S, EmitLValue(S.getIterationVariable())); 3344 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 3345 EmitOMPReductionClauseInit(S, LoopScope); 3346 EmitOMPPrivateLoopCounters(S, LoopScope); 3347 EmitOMPLinearClause(S, LoopScope); 3348 (void)LoopScope.Privatize(); 3349 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 3350 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); 3351 3352 // Detect the loop schedule kind and chunk. 3353 const Expr *ChunkExpr = nullptr; 3354 OpenMPScheduleTy ScheduleKind; 3355 if (const auto *C = S.getSingleClause<OMPScheduleClause>()) { 3356 ScheduleKind.Schedule = C->getScheduleKind(); 3357 ScheduleKind.M1 = C->getFirstScheduleModifier(); 3358 ScheduleKind.M2 = C->getSecondScheduleModifier(); 3359 ChunkExpr = C->getChunkSize(); 3360 } else { 3361 // Default behaviour for schedule clause. 3362 CGM.getOpenMPRuntime().getDefaultScheduleAndChunk( 3363 *this, S, ScheduleKind.Schedule, ChunkExpr); 3364 } 3365 bool HasChunkSizeOne = false; 3366 llvm::Value *Chunk = nullptr; 3367 if (ChunkExpr) { 3368 Chunk = EmitScalarExpr(ChunkExpr); 3369 Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(), 3370 S.getIterationVariable()->getType(), 3371 S.getBeginLoc()); 3372 Expr::EvalResult Result; 3373 if (ChunkExpr->EvaluateAsInt(Result, getContext())) { 3374 llvm::APSInt EvaluatedChunk = Result.Val.getInt(); 3375 HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1); 3376 } 3377 } 3378 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 3379 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 3380 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 3381 // If the static schedule kind is specified or if the ordered clause is 3382 // specified, and if no monotonic modifier is specified, the effect will 3383 // be as if the monotonic modifier was specified. 3384 bool StaticChunkedOne = 3385 RT.isStaticChunked(ScheduleKind.Schedule, 3386 /* Chunked */ Chunk != nullptr) && 3387 HasChunkSizeOne && 3388 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); 3389 bool IsMonotonic = 3390 Ordered || 3391 (ScheduleKind.Schedule == OMPC_SCHEDULE_static && 3392 !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic || 3393 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) || 3394 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 3395 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 3396 if ((RT.isStaticNonchunked(ScheduleKind.Schedule, 3397 /* Chunked */ Chunk != nullptr) || 3398 StaticChunkedOne) && 3399 !Ordered) { 3400 JumpDest LoopExit = 3401 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 3402 emitCommonSimdLoop( 3403 *this, S, 3404 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3405 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 3406 CGF.EmitOMPSimdInit(S); 3407 } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) { 3408 if (C->getKind() == OMPC_ORDER_concurrent) 3409 CGF.LoopStack.setParallel(/*Enable=*/true); 3410 } 3411 }, 3412 [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk, 3413 &S, ScheduleKind, LoopExit, 3414 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 3415 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 3416 // When no chunk_size is specified, the iteration space is divided 3417 // into chunks that are approximately equal in size, and at most 3418 // one chunk is distributed to each thread. Note that the size of 3419 // the chunks is unspecified in this case. 3420 CGOpenMPRuntime::StaticRTInput StaticInit( 3421 IVSize, IVSigned, Ordered, IL.getAddress(CGF), 3422 LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF), 3423 StaticChunkedOne ? Chunk : nullptr); 3424 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 3425 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, 3426 StaticInit); 3427 // UB = min(UB, GlobalUB); 3428 if (!StaticChunkedOne) 3429 CGF.EmitIgnoredExpr(S.getEnsureUpperBound()); 3430 // IV = LB; 3431 CGF.EmitIgnoredExpr(S.getInit()); 3432 // For unchunked static schedule generate: 3433 // 3434 // while (idx <= UB) { 3435 // BODY; 3436 // ++idx; 3437 // } 3438 // 3439 // For static schedule with chunk one: 3440 // 3441 // while (IV <= PrevUB) { 3442 // BODY; 3443 // IV += ST; 3444 // } 3445 CGF.EmitOMPInnerLoop( 3446 S, LoopScope.requiresCleanups(), 3447 StaticChunkedOne ? S.getCombinedParForInDistCond() 3448 : S.getCond(), 3449 StaticChunkedOne ? S.getDistInc() : S.getInc(), 3450 [&S, LoopExit](CodeGenFunction &CGF) { 3451 emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit); 3452 }, 3453 [](CodeGenFunction &) {}); 3454 }); 3455 EmitBlock(LoopExit.getBlock()); 3456 // Tell the runtime we are done. 3457 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 3458 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 3459 S.getDirectiveKind()); 3460 }; 3461 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 3462 } else { 3463 // Emit the outer loop, which requests its work chunk [LB..UB] from 3464 // runtime and runs the inner loop to process it. 3465 const OMPLoopArguments LoopArguments( 3466 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), 3467 IL.getAddress(*this), Chunk, EUB); 3468 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 3469 LoopArguments, CGDispatchBounds); 3470 } 3471 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 3472 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { 3473 return CGF.Builder.CreateIsNotNull( 3474 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3475 }); 3476 } 3477 EmitOMPReductionClauseFinal( 3478 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) 3479 ? /*Parallel and Simd*/ OMPD_parallel_for_simd 3480 : /*Parallel only*/ OMPD_parallel); 3481 // Emit post-update of the reduction variables if IsLastIter != 0. 3482 emitPostUpdateForReductionClause( 3483 *this, S, [IL, &S](CodeGenFunction &CGF) { 3484 return CGF.Builder.CreateIsNotNull( 3485 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3486 }); 3487 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3488 if (HasLastprivateClause) 3489 EmitOMPLastprivateClauseFinal( 3490 S, isOpenMPSimdDirective(S.getDirectiveKind()), 3491 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); 3492 LoopScope.restoreMap(); 3493 EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) { 3494 return CGF.Builder.CreateIsNotNull( 3495 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3496 }); 3497 } 3498 DoacrossCleanupScope.ForceCleanup(); 3499 // We're now done with the loop, so jump to the continuation block. 3500 if (ContBlock) { 3501 EmitBranch(ContBlock); 3502 EmitBlock(ContBlock, /*IsFinished=*/true); 3503 } 3504 } 3505 return HasLastprivateClause; 3506 } 3507 3508 /// The following two functions generate expressions for the loop lower 3509 /// and upper bounds in case of static and dynamic (dispatch) schedule 3510 /// of the associated 'for' or 'distribute' loop. 3511 static std::pair<LValue, LValue> 3512 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 3513 const auto &LS = cast<OMPLoopDirective>(S); 3514 LValue LB = 3515 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 3516 LValue UB = 3517 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 3518 return {LB, UB}; 3519 } 3520 3521 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not 3522 /// consider the lower and upper bound expressions generated by the 3523 /// worksharing loop support, but we use 0 and the iteration space size as 3524 /// constants 3525 static std::pair<llvm::Value *, llvm::Value *> 3526 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, 3527 Address LB, Address UB) { 3528 const auto &LS = cast<OMPLoopDirective>(S); 3529 const Expr *IVExpr = LS.getIterationVariable(); 3530 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); 3531 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); 3532 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); 3533 return {LBVal, UBVal}; 3534 } 3535 3536 /// Emits internal temp array declarations for the directive with inscan 3537 /// reductions. 3538 /// The code is the following: 3539 /// \code 3540 /// size num_iters = <num_iters>; 3541 /// <type> buffer[num_iters]; 3542 /// \endcode 3543 static void emitScanBasedDirectiveDecls( 3544 CodeGenFunction &CGF, const OMPLoopDirective &S, 3545 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) { 3546 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( 3547 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); 3548 SmallVector<const Expr *, 4> Shareds; 3549 SmallVector<const Expr *, 4> Privates; 3550 SmallVector<const Expr *, 4> ReductionOps; 3551 SmallVector<const Expr *, 4> CopyArrayTemps; 3552 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 3553 assert(C->getModifier() == OMPC_REDUCTION_inscan && 3554 "Only inscan reductions are expected."); 3555 Shareds.append(C->varlist_begin(), C->varlist_end()); 3556 Privates.append(C->privates().begin(), C->privates().end()); 3557 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 3558 CopyArrayTemps.append(C->copy_array_temps().begin(), 3559 C->copy_array_temps().end()); 3560 } 3561 { 3562 // Emit buffers for each reduction variables. 3563 // ReductionCodeGen is required to emit correctly the code for array 3564 // reductions. 3565 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); 3566 unsigned Count = 0; 3567 auto *ITA = CopyArrayTemps.begin(); 3568 for (const Expr *IRef : Privates) { 3569 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 3570 // Emit variably modified arrays, used for arrays/array sections 3571 // reductions. 3572 if (PrivateVD->getType()->isVariablyModifiedType()) { 3573 RedCG.emitSharedOrigLValue(CGF, Count); 3574 RedCG.emitAggregateType(CGF, Count); 3575 } 3576 CodeGenFunction::OpaqueValueMapping DimMapping( 3577 CGF, 3578 cast<OpaqueValueExpr>( 3579 cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe()) 3580 ->getSizeExpr()), 3581 RValue::get(OMPScanNumIterations)); 3582 // Emit temp buffer. 3583 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl())); 3584 ++ITA; 3585 ++Count; 3586 } 3587 } 3588 } 3589 3590 /// Copies final inscan reductions values to the original variables. 3591 /// The code is the following: 3592 /// \code 3593 /// <orig_var> = buffer[num_iters-1]; 3594 /// \endcode 3595 static void emitScanBasedDirectiveFinals( 3596 CodeGenFunction &CGF, const OMPLoopDirective &S, 3597 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) { 3598 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( 3599 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); 3600 SmallVector<const Expr *, 4> Shareds; 3601 SmallVector<const Expr *, 4> LHSs; 3602 SmallVector<const Expr *, 4> RHSs; 3603 SmallVector<const Expr *, 4> Privates; 3604 SmallVector<const Expr *, 4> CopyOps; 3605 SmallVector<const Expr *, 4> CopyArrayElems; 3606 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 3607 assert(C->getModifier() == OMPC_REDUCTION_inscan && 3608 "Only inscan reductions are expected."); 3609 Shareds.append(C->varlist_begin(), C->varlist_end()); 3610 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 3611 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 3612 Privates.append(C->privates().begin(), C->privates().end()); 3613 CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); 3614 CopyArrayElems.append(C->copy_array_elems().begin(), 3615 C->copy_array_elems().end()); 3616 } 3617 // Create temp var and copy LHS value to this temp value. 3618 // LHS = TMP[LastIter]; 3619 llvm::Value *OMPLast = CGF.Builder.CreateNSWSub( 3620 OMPScanNumIterations, 3621 llvm::ConstantInt::get(CGF.SizeTy, 1, /*isSigned=*/false)); 3622 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 3623 const Expr *PrivateExpr = Privates[I]; 3624 const Expr *OrigExpr = Shareds[I]; 3625 const Expr *CopyArrayElem = CopyArrayElems[I]; 3626 CodeGenFunction::OpaqueValueMapping IdxMapping( 3627 CGF, 3628 cast<OpaqueValueExpr>( 3629 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 3630 RValue::get(OMPLast)); 3631 LValue DestLVal = CGF.EmitLValue(OrigExpr); 3632 LValue SrcLVal = CGF.EmitLValue(CopyArrayElem); 3633 CGF.EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(CGF), 3634 SrcLVal.getAddress(CGF), 3635 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 3636 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), 3637 CopyOps[I]); 3638 } 3639 } 3640 3641 /// Emits the code for the directive with inscan reductions. 3642 /// The code is the following: 3643 /// \code 3644 /// #pragma omp ... 3645 /// for (i: 0..<num_iters>) { 3646 /// <input phase>; 3647 /// buffer[i] = red; 3648 /// } 3649 /// #pragma omp master // in parallel region 3650 /// for (int k = 0; k != ceil(log2(num_iters)); ++k) 3651 /// for (size cnt = last_iter; cnt >= pow(2, k); --k) 3652 /// buffer[i] op= buffer[i-pow(2,k)]; 3653 /// #pragma omp barrier // in parallel region 3654 /// #pragma omp ... 3655 /// for (0..<num_iters>) { 3656 /// red = InclusiveScan ? buffer[i] : buffer[i-1]; 3657 /// <scan phase>; 3658 /// } 3659 /// \endcode 3660 static void emitScanBasedDirective( 3661 CodeGenFunction &CGF, const OMPLoopDirective &S, 3662 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen, 3663 llvm::function_ref<void(CodeGenFunction &)> FirstGen, 3664 llvm::function_ref<void(CodeGenFunction &)> SecondGen) { 3665 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( 3666 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); 3667 SmallVector<const Expr *, 4> Privates; 3668 SmallVector<const Expr *, 4> ReductionOps; 3669 SmallVector<const Expr *, 4> LHSs; 3670 SmallVector<const Expr *, 4> RHSs; 3671 SmallVector<const Expr *, 4> CopyArrayElems; 3672 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 3673 assert(C->getModifier() == OMPC_REDUCTION_inscan && 3674 "Only inscan reductions are expected."); 3675 Privates.append(C->privates().begin(), C->privates().end()); 3676 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 3677 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 3678 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 3679 CopyArrayElems.append(C->copy_array_elems().begin(), 3680 C->copy_array_elems().end()); 3681 } 3682 CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S); 3683 { 3684 // Emit loop with input phase: 3685 // #pragma omp ... 3686 // for (i: 0..<num_iters>) { 3687 // <input phase>; 3688 // buffer[i] = red; 3689 // } 3690 CGF.OMPFirstScanLoop = true; 3691 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 3692 FirstGen(CGF); 3693 } 3694 // #pragma omp barrier // in parallel region 3695 auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems, 3696 &ReductionOps, 3697 &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) { 3698 Action.Enter(CGF); 3699 // Emit prefix reduction: 3700 // #pragma omp master // in parallel region 3701 // for (int k = 0; k <= ceil(log2(n)); ++k) 3702 llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock(); 3703 llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body"); 3704 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit"); 3705 llvm::Function *F = 3706 CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy); 3707 llvm::Value *Arg = 3708 CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy); 3709 llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg); 3710 F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy); 3711 LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal); 3712 LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy); 3713 llvm::Value *NMin1 = CGF.Builder.CreateNUWSub( 3714 OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1)); 3715 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc()); 3716 CGF.EmitBlock(LoopBB); 3717 auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2); 3718 // size pow2k = 1; 3719 auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2); 3720 Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB); 3721 Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB); 3722 // for (size i = n - 1; i >= 2 ^ k; --i) 3723 // tmp[i] op= tmp[i-pow2k]; 3724 llvm::BasicBlock *InnerLoopBB = 3725 CGF.createBasicBlock("omp.inner.log.scan.body"); 3726 llvm::BasicBlock *InnerExitBB = 3727 CGF.createBasicBlock("omp.inner.log.scan.exit"); 3728 llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K); 3729 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); 3730 CGF.EmitBlock(InnerLoopBB); 3731 auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2); 3732 IVal->addIncoming(NMin1, LoopBB); 3733 { 3734 CodeGenFunction::OMPPrivateScope PrivScope(CGF); 3735 auto *ILHS = LHSs.begin(); 3736 auto *IRHS = RHSs.begin(); 3737 for (const Expr *CopyArrayElem : CopyArrayElems) { 3738 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 3739 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 3740 Address LHSAddr = Address::invalid(); 3741 { 3742 CodeGenFunction::OpaqueValueMapping IdxMapping( 3743 CGF, 3744 cast<OpaqueValueExpr>( 3745 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 3746 RValue::get(IVal)); 3747 LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF); 3748 } 3749 PrivScope.addPrivate(LHSVD, LHSAddr); 3750 Address RHSAddr = Address::invalid(); 3751 { 3752 llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K); 3753 CodeGenFunction::OpaqueValueMapping IdxMapping( 3754 CGF, 3755 cast<OpaqueValueExpr>( 3756 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 3757 RValue::get(OffsetIVal)); 3758 RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF); 3759 } 3760 PrivScope.addPrivate(RHSVD, RHSAddr); 3761 ++ILHS; 3762 ++IRHS; 3763 } 3764 PrivScope.Privatize(); 3765 CGF.CGM.getOpenMPRuntime().emitReduction( 3766 CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, 3767 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown}); 3768 } 3769 llvm::Value *NextIVal = 3770 CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1)); 3771 IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock()); 3772 CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K); 3773 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); 3774 CGF.EmitBlock(InnerExitBB); 3775 llvm::Value *Next = 3776 CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1)); 3777 Counter->addIncoming(Next, CGF.Builder.GetInsertBlock()); 3778 // pow2k <<= 1; 3779 llvm::Value *NextPow2K = 3780 CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true); 3781 Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock()); 3782 llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal); 3783 CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB); 3784 auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc()); 3785 CGF.EmitBlock(ExitBB); 3786 }; 3787 if (isOpenMPParallelDirective(S.getDirectiveKind())) { 3788 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc()); 3789 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 3790 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 3791 /*ForceSimpleCall=*/true); 3792 } else { 3793 RegionCodeGenTy RCG(CodeGen); 3794 RCG(CGF); 3795 } 3796 3797 CGF.OMPFirstScanLoop = false; 3798 SecondGen(CGF); 3799 } 3800 3801 static bool emitWorksharingDirective(CodeGenFunction &CGF, 3802 const OMPLoopDirective &S, 3803 bool HasCancel) { 3804 bool HasLastprivates; 3805 if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), 3806 [](const OMPReductionClause *C) { 3807 return C->getModifier() == OMPC_REDUCTION_inscan; 3808 })) { 3809 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { 3810 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 3811 OMPLoopScope LoopScope(CGF, S); 3812 return CGF.EmitScalarExpr(S.getNumIterations()); 3813 }; 3814 const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) { 3815 CodeGenFunction::OMPCancelStackRAII CancelRegion( 3816 CGF, S.getDirectiveKind(), HasCancel); 3817 (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 3818 emitForLoopBounds, 3819 emitDispatchForLoopBounds); 3820 // Emit an implicit barrier at the end. 3821 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(), 3822 OMPD_for); 3823 }; 3824 const auto &&SecondGen = [&S, HasCancel, 3825 &HasLastprivates](CodeGenFunction &CGF) { 3826 CodeGenFunction::OMPCancelStackRAII CancelRegion( 3827 CGF, S.getDirectiveKind(), HasCancel); 3828 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 3829 emitForLoopBounds, 3830 emitDispatchForLoopBounds); 3831 }; 3832 if (!isOpenMPParallelDirective(S.getDirectiveKind())) 3833 emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen); 3834 emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen); 3835 if (!isOpenMPParallelDirective(S.getDirectiveKind())) 3836 emitScanBasedDirectiveFinals(CGF, S, NumIteratorsGen); 3837 } else { 3838 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), 3839 HasCancel); 3840 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 3841 emitForLoopBounds, 3842 emitDispatchForLoopBounds); 3843 } 3844 return HasLastprivates; 3845 } 3846 3847 static bool isSupportedByOpenMPIRBuilder(const OMPForDirective &S) { 3848 if (S.hasCancel()) 3849 return false; 3850 for (OMPClause *C : S.clauses()) { 3851 if (isa<OMPNowaitClause>(C)) 3852 continue; 3853 3854 if (auto *SC = dyn_cast<OMPScheduleClause>(C)) { 3855 if (SC->getFirstScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown) 3856 return false; 3857 if (SC->getSecondScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown) 3858 return false; 3859 switch (SC->getScheduleKind()) { 3860 case OMPC_SCHEDULE_auto: 3861 case OMPC_SCHEDULE_dynamic: 3862 case OMPC_SCHEDULE_runtime: 3863 case OMPC_SCHEDULE_guided: 3864 case OMPC_SCHEDULE_static: 3865 continue; 3866 case OMPC_SCHEDULE_unknown: 3867 return false; 3868 } 3869 } 3870 3871 return false; 3872 } 3873 3874 return true; 3875 } 3876 3877 static llvm::omp::ScheduleKind 3878 convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind) { 3879 switch (ScheduleClauseKind) { 3880 case OMPC_SCHEDULE_unknown: 3881 return llvm::omp::OMP_SCHEDULE_Default; 3882 case OMPC_SCHEDULE_auto: 3883 return llvm::omp::OMP_SCHEDULE_Auto; 3884 case OMPC_SCHEDULE_dynamic: 3885 return llvm::omp::OMP_SCHEDULE_Dynamic; 3886 case OMPC_SCHEDULE_guided: 3887 return llvm::omp::OMP_SCHEDULE_Guided; 3888 case OMPC_SCHEDULE_runtime: 3889 return llvm::omp::OMP_SCHEDULE_Runtime; 3890 case OMPC_SCHEDULE_static: 3891 return llvm::omp::OMP_SCHEDULE_Static; 3892 } 3893 llvm_unreachable("Unhandled schedule kind"); 3894 } 3895 3896 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 3897 bool HasLastprivates = false; 3898 bool UseOMPIRBuilder = 3899 CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S); 3900 auto &&CodeGen = [this, &S, &HasLastprivates, 3901 UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) { 3902 // Use the OpenMPIRBuilder if enabled. 3903 if (UseOMPIRBuilder) { 3904 bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>(); 3905 3906 llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default; 3907 llvm::Value *ChunkSize = nullptr; 3908 if (auto *SchedClause = S.getSingleClause<OMPScheduleClause>()) { 3909 SchedKind = 3910 convertClauseKindToSchedKind(SchedClause->getScheduleKind()); 3911 if (const Expr *ChunkSizeExpr = SchedClause->getChunkSize()) 3912 ChunkSize = EmitScalarExpr(ChunkSizeExpr); 3913 } 3914 3915 // Emit the associated statement and get its loop representation. 3916 const Stmt *Inner = S.getRawStmt(); 3917 llvm::CanonicalLoopInfo *CLI = 3918 EmitOMPCollapsedCanonicalLoopNest(Inner, 1); 3919 3920 llvm::OpenMPIRBuilder &OMPBuilder = 3921 CGM.getOpenMPRuntime().getOMPBuilder(); 3922 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( 3923 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); 3924 OMPBuilder.applyWorkshareLoop( 3925 Builder.getCurrentDebugLocation(), CLI, AllocaIP, NeedsBarrier, 3926 SchedKind, ChunkSize, /*HasSimdModifier=*/false, 3927 /*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false, 3928 /*HasOrderedClause=*/false); 3929 return; 3930 } 3931 3932 HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel()); 3933 }; 3934 { 3935 auto LPCRegion = 3936 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3937 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3938 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 3939 S.hasCancel()); 3940 } 3941 3942 if (!UseOMPIRBuilder) { 3943 // Emit an implicit barrier at the end. 3944 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) 3945 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); 3946 } 3947 // Check for outer lastprivate conditional update. 3948 checkForLastprivateConditionalUpdate(*this, S); 3949 } 3950 3951 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 3952 bool HasLastprivates = false; 3953 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 3954 PrePostActionTy &) { 3955 HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false); 3956 }; 3957 { 3958 auto LPCRegion = 3959 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3960 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3961 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 3962 } 3963 3964 // Emit an implicit barrier at the end. 3965 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) 3966 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); 3967 // Check for outer lastprivate conditional update. 3968 checkForLastprivateConditionalUpdate(*this, S); 3969 } 3970 3971 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 3972 const Twine &Name, 3973 llvm::Value *Init = nullptr) { 3974 LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 3975 if (Init) 3976 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); 3977 return LVal; 3978 } 3979 3980 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 3981 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); 3982 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt); 3983 bool HasLastprivates = false; 3984 auto &&CodeGen = [&S, CapturedStmt, CS, 3985 &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { 3986 const ASTContext &C = CGF.getContext(); 3987 QualType KmpInt32Ty = 3988 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3989 // Emit helper vars inits. 3990 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 3991 CGF.Builder.getInt32(0)); 3992 llvm::ConstantInt *GlobalUBVal = CS != nullptr 3993 ? CGF.Builder.getInt32(CS->size() - 1) 3994 : CGF.Builder.getInt32(0); 3995 LValue UB = 3996 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 3997 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 3998 CGF.Builder.getInt32(1)); 3999 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 4000 CGF.Builder.getInt32(0)); 4001 // Loop counter. 4002 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 4003 OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); 4004 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 4005 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); 4006 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 4007 // Generate condition for loop. 4008 BinaryOperator *Cond = BinaryOperator::Create( 4009 C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_PRValue, OK_Ordinary, 4010 S.getBeginLoc(), FPOptionsOverride()); 4011 // Increment for loop counter. 4012 UnaryOperator *Inc = UnaryOperator::Create( 4013 C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_PRValue, OK_Ordinary, 4014 S.getBeginLoc(), true, FPOptionsOverride()); 4015 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) { 4016 // Iterate through all sections and emit a switch construct: 4017 // switch (IV) { 4018 // case 0: 4019 // <SectionStmt[0]>; 4020 // break; 4021 // ... 4022 // case <NumSection> - 1: 4023 // <SectionStmt[<NumSection> - 1]>; 4024 // break; 4025 // } 4026 // .omp.sections.exit: 4027 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 4028 llvm::SwitchInst *SwitchStmt = 4029 CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()), 4030 ExitBB, CS == nullptr ? 1 : CS->size()); 4031 if (CS) { 4032 unsigned CaseNumber = 0; 4033 for (const Stmt *SubStmt : CS->children()) { 4034 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 4035 CGF.EmitBlock(CaseBB); 4036 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 4037 CGF.EmitStmt(SubStmt); 4038 CGF.EmitBranch(ExitBB); 4039 ++CaseNumber; 4040 } 4041 } else { 4042 llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case"); 4043 CGF.EmitBlock(CaseBB); 4044 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 4045 CGF.EmitStmt(CapturedStmt); 4046 CGF.EmitBranch(ExitBB); 4047 } 4048 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 4049 }; 4050 4051 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 4052 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 4053 // Emit implicit barrier to synchronize threads and avoid data races on 4054 // initialization of firstprivate variables and post-update of lastprivate 4055 // variables. 4056 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 4057 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 4058 /*ForceSimpleCall=*/true); 4059 } 4060 CGF.EmitOMPPrivateClause(S, LoopScope); 4061 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV); 4062 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 4063 CGF.EmitOMPReductionClauseInit(S, LoopScope); 4064 (void)LoopScope.Privatize(); 4065 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 4066 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 4067 4068 // Emit static non-chunked loop. 4069 OpenMPScheduleTy ScheduleKind; 4070 ScheduleKind.Schedule = OMPC_SCHEDULE_static; 4071 CGOpenMPRuntime::StaticRTInput StaticInit( 4072 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(CGF), 4073 LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF)); 4074 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 4075 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit); 4076 // UB = min(UB, GlobalUB); 4077 llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc()); 4078 llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect( 4079 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 4080 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 4081 // IV = LB; 4082 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV); 4083 // while (idx <= UB) { BODY; ++idx; } 4084 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen, 4085 [](CodeGenFunction &) {}); 4086 // Tell the runtime we are done. 4087 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 4088 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 4089 S.getDirectiveKind()); 4090 }; 4091 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); 4092 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 4093 // Emit post-update of the reduction variables if IsLastIter != 0. 4094 emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) { 4095 return CGF.Builder.CreateIsNotNull( 4096 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 4097 }); 4098 4099 // Emit final copy of the lastprivate variables if IsLastIter != 0. 4100 if (HasLastprivates) 4101 CGF.EmitOMPLastprivateClauseFinal( 4102 S, /*NoFinals=*/false, 4103 CGF.Builder.CreateIsNotNull( 4104 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()))); 4105 }; 4106 4107 bool HasCancel = false; 4108 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 4109 HasCancel = OSD->hasCancel(); 4110 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 4111 HasCancel = OPSD->hasCancel(); 4112 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); 4113 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 4114 HasCancel); 4115 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 4116 // clause. Otherwise the barrier will be generated by the codegen for the 4117 // directive. 4118 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 4119 // Emit implicit barrier to synchronize threads and avoid data races on 4120 // initialization of firstprivate variables. 4121 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), 4122 OMPD_unknown); 4123 } 4124 } 4125 4126 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 4127 if (CGM.getLangOpts().OpenMPIRBuilder) { 4128 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 4129 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 4130 using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; 4131 4132 auto FiniCB = [this](InsertPointTy IP) { 4133 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 4134 }; 4135 4136 const CapturedStmt *ICS = S.getInnermostCapturedStmt(); 4137 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); 4138 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt); 4139 llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector; 4140 if (CS) { 4141 for (const Stmt *SubStmt : CS->children()) { 4142 auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP, 4143 InsertPointTy CodeGenIP) { 4144 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( 4145 *this, SubStmt, AllocaIP, CodeGenIP, "section"); 4146 }; 4147 SectionCBVector.push_back(SectionCB); 4148 } 4149 } else { 4150 auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP, 4151 InsertPointTy CodeGenIP) { 4152 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( 4153 *this, CapturedStmt, AllocaIP, CodeGenIP, "section"); 4154 }; 4155 SectionCBVector.push_back(SectionCB); 4156 } 4157 4158 // Privatization callback that performs appropriate action for 4159 // shared/private/firstprivate/lastprivate/copyin/... variables. 4160 // 4161 // TODO: This defaults to shared right now. 4162 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 4163 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { 4164 // The next line is appropriate only for variables (Val) with the 4165 // data-sharing attribute "shared". 4166 ReplVal = &Val; 4167 4168 return CodeGenIP; 4169 }; 4170 4171 CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP); 4172 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); 4173 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( 4174 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); 4175 Builder.restoreIP(OMPBuilder.createSections( 4176 Builder, AllocaIP, SectionCBVector, PrivCB, FiniCB, S.hasCancel(), 4177 S.getSingleClause<OMPNowaitClause>())); 4178 return; 4179 } 4180 { 4181 auto LPCRegion = 4182 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4183 OMPLexicalScope Scope(*this, S, OMPD_unknown); 4184 EmitSections(S); 4185 } 4186 // Emit an implicit barrier at the end. 4187 if (!S.getSingleClause<OMPNowaitClause>()) { 4188 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), 4189 OMPD_sections); 4190 } 4191 // Check for outer lastprivate conditional update. 4192 checkForLastprivateConditionalUpdate(*this, S); 4193 } 4194 4195 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 4196 if (CGM.getLangOpts().OpenMPIRBuilder) { 4197 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 4198 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 4199 4200 const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt(); 4201 auto FiniCB = [this](InsertPointTy IP) { 4202 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 4203 }; 4204 4205 auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP, 4206 InsertPointTy CodeGenIP) { 4207 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( 4208 *this, SectionRegionBodyStmt, AllocaIP, CodeGenIP, "section"); 4209 }; 4210 4211 LexicalScope Scope(*this, S.getSourceRange()); 4212 EmitStopPoint(&S); 4213 Builder.restoreIP(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB)); 4214 4215 return; 4216 } 4217 LexicalScope Scope(*this, S.getSourceRange()); 4218 EmitStopPoint(&S); 4219 EmitStmt(S.getAssociatedStmt()); 4220 } 4221 4222 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 4223 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 4224 llvm::SmallVector<const Expr *, 8> DestExprs; 4225 llvm::SmallVector<const Expr *, 8> SrcExprs; 4226 llvm::SmallVector<const Expr *, 8> AssignmentOps; 4227 // Check if there are any 'copyprivate' clauses associated with this 4228 // 'single' construct. 4229 // Build a list of copyprivate variables along with helper expressions 4230 // (<source>, <destination>, <destination>=<source> expressions) 4231 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 4232 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 4233 DestExprs.append(C->destination_exprs().begin(), 4234 C->destination_exprs().end()); 4235 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 4236 AssignmentOps.append(C->assignment_ops().begin(), 4237 C->assignment_ops().end()); 4238 } 4239 // Emit code for 'single' region along with 'copyprivate' clauses 4240 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4241 Action.Enter(CGF); 4242 OMPPrivateScope SingleScope(CGF); 4243 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 4244 CGF.EmitOMPPrivateClause(S, SingleScope); 4245 (void)SingleScope.Privatize(); 4246 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 4247 }; 4248 { 4249 auto LPCRegion = 4250 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4251 OMPLexicalScope Scope(*this, S, OMPD_unknown); 4252 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(), 4253 CopyprivateVars, DestExprs, 4254 SrcExprs, AssignmentOps); 4255 } 4256 // Emit an implicit barrier at the end (to avoid data race on firstprivate 4257 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 4258 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 4259 CGM.getOpenMPRuntime().emitBarrierCall( 4260 *this, S.getBeginLoc(), 4261 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 4262 } 4263 // Check for outer lastprivate conditional update. 4264 checkForLastprivateConditionalUpdate(*this, S); 4265 } 4266 4267 static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 4268 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4269 Action.Enter(CGF); 4270 CGF.EmitStmt(S.getRawStmt()); 4271 }; 4272 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc()); 4273 } 4274 4275 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 4276 if (CGM.getLangOpts().OpenMPIRBuilder) { 4277 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 4278 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 4279 4280 const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt(); 4281 4282 auto FiniCB = [this](InsertPointTy IP) { 4283 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 4284 }; 4285 4286 auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP, 4287 InsertPointTy CodeGenIP) { 4288 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( 4289 *this, MasterRegionBodyStmt, AllocaIP, CodeGenIP, "master"); 4290 }; 4291 4292 LexicalScope Scope(*this, S.getSourceRange()); 4293 EmitStopPoint(&S); 4294 Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB)); 4295 4296 return; 4297 } 4298 LexicalScope Scope(*this, S.getSourceRange()); 4299 EmitStopPoint(&S); 4300 emitMaster(*this, S); 4301 } 4302 4303 static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 4304 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4305 Action.Enter(CGF); 4306 CGF.EmitStmt(S.getRawStmt()); 4307 }; 4308 Expr *Filter = nullptr; 4309 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>()) 4310 Filter = FilterClause->getThreadID(); 4311 CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, CodeGen, S.getBeginLoc(), 4312 Filter); 4313 } 4314 4315 void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) { 4316 if (CGM.getLangOpts().OpenMPIRBuilder) { 4317 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 4318 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 4319 4320 const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt(); 4321 const Expr *Filter = nullptr; 4322 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>()) 4323 Filter = FilterClause->getThreadID(); 4324 llvm::Value *FilterVal = Filter 4325 ? EmitScalarExpr(Filter, CGM.Int32Ty) 4326 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 4327 4328 auto FiniCB = [this](InsertPointTy IP) { 4329 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 4330 }; 4331 4332 auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP, 4333 InsertPointTy CodeGenIP) { 4334 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( 4335 *this, MaskedRegionBodyStmt, AllocaIP, CodeGenIP, "masked"); 4336 }; 4337 4338 LexicalScope Scope(*this, S.getSourceRange()); 4339 EmitStopPoint(&S); 4340 Builder.restoreIP( 4341 OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal)); 4342 4343 return; 4344 } 4345 LexicalScope Scope(*this, S.getSourceRange()); 4346 EmitStopPoint(&S); 4347 emitMasked(*this, S); 4348 } 4349 4350 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 4351 if (CGM.getLangOpts().OpenMPIRBuilder) { 4352 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 4353 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 4354 4355 const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt(); 4356 const Expr *Hint = nullptr; 4357 if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) 4358 Hint = HintClause->getHint(); 4359 4360 // TODO: This is slightly different from what's currently being done in 4361 // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything 4362 // about typing is final. 4363 llvm::Value *HintInst = nullptr; 4364 if (Hint) 4365 HintInst = 4366 Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false); 4367 4368 auto FiniCB = [this](InsertPointTy IP) { 4369 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 4370 }; 4371 4372 auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP, 4373 InsertPointTy CodeGenIP) { 4374 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( 4375 *this, CriticalRegionBodyStmt, AllocaIP, CodeGenIP, "critical"); 4376 }; 4377 4378 LexicalScope Scope(*this, S.getSourceRange()); 4379 EmitStopPoint(&S); 4380 Builder.restoreIP(OMPBuilder.createCritical( 4381 Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(), 4382 HintInst)); 4383 4384 return; 4385 } 4386 4387 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4388 Action.Enter(CGF); 4389 CGF.EmitStmt(S.getAssociatedStmt()); 4390 }; 4391 const Expr *Hint = nullptr; 4392 if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) 4393 Hint = HintClause->getHint(); 4394 LexicalScope Scope(*this, S.getSourceRange()); 4395 EmitStopPoint(&S); 4396 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 4397 S.getDirectiveName().getAsString(), 4398 CodeGen, S.getBeginLoc(), Hint); 4399 } 4400 4401 void CodeGenFunction::EmitOMPParallelForDirective( 4402 const OMPParallelForDirective &S) { 4403 // Emit directive as a combined directive that consists of two implicit 4404 // directives: 'parallel' with 'for' directive. 4405 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4406 Action.Enter(CGF); 4407 emitOMPCopyinClause(CGF, S); 4408 (void)emitWorksharingDirective(CGF, S, S.hasCancel()); 4409 }; 4410 { 4411 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { 4412 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 4413 CGCapturedStmtInfo CGSI(CR_OpenMP); 4414 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); 4415 OMPLoopScope LoopScope(CGF, S); 4416 return CGF.EmitScalarExpr(S.getNumIterations()); 4417 }; 4418 bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), 4419 [](const OMPReductionClause *C) { 4420 return C->getModifier() == OMPC_REDUCTION_inscan; 4421 }); 4422 if (IsInscan) 4423 emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen); 4424 auto LPCRegion = 4425 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4426 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, 4427 emitEmptyBoundParameters); 4428 if (IsInscan) 4429 emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen); 4430 } 4431 // Check for outer lastprivate conditional update. 4432 checkForLastprivateConditionalUpdate(*this, S); 4433 } 4434 4435 void CodeGenFunction::EmitOMPParallelForSimdDirective( 4436 const OMPParallelForSimdDirective &S) { 4437 // Emit directive as a combined directive that consists of two implicit 4438 // directives: 'parallel' with 'for' directive. 4439 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4440 Action.Enter(CGF); 4441 emitOMPCopyinClause(CGF, S); 4442 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false); 4443 }; 4444 { 4445 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { 4446 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 4447 CGCapturedStmtInfo CGSI(CR_OpenMP); 4448 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); 4449 OMPLoopScope LoopScope(CGF, S); 4450 return CGF.EmitScalarExpr(S.getNumIterations()); 4451 }; 4452 bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), 4453 [](const OMPReductionClause *C) { 4454 return C->getModifier() == OMPC_REDUCTION_inscan; 4455 }); 4456 if (IsInscan) 4457 emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen); 4458 auto LPCRegion = 4459 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4460 emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen, 4461 emitEmptyBoundParameters); 4462 if (IsInscan) 4463 emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen); 4464 } 4465 // Check for outer lastprivate conditional update. 4466 checkForLastprivateConditionalUpdate(*this, S); 4467 } 4468 4469 void CodeGenFunction::EmitOMPParallelMasterDirective( 4470 const OMPParallelMasterDirective &S) { 4471 // Emit directive as a combined directive that consists of two implicit 4472 // directives: 'parallel' with 'master' directive. 4473 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4474 Action.Enter(CGF); 4475 OMPPrivateScope PrivateScope(CGF); 4476 emitOMPCopyinClause(CGF, S); 4477 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4478 CGF.EmitOMPPrivateClause(S, PrivateScope); 4479 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4480 (void)PrivateScope.Privatize(); 4481 emitMaster(CGF, S); 4482 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 4483 }; 4484 { 4485 auto LPCRegion = 4486 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4487 emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen, 4488 emitEmptyBoundParameters); 4489 emitPostUpdateForReductionClause(*this, S, 4490 [](CodeGenFunction &) { return nullptr; }); 4491 } 4492 // Check for outer lastprivate conditional update. 4493 checkForLastprivateConditionalUpdate(*this, S); 4494 } 4495 4496 void CodeGenFunction::EmitOMPParallelMaskedDirective( 4497 const OMPParallelMaskedDirective &S) { 4498 // Emit directive as a combined directive that consists of two implicit 4499 // directives: 'parallel' with 'masked' directive. 4500 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4501 Action.Enter(CGF); 4502 OMPPrivateScope PrivateScope(CGF); 4503 emitOMPCopyinClause(CGF, S); 4504 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4505 CGF.EmitOMPPrivateClause(S, PrivateScope); 4506 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4507 (void)PrivateScope.Privatize(); 4508 emitMasked(CGF, S); 4509 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 4510 }; 4511 { 4512 auto LPCRegion = 4513 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4514 emitCommonOMPParallelDirective(*this, S, OMPD_masked, CodeGen, 4515 emitEmptyBoundParameters); 4516 emitPostUpdateForReductionClause(*this, S, 4517 [](CodeGenFunction &) { return nullptr; }); 4518 } 4519 // Check for outer lastprivate conditional update. 4520 checkForLastprivateConditionalUpdate(*this, S); 4521 } 4522 4523 void CodeGenFunction::EmitOMPParallelSectionsDirective( 4524 const OMPParallelSectionsDirective &S) { 4525 // Emit directive as a combined directive that consists of two implicit 4526 // directives: 'parallel' with 'sections' directive. 4527 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4528 Action.Enter(CGF); 4529 emitOMPCopyinClause(CGF, S); 4530 CGF.EmitSections(S); 4531 }; 4532 { 4533 auto LPCRegion = 4534 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4535 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, 4536 emitEmptyBoundParameters); 4537 } 4538 // Check for outer lastprivate conditional update. 4539 checkForLastprivateConditionalUpdate(*this, S); 4540 } 4541 4542 namespace { 4543 /// Get the list of variables declared in the context of the untied tasks. 4544 class CheckVarsEscapingUntiedTaskDeclContext final 4545 : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> { 4546 llvm::SmallVector<const VarDecl *, 4> PrivateDecls; 4547 4548 public: 4549 explicit CheckVarsEscapingUntiedTaskDeclContext() = default; 4550 virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default; 4551 void VisitDeclStmt(const DeclStmt *S) { 4552 if (!S) 4553 return; 4554 // Need to privatize only local vars, static locals can be processed as is. 4555 for (const Decl *D : S->decls()) { 4556 if (const auto *VD = dyn_cast_or_null<VarDecl>(D)) 4557 if (VD->hasLocalStorage()) 4558 PrivateDecls.push_back(VD); 4559 } 4560 } 4561 void VisitOMPExecutableDirective(const OMPExecutableDirective *) {} 4562 void VisitCapturedStmt(const CapturedStmt *) {} 4563 void VisitLambdaExpr(const LambdaExpr *) {} 4564 void VisitBlockExpr(const BlockExpr *) {} 4565 void VisitStmt(const Stmt *S) { 4566 if (!S) 4567 return; 4568 for (const Stmt *Child : S->children()) 4569 if (Child) 4570 Visit(Child); 4571 } 4572 4573 /// Swaps list of vars with the provided one. 4574 ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; } 4575 }; 4576 } // anonymous namespace 4577 4578 static void buildDependences(const OMPExecutableDirective &S, 4579 OMPTaskDataTy &Data) { 4580 4581 // First look for 'omp_all_memory' and add this first. 4582 bool OmpAllMemory = false; 4583 if (llvm::any_of( 4584 S.getClausesOfKind<OMPDependClause>(), [](const OMPDependClause *C) { 4585 return C->getDependencyKind() == OMPC_DEPEND_outallmemory || 4586 C->getDependencyKind() == OMPC_DEPEND_inoutallmemory; 4587 })) { 4588 OmpAllMemory = true; 4589 // Since both OMPC_DEPEND_outallmemory and OMPC_DEPEND_inoutallmemory are 4590 // equivalent to the runtime, always use OMPC_DEPEND_outallmemory to 4591 // simplify. 4592 OMPTaskDataTy::DependData &DD = 4593 Data.Dependences.emplace_back(OMPC_DEPEND_outallmemory, 4594 /*IteratorExpr=*/nullptr); 4595 // Add a nullptr Expr to simplify the codegen in emitDependData. 4596 DD.DepExprs.push_back(nullptr); 4597 } 4598 // Add remaining dependences skipping any 'out' or 'inout' if they are 4599 // overridden by 'omp_all_memory'. 4600 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { 4601 OpenMPDependClauseKind Kind = C->getDependencyKind(); 4602 if (Kind == OMPC_DEPEND_outallmemory || Kind == OMPC_DEPEND_inoutallmemory) 4603 continue; 4604 if (OmpAllMemory && (Kind == OMPC_DEPEND_out || Kind == OMPC_DEPEND_inout)) 4605 continue; 4606 OMPTaskDataTy::DependData &DD = 4607 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); 4608 DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); 4609 } 4610 } 4611 4612 void CodeGenFunction::EmitOMPTaskBasedDirective( 4613 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, 4614 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, 4615 OMPTaskDataTy &Data) { 4616 // Emit outlined function for task construct. 4617 const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion); 4618 auto I = CS->getCapturedDecl()->param_begin(); 4619 auto PartId = std::next(I); 4620 auto TaskT = std::next(I, 4); 4621 // Check if the task is final 4622 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 4623 // If the condition constant folds and can be elided, try to avoid emitting 4624 // the condition and the dead arm of the if/else. 4625 const Expr *Cond = Clause->getCondition(); 4626 bool CondConstant; 4627 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 4628 Data.Final.setInt(CondConstant); 4629 else 4630 Data.Final.setPointer(EvaluateExprAsBool(Cond)); 4631 } else { 4632 // By default the task is not final. 4633 Data.Final.setInt(/*IntVal=*/false); 4634 } 4635 // Check if the task has 'priority' clause. 4636 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { 4637 const Expr *Prio = Clause->getPriority(); 4638 Data.Priority.setInt(/*IntVal=*/true); 4639 Data.Priority.setPointer(EmitScalarConversion( 4640 EmitScalarExpr(Prio), Prio->getType(), 4641 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), 4642 Prio->getExprLoc())); 4643 } 4644 // The first function argument for tasks is a thread id, the second one is a 4645 // part id (0 for tied tasks, >=0 for untied task). 4646 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 4647 // Get list of private variables. 4648 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 4649 auto IRef = C->varlist_begin(); 4650 for (const Expr *IInit : C->private_copies()) { 4651 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 4652 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 4653 Data.PrivateVars.push_back(*IRef); 4654 Data.PrivateCopies.push_back(IInit); 4655 } 4656 ++IRef; 4657 } 4658 } 4659 EmittedAsPrivate.clear(); 4660 // Get list of firstprivate variables. 4661 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 4662 auto IRef = C->varlist_begin(); 4663 auto IElemInitRef = C->inits().begin(); 4664 for (const Expr *IInit : C->private_copies()) { 4665 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 4666 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 4667 Data.FirstprivateVars.push_back(*IRef); 4668 Data.FirstprivateCopies.push_back(IInit); 4669 Data.FirstprivateInits.push_back(*IElemInitRef); 4670 } 4671 ++IRef; 4672 ++IElemInitRef; 4673 } 4674 } 4675 // Get list of lastprivate variables (for taskloops). 4676 llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 4677 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 4678 auto IRef = C->varlist_begin(); 4679 auto ID = C->destination_exprs().begin(); 4680 for (const Expr *IInit : C->private_copies()) { 4681 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 4682 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 4683 Data.LastprivateVars.push_back(*IRef); 4684 Data.LastprivateCopies.push_back(IInit); 4685 } 4686 LastprivateDstsOrigs.insert( 4687 std::make_pair(cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 4688 cast<DeclRefExpr>(*IRef))); 4689 ++IRef; 4690 ++ID; 4691 } 4692 } 4693 SmallVector<const Expr *, 4> LHSs; 4694 SmallVector<const Expr *, 4> RHSs; 4695 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 4696 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); 4697 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); 4698 Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); 4699 Data.ReductionOps.append(C->reduction_ops().begin(), 4700 C->reduction_ops().end()); 4701 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 4702 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 4703 } 4704 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( 4705 *this, S.getBeginLoc(), LHSs, RHSs, Data); 4706 // Build list of dependences. 4707 buildDependences(S, Data); 4708 // Get list of local vars for untied tasks. 4709 if (!Data.Tied) { 4710 CheckVarsEscapingUntiedTaskDeclContext Checker; 4711 Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt()); 4712 Data.PrivateLocals.append(Checker.getPrivateDecls().begin(), 4713 Checker.getPrivateDecls().end()); 4714 } 4715 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, 4716 CapturedRegion](CodeGenFunction &CGF, 4717 PrePostActionTy &Action) { 4718 llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 4719 std::pair<Address, Address>> 4720 UntiedLocalVars; 4721 // Set proper addresses for generated private copies. 4722 OMPPrivateScope Scope(CGF); 4723 // Generate debug info for variables present in shared clause. 4724 if (auto *DI = CGF.getDebugInfo()) { 4725 llvm::SmallDenseMap<const VarDecl *, FieldDecl *> CaptureFields = 4726 CGF.CapturedStmtInfo->getCaptureFields(); 4727 llvm::Value *ContextValue = CGF.CapturedStmtInfo->getContextValue(); 4728 if (CaptureFields.size() && ContextValue) { 4729 unsigned CharWidth = CGF.getContext().getCharWidth(); 4730 // The shared variables are packed together as members of structure. 4731 // So the address of each shared variable can be computed by adding 4732 // offset of it (within record) to the base address of record. For each 4733 // shared variable, debug intrinsic llvm.dbg.declare is generated with 4734 // appropriate expressions (DIExpression). 4735 // Ex: 4736 // %12 = load %struct.anon*, %struct.anon** %__context.addr.i 4737 // call void @llvm.dbg.declare(metadata %struct.anon* %12, 4738 // metadata !svar1, 4739 // metadata !DIExpression(DW_OP_deref)) 4740 // call void @llvm.dbg.declare(metadata %struct.anon* %12, 4741 // metadata !svar2, 4742 // metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref)) 4743 for (auto It = CaptureFields.begin(); It != CaptureFields.end(); ++It) { 4744 const VarDecl *SharedVar = It->first; 4745 RecordDecl *CaptureRecord = It->second->getParent(); 4746 const ASTRecordLayout &Layout = 4747 CGF.getContext().getASTRecordLayout(CaptureRecord); 4748 unsigned Offset = 4749 Layout.getFieldOffset(It->second->getFieldIndex()) / CharWidth; 4750 if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo()) 4751 (void)DI->EmitDeclareOfAutoVariable(SharedVar, ContextValue, 4752 CGF.Builder, false); 4753 llvm::Instruction &Last = CGF.Builder.GetInsertBlock()->back(); 4754 // Get the call dbg.declare instruction we just created and update 4755 // its DIExpression to add offset to base address. 4756 if (auto DDI = dyn_cast<llvm::DbgVariableIntrinsic>(&Last)) { 4757 SmallVector<uint64_t, 8> Ops; 4758 // Add offset to the base address if non zero. 4759 if (Offset) { 4760 Ops.push_back(llvm::dwarf::DW_OP_plus_uconst); 4761 Ops.push_back(Offset); 4762 } 4763 Ops.push_back(llvm::dwarf::DW_OP_deref); 4764 auto &Ctx = DDI->getContext(); 4765 llvm::DIExpression *DIExpr = llvm::DIExpression::get(Ctx, Ops); 4766 Last.setOperand(2, llvm::MetadataAsValue::get(Ctx, DIExpr)); 4767 } 4768 } 4769 } 4770 } 4771 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs; 4772 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 4773 !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) { 4774 enum { PrivatesParam = 2, CopyFnParam = 3 }; 4775 llvm::Value *CopyFn = CGF.Builder.CreateLoad( 4776 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); 4777 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( 4778 CS->getCapturedDecl()->getParam(PrivatesParam))); 4779 // Map privates. 4780 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 4781 llvm::SmallVector<llvm::Value *, 16> CallArgs; 4782 llvm::SmallVector<llvm::Type *, 4> ParamTypes; 4783 CallArgs.push_back(PrivatesPtr); 4784 ParamTypes.push_back(PrivatesPtr->getType()); 4785 for (const Expr *E : Data.PrivateVars) { 4786 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4787 Address PrivatePtr = CGF.CreateMemTemp( 4788 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 4789 PrivatePtrs.emplace_back(VD, PrivatePtr); 4790 CallArgs.push_back(PrivatePtr.getPointer()); 4791 ParamTypes.push_back(PrivatePtr.getType()); 4792 } 4793 for (const Expr *E : Data.FirstprivateVars) { 4794 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4795 Address PrivatePtr = 4796 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 4797 ".firstpriv.ptr.addr"); 4798 PrivatePtrs.emplace_back(VD, PrivatePtr); 4799 FirstprivatePtrs.emplace_back(VD, PrivatePtr); 4800 CallArgs.push_back(PrivatePtr.getPointer()); 4801 ParamTypes.push_back(PrivatePtr.getType()); 4802 } 4803 for (const Expr *E : Data.LastprivateVars) { 4804 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4805 Address PrivatePtr = 4806 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 4807 ".lastpriv.ptr.addr"); 4808 PrivatePtrs.emplace_back(VD, PrivatePtr); 4809 CallArgs.push_back(PrivatePtr.getPointer()); 4810 ParamTypes.push_back(PrivatePtr.getType()); 4811 } 4812 for (const VarDecl *VD : Data.PrivateLocals) { 4813 QualType Ty = VD->getType().getNonReferenceType(); 4814 if (VD->getType()->isLValueReferenceType()) 4815 Ty = CGF.getContext().getPointerType(Ty); 4816 if (isAllocatableDecl(VD)) 4817 Ty = CGF.getContext().getPointerType(Ty); 4818 Address PrivatePtr = CGF.CreateMemTemp( 4819 CGF.getContext().getPointerType(Ty), ".local.ptr.addr"); 4820 auto Result = UntiedLocalVars.insert( 4821 std::make_pair(VD, std::make_pair(PrivatePtr, Address::invalid()))); 4822 // If key exists update in place. 4823 if (Result.second == false) 4824 *Result.first = std::make_pair( 4825 VD, std::make_pair(PrivatePtr, Address::invalid())); 4826 CallArgs.push_back(PrivatePtr.getPointer()); 4827 ParamTypes.push_back(PrivatePtr.getType()); 4828 } 4829 auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(), 4830 ParamTypes, /*isVarArg=*/false); 4831 CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4832 CopyFn, CopyFnTy->getPointerTo()); 4833 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( 4834 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); 4835 for (const auto &Pair : LastprivateDstsOrigs) { 4836 const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 4837 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD), 4838 /*RefersToEnclosingVariableOrCapture=*/ 4839 CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, 4840 Pair.second->getType(), VK_LValue, 4841 Pair.second->getExprLoc()); 4842 Scope.addPrivate(Pair.first, CGF.EmitLValue(&DRE).getAddress(CGF)); 4843 } 4844 for (const auto &Pair : PrivatePtrs) { 4845 Address Replacement = Address( 4846 CGF.Builder.CreateLoad(Pair.second), 4847 CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()), 4848 CGF.getContext().getDeclAlign(Pair.first)); 4849 Scope.addPrivate(Pair.first, Replacement); 4850 if (auto *DI = CGF.getDebugInfo()) 4851 if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo()) 4852 (void)DI->EmitDeclareOfAutoVariable( 4853 Pair.first, Pair.second.getPointer(), CGF.Builder, 4854 /*UsePointerValue*/ true); 4855 } 4856 // Adjust mapping for internal locals by mapping actual memory instead of 4857 // a pointer to this memory. 4858 for (auto &Pair : UntiedLocalVars) { 4859 QualType VDType = Pair.first->getType().getNonReferenceType(); 4860 if (Pair.first->getType()->isLValueReferenceType()) 4861 VDType = CGF.getContext().getPointerType(VDType); 4862 if (isAllocatableDecl(Pair.first)) { 4863 llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first); 4864 Address Replacement( 4865 Ptr, 4866 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(VDType)), 4867 CGF.getPointerAlign()); 4868 Pair.second.first = Replacement; 4869 Ptr = CGF.Builder.CreateLoad(Replacement); 4870 Replacement = Address(Ptr, CGF.ConvertTypeForMem(VDType), 4871 CGF.getContext().getDeclAlign(Pair.first)); 4872 Pair.second.second = Replacement; 4873 } else { 4874 llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first); 4875 Address Replacement(Ptr, CGF.ConvertTypeForMem(VDType), 4876 CGF.getContext().getDeclAlign(Pair.first)); 4877 Pair.second.first = Replacement; 4878 } 4879 } 4880 } 4881 if (Data.Reductions) { 4882 OMPPrivateScope FirstprivateScope(CGF); 4883 for (const auto &Pair : FirstprivatePtrs) { 4884 Address Replacement( 4885 CGF.Builder.CreateLoad(Pair.second), 4886 CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()), 4887 CGF.getContext().getDeclAlign(Pair.first)); 4888 FirstprivateScope.addPrivate(Pair.first, Replacement); 4889 } 4890 (void)FirstprivateScope.Privatize(); 4891 OMPLexicalScope LexScope(CGF, S, CapturedRegion); 4892 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars, 4893 Data.ReductionCopies, Data.ReductionOps); 4894 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( 4895 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); 4896 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { 4897 RedCG.emitSharedOrigLValue(CGF, Cnt); 4898 RedCG.emitAggregateType(CGF, Cnt); 4899 // FIXME: This must removed once the runtime library is fixed. 4900 // Emit required threadprivate variables for 4901 // initializer/combiner/finalizer. 4902 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), 4903 RedCG, Cnt); 4904 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 4905 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 4906 Replacement = 4907 Address(CGF.EmitScalarConversion( 4908 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 4909 CGF.getContext().getPointerType( 4910 Data.ReductionCopies[Cnt]->getType()), 4911 Data.ReductionCopies[Cnt]->getExprLoc()), 4912 CGF.ConvertTypeForMem(Data.ReductionCopies[Cnt]->getType()), 4913 Replacement.getAlignment()); 4914 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 4915 Scope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement); 4916 } 4917 } 4918 // Privatize all private variables except for in_reduction items. 4919 (void)Scope.Privatize(); 4920 SmallVector<const Expr *, 4> InRedVars; 4921 SmallVector<const Expr *, 4> InRedPrivs; 4922 SmallVector<const Expr *, 4> InRedOps; 4923 SmallVector<const Expr *, 4> TaskgroupDescriptors; 4924 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { 4925 auto IPriv = C->privates().begin(); 4926 auto IRed = C->reduction_ops().begin(); 4927 auto ITD = C->taskgroup_descriptors().begin(); 4928 for (const Expr *Ref : C->varlists()) { 4929 InRedVars.emplace_back(Ref); 4930 InRedPrivs.emplace_back(*IPriv); 4931 InRedOps.emplace_back(*IRed); 4932 TaskgroupDescriptors.emplace_back(*ITD); 4933 std::advance(IPriv, 1); 4934 std::advance(IRed, 1); 4935 std::advance(ITD, 1); 4936 } 4937 } 4938 // Privatize in_reduction items here, because taskgroup descriptors must be 4939 // privatized earlier. 4940 OMPPrivateScope InRedScope(CGF); 4941 if (!InRedVars.empty()) { 4942 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps); 4943 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { 4944 RedCG.emitSharedOrigLValue(CGF, Cnt); 4945 RedCG.emitAggregateType(CGF, Cnt); 4946 // The taskgroup descriptor variable is always implicit firstprivate and 4947 // privatized already during processing of the firstprivates. 4948 // FIXME: This must removed once the runtime library is fixed. 4949 // Emit required threadprivate variables for 4950 // initializer/combiner/finalizer. 4951 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), 4952 RedCG, Cnt); 4953 llvm::Value *ReductionsPtr; 4954 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) { 4955 ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr), 4956 TRExpr->getExprLoc()); 4957 } else { 4958 ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4959 } 4960 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 4961 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 4962 Replacement = Address( 4963 CGF.EmitScalarConversion( 4964 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 4965 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), 4966 InRedPrivs[Cnt]->getExprLoc()), 4967 CGF.ConvertTypeForMem(InRedPrivs[Cnt]->getType()), 4968 Replacement.getAlignment()); 4969 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 4970 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement); 4971 } 4972 } 4973 (void)InRedScope.Privatize(); 4974 4975 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF, 4976 UntiedLocalVars); 4977 Action.Enter(CGF); 4978 BodyGen(CGF); 4979 }; 4980 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 4981 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, 4982 Data.NumberOfParts); 4983 OMPLexicalScope Scope(*this, S, std::nullopt, 4984 !isOpenMPParallelDirective(S.getDirectiveKind()) && 4985 !isOpenMPSimdDirective(S.getDirectiveKind())); 4986 TaskGen(*this, OutlinedFn, Data); 4987 } 4988 4989 static ImplicitParamDecl * 4990 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, 4991 QualType Ty, CapturedDecl *CD, 4992 SourceLocation Loc) { 4993 auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, 4994 ImplicitParamDecl::Other); 4995 auto *OrigRef = DeclRefExpr::Create( 4996 C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD, 4997 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); 4998 auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, 4999 ImplicitParamDecl::Other); 5000 auto *PrivateRef = DeclRefExpr::Create( 5001 C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD, 5002 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); 5003 QualType ElemType = C.getBaseElementType(Ty); 5004 auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType, 5005 ImplicitParamDecl::Other); 5006 auto *InitRef = DeclRefExpr::Create( 5007 C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, 5008 /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue); 5009 PrivateVD->setInitStyle(VarDecl::CInit); 5010 PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, 5011 InitRef, /*BasePath=*/nullptr, 5012 VK_PRValue, FPOptionsOverride())); 5013 Data.FirstprivateVars.emplace_back(OrigRef); 5014 Data.FirstprivateCopies.emplace_back(PrivateRef); 5015 Data.FirstprivateInits.emplace_back(InitRef); 5016 return OrigVD; 5017 } 5018 5019 void CodeGenFunction::EmitOMPTargetTaskBasedDirective( 5020 const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, 5021 OMPTargetDataInfo &InputInfo) { 5022 // Emit outlined function for task construct. 5023 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); 5024 Address CapturedStruct = GenerateCapturedStmtArgument(*CS); 5025 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 5026 auto I = CS->getCapturedDecl()->param_begin(); 5027 auto PartId = std::next(I); 5028 auto TaskT = std::next(I, 4); 5029 OMPTaskDataTy Data; 5030 // The task is not final. 5031 Data.Final.setInt(/*IntVal=*/false); 5032 // Get list of firstprivate variables. 5033 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 5034 auto IRef = C->varlist_begin(); 5035 auto IElemInitRef = C->inits().begin(); 5036 for (auto *IInit : C->private_copies()) { 5037 Data.FirstprivateVars.push_back(*IRef); 5038 Data.FirstprivateCopies.push_back(IInit); 5039 Data.FirstprivateInits.push_back(*IElemInitRef); 5040 ++IRef; 5041 ++IElemInitRef; 5042 } 5043 } 5044 SmallVector<const Expr *, 4> LHSs; 5045 SmallVector<const Expr *, 4> RHSs; 5046 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { 5047 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); 5048 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); 5049 Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); 5050 Data.ReductionOps.append(C->reduction_ops().begin(), 5051 C->reduction_ops().end()); 5052 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 5053 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 5054 } 5055 OMPPrivateScope TargetScope(*this); 5056 VarDecl *BPVD = nullptr; 5057 VarDecl *PVD = nullptr; 5058 VarDecl *SVD = nullptr; 5059 VarDecl *MVD = nullptr; 5060 if (InputInfo.NumberOfTargetItems > 0) { 5061 auto *CD = CapturedDecl::Create( 5062 getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0); 5063 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems); 5064 QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType( 5065 getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal, 5066 /*IndexTypeQuals=*/0); 5067 BPVD = createImplicitFirstprivateForType( 5068 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); 5069 PVD = createImplicitFirstprivateForType( 5070 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); 5071 QualType SizesType = getContext().getConstantArrayType( 5072 getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1), 5073 ArrSize, nullptr, ArrayType::Normal, 5074 /*IndexTypeQuals=*/0); 5075 SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD, 5076 S.getBeginLoc()); 5077 TargetScope.addPrivate(BPVD, InputInfo.BasePointersArray); 5078 TargetScope.addPrivate(PVD, InputInfo.PointersArray); 5079 TargetScope.addPrivate(SVD, InputInfo.SizesArray); 5080 // If there is no user-defined mapper, the mapper array will be nullptr. In 5081 // this case, we don't need to privatize it. 5082 if (!isa_and_nonnull<llvm::ConstantPointerNull>( 5083 InputInfo.MappersArray.getPointer())) { 5084 MVD = createImplicitFirstprivateForType( 5085 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); 5086 TargetScope.addPrivate(MVD, InputInfo.MappersArray); 5087 } 5088 } 5089 (void)TargetScope.Privatize(); 5090 buildDependences(S, Data); 5091 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD, 5092 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { 5093 // Set proper addresses for generated private copies. 5094 OMPPrivateScope Scope(CGF); 5095 if (!Data.FirstprivateVars.empty()) { 5096 enum { PrivatesParam = 2, CopyFnParam = 3 }; 5097 llvm::Value *CopyFn = CGF.Builder.CreateLoad( 5098 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); 5099 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( 5100 CS->getCapturedDecl()->getParam(PrivatesParam))); 5101 // Map privates. 5102 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 5103 llvm::SmallVector<llvm::Value *, 16> CallArgs; 5104 llvm::SmallVector<llvm::Type *, 4> ParamTypes; 5105 CallArgs.push_back(PrivatesPtr); 5106 ParamTypes.push_back(PrivatesPtr->getType()); 5107 for (const Expr *E : Data.FirstprivateVars) { 5108 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5109 Address PrivatePtr = 5110 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 5111 ".firstpriv.ptr.addr"); 5112 PrivatePtrs.emplace_back(VD, PrivatePtr); 5113 CallArgs.push_back(PrivatePtr.getPointer()); 5114 ParamTypes.push_back(PrivatePtr.getType()); 5115 } 5116 auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(), 5117 ParamTypes, /*isVarArg=*/false); 5118 CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5119 CopyFn, CopyFnTy->getPointerTo()); 5120 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( 5121 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); 5122 for (const auto &Pair : PrivatePtrs) { 5123 Address Replacement( 5124 CGF.Builder.CreateLoad(Pair.second), 5125 CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()), 5126 CGF.getContext().getDeclAlign(Pair.first)); 5127 Scope.addPrivate(Pair.first, Replacement); 5128 } 5129 } 5130 CGF.processInReduction(S, Data, CGF, CS, Scope); 5131 if (InputInfo.NumberOfTargetItems > 0) { 5132 InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP( 5133 CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0); 5134 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( 5135 CGF.GetAddrOfLocalVar(PVD), /*Index=*/0); 5136 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( 5137 CGF.GetAddrOfLocalVar(SVD), /*Index=*/0); 5138 // If MVD is nullptr, the mapper array is not privatized 5139 if (MVD) 5140 InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP( 5141 CGF.GetAddrOfLocalVar(MVD), /*Index=*/0); 5142 } 5143 5144 Action.Enter(CGF); 5145 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false); 5146 BodyGen(CGF); 5147 }; 5148 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 5149 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true, 5150 Data.NumberOfParts); 5151 llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0); 5152 IntegerLiteral IfCond(getContext(), TrueOrFalse, 5153 getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 5154 SourceLocation()); 5155 CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn, 5156 SharedsTy, CapturedStruct, &IfCond, Data); 5157 } 5158 5159 void CodeGenFunction::processInReduction(const OMPExecutableDirective &S, 5160 OMPTaskDataTy &Data, 5161 CodeGenFunction &CGF, 5162 const CapturedStmt *CS, 5163 OMPPrivateScope &Scope) { 5164 if (Data.Reductions) { 5165 OpenMPDirectiveKind CapturedRegion = S.getDirectiveKind(); 5166 OMPLexicalScope LexScope(CGF, S, CapturedRegion); 5167 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars, 5168 Data.ReductionCopies, Data.ReductionOps); 5169 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( 5170 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(4))); 5171 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { 5172 RedCG.emitSharedOrigLValue(CGF, Cnt); 5173 RedCG.emitAggregateType(CGF, Cnt); 5174 // FIXME: This must removed once the runtime library is fixed. 5175 // Emit required threadprivate variables for 5176 // initializer/combiner/finalizer. 5177 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), 5178 RedCG, Cnt); 5179 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 5180 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 5181 Replacement = 5182 Address(CGF.EmitScalarConversion( 5183 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 5184 CGF.getContext().getPointerType( 5185 Data.ReductionCopies[Cnt]->getType()), 5186 Data.ReductionCopies[Cnt]->getExprLoc()), 5187 CGF.ConvertTypeForMem(Data.ReductionCopies[Cnt]->getType()), 5188 Replacement.getAlignment()); 5189 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 5190 Scope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement); 5191 } 5192 } 5193 (void)Scope.Privatize(); 5194 SmallVector<const Expr *, 4> InRedVars; 5195 SmallVector<const Expr *, 4> InRedPrivs; 5196 SmallVector<const Expr *, 4> InRedOps; 5197 SmallVector<const Expr *, 4> TaskgroupDescriptors; 5198 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { 5199 auto IPriv = C->privates().begin(); 5200 auto IRed = C->reduction_ops().begin(); 5201 auto ITD = C->taskgroup_descriptors().begin(); 5202 for (const Expr *Ref : C->varlists()) { 5203 InRedVars.emplace_back(Ref); 5204 InRedPrivs.emplace_back(*IPriv); 5205 InRedOps.emplace_back(*IRed); 5206 TaskgroupDescriptors.emplace_back(*ITD); 5207 std::advance(IPriv, 1); 5208 std::advance(IRed, 1); 5209 std::advance(ITD, 1); 5210 } 5211 } 5212 OMPPrivateScope InRedScope(CGF); 5213 if (!InRedVars.empty()) { 5214 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps); 5215 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { 5216 RedCG.emitSharedOrigLValue(CGF, Cnt); 5217 RedCG.emitAggregateType(CGF, Cnt); 5218 // FIXME: This must removed once the runtime library is fixed. 5219 // Emit required threadprivate variables for 5220 // initializer/combiner/finalizer. 5221 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), 5222 RedCG, Cnt); 5223 llvm::Value *ReductionsPtr; 5224 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) { 5225 ReductionsPtr = 5226 CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr), TRExpr->getExprLoc()); 5227 } else { 5228 ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5229 } 5230 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 5231 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 5232 Replacement = Address( 5233 CGF.EmitScalarConversion( 5234 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 5235 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), 5236 InRedPrivs[Cnt]->getExprLoc()), 5237 CGF.ConvertTypeForMem(InRedPrivs[Cnt]->getType()), 5238 Replacement.getAlignment()); 5239 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 5240 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement); 5241 } 5242 } 5243 (void)InRedScope.Privatize(); 5244 } 5245 5246 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 5247 // Emit outlined function for task construct. 5248 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); 5249 Address CapturedStruct = GenerateCapturedStmtArgument(*CS); 5250 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 5251 const Expr *IfCond = nullptr; 5252 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 5253 if (C->getNameModifier() == OMPD_unknown || 5254 C->getNameModifier() == OMPD_task) { 5255 IfCond = C->getCondition(); 5256 break; 5257 } 5258 } 5259 5260 OMPTaskDataTy Data; 5261 // Check if we should emit tied or untied task. 5262 Data.Tied = !S.getSingleClause<OMPUntiedClause>(); 5263 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 5264 CGF.EmitStmt(CS->getCapturedStmt()); 5265 }; 5266 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 5267 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, 5268 const OMPTaskDataTy &Data) { 5269 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn, 5270 SharedsTy, CapturedStruct, IfCond, 5271 Data); 5272 }; 5273 auto LPCRegion = 5274 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 5275 EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data); 5276 } 5277 5278 void CodeGenFunction::EmitOMPTaskyieldDirective( 5279 const OMPTaskyieldDirective &S) { 5280 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc()); 5281 } 5282 5283 void CodeGenFunction::EmitOMPErrorDirective(const OMPErrorDirective &S) { 5284 const OMPMessageClause *MC = S.getSingleClause<OMPMessageClause>(); 5285 Expr *ME = MC ? MC->getMessageString() : nullptr; 5286 const OMPSeverityClause *SC = S.getSingleClause<OMPSeverityClause>(); 5287 bool IsFatal = false; 5288 if (!SC || SC->getSeverityKind() == OMPC_SEVERITY_fatal) 5289 IsFatal = true; 5290 CGM.getOpenMPRuntime().emitErrorCall(*this, S.getBeginLoc(), ME, IsFatal); 5291 } 5292 5293 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 5294 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier); 5295 } 5296 5297 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 5298 OMPTaskDataTy Data; 5299 // Build list of dependences 5300 buildDependences(S, Data); 5301 Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>(); 5302 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc(), Data); 5303 } 5304 5305 bool isSupportedByOpenMPIRBuilder(const OMPTaskgroupDirective &T) { 5306 return T.clauses().empty(); 5307 } 5308 5309 void CodeGenFunction::EmitOMPTaskgroupDirective( 5310 const OMPTaskgroupDirective &S) { 5311 OMPLexicalScope Scope(*this, S, OMPD_unknown); 5312 if (CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S)) { 5313 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 5314 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 5315 InsertPointTy AllocaIP(AllocaInsertPt->getParent(), 5316 AllocaInsertPt->getIterator()); 5317 5318 auto BodyGenCB = [&, this](InsertPointTy AllocaIP, 5319 InsertPointTy CodeGenIP) { 5320 Builder.restoreIP(CodeGenIP); 5321 EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 5322 }; 5323 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 5324 if (!CapturedStmtInfo) 5325 CapturedStmtInfo = &CapStmtInfo; 5326 Builder.restoreIP(OMPBuilder.createTaskgroup(Builder, AllocaIP, BodyGenCB)); 5327 return; 5328 } 5329 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5330 Action.Enter(CGF); 5331 if (const Expr *E = S.getReductionRef()) { 5332 SmallVector<const Expr *, 4> LHSs; 5333 SmallVector<const Expr *, 4> RHSs; 5334 OMPTaskDataTy Data; 5335 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { 5336 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); 5337 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); 5338 Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); 5339 Data.ReductionOps.append(C->reduction_ops().begin(), 5340 C->reduction_ops().end()); 5341 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 5342 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 5343 } 5344 llvm::Value *ReductionDesc = 5345 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(), 5346 LHSs, RHSs, Data); 5347 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5348 CGF.EmitVarDecl(*VD); 5349 CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD), 5350 /*Volatile=*/false, E->getType()); 5351 } 5352 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 5353 }; 5354 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc()); 5355 } 5356 5357 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 5358 llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>() 5359 ? llvm::AtomicOrdering::NotAtomic 5360 : llvm::AtomicOrdering::AcquireRelease; 5361 CGM.getOpenMPRuntime().emitFlush( 5362 *this, 5363 [&S]() -> ArrayRef<const Expr *> { 5364 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) 5365 return llvm::ArrayRef(FlushClause->varlist_begin(), 5366 FlushClause->varlist_end()); 5367 return std::nullopt; 5368 }(), 5369 S.getBeginLoc(), AO); 5370 } 5371 5372 void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) { 5373 const auto *DO = S.getSingleClause<OMPDepobjClause>(); 5374 LValue DOLVal = EmitLValue(DO->getDepobj()); 5375 if (const auto *DC = S.getSingleClause<OMPDependClause>()) { 5376 OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(), 5377 DC->getModifier()); 5378 Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end()); 5379 Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause( 5380 *this, Dependencies, DC->getBeginLoc()); 5381 EmitStoreOfScalar(DepAddr.getPointer(), DOLVal); 5382 return; 5383 } 5384 if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) { 5385 CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc()); 5386 return; 5387 } 5388 if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) { 5389 CGM.getOpenMPRuntime().emitUpdateClause( 5390 *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc()); 5391 return; 5392 } 5393 } 5394 5395 void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) { 5396 if (!OMPParentLoopDirectiveForScan) 5397 return; 5398 const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan; 5399 bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>(); 5400 SmallVector<const Expr *, 4> Shareds; 5401 SmallVector<const Expr *, 4> Privates; 5402 SmallVector<const Expr *, 4> LHSs; 5403 SmallVector<const Expr *, 4> RHSs; 5404 SmallVector<const Expr *, 4> ReductionOps; 5405 SmallVector<const Expr *, 4> CopyOps; 5406 SmallVector<const Expr *, 4> CopyArrayTemps; 5407 SmallVector<const Expr *, 4> CopyArrayElems; 5408 for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) { 5409 if (C->getModifier() != OMPC_REDUCTION_inscan) 5410 continue; 5411 Shareds.append(C->varlist_begin(), C->varlist_end()); 5412 Privates.append(C->privates().begin(), C->privates().end()); 5413 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 5414 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 5415 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 5416 CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); 5417 CopyArrayTemps.append(C->copy_array_temps().begin(), 5418 C->copy_array_temps().end()); 5419 CopyArrayElems.append(C->copy_array_elems().begin(), 5420 C->copy_array_elems().end()); 5421 } 5422 if (ParentDir.getDirectiveKind() == OMPD_simd || 5423 (getLangOpts().OpenMPSimd && 5424 isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) { 5425 // For simd directive and simd-based directives in simd only mode, use the 5426 // following codegen: 5427 // int x = 0; 5428 // #pragma omp simd reduction(inscan, +: x) 5429 // for (..) { 5430 // <first part> 5431 // #pragma omp scan inclusive(x) 5432 // <second part> 5433 // } 5434 // is transformed to: 5435 // int x = 0; 5436 // for (..) { 5437 // int x_priv = 0; 5438 // <first part> 5439 // x = x_priv + x; 5440 // x_priv = x; 5441 // <second part> 5442 // } 5443 // and 5444 // int x = 0; 5445 // #pragma omp simd reduction(inscan, +: x) 5446 // for (..) { 5447 // <first part> 5448 // #pragma omp scan exclusive(x) 5449 // <second part> 5450 // } 5451 // to 5452 // int x = 0; 5453 // for (..) { 5454 // int x_priv = 0; 5455 // <second part> 5456 // int temp = x; 5457 // x = x_priv + x; 5458 // x_priv = temp; 5459 // <first part> 5460 // } 5461 llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce"); 5462 EmitBranch(IsInclusive 5463 ? OMPScanReduce 5464 : BreakContinueStack.back().ContinueBlock.getBlock()); 5465 EmitBlock(OMPScanDispatch); 5466 { 5467 // New scope for correct construction/destruction of temp variables for 5468 // exclusive scan. 5469 LexicalScope Scope(*this, S.getSourceRange()); 5470 EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock); 5471 EmitBlock(OMPScanReduce); 5472 if (!IsInclusive) { 5473 // Create temp var and copy LHS value to this temp value. 5474 // TMP = LHS; 5475 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 5476 const Expr *PrivateExpr = Privates[I]; 5477 const Expr *TempExpr = CopyArrayTemps[I]; 5478 EmitAutoVarDecl( 5479 *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl())); 5480 LValue DestLVal = EmitLValue(TempExpr); 5481 LValue SrcLVal = EmitLValue(LHSs[I]); 5482 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), 5483 SrcLVal.getAddress(*this), 5484 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 5485 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), 5486 CopyOps[I]); 5487 } 5488 } 5489 CGM.getOpenMPRuntime().emitReduction( 5490 *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, 5491 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd}); 5492 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 5493 const Expr *PrivateExpr = Privates[I]; 5494 LValue DestLVal; 5495 LValue SrcLVal; 5496 if (IsInclusive) { 5497 DestLVal = EmitLValue(RHSs[I]); 5498 SrcLVal = EmitLValue(LHSs[I]); 5499 } else { 5500 const Expr *TempExpr = CopyArrayTemps[I]; 5501 DestLVal = EmitLValue(RHSs[I]); 5502 SrcLVal = EmitLValue(TempExpr); 5503 } 5504 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), 5505 SrcLVal.getAddress(*this), 5506 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 5507 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), 5508 CopyOps[I]); 5509 } 5510 } 5511 EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock); 5512 OMPScanExitBlock = IsInclusive 5513 ? BreakContinueStack.back().ContinueBlock.getBlock() 5514 : OMPScanReduce; 5515 EmitBlock(OMPAfterScanBlock); 5516 return; 5517 } 5518 if (!IsInclusive) { 5519 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); 5520 EmitBlock(OMPScanExitBlock); 5521 } 5522 if (OMPFirstScanLoop) { 5523 // Emit buffer[i] = red; at the end of the input phase. 5524 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir) 5525 .getIterationVariable() 5526 ->IgnoreParenImpCasts(); 5527 LValue IdxLVal = EmitLValue(IVExpr); 5528 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc()); 5529 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false); 5530 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 5531 const Expr *PrivateExpr = Privates[I]; 5532 const Expr *OrigExpr = Shareds[I]; 5533 const Expr *CopyArrayElem = CopyArrayElems[I]; 5534 OpaqueValueMapping IdxMapping( 5535 *this, 5536 cast<OpaqueValueExpr>( 5537 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 5538 RValue::get(IdxVal)); 5539 LValue DestLVal = EmitLValue(CopyArrayElem); 5540 LValue SrcLVal = EmitLValue(OrigExpr); 5541 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), 5542 SrcLVal.getAddress(*this), 5543 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 5544 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), 5545 CopyOps[I]); 5546 } 5547 } 5548 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); 5549 if (IsInclusive) { 5550 EmitBlock(OMPScanExitBlock); 5551 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); 5552 } 5553 EmitBlock(OMPScanDispatch); 5554 if (!OMPFirstScanLoop) { 5555 // Emit red = buffer[i]; at the entrance to the scan phase. 5556 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir) 5557 .getIterationVariable() 5558 ->IgnoreParenImpCasts(); 5559 LValue IdxLVal = EmitLValue(IVExpr); 5560 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc()); 5561 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false); 5562 llvm::BasicBlock *ExclusiveExitBB = nullptr; 5563 if (!IsInclusive) { 5564 llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec"); 5565 ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit"); 5566 llvm::Value *Cmp = Builder.CreateIsNull(IdxVal); 5567 Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB); 5568 EmitBlock(ContBB); 5569 // Use idx - 1 iteration for exclusive scan. 5570 IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1)); 5571 } 5572 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 5573 const Expr *PrivateExpr = Privates[I]; 5574 const Expr *OrigExpr = Shareds[I]; 5575 const Expr *CopyArrayElem = CopyArrayElems[I]; 5576 OpaqueValueMapping IdxMapping( 5577 *this, 5578 cast<OpaqueValueExpr>( 5579 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 5580 RValue::get(IdxVal)); 5581 LValue SrcLVal = EmitLValue(CopyArrayElem); 5582 LValue DestLVal = EmitLValue(OrigExpr); 5583 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), 5584 SrcLVal.getAddress(*this), 5585 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 5586 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), 5587 CopyOps[I]); 5588 } 5589 if (!IsInclusive) { 5590 EmitBlock(ExclusiveExitBB); 5591 } 5592 } 5593 EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock 5594 : OMPAfterScanBlock); 5595 EmitBlock(OMPAfterScanBlock); 5596 } 5597 5598 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, 5599 const CodeGenLoopTy &CodeGenLoop, 5600 Expr *IncExpr) { 5601 // Emit the loop iteration variable. 5602 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 5603 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); 5604 EmitVarDecl(*IVDecl); 5605 5606 // Emit the iterations count variable. 5607 // If it is not a variable, Sema decided to calculate iterations count on each 5608 // iteration (e.g., it is foldable into a constant). 5609 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 5610 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 5611 // Emit calculation of the iterations count. 5612 EmitIgnoredExpr(S.getCalcLastIteration()); 5613 } 5614 5615 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 5616 5617 bool HasLastprivateClause = false; 5618 // Check pre-condition. 5619 { 5620 OMPLoopScope PreInitScope(*this, S); 5621 // Skip the entire loop if we don't meet the precondition. 5622 // If the condition constant folds and can be elided, avoid emitting the 5623 // whole loop. 5624 bool CondConstant; 5625 llvm::BasicBlock *ContBlock = nullptr; 5626 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 5627 if (!CondConstant) 5628 return; 5629 } else { 5630 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then"); 5631 ContBlock = createBasicBlock("omp.precond.end"); 5632 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 5633 getProfileCount(&S)); 5634 EmitBlock(ThenBlock); 5635 incrementProfileCounter(&S); 5636 } 5637 5638 emitAlignedClause(*this, S); 5639 // Emit 'then' code. 5640 { 5641 // Emit helper vars inits. 5642 5643 LValue LB = EmitOMPHelperVar( 5644 *this, cast<DeclRefExpr>( 5645 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5646 ? S.getCombinedLowerBoundVariable() 5647 : S.getLowerBoundVariable()))); 5648 LValue UB = EmitOMPHelperVar( 5649 *this, cast<DeclRefExpr>( 5650 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5651 ? S.getCombinedUpperBoundVariable() 5652 : S.getUpperBoundVariable()))); 5653 LValue ST = 5654 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 5655 LValue IL = 5656 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 5657 5658 OMPPrivateScope LoopScope(*this); 5659 if (EmitOMPFirstprivateClause(S, LoopScope)) { 5660 // Emit implicit barrier to synchronize threads and avoid data races 5661 // on initialization of firstprivate variables and post-update of 5662 // lastprivate variables. 5663 CGM.getOpenMPRuntime().emitBarrierCall( 5664 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 5665 /*ForceSimpleCall=*/true); 5666 } 5667 EmitOMPPrivateClause(S, LoopScope); 5668 if (isOpenMPSimdDirective(S.getDirectiveKind()) && 5669 !isOpenMPParallelDirective(S.getDirectiveKind()) && 5670 !isOpenMPTeamsDirective(S.getDirectiveKind())) 5671 EmitOMPReductionClauseInit(S, LoopScope); 5672 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 5673 EmitOMPPrivateLoopCounters(S, LoopScope); 5674 (void)LoopScope.Privatize(); 5675 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 5676 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); 5677 5678 // Detect the distribute schedule kind and chunk. 5679 llvm::Value *Chunk = nullptr; 5680 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 5681 if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 5682 ScheduleKind = C->getDistScheduleKind(); 5683 if (const Expr *Ch = C->getChunkSize()) { 5684 Chunk = EmitScalarExpr(Ch); 5685 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 5686 S.getIterationVariable()->getType(), 5687 S.getBeginLoc()); 5688 } 5689 } else { 5690 // Default behaviour for dist_schedule clause. 5691 CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk( 5692 *this, S, ScheduleKind, Chunk); 5693 } 5694 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 5695 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 5696 5697 // OpenMP [2.10.8, distribute Construct, Description] 5698 // If dist_schedule is specified, kind must be static. If specified, 5699 // iterations are divided into chunks of size chunk_size, chunks are 5700 // assigned to the teams of the league in a round-robin fashion in the 5701 // order of the team number. When no chunk_size is specified, the 5702 // iteration space is divided into chunks that are approximately equal 5703 // in size, and at most one chunk is distributed to each team of the 5704 // league. The size of the chunks is unspecified in this case. 5705 bool StaticChunked = 5706 RT.isStaticChunked(ScheduleKind, /* Chunked */ Chunk != nullptr) && 5707 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); 5708 if (RT.isStaticNonchunked(ScheduleKind, 5709 /* Chunked */ Chunk != nullptr) || 5710 StaticChunked) { 5711 CGOpenMPRuntime::StaticRTInput StaticInit( 5712 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(*this), 5713 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), 5714 StaticChunked ? Chunk : nullptr); 5715 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, 5716 StaticInit); 5717 JumpDest LoopExit = 5718 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 5719 // UB = min(UB, GlobalUB); 5720 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5721 ? S.getCombinedEnsureUpperBound() 5722 : S.getEnsureUpperBound()); 5723 // IV = LB; 5724 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5725 ? S.getCombinedInit() 5726 : S.getInit()); 5727 5728 const Expr *Cond = 5729 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5730 ? S.getCombinedCond() 5731 : S.getCond(); 5732 5733 if (StaticChunked) 5734 Cond = S.getCombinedDistCond(); 5735 5736 // For static unchunked schedules generate: 5737 // 5738 // 1. For distribute alone, codegen 5739 // while (idx <= UB) { 5740 // BODY; 5741 // ++idx; 5742 // } 5743 // 5744 // 2. When combined with 'for' (e.g. as in 'distribute parallel for') 5745 // while (idx <= UB) { 5746 // <CodeGen rest of pragma>(LB, UB); 5747 // idx += ST; 5748 // } 5749 // 5750 // For static chunk one schedule generate: 5751 // 5752 // while (IV <= GlobalUB) { 5753 // <CodeGen rest of pragma>(LB, UB); 5754 // LB += ST; 5755 // UB += ST; 5756 // UB = min(UB, GlobalUB); 5757 // IV = LB; 5758 // } 5759 // 5760 emitCommonSimdLoop( 5761 *this, S, 5762 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5763 if (isOpenMPSimdDirective(S.getDirectiveKind())) 5764 CGF.EmitOMPSimdInit(S); 5765 }, 5766 [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop, 5767 StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) { 5768 CGF.EmitOMPInnerLoop( 5769 S, LoopScope.requiresCleanups(), Cond, IncExpr, 5770 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 5771 CodeGenLoop(CGF, S, LoopExit); 5772 }, 5773 [&S, StaticChunked](CodeGenFunction &CGF) { 5774 if (StaticChunked) { 5775 CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound()); 5776 CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound()); 5777 CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound()); 5778 CGF.EmitIgnoredExpr(S.getCombinedInit()); 5779 } 5780 }); 5781 }); 5782 EmitBlock(LoopExit.getBlock()); 5783 // Tell the runtime we are done. 5784 RT.emitForStaticFinish(*this, S.getEndLoc(), S.getDirectiveKind()); 5785 } else { 5786 // Emit the outer loop, which requests its work chunk [LB..UB] from 5787 // runtime and runs the inner loop to process it. 5788 const OMPLoopArguments LoopArguments = { 5789 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), 5790 IL.getAddress(*this), Chunk}; 5791 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, 5792 CodeGenLoop); 5793 } 5794 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 5795 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { 5796 return CGF.Builder.CreateIsNotNull( 5797 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 5798 }); 5799 } 5800 if (isOpenMPSimdDirective(S.getDirectiveKind()) && 5801 !isOpenMPParallelDirective(S.getDirectiveKind()) && 5802 !isOpenMPTeamsDirective(S.getDirectiveKind())) { 5803 EmitOMPReductionClauseFinal(S, OMPD_simd); 5804 // Emit post-update of the reduction variables if IsLastIter != 0. 5805 emitPostUpdateForReductionClause( 5806 *this, S, [IL, &S](CodeGenFunction &CGF) { 5807 return CGF.Builder.CreateIsNotNull( 5808 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 5809 }); 5810 } 5811 // Emit final copy of the lastprivate variables if IsLastIter != 0. 5812 if (HasLastprivateClause) { 5813 EmitOMPLastprivateClauseFinal( 5814 S, /*NoFinals=*/false, 5815 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); 5816 } 5817 } 5818 5819 // We're now done with the loop, so jump to the continuation block. 5820 if (ContBlock) { 5821 EmitBranch(ContBlock); 5822 EmitBlock(ContBlock, true); 5823 } 5824 } 5825 } 5826 5827 void CodeGenFunction::EmitOMPDistributeDirective( 5828 const OMPDistributeDirective &S) { 5829 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5830 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 5831 }; 5832 OMPLexicalScope Scope(*this, S, OMPD_unknown); 5833 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 5834 } 5835 5836 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 5837 const CapturedStmt *S, 5838 SourceLocation Loc) { 5839 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 5840 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 5841 CGF.CapturedStmtInfo = &CapStmtInfo; 5842 llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc); 5843 Fn->setDoesNotRecurse(); 5844 return Fn; 5845 } 5846 5847 template <typename T> 5848 static void emitRestoreIP(CodeGenFunction &CGF, const T *C, 5849 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP, 5850 llvm::OpenMPIRBuilder &OMPBuilder) { 5851 5852 unsigned NumLoops = C->getNumLoops(); 5853 QualType Int64Ty = CGF.CGM.getContext().getIntTypeForBitwidth( 5854 /*DestWidth=*/64, /*Signed=*/1); 5855 llvm::SmallVector<llvm::Value *> StoreValues; 5856 for (unsigned I = 0; I < NumLoops; I++) { 5857 const Expr *CounterVal = C->getLoopData(I); 5858 assert(CounterVal); 5859 llvm::Value *StoreValue = CGF.EmitScalarConversion( 5860 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 5861 CounterVal->getExprLoc()); 5862 StoreValues.emplace_back(StoreValue); 5863 } 5864 OMPDoacrossKind<T> ODK; 5865 bool IsDependSource = ODK.isSource(C); 5866 CGF.Builder.restoreIP( 5867 OMPBuilder.createOrderedDepend(CGF.Builder, AllocaIP, NumLoops, 5868 StoreValues, ".cnt.addr", IsDependSource)); 5869 } 5870 5871 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 5872 if (CGM.getLangOpts().OpenMPIRBuilder) { 5873 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 5874 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 5875 5876 if (S.hasClausesOfKind<OMPDependClause>() || 5877 S.hasClausesOfKind<OMPDoacrossClause>()) { 5878 // The ordered directive with depend clause. 5879 assert(!S.hasAssociatedStmt() && "No associated statement must be in " 5880 "ordered depend|doacross construct."); 5881 InsertPointTy AllocaIP(AllocaInsertPt->getParent(), 5882 AllocaInsertPt->getIterator()); 5883 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 5884 emitRestoreIP(*this, DC, AllocaIP, OMPBuilder); 5885 for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>()) 5886 emitRestoreIP(*this, DC, AllocaIP, OMPBuilder); 5887 } else { 5888 // The ordered directive with threads or simd clause, or without clause. 5889 // Without clause, it behaves as if the threads clause is specified. 5890 const auto *C = S.getSingleClause<OMPSIMDClause>(); 5891 5892 auto FiniCB = [this](InsertPointTy IP) { 5893 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 5894 }; 5895 5896 auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP, 5897 InsertPointTy CodeGenIP) { 5898 Builder.restoreIP(CodeGenIP); 5899 5900 const CapturedStmt *CS = S.getInnermostCapturedStmt(); 5901 if (C) { 5902 llvm::BasicBlock *FiniBB = splitBBWithSuffix( 5903 Builder, /*CreateBranch=*/false, ".ordered.after"); 5904 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 5905 GenerateOpenMPCapturedVars(*CS, CapturedVars); 5906 llvm::Function *OutlinedFn = 5907 emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc()); 5908 assert(S.getBeginLoc().isValid() && 5909 "Outlined function call location must be valid."); 5910 ApplyDebugLocation::CreateDefaultArtificial(*this, S.getBeginLoc()); 5911 OMPBuilderCBHelpers::EmitCaptureStmt(*this, CodeGenIP, *FiniBB, 5912 OutlinedFn, CapturedVars); 5913 } else { 5914 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( 5915 *this, CS->getCapturedStmt(), AllocaIP, CodeGenIP, "ordered"); 5916 } 5917 }; 5918 5919 OMPLexicalScope Scope(*this, S, OMPD_unknown); 5920 Builder.restoreIP( 5921 OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, !C)); 5922 } 5923 return; 5924 } 5925 5926 if (S.hasClausesOfKind<OMPDependClause>()) { 5927 assert(!S.hasAssociatedStmt() && 5928 "No associated statement must be in ordered depend construct."); 5929 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 5930 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 5931 return; 5932 } 5933 if (S.hasClausesOfKind<OMPDoacrossClause>()) { 5934 assert(!S.hasAssociatedStmt() && 5935 "No associated statement must be in ordered doacross construct."); 5936 for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>()) 5937 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 5938 return; 5939 } 5940 const auto *C = S.getSingleClause<OMPSIMDClause>(); 5941 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 5942 PrePostActionTy &Action) { 5943 const CapturedStmt *CS = S.getInnermostCapturedStmt(); 5944 if (C) { 5945 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 5946 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 5947 llvm::Function *OutlinedFn = 5948 emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc()); 5949 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(), 5950 OutlinedFn, CapturedVars); 5951 } else { 5952 Action.Enter(CGF); 5953 CGF.EmitStmt(CS->getCapturedStmt()); 5954 } 5955 }; 5956 OMPLexicalScope Scope(*this, S, OMPD_unknown); 5957 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C); 5958 } 5959 5960 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 5961 QualType SrcType, QualType DestType, 5962 SourceLocation Loc) { 5963 assert(CGF.hasScalarEvaluationKind(DestType) && 5964 "DestType must have scalar evaluation kind."); 5965 assert(!Val.isAggregate() && "Must be a scalar or complex."); 5966 return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 5967 DestType, Loc) 5968 : CGF.EmitComplexToScalarConversion( 5969 Val.getComplexVal(), SrcType, DestType, Loc); 5970 } 5971 5972 static CodeGenFunction::ComplexPairTy 5973 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 5974 QualType DestType, SourceLocation Loc) { 5975 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 5976 "DestType must have complex evaluation kind."); 5977 CodeGenFunction::ComplexPairTy ComplexVal; 5978 if (Val.isScalar()) { 5979 // Convert the input element to the element type of the complex. 5980 QualType DestElementType = 5981 DestType->castAs<ComplexType>()->getElementType(); 5982 llvm::Value *ScalarVal = CGF.EmitScalarConversion( 5983 Val.getScalarVal(), SrcType, DestElementType, Loc); 5984 ComplexVal = CodeGenFunction::ComplexPairTy( 5985 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 5986 } else { 5987 assert(Val.isComplex() && "Must be a scalar or complex."); 5988 QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 5989 QualType DestElementType = 5990 DestType->castAs<ComplexType>()->getElementType(); 5991 ComplexVal.first = CGF.EmitScalarConversion( 5992 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 5993 ComplexVal.second = CGF.EmitScalarConversion( 5994 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 5995 } 5996 return ComplexVal; 5997 } 5998 5999 static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO, 6000 LValue LVal, RValue RVal) { 6001 if (LVal.isGlobalReg()) 6002 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 6003 else 6004 CGF.EmitAtomicStore(RVal, LVal, AO, LVal.isVolatile(), /*isInit=*/false); 6005 } 6006 6007 static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF, 6008 llvm::AtomicOrdering AO, LValue LVal, 6009 SourceLocation Loc) { 6010 if (LVal.isGlobalReg()) 6011 return CGF.EmitLoadOfLValue(LVal, Loc); 6012 return CGF.EmitAtomicLoad( 6013 LVal, Loc, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO), 6014 LVal.isVolatile()); 6015 } 6016 6017 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 6018 QualType RValTy, SourceLocation Loc) { 6019 switch (getEvaluationKind(LVal.getType())) { 6020 case TEK_Scalar: 6021 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 6022 *this, RVal, RValTy, LVal.getType(), Loc)), 6023 LVal); 6024 break; 6025 case TEK_Complex: 6026 EmitStoreOfComplex( 6027 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 6028 /*isInit=*/false); 6029 break; 6030 case TEK_Aggregate: 6031 llvm_unreachable("Must be a scalar or complex."); 6032 } 6033 } 6034 6035 static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO, 6036 const Expr *X, const Expr *V, 6037 SourceLocation Loc) { 6038 // v = x; 6039 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 6040 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 6041 LValue XLValue = CGF.EmitLValue(X); 6042 LValue VLValue = CGF.EmitLValue(V); 6043 RValue Res = emitSimpleAtomicLoad(CGF, AO, XLValue, Loc); 6044 // OpenMP, 2.17.7, atomic Construct 6045 // If the read or capture clause is specified and the acquire, acq_rel, or 6046 // seq_cst clause is specified then the strong flush on exit from the atomic 6047 // operation is also an acquire flush. 6048 switch (AO) { 6049 case llvm::AtomicOrdering::Acquire: 6050 case llvm::AtomicOrdering::AcquireRelease: 6051 case llvm::AtomicOrdering::SequentiallyConsistent: 6052 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc, 6053 llvm::AtomicOrdering::Acquire); 6054 break; 6055 case llvm::AtomicOrdering::Monotonic: 6056 case llvm::AtomicOrdering::Release: 6057 break; 6058 case llvm::AtomicOrdering::NotAtomic: 6059 case llvm::AtomicOrdering::Unordered: 6060 llvm_unreachable("Unexpected ordering."); 6061 } 6062 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 6063 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V); 6064 } 6065 6066 static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, 6067 llvm::AtomicOrdering AO, const Expr *X, 6068 const Expr *E, SourceLocation Loc) { 6069 // x = expr; 6070 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 6071 emitSimpleAtomicStore(CGF, AO, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 6072 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 6073 // OpenMP, 2.17.7, atomic Construct 6074 // If the write, update, or capture clause is specified and the release, 6075 // acq_rel, or seq_cst clause is specified then the strong flush on entry to 6076 // the atomic operation is also a release flush. 6077 switch (AO) { 6078 case llvm::AtomicOrdering::Release: 6079 case llvm::AtomicOrdering::AcquireRelease: 6080 case llvm::AtomicOrdering::SequentiallyConsistent: 6081 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc, 6082 llvm::AtomicOrdering::Release); 6083 break; 6084 case llvm::AtomicOrdering::Acquire: 6085 case llvm::AtomicOrdering::Monotonic: 6086 break; 6087 case llvm::AtomicOrdering::NotAtomic: 6088 case llvm::AtomicOrdering::Unordered: 6089 llvm_unreachable("Unexpected ordering."); 6090 } 6091 } 6092 6093 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 6094 RValue Update, 6095 BinaryOperatorKind BO, 6096 llvm::AtomicOrdering AO, 6097 bool IsXLHSInRHSPart) { 6098 ASTContext &Context = CGF.getContext(); 6099 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 6100 // expression is simple and atomic is allowed for the given type for the 6101 // target platform. 6102 if (BO == BO_Comma || !Update.isScalar() || !X.isSimple() || 6103 (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 6104 (Update.getScalarVal()->getType() != 6105 X.getAddress(CGF).getElementType())) || 6106 !Context.getTargetInfo().hasBuiltinAtomic( 6107 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 6108 return std::make_pair(false, RValue::get(nullptr)); 6109 6110 auto &&CheckAtomicSupport = [&CGF](llvm::Type *T, BinaryOperatorKind BO) { 6111 if (T->isIntegerTy()) 6112 return true; 6113 6114 if (T->isFloatingPointTy() && (BO == BO_Add || BO == BO_Sub)) 6115 return llvm::isPowerOf2_64(CGF.CGM.getDataLayout().getTypeStoreSize(T)); 6116 6117 return false; 6118 }; 6119 6120 if (!CheckAtomicSupport(Update.getScalarVal()->getType(), BO) || 6121 !CheckAtomicSupport(X.getAddress(CGF).getElementType(), BO)) 6122 return std::make_pair(false, RValue::get(nullptr)); 6123 6124 bool IsInteger = X.getAddress(CGF).getElementType()->isIntegerTy(); 6125 llvm::AtomicRMWInst::BinOp RMWOp; 6126 switch (BO) { 6127 case BO_Add: 6128 RMWOp = IsInteger ? llvm::AtomicRMWInst::Add : llvm::AtomicRMWInst::FAdd; 6129 break; 6130 case BO_Sub: 6131 if (!IsXLHSInRHSPart) 6132 return std::make_pair(false, RValue::get(nullptr)); 6133 RMWOp = IsInteger ? llvm::AtomicRMWInst::Sub : llvm::AtomicRMWInst::FSub; 6134 break; 6135 case BO_And: 6136 RMWOp = llvm::AtomicRMWInst::And; 6137 break; 6138 case BO_Or: 6139 RMWOp = llvm::AtomicRMWInst::Or; 6140 break; 6141 case BO_Xor: 6142 RMWOp = llvm::AtomicRMWInst::Xor; 6143 break; 6144 case BO_LT: 6145 if (IsInteger) 6146 RMWOp = X.getType()->hasSignedIntegerRepresentation() 6147 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 6148 : llvm::AtomicRMWInst::Max) 6149 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 6150 : llvm::AtomicRMWInst::UMax); 6151 else 6152 RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMin 6153 : llvm::AtomicRMWInst::FMax; 6154 break; 6155 case BO_GT: 6156 if (IsInteger) 6157 RMWOp = X.getType()->hasSignedIntegerRepresentation() 6158 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 6159 : llvm::AtomicRMWInst::Min) 6160 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 6161 : llvm::AtomicRMWInst::UMin); 6162 else 6163 RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMax 6164 : llvm::AtomicRMWInst::FMin; 6165 break; 6166 case BO_Assign: 6167 RMWOp = llvm::AtomicRMWInst::Xchg; 6168 break; 6169 case BO_Mul: 6170 case BO_Div: 6171 case BO_Rem: 6172 case BO_Shl: 6173 case BO_Shr: 6174 case BO_LAnd: 6175 case BO_LOr: 6176 return std::make_pair(false, RValue::get(nullptr)); 6177 case BO_PtrMemD: 6178 case BO_PtrMemI: 6179 case BO_LE: 6180 case BO_GE: 6181 case BO_EQ: 6182 case BO_NE: 6183 case BO_Cmp: 6184 case BO_AddAssign: 6185 case BO_SubAssign: 6186 case BO_AndAssign: 6187 case BO_OrAssign: 6188 case BO_XorAssign: 6189 case BO_MulAssign: 6190 case BO_DivAssign: 6191 case BO_RemAssign: 6192 case BO_ShlAssign: 6193 case BO_ShrAssign: 6194 case BO_Comma: 6195 llvm_unreachable("Unsupported atomic update operation"); 6196 } 6197 llvm::Value *UpdateVal = Update.getScalarVal(); 6198 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 6199 if (IsInteger) 6200 UpdateVal = CGF.Builder.CreateIntCast( 6201 IC, X.getAddress(CGF).getElementType(), 6202 X.getType()->hasSignedIntegerRepresentation()); 6203 else 6204 UpdateVal = CGF.Builder.CreateCast(llvm::Instruction::CastOps::UIToFP, IC, 6205 X.getAddress(CGF).getElementType()); 6206 } 6207 llvm::Value *Res = 6208 CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(CGF), UpdateVal, AO); 6209 return std::make_pair(true, RValue::get(Res)); 6210 } 6211 6212 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 6213 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 6214 llvm::AtomicOrdering AO, SourceLocation Loc, 6215 const llvm::function_ref<RValue(RValue)> CommonGen) { 6216 // Update expressions are allowed to have the following forms: 6217 // x binop= expr; -> xrval + expr; 6218 // x++, ++x -> xrval + 1; 6219 // x--, --x -> xrval - 1; 6220 // x = x binop expr; -> xrval binop expr 6221 // x = expr Op x; - > expr binop xrval; 6222 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 6223 if (!Res.first) { 6224 if (X.isGlobalReg()) { 6225 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 6226 // 'xrval'. 6227 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 6228 } else { 6229 // Perform compare-and-swap procedure. 6230 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 6231 } 6232 } 6233 return Res; 6234 } 6235 6236 static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, 6237 llvm::AtomicOrdering AO, const Expr *X, 6238 const Expr *E, const Expr *UE, 6239 bool IsXLHSInRHSPart, SourceLocation Loc) { 6240 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 6241 "Update expr in 'atomic update' must be a binary operator."); 6242 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 6243 // Update expressions are allowed to have the following forms: 6244 // x binop= expr; -> xrval + expr; 6245 // x++, ++x -> xrval + 1; 6246 // x--, --x -> xrval - 1; 6247 // x = x binop expr; -> xrval binop expr 6248 // x = expr Op x; - > expr binop xrval; 6249 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 6250 LValue XLValue = CGF.EmitLValue(X); 6251 RValue ExprRValue = CGF.EmitAnyExpr(E); 6252 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 6253 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 6254 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 6255 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 6256 auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) { 6257 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 6258 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 6259 return CGF.EmitAnyExpr(UE); 6260 }; 6261 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 6262 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 6263 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 6264 // OpenMP, 2.17.7, atomic Construct 6265 // If the write, update, or capture clause is specified and the release, 6266 // acq_rel, or seq_cst clause is specified then the strong flush on entry to 6267 // the atomic operation is also a release flush. 6268 switch (AO) { 6269 case llvm::AtomicOrdering::Release: 6270 case llvm::AtomicOrdering::AcquireRelease: 6271 case llvm::AtomicOrdering::SequentiallyConsistent: 6272 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc, 6273 llvm::AtomicOrdering::Release); 6274 break; 6275 case llvm::AtomicOrdering::Acquire: 6276 case llvm::AtomicOrdering::Monotonic: 6277 break; 6278 case llvm::AtomicOrdering::NotAtomic: 6279 case llvm::AtomicOrdering::Unordered: 6280 llvm_unreachable("Unexpected ordering."); 6281 } 6282 } 6283 6284 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 6285 QualType SourceType, QualType ResType, 6286 SourceLocation Loc) { 6287 switch (CGF.getEvaluationKind(ResType)) { 6288 case TEK_Scalar: 6289 return RValue::get( 6290 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 6291 case TEK_Complex: { 6292 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 6293 return RValue::getComplex(Res.first, Res.second); 6294 } 6295 case TEK_Aggregate: 6296 break; 6297 } 6298 llvm_unreachable("Must be a scalar or complex."); 6299 } 6300 6301 static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, 6302 llvm::AtomicOrdering AO, 6303 bool IsPostfixUpdate, const Expr *V, 6304 const Expr *X, const Expr *E, 6305 const Expr *UE, bool IsXLHSInRHSPart, 6306 SourceLocation Loc) { 6307 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 6308 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 6309 RValue NewVVal; 6310 LValue VLValue = CGF.EmitLValue(V); 6311 LValue XLValue = CGF.EmitLValue(X); 6312 RValue ExprRValue = CGF.EmitAnyExpr(E); 6313 QualType NewVValType; 6314 if (UE) { 6315 // 'x' is updated with some additional value. 6316 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 6317 "Update expr in 'atomic capture' must be a binary operator."); 6318 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 6319 // Update expressions are allowed to have the following forms: 6320 // x binop= expr; -> xrval + expr; 6321 // x++, ++x -> xrval + 1; 6322 // x--, --x -> xrval - 1; 6323 // x = x binop expr; -> xrval binop expr 6324 // x = expr Op x; - > expr binop xrval; 6325 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 6326 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 6327 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 6328 NewVValType = XRValExpr->getType(); 6329 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 6330 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 6331 IsPostfixUpdate](RValue XRValue) { 6332 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 6333 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 6334 RValue Res = CGF.EmitAnyExpr(UE); 6335 NewVVal = IsPostfixUpdate ? XRValue : Res; 6336 return Res; 6337 }; 6338 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 6339 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 6340 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 6341 if (Res.first) { 6342 // 'atomicrmw' instruction was generated. 6343 if (IsPostfixUpdate) { 6344 // Use old value from 'atomicrmw'. 6345 NewVVal = Res.second; 6346 } else { 6347 // 'atomicrmw' does not provide new value, so evaluate it using old 6348 // value of 'x'. 6349 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 6350 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 6351 NewVVal = CGF.EmitAnyExpr(UE); 6352 } 6353 } 6354 } else { 6355 // 'x' is simply rewritten with some 'expr'. 6356 NewVValType = X->getType().getNonReferenceType(); 6357 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 6358 X->getType().getNonReferenceType(), Loc); 6359 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) { 6360 NewVVal = XRValue; 6361 return ExprRValue; 6362 }; 6363 // Try to perform atomicrmw xchg, otherwise simple exchange. 6364 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 6365 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 6366 Loc, Gen); 6367 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 6368 if (Res.first) { 6369 // 'atomicrmw' instruction was generated. 6370 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 6371 } 6372 } 6373 // Emit post-update store to 'v' of old/new 'x' value. 6374 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 6375 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V); 6376 // OpenMP 5.1 removes the required flush for capture clause. 6377 if (CGF.CGM.getLangOpts().OpenMP < 51) { 6378 // OpenMP, 2.17.7, atomic Construct 6379 // If the write, update, or capture clause is specified and the release, 6380 // acq_rel, or seq_cst clause is specified then the strong flush on entry to 6381 // the atomic operation is also a release flush. 6382 // If the read or capture clause is specified and the acquire, acq_rel, or 6383 // seq_cst clause is specified then the strong flush on exit from the atomic 6384 // operation is also an acquire flush. 6385 switch (AO) { 6386 case llvm::AtomicOrdering::Release: 6387 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc, 6388 llvm::AtomicOrdering::Release); 6389 break; 6390 case llvm::AtomicOrdering::Acquire: 6391 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc, 6392 llvm::AtomicOrdering::Acquire); 6393 break; 6394 case llvm::AtomicOrdering::AcquireRelease: 6395 case llvm::AtomicOrdering::SequentiallyConsistent: 6396 CGF.CGM.getOpenMPRuntime().emitFlush( 6397 CGF, std::nullopt, Loc, llvm::AtomicOrdering::AcquireRelease); 6398 break; 6399 case llvm::AtomicOrdering::Monotonic: 6400 break; 6401 case llvm::AtomicOrdering::NotAtomic: 6402 case llvm::AtomicOrdering::Unordered: 6403 llvm_unreachable("Unexpected ordering."); 6404 } 6405 } 6406 } 6407 6408 static void emitOMPAtomicCompareExpr(CodeGenFunction &CGF, 6409 llvm::AtomicOrdering AO, const Expr *X, 6410 const Expr *V, const Expr *R, 6411 const Expr *E, const Expr *D, 6412 const Expr *CE, bool IsXBinopExpr, 6413 bool IsPostfixUpdate, bool IsFailOnly, 6414 SourceLocation Loc) { 6415 llvm::OpenMPIRBuilder &OMPBuilder = 6416 CGF.CGM.getOpenMPRuntime().getOMPBuilder(); 6417 6418 OMPAtomicCompareOp Op; 6419 assert(isa<BinaryOperator>(CE) && "CE is not a BinaryOperator"); 6420 switch (cast<BinaryOperator>(CE)->getOpcode()) { 6421 case BO_EQ: 6422 Op = OMPAtomicCompareOp::EQ; 6423 break; 6424 case BO_LT: 6425 Op = OMPAtomicCompareOp::MIN; 6426 break; 6427 case BO_GT: 6428 Op = OMPAtomicCompareOp::MAX; 6429 break; 6430 default: 6431 llvm_unreachable("unsupported atomic compare binary operator"); 6432 } 6433 6434 LValue XLVal = CGF.EmitLValue(X); 6435 Address XAddr = XLVal.getAddress(CGF); 6436 6437 auto EmitRValueWithCastIfNeeded = [&CGF, Loc](const Expr *X, const Expr *E) { 6438 if (X->getType() == E->getType()) 6439 return CGF.EmitScalarExpr(E); 6440 const Expr *NewE = E->IgnoreImplicitAsWritten(); 6441 llvm::Value *V = CGF.EmitScalarExpr(NewE); 6442 if (NewE->getType() == X->getType()) 6443 return V; 6444 return CGF.EmitScalarConversion(V, NewE->getType(), X->getType(), Loc); 6445 }; 6446 6447 llvm::Value *EVal = EmitRValueWithCastIfNeeded(X, E); 6448 llvm::Value *DVal = D ? EmitRValueWithCastIfNeeded(X, D) : nullptr; 6449 if (auto *CI = dyn_cast<llvm::ConstantInt>(EVal)) 6450 EVal = CGF.Builder.CreateIntCast( 6451 CI, XLVal.getAddress(CGF).getElementType(), 6452 E->getType()->hasSignedIntegerRepresentation()); 6453 if (DVal) 6454 if (auto *CI = dyn_cast<llvm::ConstantInt>(DVal)) 6455 DVal = CGF.Builder.CreateIntCast( 6456 CI, XLVal.getAddress(CGF).getElementType(), 6457 D->getType()->hasSignedIntegerRepresentation()); 6458 6459 llvm::OpenMPIRBuilder::AtomicOpValue XOpVal{ 6460 XAddr.getPointer(), XAddr.getElementType(), 6461 X->getType()->hasSignedIntegerRepresentation(), 6462 X->getType().isVolatileQualified()}; 6463 llvm::OpenMPIRBuilder::AtomicOpValue VOpVal, ROpVal; 6464 if (V) { 6465 LValue LV = CGF.EmitLValue(V); 6466 Address Addr = LV.getAddress(CGF); 6467 VOpVal = {Addr.getPointer(), Addr.getElementType(), 6468 V->getType()->hasSignedIntegerRepresentation(), 6469 V->getType().isVolatileQualified()}; 6470 } 6471 if (R) { 6472 LValue LV = CGF.EmitLValue(R); 6473 Address Addr = LV.getAddress(CGF); 6474 ROpVal = {Addr.getPointer(), Addr.getElementType(), 6475 R->getType()->hasSignedIntegerRepresentation(), 6476 R->getType().isVolatileQualified()}; 6477 } 6478 6479 CGF.Builder.restoreIP(OMPBuilder.createAtomicCompare( 6480 CGF.Builder, XOpVal, VOpVal, ROpVal, EVal, DVal, AO, Op, IsXBinopExpr, 6481 IsPostfixUpdate, IsFailOnly)); 6482 } 6483 6484 static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 6485 llvm::AtomicOrdering AO, bool IsPostfixUpdate, 6486 const Expr *X, const Expr *V, const Expr *R, 6487 const Expr *E, const Expr *UE, const Expr *D, 6488 const Expr *CE, bool IsXLHSInRHSPart, 6489 bool IsFailOnly, SourceLocation Loc) { 6490 switch (Kind) { 6491 case OMPC_read: 6492 emitOMPAtomicReadExpr(CGF, AO, X, V, Loc); 6493 break; 6494 case OMPC_write: 6495 emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc); 6496 break; 6497 case OMPC_unknown: 6498 case OMPC_update: 6499 emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc); 6500 break; 6501 case OMPC_capture: 6502 emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE, 6503 IsXLHSInRHSPart, Loc); 6504 break; 6505 case OMPC_compare: { 6506 emitOMPAtomicCompareExpr(CGF, AO, X, V, R, E, D, CE, IsXLHSInRHSPart, 6507 IsPostfixUpdate, IsFailOnly, Loc); 6508 break; 6509 } 6510 default: 6511 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 6512 } 6513 } 6514 6515 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 6516 llvm::AtomicOrdering AO = llvm::AtomicOrdering::Monotonic; 6517 bool MemOrderingSpecified = false; 6518 if (S.getSingleClause<OMPSeqCstClause>()) { 6519 AO = llvm::AtomicOrdering::SequentiallyConsistent; 6520 MemOrderingSpecified = true; 6521 } else if (S.getSingleClause<OMPAcqRelClause>()) { 6522 AO = llvm::AtomicOrdering::AcquireRelease; 6523 MemOrderingSpecified = true; 6524 } else if (S.getSingleClause<OMPAcquireClause>()) { 6525 AO = llvm::AtomicOrdering::Acquire; 6526 MemOrderingSpecified = true; 6527 } else if (S.getSingleClause<OMPReleaseClause>()) { 6528 AO = llvm::AtomicOrdering::Release; 6529 MemOrderingSpecified = true; 6530 } else if (S.getSingleClause<OMPRelaxedClause>()) { 6531 AO = llvm::AtomicOrdering::Monotonic; 6532 MemOrderingSpecified = true; 6533 } 6534 llvm::SmallSet<OpenMPClauseKind, 2> KindsEncountered; 6535 OpenMPClauseKind Kind = OMPC_unknown; 6536 for (const OMPClause *C : S.clauses()) { 6537 // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause, 6538 // if it is first). 6539 OpenMPClauseKind K = C->getClauseKind(); 6540 if (K == OMPC_seq_cst || K == OMPC_acq_rel || K == OMPC_acquire || 6541 K == OMPC_release || K == OMPC_relaxed || K == OMPC_hint) 6542 continue; 6543 Kind = K; 6544 KindsEncountered.insert(K); 6545 } 6546 // We just need to correct Kind here. No need to set a bool saying it is 6547 // actually compare capture because we can tell from whether V and R are 6548 // nullptr. 6549 if (KindsEncountered.contains(OMPC_compare) && 6550 KindsEncountered.contains(OMPC_capture)) 6551 Kind = OMPC_compare; 6552 if (!MemOrderingSpecified) { 6553 llvm::AtomicOrdering DefaultOrder = 6554 CGM.getOpenMPRuntime().getDefaultMemoryOrdering(); 6555 if (DefaultOrder == llvm::AtomicOrdering::Monotonic || 6556 DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent || 6557 (DefaultOrder == llvm::AtomicOrdering::AcquireRelease && 6558 Kind == OMPC_capture)) { 6559 AO = DefaultOrder; 6560 } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) { 6561 if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) { 6562 AO = llvm::AtomicOrdering::Release; 6563 } else if (Kind == OMPC_read) { 6564 assert(Kind == OMPC_read && "Unexpected atomic kind."); 6565 AO = llvm::AtomicOrdering::Acquire; 6566 } 6567 } 6568 } 6569 6570 LexicalScope Scope(*this, S.getSourceRange()); 6571 EmitStopPoint(S.getAssociatedStmt()); 6572 emitOMPAtomicExpr(*this, Kind, AO, S.isPostfixUpdate(), S.getX(), S.getV(), 6573 S.getR(), S.getExpr(), S.getUpdateExpr(), S.getD(), 6574 S.getCondExpr(), S.isXLHSInRHSPart(), S.isFailOnly(), 6575 S.getBeginLoc()); 6576 } 6577 6578 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 6579 const OMPExecutableDirective &S, 6580 const RegionCodeGenTy &CodeGen) { 6581 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); 6582 CodeGenModule &CGM = CGF.CGM; 6583 6584 // On device emit this construct as inlined code. 6585 if (CGM.getLangOpts().OpenMPIsTargetDevice) { 6586 OMPLexicalScope Scope(CGF, S, OMPD_target); 6587 CGM.getOpenMPRuntime().emitInlinedDirective( 6588 CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6589 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 6590 }); 6591 return; 6592 } 6593 6594 auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S); 6595 llvm::Function *Fn = nullptr; 6596 llvm::Constant *FnID = nullptr; 6597 6598 const Expr *IfCond = nullptr; 6599 // Check for the at most one if clause associated with the target region. 6600 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 6601 if (C->getNameModifier() == OMPD_unknown || 6602 C->getNameModifier() == OMPD_target) { 6603 IfCond = C->getCondition(); 6604 break; 6605 } 6606 } 6607 6608 // Check if we have any device clause associated with the directive. 6609 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device( 6610 nullptr, OMPC_DEVICE_unknown); 6611 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 6612 Device.setPointerAndInt(C->getDevice(), C->getModifier()); 6613 6614 // Check if we have an if clause whose conditional always evaluates to false 6615 // or if we do not have any targets specified. If so the target region is not 6616 // an offload entry point. 6617 bool IsOffloadEntry = true; 6618 if (IfCond) { 6619 bool Val; 6620 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 6621 IsOffloadEntry = false; 6622 } 6623 if (CGM.getLangOpts().OMPTargetTriples.empty()) 6624 IsOffloadEntry = false; 6625 6626 if (CGM.getLangOpts().OpenMPOffloadMandatory && !IsOffloadEntry) { 6627 unsigned DiagID = CGM.getDiags().getCustomDiagID( 6628 DiagnosticsEngine::Error, 6629 "No offloading entry generated while offloading is mandatory."); 6630 CGM.getDiags().Report(DiagID); 6631 } 6632 6633 assert(CGF.CurFuncDecl && "No parent declaration for target region!"); 6634 StringRef ParentName; 6635 // In case we have Ctors/Dtors we use the complete type variant to produce 6636 // the mangling of the device outlined kernel. 6637 if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) 6638 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 6639 else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) 6640 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 6641 else 6642 ParentName = 6643 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); 6644 6645 // Emit target region as a standalone region. 6646 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 6647 IsOffloadEntry, CodeGen); 6648 OMPLexicalScope Scope(CGF, S, OMPD_task); 6649 auto &&SizeEmitter = 6650 [IsOffloadEntry](CodeGenFunction &CGF, 6651 const OMPLoopDirective &D) -> llvm::Value * { 6652 if (IsOffloadEntry) { 6653 OMPLoopScope(CGF, D); 6654 // Emit calculation of the iterations count. 6655 llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations()); 6656 NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty, 6657 /*isSigned=*/false); 6658 return NumIterations; 6659 } 6660 return nullptr; 6661 }; 6662 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, 6663 SizeEmitter); 6664 } 6665 6666 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, 6667 PrePostActionTy &Action) { 6668 Action.Enter(CGF); 6669 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6670 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 6671 CGF.EmitOMPPrivateClause(S, PrivateScope); 6672 (void)PrivateScope.Privatize(); 6673 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 6674 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 6675 6676 CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt()); 6677 CGF.EnsureInsertPoint(); 6678 } 6679 6680 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, 6681 StringRef ParentName, 6682 const OMPTargetDirective &S) { 6683 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6684 emitTargetRegion(CGF, S, Action); 6685 }; 6686 llvm::Function *Fn; 6687 llvm::Constant *Addr; 6688 // Emit target region as a standalone region. 6689 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6690 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6691 assert(Fn && Addr && "Target device function emission failed."); 6692 } 6693 6694 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 6695 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6696 emitTargetRegion(CGF, S, Action); 6697 }; 6698 emitCommonOMPTargetDirective(*this, S, CodeGen); 6699 } 6700 6701 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 6702 const OMPExecutableDirective &S, 6703 OpenMPDirectiveKind InnermostKind, 6704 const RegionCodeGenTy &CodeGen) { 6705 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); 6706 llvm::Function *OutlinedFn = 6707 CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( 6708 CGF, S, *CS->getCapturedDecl()->param_begin(), InnermostKind, 6709 CodeGen); 6710 6711 const auto *NT = S.getSingleClause<OMPNumTeamsClause>(); 6712 const auto *TL = S.getSingleClause<OMPThreadLimitClause>(); 6713 if (NT || TL) { 6714 const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr; 6715 const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr; 6716 6717 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 6718 S.getBeginLoc()); 6719 } 6720 6721 OMPTeamsScope Scope(CGF, S); 6722 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 6723 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 6724 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn, 6725 CapturedVars); 6726 } 6727 6728 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 6729 // Emit teams region as a standalone region. 6730 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6731 Action.Enter(CGF); 6732 OMPPrivateScope PrivateScope(CGF); 6733 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 6734 CGF.EmitOMPPrivateClause(S, PrivateScope); 6735 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6736 (void)PrivateScope.Privatize(); 6737 CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt()); 6738 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6739 }; 6740 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 6741 emitPostUpdateForReductionClause(*this, S, 6742 [](CodeGenFunction &) { return nullptr; }); 6743 } 6744 6745 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 6746 const OMPTargetTeamsDirective &S) { 6747 auto *CS = S.getCapturedStmt(OMPD_teams); 6748 Action.Enter(CGF); 6749 // Emit teams region as a standalone region. 6750 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { 6751 Action.Enter(CGF); 6752 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6753 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 6754 CGF.EmitOMPPrivateClause(S, PrivateScope); 6755 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6756 (void)PrivateScope.Privatize(); 6757 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 6758 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 6759 CGF.EmitStmt(CS->getCapturedStmt()); 6760 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6761 }; 6762 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); 6763 emitPostUpdateForReductionClause(CGF, S, 6764 [](CodeGenFunction &) { return nullptr; }); 6765 } 6766 6767 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 6768 CodeGenModule &CGM, StringRef ParentName, 6769 const OMPTargetTeamsDirective &S) { 6770 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6771 emitTargetTeamsRegion(CGF, Action, S); 6772 }; 6773 llvm::Function *Fn; 6774 llvm::Constant *Addr; 6775 // Emit target region as a standalone region. 6776 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6777 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6778 assert(Fn && Addr && "Target device function emission failed."); 6779 } 6780 6781 void CodeGenFunction::EmitOMPTargetTeamsDirective( 6782 const OMPTargetTeamsDirective &S) { 6783 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6784 emitTargetTeamsRegion(CGF, Action, S); 6785 }; 6786 emitCommonOMPTargetDirective(*this, S, CodeGen); 6787 } 6788 6789 static void 6790 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 6791 const OMPTargetTeamsDistributeDirective &S) { 6792 Action.Enter(CGF); 6793 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6794 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 6795 }; 6796 6797 // Emit teams region as a standalone region. 6798 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 6799 PrePostActionTy &Action) { 6800 Action.Enter(CGF); 6801 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6802 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6803 (void)PrivateScope.Privatize(); 6804 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 6805 CodeGenDistribute); 6806 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6807 }; 6808 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen); 6809 emitPostUpdateForReductionClause(CGF, S, 6810 [](CodeGenFunction &) { return nullptr; }); 6811 } 6812 6813 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 6814 CodeGenModule &CGM, StringRef ParentName, 6815 const OMPTargetTeamsDistributeDirective &S) { 6816 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6817 emitTargetTeamsDistributeRegion(CGF, Action, S); 6818 }; 6819 llvm::Function *Fn; 6820 llvm::Constant *Addr; 6821 // Emit target region as a standalone region. 6822 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6823 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6824 assert(Fn && Addr && "Target device function emission failed."); 6825 } 6826 6827 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( 6828 const OMPTargetTeamsDistributeDirective &S) { 6829 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6830 emitTargetTeamsDistributeRegion(CGF, Action, S); 6831 }; 6832 emitCommonOMPTargetDirective(*this, S, CodeGen); 6833 } 6834 6835 static void emitTargetTeamsDistributeSimdRegion( 6836 CodeGenFunction &CGF, PrePostActionTy &Action, 6837 const OMPTargetTeamsDistributeSimdDirective &S) { 6838 Action.Enter(CGF); 6839 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6840 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 6841 }; 6842 6843 // Emit teams region as a standalone region. 6844 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 6845 PrePostActionTy &Action) { 6846 Action.Enter(CGF); 6847 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6848 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6849 (void)PrivateScope.Privatize(); 6850 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 6851 CodeGenDistribute); 6852 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6853 }; 6854 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen); 6855 emitPostUpdateForReductionClause(CGF, S, 6856 [](CodeGenFunction &) { return nullptr; }); 6857 } 6858 6859 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 6860 CodeGenModule &CGM, StringRef ParentName, 6861 const OMPTargetTeamsDistributeSimdDirective &S) { 6862 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6863 emitTargetTeamsDistributeSimdRegion(CGF, Action, S); 6864 }; 6865 llvm::Function *Fn; 6866 llvm::Constant *Addr; 6867 // Emit target region as a standalone region. 6868 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6869 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6870 assert(Fn && Addr && "Target device function emission failed."); 6871 } 6872 6873 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( 6874 const OMPTargetTeamsDistributeSimdDirective &S) { 6875 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6876 emitTargetTeamsDistributeSimdRegion(CGF, Action, S); 6877 }; 6878 emitCommonOMPTargetDirective(*this, S, CodeGen); 6879 } 6880 6881 void CodeGenFunction::EmitOMPTeamsDistributeDirective( 6882 const OMPTeamsDistributeDirective &S) { 6883 6884 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6885 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 6886 }; 6887 6888 // Emit teams region as a standalone region. 6889 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 6890 PrePostActionTy &Action) { 6891 Action.Enter(CGF); 6892 OMPPrivateScope PrivateScope(CGF); 6893 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6894 (void)PrivateScope.Privatize(); 6895 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 6896 CodeGenDistribute); 6897 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6898 }; 6899 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 6900 emitPostUpdateForReductionClause(*this, S, 6901 [](CodeGenFunction &) { return nullptr; }); 6902 } 6903 6904 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( 6905 const OMPTeamsDistributeSimdDirective &S) { 6906 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6907 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 6908 }; 6909 6910 // Emit teams region as a standalone region. 6911 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 6912 PrePostActionTy &Action) { 6913 Action.Enter(CGF); 6914 OMPPrivateScope PrivateScope(CGF); 6915 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6916 (void)PrivateScope.Privatize(); 6917 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd, 6918 CodeGenDistribute); 6919 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6920 }; 6921 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen); 6922 emitPostUpdateForReductionClause(*this, S, 6923 [](CodeGenFunction &) { return nullptr; }); 6924 } 6925 6926 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( 6927 const OMPTeamsDistributeParallelForDirective &S) { 6928 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6929 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 6930 S.getDistInc()); 6931 }; 6932 6933 // Emit teams region as a standalone region. 6934 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 6935 PrePostActionTy &Action) { 6936 Action.Enter(CGF); 6937 OMPPrivateScope PrivateScope(CGF); 6938 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6939 (void)PrivateScope.Privatize(); 6940 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 6941 CodeGenDistribute); 6942 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6943 }; 6944 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); 6945 emitPostUpdateForReductionClause(*this, S, 6946 [](CodeGenFunction &) { return nullptr; }); 6947 } 6948 6949 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( 6950 const OMPTeamsDistributeParallelForSimdDirective &S) { 6951 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6952 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 6953 S.getDistInc()); 6954 }; 6955 6956 // Emit teams region as a standalone region. 6957 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 6958 PrePostActionTy &Action) { 6959 Action.Enter(CGF); 6960 OMPPrivateScope PrivateScope(CGF); 6961 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6962 (void)PrivateScope.Privatize(); 6963 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 6964 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 6965 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6966 }; 6967 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for_simd, 6968 CodeGen); 6969 emitPostUpdateForReductionClause(*this, S, 6970 [](CodeGenFunction &) { return nullptr; }); 6971 } 6972 6973 void CodeGenFunction::EmitOMPInteropDirective(const OMPInteropDirective &S) { 6974 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 6975 llvm::Value *Device = nullptr; 6976 llvm::Value *NumDependences = nullptr; 6977 llvm::Value *DependenceList = nullptr; 6978 6979 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 6980 Device = EmitScalarExpr(C->getDevice()); 6981 6982 // Build list and emit dependences 6983 OMPTaskDataTy Data; 6984 buildDependences(S, Data); 6985 if (!Data.Dependences.empty()) { 6986 Address DependenciesArray = Address::invalid(); 6987 std::tie(NumDependences, DependenciesArray) = 6988 CGM.getOpenMPRuntime().emitDependClause(*this, Data.Dependences, 6989 S.getBeginLoc()); 6990 DependenceList = DependenciesArray.getPointer(); 6991 } 6992 Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>(); 6993 6994 assert(!(Data.HasNowaitClause && !(S.getSingleClause<OMPInitClause>() || 6995 S.getSingleClause<OMPDestroyClause>() || 6996 S.getSingleClause<OMPUseClause>())) && 6997 "OMPNowaitClause clause is used separately in OMPInteropDirective."); 6998 6999 if (const auto *C = S.getSingleClause<OMPInitClause>()) { 7000 llvm::Value *InteropvarPtr = 7001 EmitLValue(C->getInteropVar()).getPointer(*this); 7002 llvm::omp::OMPInteropType InteropType = llvm::omp::OMPInteropType::Unknown; 7003 if (C->getIsTarget()) { 7004 InteropType = llvm::omp::OMPInteropType::Target; 7005 } else { 7006 assert(C->getIsTargetSync() && "Expected interop-type target/targetsync"); 7007 InteropType = llvm::omp::OMPInteropType::TargetSync; 7008 } 7009 OMPBuilder.createOMPInteropInit(Builder, InteropvarPtr, InteropType, Device, 7010 NumDependences, DependenceList, 7011 Data.HasNowaitClause); 7012 } else if (const auto *C = S.getSingleClause<OMPDestroyClause>()) { 7013 llvm::Value *InteropvarPtr = 7014 EmitLValue(C->getInteropVar()).getPointer(*this); 7015 OMPBuilder.createOMPInteropDestroy(Builder, InteropvarPtr, Device, 7016 NumDependences, DependenceList, 7017 Data.HasNowaitClause); 7018 } else if (const auto *C = S.getSingleClause<OMPUseClause>()) { 7019 llvm::Value *InteropvarPtr = 7020 EmitLValue(C->getInteropVar()).getPointer(*this); 7021 OMPBuilder.createOMPInteropUse(Builder, InteropvarPtr, Device, 7022 NumDependences, DependenceList, 7023 Data.HasNowaitClause); 7024 } 7025 } 7026 7027 static void emitTargetTeamsDistributeParallelForRegion( 7028 CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S, 7029 PrePostActionTy &Action) { 7030 Action.Enter(CGF); 7031 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 7032 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 7033 S.getDistInc()); 7034 }; 7035 7036 // Emit teams region as a standalone region. 7037 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 7038 PrePostActionTy &Action) { 7039 Action.Enter(CGF); 7040 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 7041 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 7042 (void)PrivateScope.Privatize(); 7043 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 7044 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 7045 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 7046 }; 7047 7048 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for, 7049 CodeGenTeams); 7050 emitPostUpdateForReductionClause(CGF, S, 7051 [](CodeGenFunction &) { return nullptr; }); 7052 } 7053 7054 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 7055 CodeGenModule &CGM, StringRef ParentName, 7056 const OMPTargetTeamsDistributeParallelForDirective &S) { 7057 // Emit SPMD target teams distribute parallel for region as a standalone 7058 // region. 7059 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7060 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); 7061 }; 7062 llvm::Function *Fn; 7063 llvm::Constant *Addr; 7064 // Emit target region as a standalone region. 7065 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 7066 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 7067 assert(Fn && Addr && "Target device function emission failed."); 7068 } 7069 7070 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( 7071 const OMPTargetTeamsDistributeParallelForDirective &S) { 7072 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7073 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); 7074 }; 7075 emitCommonOMPTargetDirective(*this, S, CodeGen); 7076 } 7077 7078 static void emitTargetTeamsDistributeParallelForSimdRegion( 7079 CodeGenFunction &CGF, 7080 const OMPTargetTeamsDistributeParallelForSimdDirective &S, 7081 PrePostActionTy &Action) { 7082 Action.Enter(CGF); 7083 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 7084 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 7085 S.getDistInc()); 7086 }; 7087 7088 // Emit teams region as a standalone region. 7089 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 7090 PrePostActionTy &Action) { 7091 Action.Enter(CGF); 7092 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 7093 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 7094 (void)PrivateScope.Privatize(); 7095 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 7096 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 7097 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 7098 }; 7099 7100 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd, 7101 CodeGenTeams); 7102 emitPostUpdateForReductionClause(CGF, S, 7103 [](CodeGenFunction &) { return nullptr; }); 7104 } 7105 7106 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 7107 CodeGenModule &CGM, StringRef ParentName, 7108 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 7109 // Emit SPMD target teams distribute parallel for simd region as a standalone 7110 // region. 7111 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7112 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); 7113 }; 7114 llvm::Function *Fn; 7115 llvm::Constant *Addr; 7116 // Emit target region as a standalone region. 7117 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 7118 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 7119 assert(Fn && Addr && "Target device function emission failed."); 7120 } 7121 7122 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( 7123 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 7124 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7125 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); 7126 }; 7127 emitCommonOMPTargetDirective(*this, S, CodeGen); 7128 } 7129 7130 void CodeGenFunction::EmitOMPCancellationPointDirective( 7131 const OMPCancellationPointDirective &S) { 7132 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(), 7133 S.getCancelRegion()); 7134 } 7135 7136 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 7137 const Expr *IfCond = nullptr; 7138 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 7139 if (C->getNameModifier() == OMPD_unknown || 7140 C->getNameModifier() == OMPD_cancel) { 7141 IfCond = C->getCondition(); 7142 break; 7143 } 7144 } 7145 if (CGM.getLangOpts().OpenMPIRBuilder) { 7146 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 7147 // TODO: This check is necessary as we only generate `omp parallel` through 7148 // the OpenMPIRBuilder for now. 7149 if (S.getCancelRegion() == OMPD_parallel || 7150 S.getCancelRegion() == OMPD_sections || 7151 S.getCancelRegion() == OMPD_section) { 7152 llvm::Value *IfCondition = nullptr; 7153 if (IfCond) 7154 IfCondition = EmitScalarExpr(IfCond, 7155 /*IgnoreResultAssign=*/true); 7156 return Builder.restoreIP( 7157 OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion())); 7158 } 7159 } 7160 7161 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond, 7162 S.getCancelRegion()); 7163 } 7164 7165 CodeGenFunction::JumpDest 7166 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 7167 if (Kind == OMPD_parallel || Kind == OMPD_task || 7168 Kind == OMPD_target_parallel || Kind == OMPD_taskloop || 7169 Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop) 7170 return ReturnBlock; 7171 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 7172 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || 7173 Kind == OMPD_distribute_parallel_for || 7174 Kind == OMPD_target_parallel_for || 7175 Kind == OMPD_teams_distribute_parallel_for || 7176 Kind == OMPD_target_teams_distribute_parallel_for); 7177 return OMPCancelStack.getExitBlock(); 7178 } 7179 7180 void CodeGenFunction::EmitOMPUseDevicePtrClause( 7181 const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope, 7182 const llvm::DenseMap<const ValueDecl *, llvm::Value *> 7183 CaptureDeviceAddrMap) { 7184 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 7185 for (const Expr *OrigVarIt : C.varlists()) { 7186 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(OrigVarIt)->getDecl()); 7187 if (!Processed.insert(OrigVD).second) 7188 continue; 7189 7190 // In order to identify the right initializer we need to match the 7191 // declaration used by the mapping logic. In some cases we may get 7192 // OMPCapturedExprDecl that refers to the original declaration. 7193 const ValueDecl *MatchingVD = OrigVD; 7194 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 7195 // OMPCapturedExprDecl are used to privative fields of the current 7196 // structure. 7197 const auto *ME = cast<MemberExpr>(OED->getInit()); 7198 assert(isa<CXXThisExpr>(ME->getBase()->IgnoreImpCasts()) && 7199 "Base should be the current struct!"); 7200 MatchingVD = ME->getMemberDecl(); 7201 } 7202 7203 // If we don't have information about the current list item, move on to 7204 // the next one. 7205 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 7206 if (InitAddrIt == CaptureDeviceAddrMap.end()) 7207 continue; 7208 7209 llvm::Type *Ty = ConvertTypeForMem(OrigVD->getType().getNonReferenceType()); 7210 7211 // Return the address of the private variable. 7212 bool IsRegistered = PrivateScope.addPrivate( 7213 OrigVD, 7214 Address(InitAddrIt->second, Ty, 7215 getContext().getTypeAlignInChars(getContext().VoidPtrTy))); 7216 assert(IsRegistered && "firstprivate var already registered as private"); 7217 // Silence the warning about unused variable. 7218 (void)IsRegistered; 7219 } 7220 } 7221 7222 static const VarDecl *getBaseDecl(const Expr *Ref) { 7223 const Expr *Base = Ref->IgnoreParenImpCasts(); 7224 while (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Base)) 7225 Base = OASE->getBase()->IgnoreParenImpCasts(); 7226 while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Base)) 7227 Base = ASE->getBase()->IgnoreParenImpCasts(); 7228 return cast<VarDecl>(cast<DeclRefExpr>(Base)->getDecl()); 7229 } 7230 7231 void CodeGenFunction::EmitOMPUseDeviceAddrClause( 7232 const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope, 7233 const llvm::DenseMap<const ValueDecl *, llvm::Value *> 7234 CaptureDeviceAddrMap) { 7235 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 7236 for (const Expr *Ref : C.varlists()) { 7237 const VarDecl *OrigVD = getBaseDecl(Ref); 7238 if (!Processed.insert(OrigVD).second) 7239 continue; 7240 // In order to identify the right initializer we need to match the 7241 // declaration used by the mapping logic. In some cases we may get 7242 // OMPCapturedExprDecl that refers to the original declaration. 7243 const ValueDecl *MatchingVD = OrigVD; 7244 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 7245 // OMPCapturedExprDecl are used to privative fields of the current 7246 // structure. 7247 const auto *ME = cast<MemberExpr>(OED->getInit()); 7248 assert(isa<CXXThisExpr>(ME->getBase()) && 7249 "Base should be the current struct!"); 7250 MatchingVD = ME->getMemberDecl(); 7251 } 7252 7253 // If we don't have information about the current list item, move on to 7254 // the next one. 7255 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 7256 if (InitAddrIt == CaptureDeviceAddrMap.end()) 7257 continue; 7258 7259 llvm::Type *Ty = ConvertTypeForMem(OrigVD->getType().getNonReferenceType()); 7260 7261 Address PrivAddr = 7262 Address(InitAddrIt->second, Ty, 7263 getContext().getTypeAlignInChars(getContext().VoidPtrTy)); 7264 // For declrefs and variable length array need to load the pointer for 7265 // correct mapping, since the pointer to the data was passed to the runtime. 7266 if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) || 7267 MatchingVD->getType()->isArrayType()) { 7268 QualType PtrTy = getContext().getPointerType( 7269 OrigVD->getType().getNonReferenceType()); 7270 PrivAddr = 7271 EmitLoadOfPointer(PrivAddr.withElementType(ConvertTypeForMem(PtrTy)), 7272 PtrTy->castAs<PointerType>()); 7273 } 7274 7275 (void)PrivateScope.addPrivate(OrigVD, PrivAddr); 7276 } 7277 } 7278 7279 // Generate the instructions for '#pragma omp target data' directive. 7280 void CodeGenFunction::EmitOMPTargetDataDirective( 7281 const OMPTargetDataDirective &S) { 7282 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true, 7283 /*SeparateBeginEndCalls=*/true); 7284 7285 // Create a pre/post action to signal the privatization of the device pointer. 7286 // This action can be replaced by the OpenMP runtime code generation to 7287 // deactivate privatization. 7288 bool PrivatizeDevicePointers = false; 7289 class DevicePointerPrivActionTy : public PrePostActionTy { 7290 bool &PrivatizeDevicePointers; 7291 7292 public: 7293 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) 7294 : PrivatizeDevicePointers(PrivatizeDevicePointers) {} 7295 void Enter(CodeGenFunction &CGF) override { 7296 PrivatizeDevicePointers = true; 7297 } 7298 }; 7299 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); 7300 7301 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) { 7302 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 7303 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 7304 }; 7305 7306 // Codegen that selects whether to generate the privatization code or not. 7307 auto &&PrivCodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) { 7308 RegionCodeGenTy RCG(InnermostCodeGen); 7309 PrivatizeDevicePointers = false; 7310 7311 // Call the pre-action to change the status of PrivatizeDevicePointers if 7312 // needed. 7313 Action.Enter(CGF); 7314 7315 if (PrivatizeDevicePointers) { 7316 OMPPrivateScope PrivateScope(CGF); 7317 // Emit all instances of the use_device_ptr clause. 7318 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 7319 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, 7320 Info.CaptureDeviceAddrMap); 7321 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>()) 7322 CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope, 7323 Info.CaptureDeviceAddrMap); 7324 (void)PrivateScope.Privatize(); 7325 RCG(CGF); 7326 } else { 7327 // If we don't have target devices, don't bother emitting the data 7328 // mapping code. 7329 std::optional<OpenMPDirectiveKind> CaptureRegion; 7330 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 7331 // Emit helper decls of the use_device_ptr/use_device_addr clauses. 7332 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 7333 for (const Expr *E : C->varlists()) { 7334 const Decl *D = cast<DeclRefExpr>(E)->getDecl(); 7335 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) 7336 CGF.EmitVarDecl(*OED); 7337 } 7338 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>()) 7339 for (const Expr *E : C->varlists()) { 7340 const Decl *D = getBaseDecl(E); 7341 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) 7342 CGF.EmitVarDecl(*OED); 7343 } 7344 } else { 7345 CaptureRegion = OMPD_unknown; 7346 } 7347 7348 OMPLexicalScope Scope(CGF, S, CaptureRegion); 7349 RCG(CGF); 7350 } 7351 }; 7352 7353 // Forward the provided action to the privatization codegen. 7354 RegionCodeGenTy PrivRCG(PrivCodeGen); 7355 PrivRCG.setAction(Action); 7356 7357 // Notwithstanding the body of the region is emitted as inlined directive, 7358 // we don't use an inline scope as changes in the references inside the 7359 // region are expected to be visible outside, so we do not privative them. 7360 OMPLexicalScope Scope(CGF, S); 7361 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, 7362 PrivRCG); 7363 }; 7364 7365 RegionCodeGenTy RCG(CodeGen); 7366 7367 // If we don't have target devices, don't bother emitting the data mapping 7368 // code. 7369 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 7370 RCG(*this); 7371 return; 7372 } 7373 7374 // Check if we have any if clause associated with the directive. 7375 const Expr *IfCond = nullptr; 7376 if (const auto *C = S.getSingleClause<OMPIfClause>()) 7377 IfCond = C->getCondition(); 7378 7379 // Check if we have any device clause associated with the directive. 7380 const Expr *Device = nullptr; 7381 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 7382 Device = C->getDevice(); 7383 7384 // Set the action to signal privatization of device pointers. 7385 RCG.setAction(PrivAction); 7386 7387 // Emit region code. 7388 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, 7389 Info); 7390 } 7391 7392 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 7393 const OMPTargetEnterDataDirective &S) { 7394 // If we don't have target devices, don't bother emitting the data mapping 7395 // code. 7396 if (CGM.getLangOpts().OMPTargetTriples.empty()) 7397 return; 7398 7399 // Check if we have any if clause associated with the directive. 7400 const Expr *IfCond = nullptr; 7401 if (const auto *C = S.getSingleClause<OMPIfClause>()) 7402 IfCond = C->getCondition(); 7403 7404 // Check if we have any device clause associated with the directive. 7405 const Expr *Device = nullptr; 7406 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 7407 Device = C->getDevice(); 7408 7409 OMPLexicalScope Scope(*this, S, OMPD_task); 7410 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 7411 } 7412 7413 void CodeGenFunction::EmitOMPTargetExitDataDirective( 7414 const OMPTargetExitDataDirective &S) { 7415 // If we don't have target devices, don't bother emitting the data mapping 7416 // code. 7417 if (CGM.getLangOpts().OMPTargetTriples.empty()) 7418 return; 7419 7420 // Check if we have any if clause associated with the directive. 7421 const Expr *IfCond = nullptr; 7422 if (const auto *C = S.getSingleClause<OMPIfClause>()) 7423 IfCond = C->getCondition(); 7424 7425 // Check if we have any device clause associated with the directive. 7426 const Expr *Device = nullptr; 7427 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 7428 Device = C->getDevice(); 7429 7430 OMPLexicalScope Scope(*this, S, OMPD_task); 7431 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 7432 } 7433 7434 static void emitTargetParallelRegion(CodeGenFunction &CGF, 7435 const OMPTargetParallelDirective &S, 7436 PrePostActionTy &Action) { 7437 // Get the captured statement associated with the 'parallel' region. 7438 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 7439 Action.Enter(CGF); 7440 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { 7441 Action.Enter(CGF); 7442 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 7443 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 7444 CGF.EmitOMPPrivateClause(S, PrivateScope); 7445 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 7446 (void)PrivateScope.Privatize(); 7447 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 7448 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 7449 // TODO: Add support for clauses. 7450 CGF.EmitStmt(CS->getCapturedStmt()); 7451 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 7452 }; 7453 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, 7454 emitEmptyBoundParameters); 7455 emitPostUpdateForReductionClause(CGF, S, 7456 [](CodeGenFunction &) { return nullptr; }); 7457 } 7458 7459 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 7460 CodeGenModule &CGM, StringRef ParentName, 7461 const OMPTargetParallelDirective &S) { 7462 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7463 emitTargetParallelRegion(CGF, S, Action); 7464 }; 7465 llvm::Function *Fn; 7466 llvm::Constant *Addr; 7467 // Emit target region as a standalone region. 7468 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 7469 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 7470 assert(Fn && Addr && "Target device function emission failed."); 7471 } 7472 7473 void CodeGenFunction::EmitOMPTargetParallelDirective( 7474 const OMPTargetParallelDirective &S) { 7475 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7476 emitTargetParallelRegion(CGF, S, Action); 7477 }; 7478 emitCommonOMPTargetDirective(*this, S, CodeGen); 7479 } 7480 7481 static void emitTargetParallelForRegion(CodeGenFunction &CGF, 7482 const OMPTargetParallelForDirective &S, 7483 PrePostActionTy &Action) { 7484 Action.Enter(CGF); 7485 // Emit directive as a combined directive that consists of two implicit 7486 // directives: 'parallel' with 'for' directive. 7487 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7488 Action.Enter(CGF); 7489 CodeGenFunction::OMPCancelStackRAII CancelRegion( 7490 CGF, OMPD_target_parallel_for, S.hasCancel()); 7491 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 7492 emitDispatchForLoopBounds); 7493 }; 7494 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen, 7495 emitEmptyBoundParameters); 7496 } 7497 7498 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 7499 CodeGenModule &CGM, StringRef ParentName, 7500 const OMPTargetParallelForDirective &S) { 7501 // Emit SPMD target parallel for region as a standalone region. 7502 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7503 emitTargetParallelForRegion(CGF, S, Action); 7504 }; 7505 llvm::Function *Fn; 7506 llvm::Constant *Addr; 7507 // Emit target region as a standalone region. 7508 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 7509 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 7510 assert(Fn && Addr && "Target device function emission failed."); 7511 } 7512 7513 void CodeGenFunction::EmitOMPTargetParallelForDirective( 7514 const OMPTargetParallelForDirective &S) { 7515 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7516 emitTargetParallelForRegion(CGF, S, Action); 7517 }; 7518 emitCommonOMPTargetDirective(*this, S, CodeGen); 7519 } 7520 7521 static void 7522 emitTargetParallelForSimdRegion(CodeGenFunction &CGF, 7523 const OMPTargetParallelForSimdDirective &S, 7524 PrePostActionTy &Action) { 7525 Action.Enter(CGF); 7526 // Emit directive as a combined directive that consists of two implicit 7527 // directives: 'parallel' with 'for' directive. 7528 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7529 Action.Enter(CGF); 7530 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 7531 emitDispatchForLoopBounds); 7532 }; 7533 emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen, 7534 emitEmptyBoundParameters); 7535 } 7536 7537 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 7538 CodeGenModule &CGM, StringRef ParentName, 7539 const OMPTargetParallelForSimdDirective &S) { 7540 // Emit SPMD target parallel for region as a standalone region. 7541 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7542 emitTargetParallelForSimdRegion(CGF, S, Action); 7543 }; 7544 llvm::Function *Fn; 7545 llvm::Constant *Addr; 7546 // Emit target region as a standalone region. 7547 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 7548 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 7549 assert(Fn && Addr && "Target device function emission failed."); 7550 } 7551 7552 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( 7553 const OMPTargetParallelForSimdDirective &S) { 7554 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7555 emitTargetParallelForSimdRegion(CGF, S, Action); 7556 }; 7557 emitCommonOMPTargetDirective(*this, S, CodeGen); 7558 } 7559 7560 /// Emit a helper variable and return corresponding lvalue. 7561 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, 7562 const ImplicitParamDecl *PVD, 7563 CodeGenFunction::OMPPrivateScope &Privates) { 7564 const auto *VDecl = cast<VarDecl>(Helper->getDecl()); 7565 Privates.addPrivate(VDecl, CGF.GetAddrOfLocalVar(PVD)); 7566 } 7567 7568 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { 7569 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); 7570 // Emit outlined function for task construct. 7571 const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop); 7572 Address CapturedStruct = Address::invalid(); 7573 { 7574 OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); 7575 CapturedStruct = GenerateCapturedStmtArgument(*CS); 7576 } 7577 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 7578 const Expr *IfCond = nullptr; 7579 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 7580 if (C->getNameModifier() == OMPD_unknown || 7581 C->getNameModifier() == OMPD_taskloop) { 7582 IfCond = C->getCondition(); 7583 break; 7584 } 7585 } 7586 7587 OMPTaskDataTy Data; 7588 // Check if taskloop must be emitted without taskgroup. 7589 Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); 7590 // TODO: Check if we should emit tied or untied task. 7591 Data.Tied = true; 7592 // Set scheduling for taskloop 7593 if (const auto *Clause = S.getSingleClause<OMPGrainsizeClause>()) { 7594 // grainsize clause 7595 Data.Schedule.setInt(/*IntVal=*/false); 7596 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); 7597 } else if (const auto *Clause = S.getSingleClause<OMPNumTasksClause>()) { 7598 // num_tasks clause 7599 Data.Schedule.setInt(/*IntVal=*/true); 7600 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); 7601 } 7602 7603 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { 7604 // if (PreCond) { 7605 // for (IV in 0..LastIteration) BODY; 7606 // <Final counter/linear vars updates>; 7607 // } 7608 // 7609 7610 // Emit: if (PreCond) - begin. 7611 // If the condition constant folds and can be elided, avoid emitting the 7612 // whole loop. 7613 bool CondConstant; 7614 llvm::BasicBlock *ContBlock = nullptr; 7615 OMPLoopScope PreInitScope(CGF, S); 7616 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 7617 if (!CondConstant) 7618 return; 7619 } else { 7620 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); 7621 ContBlock = CGF.createBasicBlock("taskloop.if.end"); 7622 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 7623 CGF.getProfileCount(&S)); 7624 CGF.EmitBlock(ThenBlock); 7625 CGF.incrementProfileCounter(&S); 7626 } 7627 7628 (void)CGF.EmitOMPLinearClauseInit(S); 7629 7630 OMPPrivateScope LoopScope(CGF); 7631 // Emit helper vars inits. 7632 enum { LowerBound = 5, UpperBound, Stride, LastIter }; 7633 auto *I = CS->getCapturedDecl()->param_begin(); 7634 auto *LBP = std::next(I, LowerBound); 7635 auto *UBP = std::next(I, UpperBound); 7636 auto *STP = std::next(I, Stride); 7637 auto *LIP = std::next(I, LastIter); 7638 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, 7639 LoopScope); 7640 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, 7641 LoopScope); 7642 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); 7643 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, 7644 LoopScope); 7645 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 7646 CGF.EmitOMPLinearClause(S, LoopScope); 7647 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 7648 (void)LoopScope.Privatize(); 7649 // Emit the loop iteration variable. 7650 const Expr *IVExpr = S.getIterationVariable(); 7651 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 7652 CGF.EmitVarDecl(*IVDecl); 7653 CGF.EmitIgnoredExpr(S.getInit()); 7654 7655 // Emit the iterations count variable. 7656 // If it is not a variable, Sema decided to calculate iterations count on 7657 // each iteration (e.g., it is foldable into a constant). 7658 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 7659 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 7660 // Emit calculation of the iterations count. 7661 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 7662 } 7663 7664 { 7665 OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); 7666 emitCommonSimdLoop( 7667 CGF, S, 7668 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 7669 if (isOpenMPSimdDirective(S.getDirectiveKind())) 7670 CGF.EmitOMPSimdInit(S); 7671 }, 7672 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 7673 CGF.EmitOMPInnerLoop( 7674 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 7675 [&S](CodeGenFunction &CGF) { 7676 emitOMPLoopBodyWithStopPoint(CGF, S, 7677 CodeGenFunction::JumpDest()); 7678 }, 7679 [](CodeGenFunction &) {}); 7680 }); 7681 } 7682 // Emit: if (PreCond) - end. 7683 if (ContBlock) { 7684 CGF.EmitBranch(ContBlock); 7685 CGF.EmitBlock(ContBlock, true); 7686 } 7687 // Emit final copy of the lastprivate variables if IsLastIter != 0. 7688 if (HasLastprivateClause) { 7689 CGF.EmitOMPLastprivateClauseFinal( 7690 S, isOpenMPSimdDirective(S.getDirectiveKind()), 7691 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( 7692 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 7693 (*LIP)->getType(), S.getBeginLoc()))); 7694 } 7695 LoopScope.restoreMap(); 7696 CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) { 7697 return CGF.Builder.CreateIsNotNull( 7698 CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 7699 (*LIP)->getType(), S.getBeginLoc())); 7700 }); 7701 }; 7702 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 7703 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, 7704 const OMPTaskDataTy &Data) { 7705 auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond, 7706 &Data](CodeGenFunction &CGF, PrePostActionTy &) { 7707 OMPLoopScope PreInitScope(CGF, S); 7708 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S, 7709 OutlinedFn, SharedsTy, 7710 CapturedStruct, IfCond, Data); 7711 }; 7712 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, 7713 CodeGen); 7714 }; 7715 if (Data.Nogroup) { 7716 EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data); 7717 } else { 7718 CGM.getOpenMPRuntime().emitTaskgroupRegion( 7719 *this, 7720 [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, 7721 PrePostActionTy &Action) { 7722 Action.Enter(CGF); 7723 CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, 7724 Data); 7725 }, 7726 S.getBeginLoc()); 7727 } 7728 } 7729 7730 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 7731 auto LPCRegion = 7732 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7733 EmitOMPTaskLoopBasedDirective(S); 7734 } 7735 7736 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 7737 const OMPTaskLoopSimdDirective &S) { 7738 auto LPCRegion = 7739 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7740 OMPLexicalScope Scope(*this, S); 7741 EmitOMPTaskLoopBasedDirective(S); 7742 } 7743 7744 void CodeGenFunction::EmitOMPMasterTaskLoopDirective( 7745 const OMPMasterTaskLoopDirective &S) { 7746 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7747 Action.Enter(CGF); 7748 EmitOMPTaskLoopBasedDirective(S); 7749 }; 7750 auto LPCRegion = 7751 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7752 OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false); 7753 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); 7754 } 7755 7756 void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective( 7757 const OMPMasterTaskLoopSimdDirective &S) { 7758 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7759 Action.Enter(CGF); 7760 EmitOMPTaskLoopBasedDirective(S); 7761 }; 7762 auto LPCRegion = 7763 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7764 OMPLexicalScope Scope(*this, S); 7765 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); 7766 } 7767 7768 void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective( 7769 const OMPParallelMasterTaskLoopDirective &S) { 7770 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7771 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, 7772 PrePostActionTy &Action) { 7773 Action.Enter(CGF); 7774 CGF.EmitOMPTaskLoopBasedDirective(S); 7775 }; 7776 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); 7777 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, 7778 S.getBeginLoc()); 7779 }; 7780 auto LPCRegion = 7781 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7782 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen, 7783 emitEmptyBoundParameters); 7784 } 7785 7786 void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective( 7787 const OMPParallelMasterTaskLoopSimdDirective &S) { 7788 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7789 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, 7790 PrePostActionTy &Action) { 7791 Action.Enter(CGF); 7792 CGF.EmitOMPTaskLoopBasedDirective(S); 7793 }; 7794 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); 7795 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, 7796 S.getBeginLoc()); 7797 }; 7798 auto LPCRegion = 7799 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7800 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen, 7801 emitEmptyBoundParameters); 7802 } 7803 7804 // Generate the instructions for '#pragma omp target update' directive. 7805 void CodeGenFunction::EmitOMPTargetUpdateDirective( 7806 const OMPTargetUpdateDirective &S) { 7807 // If we don't have target devices, don't bother emitting the data mapping 7808 // code. 7809 if (CGM.getLangOpts().OMPTargetTriples.empty()) 7810 return; 7811 7812 // Check if we have any if clause associated with the directive. 7813 const Expr *IfCond = nullptr; 7814 if (const auto *C = S.getSingleClause<OMPIfClause>()) 7815 IfCond = C->getCondition(); 7816 7817 // Check if we have any device clause associated with the directive. 7818 const Expr *Device = nullptr; 7819 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 7820 Device = C->getDevice(); 7821 7822 OMPLexicalScope Scope(*this, S, OMPD_task); 7823 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 7824 } 7825 7826 void CodeGenFunction::EmitOMPGenericLoopDirective( 7827 const OMPGenericLoopDirective &S) { 7828 // Unimplemented, just inline the underlying statement for now. 7829 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7830 // Emit the loop iteration variable. 7831 const Stmt *CS = 7832 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 7833 const auto *ForS = dyn_cast<ForStmt>(CS); 7834 if (ForS && !isa<DeclStmt>(ForS->getInit())) { 7835 OMPPrivateScope LoopScope(CGF); 7836 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 7837 (void)LoopScope.Privatize(); 7838 CGF.EmitStmt(CS); 7839 LoopScope.restoreMap(); 7840 } else { 7841 CGF.EmitStmt(CS); 7842 } 7843 }; 7844 OMPLexicalScope Scope(*this, S, OMPD_unknown); 7845 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_loop, CodeGen); 7846 } 7847 7848 void CodeGenFunction::EmitOMPParallelGenericLoopDirective( 7849 const OMPLoopDirective &S) { 7850 // Emit combined directive as if its consituent constructs are 'parallel' 7851 // and 'for'. 7852 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7853 Action.Enter(CGF); 7854 emitOMPCopyinClause(CGF, S); 7855 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false); 7856 }; 7857 { 7858 auto LPCRegion = 7859 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7860 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, 7861 emitEmptyBoundParameters); 7862 } 7863 // Check for outer lastprivate conditional update. 7864 checkForLastprivateConditionalUpdate(*this, S); 7865 } 7866 7867 void CodeGenFunction::EmitOMPTeamsGenericLoopDirective( 7868 const OMPTeamsGenericLoopDirective &S) { 7869 // To be consistent with current behavior of 'target teams loop', emit 7870 // 'teams loop' as if its constituent constructs are 'distribute, 7871 // 'parallel, and 'for'. 7872 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 7873 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 7874 S.getDistInc()); 7875 }; 7876 7877 // Emit teams region as a standalone region. 7878 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 7879 PrePostActionTy &Action) { 7880 Action.Enter(CGF); 7881 OMPPrivateScope PrivateScope(CGF); 7882 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 7883 (void)PrivateScope.Privatize(); 7884 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 7885 CodeGenDistribute); 7886 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 7887 }; 7888 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); 7889 emitPostUpdateForReductionClause(*this, S, 7890 [](CodeGenFunction &) { return nullptr; }); 7891 } 7892 7893 static void 7894 emitTargetTeamsGenericLoopRegion(CodeGenFunction &CGF, 7895 const OMPTargetTeamsGenericLoopDirective &S, 7896 PrePostActionTy &Action) { 7897 Action.Enter(CGF); 7898 // Emit 'teams loop' as if its constituent constructs are 'distribute, 7899 // 'parallel, and 'for'. 7900 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 7901 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 7902 S.getDistInc()); 7903 }; 7904 7905 // Emit teams region as a standalone region. 7906 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 7907 PrePostActionTy &Action) { 7908 Action.Enter(CGF); 7909 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 7910 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 7911 (void)PrivateScope.Privatize(); 7912 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 7913 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 7914 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 7915 }; 7916 7917 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for, 7918 CodeGenTeams); 7919 emitPostUpdateForReductionClause(CGF, S, 7920 [](CodeGenFunction &) { return nullptr; }); 7921 } 7922 7923 /// Emit combined directive 'target teams loop' as if its constituent 7924 /// constructs are 'target', 'teams', 'distribute', 'parallel', and 'for'. 7925 void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDirective( 7926 const OMPTargetTeamsGenericLoopDirective &S) { 7927 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7928 emitTargetTeamsGenericLoopRegion(CGF, S, Action); 7929 }; 7930 emitCommonOMPTargetDirective(*this, S, CodeGen); 7931 } 7932 7933 void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction( 7934 CodeGenModule &CGM, StringRef ParentName, 7935 const OMPTargetTeamsGenericLoopDirective &S) { 7936 // Emit SPMD target parallel loop region as a standalone region. 7937 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7938 emitTargetTeamsGenericLoopRegion(CGF, S, Action); 7939 }; 7940 llvm::Function *Fn; 7941 llvm::Constant *Addr; 7942 // Emit target region as a standalone region. 7943 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 7944 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 7945 assert(Fn && Addr && 7946 "Target device function emission failed for 'target teams loop'."); 7947 } 7948 7949 static void emitTargetParallelGenericLoopRegion( 7950 CodeGenFunction &CGF, const OMPTargetParallelGenericLoopDirective &S, 7951 PrePostActionTy &Action) { 7952 Action.Enter(CGF); 7953 // Emit as 'parallel for'. 7954 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7955 Action.Enter(CGF); 7956 CodeGenFunction::OMPCancelStackRAII CancelRegion( 7957 CGF, OMPD_target_parallel_loop, /*hasCancel=*/false); 7958 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 7959 emitDispatchForLoopBounds); 7960 }; 7961 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen, 7962 emitEmptyBoundParameters); 7963 } 7964 7965 void CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction( 7966 CodeGenModule &CGM, StringRef ParentName, 7967 const OMPTargetParallelGenericLoopDirective &S) { 7968 // Emit target parallel loop region as a standalone region. 7969 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7970 emitTargetParallelGenericLoopRegion(CGF, S, Action); 7971 }; 7972 llvm::Function *Fn; 7973 llvm::Constant *Addr; 7974 // Emit target region as a standalone region. 7975 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 7976 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 7977 assert(Fn && Addr && "Target device function emission failed."); 7978 } 7979 7980 /// Emit combined directive 'target parallel loop' as if its constituent 7981 /// constructs are 'target', 'parallel', and 'for'. 7982 void CodeGenFunction::EmitOMPTargetParallelGenericLoopDirective( 7983 const OMPTargetParallelGenericLoopDirective &S) { 7984 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7985 emitTargetParallelGenericLoopRegion(CGF, S, Action); 7986 }; 7987 emitCommonOMPTargetDirective(*this, S, CodeGen); 7988 } 7989 7990 void CodeGenFunction::EmitSimpleOMPExecutableDirective( 7991 const OMPExecutableDirective &D) { 7992 if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) { 7993 EmitOMPScanDirective(*SD); 7994 return; 7995 } 7996 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt()) 7997 return; 7998 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) { 7999 OMPPrivateScope GlobalsScope(CGF); 8000 if (isOpenMPTaskingDirective(D.getDirectiveKind())) { 8001 // Capture global firstprivates to avoid crash. 8002 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 8003 for (const Expr *Ref : C->varlists()) { 8004 const auto *DRE = cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 8005 if (!DRE) 8006 continue; 8007 const auto *VD = dyn_cast<VarDecl>(DRE->getDecl()); 8008 if (!VD || VD->hasLocalStorage()) 8009 continue; 8010 if (!CGF.LocalDeclMap.count(VD)) { 8011 LValue GlobLVal = CGF.EmitLValue(Ref); 8012 GlobalsScope.addPrivate(VD, GlobLVal.getAddress(CGF)); 8013 } 8014 } 8015 } 8016 } 8017 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 8018 (void)GlobalsScope.Privatize(); 8019 ParentLoopDirectiveForScanRegion ScanRegion(CGF, D); 8020 emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action); 8021 } else { 8022 if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) { 8023 for (const Expr *E : LD->counters()) { 8024 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 8025 if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) { 8026 LValue GlobLVal = CGF.EmitLValue(E); 8027 GlobalsScope.addPrivate(VD, GlobLVal.getAddress(CGF)); 8028 } 8029 if (isa<OMPCapturedExprDecl>(VD)) { 8030 // Emit only those that were not explicitly referenced in clauses. 8031 if (!CGF.LocalDeclMap.count(VD)) 8032 CGF.EmitVarDecl(*VD); 8033 } 8034 } 8035 for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) { 8036 if (!C->getNumForLoops()) 8037 continue; 8038 for (unsigned I = LD->getLoopsNumber(), 8039 E = C->getLoopNumIterations().size(); 8040 I < E; ++I) { 8041 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>( 8042 cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) { 8043 // Emit only those that were not explicitly referenced in clauses. 8044 if (!CGF.LocalDeclMap.count(VD)) 8045 CGF.EmitVarDecl(*VD); 8046 } 8047 } 8048 } 8049 } 8050 (void)GlobalsScope.Privatize(); 8051 CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt()); 8052 } 8053 }; 8054 if (D.getDirectiveKind() == OMPD_atomic || 8055 D.getDirectiveKind() == OMPD_critical || 8056 D.getDirectiveKind() == OMPD_section || 8057 D.getDirectiveKind() == OMPD_master || 8058 D.getDirectiveKind() == OMPD_masked) { 8059 EmitStmt(D.getAssociatedStmt()); 8060 } else { 8061 auto LPCRegion = 8062 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D); 8063 OMPSimdLexicalScope Scope(*this, D); 8064 CGM.getOpenMPRuntime().emitInlinedDirective( 8065 *this, 8066 isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd 8067 : D.getDirectiveKind(), 8068 CodeGen); 8069 } 8070 // Check for outer lastprivate conditional update. 8071 checkForLastprivateConditionalUpdate(*this, D); 8072 } 8073