1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This contains code to emit OpenMP nodes as LLVM code. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGCleanup.h" 14 #include "CGDebugInfo.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CodeGenFunction.h" 17 #include "CodeGenModule.h" 18 #include "CodeGenPGO.h" 19 #include "TargetInfo.h" 20 #include "clang/AST/ASTContext.h" 21 #include "clang/AST/Attr.h" 22 #include "clang/AST/DeclOpenMP.h" 23 #include "clang/AST/OpenMPClause.h" 24 #include "clang/AST/Stmt.h" 25 #include "clang/AST/StmtOpenMP.h" 26 #include "clang/AST/StmtVisitor.h" 27 #include "clang/Basic/OpenMPKinds.h" 28 #include "clang/Basic/PrettyStackTrace.h" 29 #include "clang/Basic/SourceManager.h" 30 #include "llvm/ADT/SmallSet.h" 31 #include "llvm/BinaryFormat/Dwarf.h" 32 #include "llvm/Frontend/OpenMP/OMPConstants.h" 33 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 34 #include "llvm/IR/Constants.h" 35 #include "llvm/IR/DebugInfoMetadata.h" 36 #include "llvm/IR/Instructions.h" 37 #include "llvm/IR/IntrinsicInst.h" 38 #include "llvm/IR/Metadata.h" 39 #include "llvm/Support/AtomicOrdering.h" 40 #include "llvm/Support/Debug.h" 41 #include <optional> 42 using namespace clang; 43 using namespace CodeGen; 44 using namespace llvm::omp; 45 46 #define TTL_CODEGEN_TYPE "target-teams-loop-codegen" 47 48 static const VarDecl *getBaseDecl(const Expr *Ref); 49 static OpenMPDirectiveKind 50 getEffectiveDirectiveKind(const OMPExecutableDirective &S); 51 52 namespace { 53 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 54 /// for captured expressions. 55 class OMPLexicalScope : public CodeGenFunction::LexicalScope { 56 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 57 for (const auto *C : S.clauses()) { 58 if (const auto *CPI = OMPClauseWithPreInit::get(C)) { 59 if (const auto *PreInit = 60 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 61 for (const auto *I : PreInit->decls()) { 62 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 63 CGF.EmitVarDecl(cast<VarDecl>(*I)); 64 } else { 65 CodeGenFunction::AutoVarEmission Emission = 66 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 67 CGF.EmitAutoVarCleanups(Emission); 68 } 69 } 70 } 71 } 72 } 73 } 74 CodeGenFunction::OMPPrivateScope InlinedShareds; 75 76 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 77 return CGF.LambdaCaptureFields.lookup(VD) || 78 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 79 (isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl) && 80 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); 81 } 82 83 public: 84 OMPLexicalScope( 85 CodeGenFunction &CGF, const OMPExecutableDirective &S, 86 const std::optional<OpenMPDirectiveKind> CapturedRegion = std::nullopt, 87 const bool EmitPreInitStmt = true) 88 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 89 InlinedShareds(CGF) { 90 if (EmitPreInitStmt) 91 emitPreInitStmt(CGF, S); 92 if (!CapturedRegion) 93 return; 94 assert(S.hasAssociatedStmt() && 95 "Expected associated statement for inlined directive."); 96 const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion); 97 for (const auto &C : CS->captures()) { 98 if (C.capturesVariable() || C.capturesVariableByCopy()) { 99 auto *VD = C.getCapturedVar(); 100 assert(VD == VD->getCanonicalDecl() && 101 "Canonical decl must be captured."); 102 DeclRefExpr DRE( 103 CGF.getContext(), const_cast<VarDecl *>(VD), 104 isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && 105 InlinedShareds.isGlobalVarCaptured(VD)), 106 VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); 107 InlinedShareds.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress()); 108 } 109 } 110 (void)InlinedShareds.Privatize(); 111 } 112 }; 113 114 /// Lexical scope for OpenMP parallel construct, that handles correct codegen 115 /// for captured expressions. 116 class OMPParallelScope final : public OMPLexicalScope { 117 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 118 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); 119 return !(isOpenMPTargetExecutionDirective(EKind) || 120 isOpenMPLoopBoundSharingDirective(EKind)) && 121 isOpenMPParallelDirective(EKind); 122 } 123 124 public: 125 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 126 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt, 127 EmitPreInitStmt(S)) {} 128 }; 129 130 /// Lexical scope for OpenMP teams construct, that handles correct codegen 131 /// for captured expressions. 132 class OMPTeamsScope final : public OMPLexicalScope { 133 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 134 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); 135 return !isOpenMPTargetExecutionDirective(EKind) && 136 isOpenMPTeamsDirective(EKind); 137 } 138 139 public: 140 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 141 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt, 142 EmitPreInitStmt(S)) {} 143 }; 144 145 /// Private scope for OpenMP loop-based directives, that supports capturing 146 /// of used expression from loop statement. 147 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 148 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) { 149 const Stmt *PreInits; 150 CodeGenFunction::OMPMapVars PreCondVars; 151 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { 152 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 153 for (const auto *E : LD->counters()) { 154 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 155 EmittedAsPrivate.insert(VD->getCanonicalDecl()); 156 (void)PreCondVars.setVarAddr( 157 CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType())); 158 } 159 // Mark private vars as undefs. 160 for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) { 161 for (const Expr *IRef : C->varlist()) { 162 const auto *OrigVD = 163 cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 164 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 165 QualType OrigVDTy = OrigVD->getType().getNonReferenceType(); 166 (void)PreCondVars.setVarAddr( 167 CGF, OrigVD, 168 Address(llvm::UndefValue::get(CGF.ConvertTypeForMem( 169 CGF.getContext().getPointerType(OrigVDTy))), 170 CGF.ConvertTypeForMem(OrigVDTy), 171 CGF.getContext().getDeclAlign(OrigVD))); 172 } 173 } 174 } 175 (void)PreCondVars.apply(CGF); 176 // Emit init, __range and __end variables for C++ range loops. 177 (void)OMPLoopBasedDirective::doForAllLoops( 178 LD->getInnermostCapturedStmt()->getCapturedStmt(), 179 /*TryImperfectlyNestedLoops=*/true, LD->getLoopsNumber(), 180 [&CGF](unsigned Cnt, const Stmt *CurStmt) { 181 if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(CurStmt)) { 182 if (const Stmt *Init = CXXFor->getInit()) 183 CGF.EmitStmt(Init); 184 CGF.EmitStmt(CXXFor->getRangeStmt()); 185 CGF.EmitStmt(CXXFor->getEndStmt()); 186 } 187 return false; 188 }); 189 PreInits = LD->getPreInits(); 190 } else if (const auto *Tile = dyn_cast<OMPTileDirective>(&S)) { 191 PreInits = Tile->getPreInits(); 192 } else if (const auto *Stripe = dyn_cast<OMPStripeDirective>(&S)) { 193 PreInits = Stripe->getPreInits(); 194 } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(&S)) { 195 PreInits = Unroll->getPreInits(); 196 } else if (const auto *Reverse = dyn_cast<OMPReverseDirective>(&S)) { 197 PreInits = Reverse->getPreInits(); 198 } else if (const auto *Interchange = 199 dyn_cast<OMPInterchangeDirective>(&S)) { 200 PreInits = Interchange->getPreInits(); 201 } else { 202 llvm_unreachable("Unknown loop-based directive kind."); 203 } 204 if (PreInits) { 205 // CompoundStmts and DeclStmts are used as lists of PreInit statements and 206 // declarations. Since declarations must be visible in the the following 207 // that they initialize, unpack the CompoundStmt they are nested in. 208 SmallVector<const Stmt *> PreInitStmts; 209 if (auto *PreInitCompound = dyn_cast<CompoundStmt>(PreInits)) 210 llvm::append_range(PreInitStmts, PreInitCompound->body()); 211 else 212 PreInitStmts.push_back(PreInits); 213 214 for (const Stmt *S : PreInitStmts) { 215 // EmitStmt skips any OMPCapturedExprDecls, but needs to be emitted 216 // here. 217 if (auto *PreInitDecl = dyn_cast<DeclStmt>(S)) { 218 for (Decl *I : PreInitDecl->decls()) 219 CGF.EmitVarDecl(cast<VarDecl>(*I)); 220 continue; 221 } 222 CGF.EmitStmt(S); 223 } 224 } 225 PreCondVars.restore(CGF); 226 } 227 228 public: 229 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) 230 : CodeGenFunction::RunCleanupsScope(CGF) { 231 emitPreInitStmt(CGF, S); 232 } 233 }; 234 235 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope { 236 CodeGenFunction::OMPPrivateScope InlinedShareds; 237 238 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 239 return CGF.LambdaCaptureFields.lookup(VD) || 240 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 241 (isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl) && 242 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); 243 } 244 245 public: 246 OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 247 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 248 InlinedShareds(CGF) { 249 for (const auto *C : S.clauses()) { 250 if (const auto *CPI = OMPClauseWithPreInit::get(C)) { 251 if (const auto *PreInit = 252 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 253 for (const auto *I : PreInit->decls()) { 254 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 255 CGF.EmitVarDecl(cast<VarDecl>(*I)); 256 } else { 257 CodeGenFunction::AutoVarEmission Emission = 258 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 259 CGF.EmitAutoVarCleanups(Emission); 260 } 261 } 262 } 263 } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) { 264 for (const Expr *E : UDP->varlist()) { 265 const Decl *D = cast<DeclRefExpr>(E)->getDecl(); 266 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) 267 CGF.EmitVarDecl(*OED); 268 } 269 } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) { 270 for (const Expr *E : UDP->varlist()) { 271 const Decl *D = getBaseDecl(E); 272 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) 273 CGF.EmitVarDecl(*OED); 274 } 275 } 276 } 277 if (!isOpenMPSimdDirective(getEffectiveDirectiveKind(S))) 278 CGF.EmitOMPPrivateClause(S, InlinedShareds); 279 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) { 280 if (const Expr *E = TG->getReductionRef()) 281 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl())); 282 } 283 // Temp copy arrays for inscan reductions should not be emitted as they are 284 // not used in simd only mode. 285 llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps; 286 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 287 if (C->getModifier() != OMPC_REDUCTION_inscan) 288 continue; 289 for (const Expr *E : C->copy_array_temps()) 290 CopyArrayTemps.insert(cast<DeclRefExpr>(E)->getDecl()); 291 } 292 const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt()); 293 while (CS) { 294 for (auto &C : CS->captures()) { 295 if (C.capturesVariable() || C.capturesVariableByCopy()) { 296 auto *VD = C.getCapturedVar(); 297 if (CopyArrayTemps.contains(VD)) 298 continue; 299 assert(VD == VD->getCanonicalDecl() && 300 "Canonical decl must be captured."); 301 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 302 isCapturedVar(CGF, VD) || 303 (CGF.CapturedStmtInfo && 304 InlinedShareds.isGlobalVarCaptured(VD)), 305 VD->getType().getNonReferenceType(), VK_LValue, 306 C.getLocation()); 307 InlinedShareds.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress()); 308 } 309 } 310 CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt()); 311 } 312 (void)InlinedShareds.Privatize(); 313 } 314 }; 315 316 } // namespace 317 318 // The loop directive with a bind clause will be mapped to a different 319 // directive with corresponding semantics. 320 static OpenMPDirectiveKind 321 getEffectiveDirectiveKind(const OMPExecutableDirective &S) { 322 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 323 if (Kind != OMPD_loop) 324 return Kind; 325 326 OpenMPBindClauseKind BindKind = OMPC_BIND_unknown; 327 if (const auto *C = S.getSingleClause<OMPBindClause>()) 328 BindKind = C->getBindKind(); 329 330 switch (BindKind) { 331 case OMPC_BIND_parallel: 332 return OMPD_for; 333 case OMPC_BIND_teams: 334 return OMPD_distribute; 335 case OMPC_BIND_thread: 336 return OMPD_simd; 337 default: 338 return OMPD_loop; 339 } 340 } 341 342 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 343 const OMPExecutableDirective &S, 344 const RegionCodeGenTy &CodeGen); 345 346 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) { 347 if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) { 348 if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) { 349 OrigVD = OrigVD->getCanonicalDecl(); 350 bool IsCaptured = 351 LambdaCaptureFields.lookup(OrigVD) || 352 (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) || 353 (isa_and_nonnull<BlockDecl>(CurCodeDecl)); 354 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured, 355 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc()); 356 return EmitLValue(&DRE); 357 } 358 } 359 return EmitLValue(E); 360 } 361 362 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 363 ASTContext &C = getContext(); 364 llvm::Value *Size = nullptr; 365 auto SizeInChars = C.getTypeSizeInChars(Ty); 366 if (SizeInChars.isZero()) { 367 // getTypeSizeInChars() returns 0 for a VLA. 368 while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) { 369 VlaSizePair VlaSize = getVLASize(VAT); 370 Ty = VlaSize.Type; 371 Size = 372 Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) : VlaSize.NumElts; 373 } 374 SizeInChars = C.getTypeSizeInChars(Ty); 375 if (SizeInChars.isZero()) 376 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 377 return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 378 } 379 return CGM.getSize(SizeInChars); 380 } 381 382 void CodeGenFunction::GenerateOpenMPCapturedVars( 383 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 384 const RecordDecl *RD = S.getCapturedRecordDecl(); 385 auto CurField = RD->field_begin(); 386 auto CurCap = S.captures().begin(); 387 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 388 E = S.capture_init_end(); 389 I != E; ++I, ++CurField, ++CurCap) { 390 if (CurField->hasCapturedVLAType()) { 391 const VariableArrayType *VAT = CurField->getCapturedVLAType(); 392 llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()]; 393 CapturedVars.push_back(Val); 394 } else if (CurCap->capturesThis()) { 395 CapturedVars.push_back(CXXThisValue); 396 } else if (CurCap->capturesVariableByCopy()) { 397 llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation()); 398 399 // If the field is not a pointer, we need to save the actual value 400 // and load it as a void pointer. 401 if (!CurField->getType()->isAnyPointerType()) { 402 ASTContext &Ctx = getContext(); 403 Address DstAddr = CreateMemTemp( 404 Ctx.getUIntPtrType(), 405 Twine(CurCap->getCapturedVar()->getName(), ".casted")); 406 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 407 408 llvm::Value *SrcAddrVal = EmitScalarConversion( 409 DstAddr.emitRawPointer(*this), 410 Ctx.getPointerType(Ctx.getUIntPtrType()), 411 Ctx.getPointerType(CurField->getType()), CurCap->getLocation()); 412 LValue SrcLV = 413 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); 414 415 // Store the value using the source type pointer. 416 EmitStoreThroughLValue(RValue::get(CV), SrcLV); 417 418 // Load the value using the destination type pointer. 419 CV = EmitLoadOfScalar(DstLV, CurCap->getLocation()); 420 } 421 CapturedVars.push_back(CV); 422 } else { 423 assert(CurCap->capturesVariable() && "Expected capture by reference."); 424 CapturedVars.push_back(EmitLValue(*I).getAddress().emitRawPointer(*this)); 425 } 426 } 427 } 428 429 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc, 430 QualType DstType, StringRef Name, 431 LValue AddrLV) { 432 ASTContext &Ctx = CGF.getContext(); 433 434 llvm::Value *CastedPtr = CGF.EmitScalarConversion( 435 AddrLV.getAddress().emitRawPointer(CGF), Ctx.getUIntPtrType(), 436 Ctx.getPointerType(DstType), Loc); 437 // FIXME: should the pointee type (DstType) be passed? 438 Address TmpAddr = 439 CGF.MakeNaturalAlignAddrLValue(CastedPtr, DstType).getAddress(); 440 return TmpAddr; 441 } 442 443 static QualType getCanonicalParamType(ASTContext &C, QualType T) { 444 if (T->isLValueReferenceType()) 445 return C.getLValueReferenceType( 446 getCanonicalParamType(C, T.getNonReferenceType()), 447 /*SpelledAsLValue=*/false); 448 if (T->isPointerType()) 449 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); 450 if (const ArrayType *A = T->getAsArrayTypeUnsafe()) { 451 if (const auto *VLA = dyn_cast<VariableArrayType>(A)) 452 return getCanonicalParamType(C, VLA->getElementType()); 453 if (!A->isVariablyModifiedType()) 454 return C.getCanonicalType(T); 455 } 456 return C.getCanonicalParamType(T); 457 } 458 459 namespace { 460 /// Contains required data for proper outlined function codegen. 461 struct FunctionOptions { 462 /// Captured statement for which the function is generated. 463 const CapturedStmt *S = nullptr; 464 /// true if cast to/from UIntPtr is required for variables captured by 465 /// value. 466 const bool UIntPtrCastRequired = true; 467 /// true if only casted arguments must be registered as local args or VLA 468 /// sizes. 469 const bool RegisterCastedArgsOnly = false; 470 /// Name of the generated function. 471 const StringRef FunctionName; 472 /// Location of the non-debug version of the outlined function. 473 SourceLocation Loc; 474 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, 475 bool RegisterCastedArgsOnly, StringRef FunctionName, 476 SourceLocation Loc) 477 : S(S), UIntPtrCastRequired(UIntPtrCastRequired), 478 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), 479 FunctionName(FunctionName), Loc(Loc) {} 480 }; 481 } // namespace 482 483 static llvm::Function *emitOutlinedFunctionPrologue( 484 CodeGenFunction &CGF, FunctionArgList &Args, 485 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> 486 &LocalAddrs, 487 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> 488 &VLASizes, 489 llvm::Value *&CXXThisValue, const FunctionOptions &FO) { 490 const CapturedDecl *CD = FO.S->getCapturedDecl(); 491 const RecordDecl *RD = FO.S->getCapturedRecordDecl(); 492 assert(CD->hasBody() && "missing CapturedDecl body"); 493 494 CXXThisValue = nullptr; 495 // Build the argument list. 496 CodeGenModule &CGM = CGF.CGM; 497 ASTContext &Ctx = CGM.getContext(); 498 FunctionArgList TargetArgs; 499 Args.append(CD->param_begin(), 500 std::next(CD->param_begin(), CD->getContextParamPosition())); 501 TargetArgs.append( 502 CD->param_begin(), 503 std::next(CD->param_begin(), CD->getContextParamPosition())); 504 auto I = FO.S->captures().begin(); 505 FunctionDecl *DebugFunctionDecl = nullptr; 506 if (!FO.UIntPtrCastRequired) { 507 FunctionProtoType::ExtProtoInfo EPI; 508 QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, {}, EPI); 509 DebugFunctionDecl = FunctionDecl::Create( 510 Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(), 511 SourceLocation(), DeclarationName(), FunctionTy, 512 Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static, 513 /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false, 514 /*hasWrittenPrototype=*/false); 515 } 516 for (const FieldDecl *FD : RD->fields()) { 517 QualType ArgType = FD->getType(); 518 IdentifierInfo *II = nullptr; 519 VarDecl *CapVar = nullptr; 520 521 // If this is a capture by copy and the type is not a pointer, the outlined 522 // function argument type should be uintptr and the value properly casted to 523 // uintptr. This is necessary given that the runtime library is only able to 524 // deal with pointers. We can pass in the same way the VLA type sizes to the 525 // outlined function. 526 if (FO.UIntPtrCastRequired && 527 ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 528 I->capturesVariableArrayType())) 529 ArgType = Ctx.getUIntPtrType(); 530 531 if (I->capturesVariable() || I->capturesVariableByCopy()) { 532 CapVar = I->getCapturedVar(); 533 II = CapVar->getIdentifier(); 534 } else if (I->capturesThis()) { 535 II = &Ctx.Idents.get("this"); 536 } else { 537 assert(I->capturesVariableArrayType()); 538 II = &Ctx.Idents.get("vla"); 539 } 540 if (ArgType->isVariablyModifiedType()) 541 ArgType = getCanonicalParamType(Ctx, ArgType); 542 VarDecl *Arg; 543 if (CapVar && (CapVar->getTLSKind() != clang::VarDecl::TLS_None)) { 544 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), 545 II, ArgType, 546 ImplicitParamKind::ThreadPrivateVar); 547 } else if (DebugFunctionDecl && (CapVar || I->capturesThis())) { 548 Arg = ParmVarDecl::Create( 549 Ctx, DebugFunctionDecl, 550 CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(), 551 CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType, 552 /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr); 553 } else { 554 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), 555 II, ArgType, ImplicitParamKind::Other); 556 } 557 Args.emplace_back(Arg); 558 // Do not cast arguments if we emit function with non-original types. 559 TargetArgs.emplace_back( 560 FO.UIntPtrCastRequired 561 ? Arg 562 : CGM.getOpenMPRuntime().translateParameter(FD, Arg)); 563 ++I; 564 } 565 Args.append(std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 566 CD->param_end()); 567 TargetArgs.append( 568 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 569 CD->param_end()); 570 571 // Create the function declaration. 572 const CGFunctionInfo &FuncInfo = 573 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); 574 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 575 576 auto *F = 577 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 578 FO.FunctionName, &CGM.getModule()); 579 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 580 if (CD->isNothrow()) 581 F->setDoesNotThrow(); 582 F->setDoesNotRecurse(); 583 584 // Always inline the outlined function if optimizations are enabled. 585 if (CGM.getCodeGenOpts().OptimizationLevel != 0) { 586 F->removeFnAttr(llvm::Attribute::NoInline); 587 F->addFnAttr(llvm::Attribute::AlwaysInline); 588 } 589 590 // Generate the function. 591 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, 592 FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(), 593 FO.UIntPtrCastRequired ? FO.Loc 594 : CD->getBody()->getBeginLoc()); 595 unsigned Cnt = CD->getContextParamPosition(); 596 I = FO.S->captures().begin(); 597 for (const FieldDecl *FD : RD->fields()) { 598 // Do not map arguments if we emit function with non-original types. 599 Address LocalAddr(Address::invalid()); 600 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) { 601 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt], 602 TargetArgs[Cnt]); 603 } else { 604 LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); 605 } 606 // If we are capturing a pointer by copy we don't need to do anything, just 607 // use the value that we get from the arguments. 608 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 609 const VarDecl *CurVD = I->getCapturedVar(); 610 if (!FO.RegisterCastedArgsOnly) 611 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); 612 ++Cnt; 613 ++I; 614 continue; 615 } 616 617 LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), 618 AlignmentSource::Decl); 619 if (FD->hasCapturedVLAType()) { 620 if (FO.UIntPtrCastRequired) { 621 ArgLVal = CGF.MakeAddrLValue( 622 castValueFromUintptr(CGF, I->getLocation(), FD->getType(), 623 Args[Cnt]->getName(), ArgLVal), 624 FD->getType(), AlignmentSource::Decl); 625 } 626 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); 627 const VariableArrayType *VAT = FD->getCapturedVLAType(); 628 VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg); 629 } else if (I->capturesVariable()) { 630 const VarDecl *Var = I->getCapturedVar(); 631 QualType VarTy = Var->getType(); 632 Address ArgAddr = ArgLVal.getAddress(); 633 if (ArgLVal.getType()->isLValueReferenceType()) { 634 ArgAddr = CGF.EmitLoadOfReference(ArgLVal); 635 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { 636 assert(ArgLVal.getType()->isPointerType()); 637 ArgAddr = CGF.EmitLoadOfPointer( 638 ArgAddr, ArgLVal.getType()->castAs<PointerType>()); 639 } 640 if (!FO.RegisterCastedArgsOnly) { 641 LocalAddrs.insert( 642 {Args[Cnt], {Var, ArgAddr.withAlignment(Ctx.getDeclAlign(Var))}}); 643 } 644 } else if (I->capturesVariableByCopy()) { 645 assert(!FD->getType()->isAnyPointerType() && 646 "Not expecting a captured pointer."); 647 const VarDecl *Var = I->getCapturedVar(); 648 LocalAddrs.insert({Args[Cnt], 649 {Var, FO.UIntPtrCastRequired 650 ? castValueFromUintptr( 651 CGF, I->getLocation(), FD->getType(), 652 Args[Cnt]->getName(), ArgLVal) 653 : ArgLVal.getAddress()}}); 654 } else { 655 // If 'this' is captured, load it into CXXThisValue. 656 assert(I->capturesThis()); 657 CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); 658 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}}); 659 } 660 ++Cnt; 661 ++I; 662 } 663 664 return F; 665 } 666 667 llvm::Function * 668 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, 669 SourceLocation Loc) { 670 assert( 671 CapturedStmtInfo && 672 "CapturedStmtInfo should be set when generating the captured function"); 673 const CapturedDecl *CD = S.getCapturedDecl(); 674 // Build the argument list. 675 bool NeedWrapperFunction = 676 getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo(); 677 FunctionArgList Args, WrapperArgs; 678 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs, 679 WrapperLocalAddrs; 680 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes, 681 WrapperVLASizes; 682 SmallString<256> Buffer; 683 llvm::raw_svector_ostream Out(Buffer); 684 Out << CapturedStmtInfo->getHelperName(); 685 686 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); 687 llvm::Function *WrapperF = nullptr; 688 if (NeedWrapperFunction) { 689 // Emit the final kernel early to allow attributes to be added by the 690 // OpenMPI-IR-Builder. 691 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, 692 /*RegisterCastedArgsOnly=*/true, 693 CapturedStmtInfo->getHelperName(), Loc); 694 WrapperCGF.CapturedStmtInfo = CapturedStmtInfo; 695 WrapperF = 696 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, 697 WrapperCGF.CXXThisValue, WrapperFO); 698 Out << "_debug__"; 699 } 700 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, 701 Out.str(), Loc); 702 llvm::Function *F = emitOutlinedFunctionPrologue( 703 *this, WrapperArgs, WrapperLocalAddrs, WrapperVLASizes, CXXThisValue, FO); 704 CodeGenFunction::OMPPrivateScope LocalScope(*this); 705 for (const auto &LocalAddrPair : WrapperLocalAddrs) { 706 if (LocalAddrPair.second.first) { 707 LocalScope.addPrivate(LocalAddrPair.second.first, 708 LocalAddrPair.second.second); 709 } 710 } 711 (void)LocalScope.Privatize(); 712 for (const auto &VLASizePair : WrapperVLASizes) 713 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; 714 PGO->assignRegionCounters(GlobalDecl(CD), F); 715 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 716 LocalScope.ForceCleanup(); 717 FinishFunction(CD->getBodyRBrace()); 718 if (!NeedWrapperFunction) 719 return F; 720 721 // Reverse the order. 722 WrapperF->removeFromParent(); 723 F->getParent()->getFunctionList().insertAfter(F->getIterator(), WrapperF); 724 725 llvm::SmallVector<llvm::Value *, 4> CallArgs; 726 auto *PI = F->arg_begin(); 727 for (const auto *Arg : Args) { 728 llvm::Value *CallArg; 729 auto I = LocalAddrs.find(Arg); 730 if (I != LocalAddrs.end()) { 731 LValue LV = WrapperCGF.MakeAddrLValue( 732 I->second.second, 733 I->second.first ? I->second.first->getType() : Arg->getType(), 734 AlignmentSource::Decl); 735 if (LV.getType()->isAnyComplexType()) 736 LV.setAddress(LV.getAddress().withElementType(PI->getType())); 737 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); 738 } else { 739 auto EI = VLASizes.find(Arg); 740 if (EI != VLASizes.end()) { 741 CallArg = EI->second.second; 742 } else { 743 LValue LV = 744 WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), 745 Arg->getType(), AlignmentSource::Decl); 746 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); 747 } 748 } 749 CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType())); 750 ++PI; 751 } 752 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs); 753 WrapperCGF.FinishFunction(); 754 return WrapperF; 755 } 756 757 //===----------------------------------------------------------------------===// 758 // OpenMP Directive Emission 759 //===----------------------------------------------------------------------===// 760 void CodeGenFunction::EmitOMPAggregateAssign( 761 Address DestAddr, Address SrcAddr, QualType OriginalType, 762 const llvm::function_ref<void(Address, Address)> CopyGen) { 763 // Perform element-by-element initialization. 764 QualType ElementTy; 765 766 // Drill down to the base element type on both arrays. 767 const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 768 llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 769 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType()); 770 771 llvm::Value *SrcBegin = SrcAddr.emitRawPointer(*this); 772 llvm::Value *DestBegin = DestAddr.emitRawPointer(*this); 773 // Cast from pointer to array type to pointer to single element. 774 llvm::Value *DestEnd = Builder.CreateInBoundsGEP(DestAddr.getElementType(), 775 DestBegin, NumElements); 776 777 // The basic structure here is a while-do loop. 778 llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body"); 779 llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done"); 780 llvm::Value *IsEmpty = 781 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 782 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 783 784 // Enter the loop body, making that address the current address. 785 llvm::BasicBlock *EntryBB = Builder.GetInsertBlock(); 786 EmitBlock(BodyBB); 787 788 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 789 790 llvm::PHINode *SrcElementPHI = 791 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 792 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 793 Address SrcElementCurrent = 794 Address(SrcElementPHI, SrcAddr.getElementType(), 795 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 796 797 llvm::PHINode *DestElementPHI = Builder.CreatePHI( 798 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 799 DestElementPHI->addIncoming(DestBegin, EntryBB); 800 Address DestElementCurrent = 801 Address(DestElementPHI, DestAddr.getElementType(), 802 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 803 804 // Emit copy. 805 CopyGen(DestElementCurrent, SrcElementCurrent); 806 807 // Shift the address forward by one element. 808 llvm::Value *DestElementNext = 809 Builder.CreateConstGEP1_32(DestAddr.getElementType(), DestElementPHI, 810 /*Idx0=*/1, "omp.arraycpy.dest.element"); 811 llvm::Value *SrcElementNext = 812 Builder.CreateConstGEP1_32(SrcAddr.getElementType(), SrcElementPHI, 813 /*Idx0=*/1, "omp.arraycpy.src.element"); 814 // Check whether we've reached the end. 815 llvm::Value *Done = 816 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 817 Builder.CreateCondBr(Done, DoneBB, BodyBB); 818 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 819 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 820 821 // Done. 822 EmitBlock(DoneBB, /*IsFinished=*/true); 823 } 824 825 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 826 Address SrcAddr, const VarDecl *DestVD, 827 const VarDecl *SrcVD, const Expr *Copy) { 828 if (OriginalType->isArrayType()) { 829 const auto *BO = dyn_cast<BinaryOperator>(Copy); 830 if (BO && BO->getOpcode() == BO_Assign) { 831 // Perform simple memcpy for simple copying. 832 LValue Dest = MakeAddrLValue(DestAddr, OriginalType); 833 LValue Src = MakeAddrLValue(SrcAddr, OriginalType); 834 EmitAggregateAssign(Dest, Src, OriginalType); 835 } else { 836 // For arrays with complex element types perform element by element 837 // copying. 838 EmitOMPAggregateAssign( 839 DestAddr, SrcAddr, OriginalType, 840 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 841 // Working with the single array element, so have to remap 842 // destination and source variables to corresponding array 843 // elements. 844 CodeGenFunction::OMPPrivateScope Remap(*this); 845 Remap.addPrivate(DestVD, DestElement); 846 Remap.addPrivate(SrcVD, SrcElement); 847 (void)Remap.Privatize(); 848 EmitIgnoredExpr(Copy); 849 }); 850 } 851 } else { 852 // Remap pseudo source variable to private copy. 853 CodeGenFunction::OMPPrivateScope Remap(*this); 854 Remap.addPrivate(SrcVD, SrcAddr); 855 Remap.addPrivate(DestVD, DestAddr); 856 (void)Remap.Privatize(); 857 // Emit copying of the whole variable. 858 EmitIgnoredExpr(Copy); 859 } 860 } 861 862 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 863 OMPPrivateScope &PrivateScope) { 864 if (!HaveInsertPoint()) 865 return false; 866 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(D); 867 bool DeviceConstTarget = getLangOpts().OpenMPIsTargetDevice && 868 isOpenMPTargetExecutionDirective(EKind); 869 bool FirstprivateIsLastprivate = false; 870 llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates; 871 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 872 for (const auto *D : C->varlist()) 873 Lastprivates.try_emplace( 874 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(), 875 C->getKind()); 876 } 877 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 878 llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 879 getOpenMPCaptureRegions(CaptureRegions, EKind); 880 // Force emission of the firstprivate copy if the directive does not emit 881 // outlined function, like omp for, omp simd, omp distribute etc. 882 bool MustEmitFirstprivateCopy = 883 CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown; 884 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 885 const auto *IRef = C->varlist_begin(); 886 const auto *InitsRef = C->inits().begin(); 887 for (const Expr *IInit : C->private_copies()) { 888 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 889 bool ThisFirstprivateIsLastprivate = 890 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; 891 const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD); 892 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 893 if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD && 894 !FD->getType()->isReferenceType() && 895 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { 896 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 897 ++IRef; 898 ++InitsRef; 899 continue; 900 } 901 // Do not emit copy for firstprivate constant variables in target regions, 902 // captured by reference. 903 if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) && 904 FD && FD->getType()->isReferenceType() && 905 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { 906 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 907 ++IRef; 908 ++InitsRef; 909 continue; 910 } 911 FirstprivateIsLastprivate = 912 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; 913 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 914 const auto *VDInit = 915 cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 916 bool IsRegistered; 917 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 918 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, 919 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 920 LValue OriginalLVal; 921 if (!FD) { 922 // Check if the firstprivate variable is just a constant value. 923 ConstantEmission CE = tryEmitAsConstant(&DRE); 924 if (CE && !CE.isReference()) { 925 // Constant value, no need to create a copy. 926 ++IRef; 927 ++InitsRef; 928 continue; 929 } 930 if (CE && CE.isReference()) { 931 OriginalLVal = CE.getReferenceLValue(*this, &DRE); 932 } else { 933 assert(!CE && "Expected non-constant firstprivate."); 934 OriginalLVal = EmitLValue(&DRE); 935 } 936 } else { 937 OriginalLVal = EmitLValue(&DRE); 938 } 939 QualType Type = VD->getType(); 940 if (Type->isArrayType()) { 941 // Emit VarDecl with copy init for arrays. 942 // Get the address of the original variable captured in current 943 // captured region. 944 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 945 const Expr *Init = VD->getInit(); 946 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 947 // Perform simple memcpy. 948 LValue Dest = MakeAddrLValue(Emission.getAllocatedAddress(), Type); 949 EmitAggregateAssign(Dest, OriginalLVal, Type); 950 } else { 951 EmitOMPAggregateAssign( 952 Emission.getAllocatedAddress(), OriginalLVal.getAddress(), Type, 953 [this, VDInit, Init](Address DestElement, Address SrcElement) { 954 // Clean up any temporaries needed by the 955 // initialization. 956 RunCleanupsScope InitScope(*this); 957 // Emit initialization for single element. 958 setAddrOfLocalVar(VDInit, SrcElement); 959 EmitAnyExprToMem(Init, DestElement, 960 Init->getType().getQualifiers(), 961 /*IsInitializer*/ false); 962 LocalDeclMap.erase(VDInit); 963 }); 964 } 965 EmitAutoVarCleanups(Emission); 966 IsRegistered = 967 PrivateScope.addPrivate(OrigVD, Emission.getAllocatedAddress()); 968 } else { 969 Address OriginalAddr = OriginalLVal.getAddress(); 970 // Emit private VarDecl with copy init. 971 // Remap temp VDInit variable to the address of the original 972 // variable (for proper handling of captured global variables). 973 setAddrOfLocalVar(VDInit, OriginalAddr); 974 EmitDecl(*VD); 975 LocalDeclMap.erase(VDInit); 976 Address VDAddr = GetAddrOfLocalVar(VD); 977 if (ThisFirstprivateIsLastprivate && 978 Lastprivates[OrigVD->getCanonicalDecl()] == 979 OMPC_LASTPRIVATE_conditional) { 980 // Create/init special variable for lastprivate conditionals. 981 llvm::Value *V = 982 EmitLoadOfScalar(MakeAddrLValue(VDAddr, (*IRef)->getType(), 983 AlignmentSource::Decl), 984 (*IRef)->getExprLoc()); 985 VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit( 986 *this, OrigVD); 987 EmitStoreOfScalar(V, MakeAddrLValue(VDAddr, (*IRef)->getType(), 988 AlignmentSource::Decl)); 989 LocalDeclMap.erase(VD); 990 setAddrOfLocalVar(VD, VDAddr); 991 } 992 IsRegistered = PrivateScope.addPrivate(OrigVD, VDAddr); 993 } 994 assert(IsRegistered && 995 "firstprivate var already registered as private"); 996 // Silence the warning about unused variable. 997 (void)IsRegistered; 998 } 999 ++IRef; 1000 ++InitsRef; 1001 } 1002 } 1003 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 1004 } 1005 1006 void CodeGenFunction::EmitOMPPrivateClause( 1007 const OMPExecutableDirective &D, 1008 CodeGenFunction::OMPPrivateScope &PrivateScope) { 1009 if (!HaveInsertPoint()) 1010 return; 1011 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 1012 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 1013 auto IRef = C->varlist_begin(); 1014 for (const Expr *IInit : C->private_copies()) { 1015 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1016 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 1017 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 1018 EmitDecl(*VD); 1019 // Emit private VarDecl with copy init. 1020 bool IsRegistered = 1021 PrivateScope.addPrivate(OrigVD, GetAddrOfLocalVar(VD)); 1022 assert(IsRegistered && "private var already registered as private"); 1023 // Silence the warning about unused variable. 1024 (void)IsRegistered; 1025 } 1026 ++IRef; 1027 } 1028 } 1029 } 1030 1031 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 1032 if (!HaveInsertPoint()) 1033 return false; 1034 // threadprivate_var1 = master_threadprivate_var1; 1035 // operator=(threadprivate_var2, master_threadprivate_var2); 1036 // ... 1037 // __kmpc_barrier(&loc, global_tid); 1038 llvm::DenseSet<const VarDecl *> CopiedVars; 1039 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 1040 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 1041 auto IRef = C->varlist_begin(); 1042 auto ISrcRef = C->source_exprs().begin(); 1043 auto IDestRef = C->destination_exprs().begin(); 1044 for (const Expr *AssignOp : C->assignment_ops()) { 1045 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1046 QualType Type = VD->getType(); 1047 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 1048 // Get the address of the master variable. If we are emitting code with 1049 // TLS support, the address is passed from the master as field in the 1050 // captured declaration. 1051 Address MasterAddr = Address::invalid(); 1052 if (getLangOpts().OpenMPUseTLS && 1053 getContext().getTargetInfo().isTLSSupported()) { 1054 assert(CapturedStmtInfo->lookup(VD) && 1055 "Copyin threadprivates should have been captured!"); 1056 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true, 1057 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 1058 MasterAddr = EmitLValue(&DRE).getAddress(); 1059 LocalDeclMap.erase(VD); 1060 } else { 1061 MasterAddr = 1062 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 1063 : CGM.GetAddrOfGlobal(VD), 1064 CGM.getTypes().ConvertTypeForMem(VD->getType()), 1065 getContext().getDeclAlign(VD)); 1066 } 1067 // Get the address of the threadprivate variable. 1068 Address PrivateAddr = EmitLValue(*IRef).getAddress(); 1069 if (CopiedVars.size() == 1) { 1070 // At first check if current thread is a master thread. If it is, no 1071 // need to copy data. 1072 CopyBegin = createBasicBlock("copyin.not.master"); 1073 CopyEnd = createBasicBlock("copyin.not.master.end"); 1074 // TODO: Avoid ptrtoint conversion. 1075 auto *MasterAddrInt = Builder.CreatePtrToInt( 1076 MasterAddr.emitRawPointer(*this), CGM.IntPtrTy); 1077 auto *PrivateAddrInt = Builder.CreatePtrToInt( 1078 PrivateAddr.emitRawPointer(*this), CGM.IntPtrTy); 1079 Builder.CreateCondBr( 1080 Builder.CreateICmpNE(MasterAddrInt, PrivateAddrInt), CopyBegin, 1081 CopyEnd); 1082 EmitBlock(CopyBegin); 1083 } 1084 const auto *SrcVD = 1085 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 1086 const auto *DestVD = 1087 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 1088 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 1089 } 1090 ++IRef; 1091 ++ISrcRef; 1092 ++IDestRef; 1093 } 1094 } 1095 if (CopyEnd) { 1096 // Exit out of copying procedure for non-master thread. 1097 EmitBlock(CopyEnd, /*IsFinished=*/true); 1098 return true; 1099 } 1100 return false; 1101 } 1102 1103 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 1104 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 1105 if (!HaveInsertPoint()) 1106 return false; 1107 bool HasAtLeastOneLastprivate = false; 1108 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(D); 1109 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1110 if (isOpenMPSimdDirective(EKind)) { 1111 const auto *LoopDirective = cast<OMPLoopDirective>(&D); 1112 for (const Expr *C : LoopDirective->counters()) { 1113 SIMDLCVs.insert( 1114 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1115 } 1116 } 1117 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 1118 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 1119 HasAtLeastOneLastprivate = true; 1120 if (isOpenMPTaskLoopDirective(EKind) && !getLangOpts().OpenMPSimd) 1121 break; 1122 const auto *IRef = C->varlist_begin(); 1123 const auto *IDestRef = C->destination_exprs().begin(); 1124 for (const Expr *IInit : C->private_copies()) { 1125 // Keep the address of the original variable for future update at the end 1126 // of the loop. 1127 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1128 // Taskloops do not require additional initialization, it is done in 1129 // runtime support library. 1130 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 1131 const auto *DestVD = 1132 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 1133 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 1134 /*RefersToEnclosingVariableOrCapture=*/ 1135 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1136 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 1137 PrivateScope.addPrivate(DestVD, EmitLValue(&DRE).getAddress()); 1138 // Check if the variable is also a firstprivate: in this case IInit is 1139 // not generated. Initialization of this variable will happen in codegen 1140 // for 'firstprivate' clause. 1141 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { 1142 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 1143 Address VDAddr = Address::invalid(); 1144 if (C->getKind() == OMPC_LASTPRIVATE_conditional) { 1145 VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit( 1146 *this, OrigVD); 1147 setAddrOfLocalVar(VD, VDAddr); 1148 } else { 1149 // Emit private VarDecl with copy init. 1150 EmitDecl(*VD); 1151 VDAddr = GetAddrOfLocalVar(VD); 1152 } 1153 bool IsRegistered = PrivateScope.addPrivate(OrigVD, VDAddr); 1154 assert(IsRegistered && 1155 "lastprivate var already registered as private"); 1156 (void)IsRegistered; 1157 } 1158 } 1159 ++IRef; 1160 ++IDestRef; 1161 } 1162 } 1163 return HasAtLeastOneLastprivate; 1164 } 1165 1166 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 1167 const OMPExecutableDirective &D, bool NoFinals, 1168 llvm::Value *IsLastIterCond) { 1169 if (!HaveInsertPoint()) 1170 return; 1171 // Emit following code: 1172 // if (<IsLastIterCond>) { 1173 // orig_var1 = private_orig_var1; 1174 // ... 1175 // orig_varn = private_orig_varn; 1176 // } 1177 llvm::BasicBlock *ThenBB = nullptr; 1178 llvm::BasicBlock *DoneBB = nullptr; 1179 if (IsLastIterCond) { 1180 // Emit implicit barrier if at least one lastprivate conditional is found 1181 // and this is not a simd mode. 1182 if (!getLangOpts().OpenMPSimd && 1183 llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(), 1184 [](const OMPLastprivateClause *C) { 1185 return C->getKind() == OMPC_LASTPRIVATE_conditional; 1186 })) { 1187 CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(), 1188 OMPD_unknown, 1189 /*EmitChecks=*/false, 1190 /*ForceSimpleCall=*/true); 1191 } 1192 ThenBB = createBasicBlock(".omp.lastprivate.then"); 1193 DoneBB = createBasicBlock(".omp.lastprivate.done"); 1194 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 1195 EmitBlock(ThenBB); 1196 } 1197 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 1198 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; 1199 if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 1200 auto IC = LoopDirective->counters().begin(); 1201 for (const Expr *F : LoopDirective->finals()) { 1202 const auto *D = 1203 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); 1204 if (NoFinals) 1205 AlreadyEmittedVars.insert(D); 1206 else 1207 LoopCountersAndUpdates[D] = F; 1208 ++IC; 1209 } 1210 } 1211 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 1212 auto IRef = C->varlist_begin(); 1213 auto ISrcRef = C->source_exprs().begin(); 1214 auto IDestRef = C->destination_exprs().begin(); 1215 for (const Expr *AssignOp : C->assignment_ops()) { 1216 const auto *PrivateVD = 1217 cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1218 QualType Type = PrivateVD->getType(); 1219 const auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 1220 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 1221 // If lastprivate variable is a loop control variable for loop-based 1222 // directive, update its value before copyin back to original 1223 // variable. 1224 if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 1225 EmitIgnoredExpr(FinalExpr); 1226 const auto *SrcVD = 1227 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 1228 const auto *DestVD = 1229 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 1230 // Get the address of the private variable. 1231 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 1232 if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 1233 PrivateAddr = Address( 1234 Builder.CreateLoad(PrivateAddr), 1235 CGM.getTypes().ConvertTypeForMem(RefTy->getPointeeType()), 1236 CGM.getNaturalTypeAlignment(RefTy->getPointeeType())); 1237 // Store the last value to the private copy in the last iteration. 1238 if (C->getKind() == OMPC_LASTPRIVATE_conditional) 1239 CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate( 1240 *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD, 1241 (*IRef)->getExprLoc()); 1242 // Get the address of the original variable. 1243 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 1244 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 1245 } 1246 ++IRef; 1247 ++ISrcRef; 1248 ++IDestRef; 1249 } 1250 if (const Expr *PostUpdate = C->getPostUpdateExpr()) 1251 EmitIgnoredExpr(PostUpdate); 1252 } 1253 if (IsLastIterCond) 1254 EmitBlock(DoneBB, /*IsFinished=*/true); 1255 } 1256 1257 void CodeGenFunction::EmitOMPReductionClauseInit( 1258 const OMPExecutableDirective &D, 1259 CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) { 1260 if (!HaveInsertPoint()) 1261 return; 1262 SmallVector<const Expr *, 4> Shareds; 1263 SmallVector<const Expr *, 4> Privates; 1264 SmallVector<const Expr *, 4> ReductionOps; 1265 SmallVector<const Expr *, 4> LHSs; 1266 SmallVector<const Expr *, 4> RHSs; 1267 OMPTaskDataTy Data; 1268 SmallVector<const Expr *, 4> TaskLHSs; 1269 SmallVector<const Expr *, 4> TaskRHSs; 1270 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1271 if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan)) 1272 continue; 1273 Shareds.append(C->varlist_begin(), C->varlist_end()); 1274 Privates.append(C->privates().begin(), C->privates().end()); 1275 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1276 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1277 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1278 if (C->getModifier() == OMPC_REDUCTION_task) { 1279 Data.ReductionVars.append(C->privates().begin(), C->privates().end()); 1280 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); 1281 Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); 1282 Data.ReductionOps.append(C->reduction_ops().begin(), 1283 C->reduction_ops().end()); 1284 TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1285 TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1286 } 1287 } 1288 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); 1289 unsigned Count = 0; 1290 auto *ILHS = LHSs.begin(); 1291 auto *IRHS = RHSs.begin(); 1292 auto *IPriv = Privates.begin(); 1293 for (const Expr *IRef : Shareds) { 1294 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 1295 // Emit private VarDecl with reduction init. 1296 RedCG.emitSharedOrigLValue(*this, Count); 1297 RedCG.emitAggregateType(*this, Count); 1298 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); 1299 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), 1300 RedCG.getSharedLValue(Count).getAddress(), 1301 [&Emission](CodeGenFunction &CGF) { 1302 CGF.EmitAutoVarInit(Emission); 1303 return true; 1304 }); 1305 EmitAutoVarCleanups(Emission); 1306 Address BaseAddr = RedCG.adjustPrivateAddress( 1307 *this, Count, Emission.getAllocatedAddress()); 1308 bool IsRegistered = 1309 PrivateScope.addPrivate(RedCG.getBaseDecl(Count), BaseAddr); 1310 assert(IsRegistered && "private var already registered as private"); 1311 // Silence the warning about unused variable. 1312 (void)IsRegistered; 1313 1314 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 1315 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 1316 QualType Type = PrivateVD->getType(); 1317 bool isaOMPArraySectionExpr = isa<ArraySectionExpr>(IRef); 1318 if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) { 1319 // Store the address of the original variable associated with the LHS 1320 // implicit variable. 1321 PrivateScope.addPrivate(LHSVD, RedCG.getSharedLValue(Count).getAddress()); 1322 PrivateScope.addPrivate(RHSVD, GetAddrOfLocalVar(PrivateVD)); 1323 } else if ((isaOMPArraySectionExpr && Type->isScalarType()) || 1324 isa<ArraySubscriptExpr>(IRef)) { 1325 // Store the address of the original variable associated with the LHS 1326 // implicit variable. 1327 PrivateScope.addPrivate(LHSVD, RedCG.getSharedLValue(Count).getAddress()); 1328 PrivateScope.addPrivate(RHSVD, 1329 GetAddrOfLocalVar(PrivateVD).withElementType( 1330 ConvertTypeForMem(RHSVD->getType()))); 1331 } else { 1332 QualType Type = PrivateVD->getType(); 1333 bool IsArray = getContext().getAsArrayType(Type) != nullptr; 1334 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(); 1335 // Store the address of the original variable associated with the LHS 1336 // implicit variable. 1337 if (IsArray) { 1338 OriginalAddr = 1339 OriginalAddr.withElementType(ConvertTypeForMem(LHSVD->getType())); 1340 } 1341 PrivateScope.addPrivate(LHSVD, OriginalAddr); 1342 PrivateScope.addPrivate( 1343 RHSVD, IsArray ? GetAddrOfLocalVar(PrivateVD).withElementType( 1344 ConvertTypeForMem(RHSVD->getType())) 1345 : GetAddrOfLocalVar(PrivateVD)); 1346 } 1347 ++ILHS; 1348 ++IRHS; 1349 ++IPriv; 1350 ++Count; 1351 } 1352 if (!Data.ReductionVars.empty()) { 1353 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(D); 1354 Data.IsReductionWithTaskMod = true; 1355 Data.IsWorksharingReduction = isOpenMPWorksharingDirective(EKind); 1356 llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit( 1357 *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data); 1358 const Expr *TaskRedRef = nullptr; 1359 switch (EKind) { 1360 case OMPD_parallel: 1361 TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr(); 1362 break; 1363 case OMPD_for: 1364 TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr(); 1365 break; 1366 case OMPD_sections: 1367 TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr(); 1368 break; 1369 case OMPD_parallel_for: 1370 TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr(); 1371 break; 1372 case OMPD_parallel_master: 1373 TaskRedRef = 1374 cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr(); 1375 break; 1376 case OMPD_parallel_sections: 1377 TaskRedRef = 1378 cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr(); 1379 break; 1380 case OMPD_target_parallel: 1381 TaskRedRef = 1382 cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr(); 1383 break; 1384 case OMPD_target_parallel_for: 1385 TaskRedRef = 1386 cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr(); 1387 break; 1388 case OMPD_distribute_parallel_for: 1389 TaskRedRef = 1390 cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr(); 1391 break; 1392 case OMPD_teams_distribute_parallel_for: 1393 TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D) 1394 .getTaskReductionRefExpr(); 1395 break; 1396 case OMPD_target_teams_distribute_parallel_for: 1397 TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D) 1398 .getTaskReductionRefExpr(); 1399 break; 1400 case OMPD_simd: 1401 case OMPD_for_simd: 1402 case OMPD_section: 1403 case OMPD_single: 1404 case OMPD_master: 1405 case OMPD_critical: 1406 case OMPD_parallel_for_simd: 1407 case OMPD_task: 1408 case OMPD_taskyield: 1409 case OMPD_error: 1410 case OMPD_barrier: 1411 case OMPD_taskwait: 1412 case OMPD_taskgroup: 1413 case OMPD_flush: 1414 case OMPD_depobj: 1415 case OMPD_scan: 1416 case OMPD_ordered: 1417 case OMPD_atomic: 1418 case OMPD_teams: 1419 case OMPD_target: 1420 case OMPD_cancellation_point: 1421 case OMPD_cancel: 1422 case OMPD_target_data: 1423 case OMPD_target_enter_data: 1424 case OMPD_target_exit_data: 1425 case OMPD_taskloop: 1426 case OMPD_taskloop_simd: 1427 case OMPD_master_taskloop: 1428 case OMPD_master_taskloop_simd: 1429 case OMPD_parallel_master_taskloop: 1430 case OMPD_parallel_master_taskloop_simd: 1431 case OMPD_distribute: 1432 case OMPD_target_update: 1433 case OMPD_distribute_parallel_for_simd: 1434 case OMPD_distribute_simd: 1435 case OMPD_target_parallel_for_simd: 1436 case OMPD_target_simd: 1437 case OMPD_teams_distribute: 1438 case OMPD_teams_distribute_simd: 1439 case OMPD_teams_distribute_parallel_for_simd: 1440 case OMPD_target_teams: 1441 case OMPD_target_teams_distribute: 1442 case OMPD_target_teams_distribute_parallel_for_simd: 1443 case OMPD_target_teams_distribute_simd: 1444 case OMPD_declare_target: 1445 case OMPD_end_declare_target: 1446 case OMPD_threadprivate: 1447 case OMPD_allocate: 1448 case OMPD_declare_reduction: 1449 case OMPD_declare_mapper: 1450 case OMPD_declare_simd: 1451 case OMPD_requires: 1452 case OMPD_declare_variant: 1453 case OMPD_begin_declare_variant: 1454 case OMPD_end_declare_variant: 1455 case OMPD_unknown: 1456 default: 1457 llvm_unreachable("Unexpected directive with task reductions."); 1458 } 1459 1460 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl()); 1461 EmitVarDecl(*VD); 1462 EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD), 1463 /*Volatile=*/false, TaskRedRef->getType()); 1464 } 1465 } 1466 1467 void CodeGenFunction::EmitOMPReductionClauseFinal( 1468 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { 1469 if (!HaveInsertPoint()) 1470 return; 1471 llvm::SmallVector<const Expr *, 8> Privates; 1472 llvm::SmallVector<const Expr *, 8> LHSExprs; 1473 llvm::SmallVector<const Expr *, 8> RHSExprs; 1474 llvm::SmallVector<const Expr *, 8> ReductionOps; 1475 llvm::SmallVector<bool, 8> IsPrivateVarReduction; 1476 bool HasAtLeastOneReduction = false; 1477 bool IsReductionWithTaskMod = false; 1478 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1479 // Do not emit for inscan reductions. 1480 if (C->getModifier() == OMPC_REDUCTION_inscan) 1481 continue; 1482 HasAtLeastOneReduction = true; 1483 Privates.append(C->privates().begin(), C->privates().end()); 1484 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1485 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1486 IsPrivateVarReduction.append(C->private_var_reduction_flags().begin(), 1487 C->private_var_reduction_flags().end()); 1488 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1489 IsReductionWithTaskMod = 1490 IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task; 1491 } 1492 if (HasAtLeastOneReduction) { 1493 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(D); 1494 if (IsReductionWithTaskMod) { 1495 CGM.getOpenMPRuntime().emitTaskReductionFini( 1496 *this, D.getBeginLoc(), isOpenMPWorksharingDirective(EKind)); 1497 } 1498 bool TeamsLoopCanBeParallel = false; 1499 if (auto *TTLD = dyn_cast<OMPTargetTeamsGenericLoopDirective>(&D)) 1500 TeamsLoopCanBeParallel = TTLD->canBeParallelFor(); 1501 bool WithNowait = D.getSingleClause<OMPNowaitClause>() || 1502 isOpenMPParallelDirective(EKind) || 1503 TeamsLoopCanBeParallel || ReductionKind == OMPD_simd; 1504 bool SimpleReduction = ReductionKind == OMPD_simd; 1505 // Emit nowait reduction if nowait clause is present or directive is a 1506 // parallel directive (it always has implicit barrier). 1507 CGM.getOpenMPRuntime().emitReduction( 1508 *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps, 1509 {WithNowait, SimpleReduction, IsPrivateVarReduction, ReductionKind}); 1510 } 1511 } 1512 1513 static void emitPostUpdateForReductionClause( 1514 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1515 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 1516 if (!CGF.HaveInsertPoint()) 1517 return; 1518 llvm::BasicBlock *DoneBB = nullptr; 1519 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1520 if (const Expr *PostUpdate = C->getPostUpdateExpr()) { 1521 if (!DoneBB) { 1522 if (llvm::Value *Cond = CondGen(CGF)) { 1523 // If the first post-update expression is found, emit conditional 1524 // block if it was requested. 1525 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1526 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1527 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1528 CGF.EmitBlock(ThenBB); 1529 } 1530 } 1531 CGF.EmitIgnoredExpr(PostUpdate); 1532 } 1533 } 1534 if (DoneBB) 1535 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1536 } 1537 1538 namespace { 1539 /// Codegen lambda for appending distribute lower and upper bounds to outlined 1540 /// parallel function. This is necessary for combined constructs such as 1541 /// 'distribute parallel for' 1542 typedef llvm::function_ref<void(CodeGenFunction &, 1543 const OMPExecutableDirective &, 1544 llvm::SmallVectorImpl<llvm::Value *> &)> 1545 CodeGenBoundParametersTy; 1546 } // anonymous namespace 1547 1548 static void 1549 checkForLastprivateConditionalUpdate(CodeGenFunction &CGF, 1550 const OMPExecutableDirective &S) { 1551 if (CGF.getLangOpts().OpenMP < 50) 1552 return; 1553 llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls; 1554 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 1555 for (const Expr *Ref : C->varlist()) { 1556 if (!Ref->getType()->isScalarType()) 1557 continue; 1558 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1559 if (!DRE) 1560 continue; 1561 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1562 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); 1563 } 1564 } 1565 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 1566 for (const Expr *Ref : C->varlist()) { 1567 if (!Ref->getType()->isScalarType()) 1568 continue; 1569 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1570 if (!DRE) 1571 continue; 1572 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1573 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); 1574 } 1575 } 1576 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 1577 for (const Expr *Ref : C->varlist()) { 1578 if (!Ref->getType()->isScalarType()) 1579 continue; 1580 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1581 if (!DRE) 1582 continue; 1583 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1584 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); 1585 } 1586 } 1587 // Privates should ne analyzed since they are not captured at all. 1588 // Task reductions may be skipped - tasks are ignored. 1589 // Firstprivates do not return value but may be passed by reference - no need 1590 // to check for updated lastprivate conditional. 1591 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 1592 for (const Expr *Ref : C->varlist()) { 1593 if (!Ref->getType()->isScalarType()) 1594 continue; 1595 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1596 if (!DRE) 1597 continue; 1598 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1599 } 1600 } 1601 CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional( 1602 CGF, S, PrivateDecls); 1603 } 1604 1605 static void emitCommonOMPParallelDirective( 1606 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1607 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1608 const CodeGenBoundParametersTy &CodeGenBoundParameters) { 1609 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1610 llvm::Value *NumThreads = nullptr; 1611 llvm::Function *OutlinedFn = 1612 CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 1613 CGF, S, *CS->getCapturedDecl()->param_begin(), InnermostKind, 1614 CodeGen); 1615 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1616 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1617 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1618 /*IgnoreResultAssign=*/true); 1619 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1620 CGF, NumThreads, NumThreadsClause->getBeginLoc()); 1621 } 1622 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1623 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1624 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1625 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc()); 1626 } 1627 const Expr *IfCond = nullptr; 1628 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1629 if (C->getNameModifier() == OMPD_unknown || 1630 C->getNameModifier() == OMPD_parallel) { 1631 IfCond = C->getCondition(); 1632 break; 1633 } 1634 } 1635 1636 OMPParallelScope Scope(CGF, S); 1637 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1638 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk 1639 // lower and upper bounds with the pragma 'for' chunking mechanism. 1640 // The following lambda takes care of appending the lower and upper bound 1641 // parameters when necessary 1642 CodeGenBoundParameters(CGF, S, CapturedVars); 1643 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1644 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn, 1645 CapturedVars, IfCond, NumThreads); 1646 } 1647 1648 static bool isAllocatableDecl(const VarDecl *VD) { 1649 const VarDecl *CVD = VD->getCanonicalDecl(); 1650 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 1651 return false; 1652 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 1653 // Use the default allocation. 1654 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 1655 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 1656 !AA->getAllocator()); 1657 } 1658 1659 static void emitEmptyBoundParameters(CodeGenFunction &, 1660 const OMPExecutableDirective &, 1661 llvm::SmallVectorImpl<llvm::Value *> &) {} 1662 1663 static void emitOMPCopyinClause(CodeGenFunction &CGF, 1664 const OMPExecutableDirective &S) { 1665 bool Copyins = CGF.EmitOMPCopyinClause(S); 1666 if (Copyins) { 1667 // Emit implicit barrier to synchronize threads and avoid data races on 1668 // propagation master's thread values of threadprivate variables to local 1669 // instances of that variables of all other implicit threads. 1670 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1671 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 1672 /*ForceSimpleCall=*/true); 1673 } 1674 } 1675 1676 Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable( 1677 CodeGenFunction &CGF, const VarDecl *VD) { 1678 CodeGenModule &CGM = CGF.CGM; 1679 auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1680 1681 if (!VD) 1682 return Address::invalid(); 1683 const VarDecl *CVD = VD->getCanonicalDecl(); 1684 if (!isAllocatableDecl(CVD)) 1685 return Address::invalid(); 1686 llvm::Value *Size; 1687 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 1688 if (CVD->getType()->isVariablyModifiedType()) { 1689 Size = CGF.getTypeSize(CVD->getType()); 1690 // Align the size: ((size + align - 1) / align) * align 1691 Size = CGF.Builder.CreateNUWAdd( 1692 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 1693 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 1694 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 1695 } else { 1696 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 1697 Size = CGM.getSize(Sz.alignTo(Align)); 1698 } 1699 1700 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 1701 assert(AA->getAllocator() && 1702 "Expected allocator expression for non-default allocator."); 1703 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 1704 // According to the standard, the original allocator type is a enum (integer). 1705 // Convert to pointer type, if required. 1706 if (Allocator->getType()->isIntegerTy()) 1707 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 1708 else if (Allocator->getType()->isPointerTy()) 1709 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 1710 CGM.VoidPtrTy); 1711 1712 llvm::Value *Addr = OMPBuilder.createOMPAlloc( 1713 CGF.Builder, Size, Allocator, 1714 getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", ".")); 1715 llvm::CallInst *FreeCI = 1716 OMPBuilder.createOMPFree(CGF.Builder, Addr, Allocator); 1717 1718 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI); 1719 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1720 Addr, 1721 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 1722 getNameWithSeparators({CVD->getName(), ".addr"}, ".", ".")); 1723 return Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align); 1724 } 1725 1726 Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate( 1727 CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, 1728 SourceLocation Loc) { 1729 CodeGenModule &CGM = CGF.CGM; 1730 if (CGM.getLangOpts().OpenMPUseTLS && 1731 CGM.getContext().getTargetInfo().isTLSSupported()) 1732 return VDAddr; 1733 1734 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1735 1736 llvm::Type *VarTy = VDAddr.getElementType(); 1737 llvm::Value *Data = 1738 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy); 1739 llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)); 1740 std::string Suffix = getNameWithSeparators({"cache", ""}); 1741 llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix); 1742 1743 llvm::CallInst *ThreadPrivateCacheCall = 1744 OMPBuilder.createCachedThreadPrivate(CGF.Builder, Data, Size, CacheName); 1745 1746 return Address(ThreadPrivateCacheCall, CGM.Int8Ty, VDAddr.getAlignment()); 1747 } 1748 1749 std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators( 1750 ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) { 1751 SmallString<128> Buffer; 1752 llvm::raw_svector_ostream OS(Buffer); 1753 StringRef Sep = FirstSeparator; 1754 for (StringRef Part : Parts) { 1755 OS << Sep << Part; 1756 Sep = Separator; 1757 } 1758 return OS.str().str(); 1759 } 1760 1761 void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( 1762 CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP, 1763 InsertPointTy CodeGenIP, Twine RegionName) { 1764 CGBuilderTy &Builder = CGF.Builder; 1765 Builder.restoreIP(CodeGenIP); 1766 llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false, 1767 "." + RegionName + ".after"); 1768 1769 { 1770 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB); 1771 CGF.EmitStmt(RegionBodyStmt); 1772 } 1773 1774 if (Builder.saveIP().isSet()) 1775 Builder.CreateBr(FiniBB); 1776 } 1777 1778 void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody( 1779 CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP, 1780 InsertPointTy CodeGenIP, Twine RegionName) { 1781 CGBuilderTy &Builder = CGF.Builder; 1782 Builder.restoreIP(CodeGenIP); 1783 llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false, 1784 "." + RegionName + ".after"); 1785 1786 { 1787 OMPBuilderCBHelpers::OutlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB); 1788 CGF.EmitStmt(RegionBodyStmt); 1789 } 1790 1791 if (Builder.saveIP().isSet()) 1792 Builder.CreateBr(FiniBB); 1793 } 1794 1795 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1796 if (CGM.getLangOpts().OpenMPIRBuilder) { 1797 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1798 // Check if we have any if clause associated with the directive. 1799 llvm::Value *IfCond = nullptr; 1800 if (const auto *C = S.getSingleClause<OMPIfClause>()) 1801 IfCond = EmitScalarExpr(C->getCondition(), 1802 /*IgnoreResultAssign=*/true); 1803 1804 llvm::Value *NumThreads = nullptr; 1805 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) 1806 NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(), 1807 /*IgnoreResultAssign=*/true); 1808 1809 ProcBindKind ProcBind = OMP_PROC_BIND_default; 1810 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) 1811 ProcBind = ProcBindClause->getProcBindKind(); 1812 1813 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 1814 1815 // The cleanup callback that finalizes all variables at the given location, 1816 // thus calls destructors etc. 1817 auto FiniCB = [this](InsertPointTy IP) { 1818 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 1819 return llvm::Error::success(); 1820 }; 1821 1822 // Privatization callback that performs appropriate action for 1823 // shared/private/firstprivate/lastprivate/copyin/... variables. 1824 // 1825 // TODO: This defaults to shared right now. 1826 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 1827 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { 1828 // The next line is appropriate only for variables (Val) with the 1829 // data-sharing attribute "shared". 1830 ReplVal = &Val; 1831 1832 return CodeGenIP; 1833 }; 1834 1835 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1836 const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt(); 1837 1838 auto BodyGenCB = [&, this](InsertPointTy AllocaIP, 1839 InsertPointTy CodeGenIP) { 1840 OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody( 1841 *this, ParallelRegionBodyStmt, AllocaIP, CodeGenIP, "parallel"); 1842 return llvm::Error::success(); 1843 }; 1844 1845 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); 1846 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); 1847 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( 1848 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); 1849 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail( 1850 OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB, 1851 IfCond, NumThreads, ProcBind, S.hasCancel())); 1852 Builder.restoreIP(AfterIP); 1853 return; 1854 } 1855 1856 // Emit parallel region as a standalone region. 1857 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 1858 Action.Enter(CGF); 1859 OMPPrivateScope PrivateScope(CGF); 1860 emitOMPCopyinClause(CGF, S); 1861 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1862 CGF.EmitOMPPrivateClause(S, PrivateScope); 1863 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1864 (void)PrivateScope.Privatize(); 1865 CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt()); 1866 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 1867 }; 1868 { 1869 auto LPCRegion = 1870 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 1871 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, 1872 emitEmptyBoundParameters); 1873 emitPostUpdateForReductionClause(*this, S, 1874 [](CodeGenFunction &) { return nullptr; }); 1875 } 1876 // Check for outer lastprivate conditional update. 1877 checkForLastprivateConditionalUpdate(*this, S); 1878 } 1879 1880 void CodeGenFunction::EmitOMPMetaDirective(const OMPMetaDirective &S) { 1881 EmitStmt(S.getIfStmt()); 1882 } 1883 1884 namespace { 1885 /// RAII to handle scopes for loop transformation directives. 1886 class OMPTransformDirectiveScopeRAII { 1887 OMPLoopScope *Scope = nullptr; 1888 CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr; 1889 CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr; 1890 1891 OMPTransformDirectiveScopeRAII(const OMPTransformDirectiveScopeRAII &) = 1892 delete; 1893 OMPTransformDirectiveScopeRAII & 1894 operator=(const OMPTransformDirectiveScopeRAII &) = delete; 1895 1896 public: 1897 OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) { 1898 if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(S)) { 1899 Scope = new OMPLoopScope(CGF, *Dir); 1900 CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP); 1901 CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI); 1902 } 1903 } 1904 ~OMPTransformDirectiveScopeRAII() { 1905 if (!Scope) 1906 return; 1907 delete CapInfoRAII; 1908 delete CGSI; 1909 delete Scope; 1910 } 1911 }; 1912 } // namespace 1913 1914 static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, 1915 int MaxLevel, int Level = 0) { 1916 assert(Level < MaxLevel && "Too deep lookup during loop body codegen."); 1917 const Stmt *SimplifiedS = S->IgnoreContainers(); 1918 if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) { 1919 PrettyStackTraceLoc CrashInfo( 1920 CGF.getContext().getSourceManager(), CS->getLBracLoc(), 1921 "LLVM IR generation of compound statement ('{}')"); 1922 1923 // Keep track of the current cleanup stack depth, including debug scopes. 1924 CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange()); 1925 for (const Stmt *CurStmt : CS->body()) 1926 emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level); 1927 return; 1928 } 1929 if (SimplifiedS == NextLoop) { 1930 if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(SimplifiedS)) 1931 SimplifiedS = Dir->getTransformedStmt(); 1932 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS)) 1933 SimplifiedS = CanonLoop->getLoopStmt(); 1934 if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) { 1935 S = For->getBody(); 1936 } else { 1937 assert(isa<CXXForRangeStmt>(SimplifiedS) && 1938 "Expected canonical for loop or range-based for loop."); 1939 const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS); 1940 CGF.EmitStmt(CXXFor->getLoopVarStmt()); 1941 S = CXXFor->getBody(); 1942 } 1943 if (Level + 1 < MaxLevel) { 1944 NextLoop = OMPLoopDirective::tryToFindNextInnerLoop( 1945 S, /*TryImperfectlyNestedLoops=*/true); 1946 emitBody(CGF, S, NextLoop, MaxLevel, Level + 1); 1947 return; 1948 } 1949 } 1950 CGF.EmitStmt(S); 1951 } 1952 1953 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1954 JumpDest LoopExit) { 1955 RunCleanupsScope BodyScope(*this); 1956 // Update counters values on current iteration. 1957 for (const Expr *UE : D.updates()) 1958 EmitIgnoredExpr(UE); 1959 // Update the linear variables. 1960 // In distribute directives only loop counters may be marked as linear, no 1961 // need to generate the code for them. 1962 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(D); 1963 if (!isOpenMPDistributeDirective(EKind)) { 1964 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1965 for (const Expr *UE : C->updates()) 1966 EmitIgnoredExpr(UE); 1967 } 1968 } 1969 1970 // On a continue in the body, jump to the end. 1971 JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue"); 1972 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1973 for (const Expr *E : D.finals_conditions()) { 1974 if (!E) 1975 continue; 1976 // Check that loop counter in non-rectangular nest fits into the iteration 1977 // space. 1978 llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next"); 1979 EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(), 1980 getProfileCount(D.getBody())); 1981 EmitBlock(NextBB); 1982 } 1983 1984 OMPPrivateScope InscanScope(*this); 1985 EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true); 1986 bool IsInscanRegion = InscanScope.Privatize(); 1987 if (IsInscanRegion) { 1988 // Need to remember the block before and after scan directive 1989 // to dispatch them correctly depending on the clause used in 1990 // this directive, inclusive or exclusive. For inclusive scan the natural 1991 // order of the blocks is used, for exclusive clause the blocks must be 1992 // executed in reverse order. 1993 OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb"); 1994 OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb"); 1995 // No need to allocate inscan exit block, in simd mode it is selected in the 1996 // codegen for the scan directive. 1997 if (EKind != OMPD_simd && !getLangOpts().OpenMPSimd) 1998 OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb"); 1999 OMPScanDispatch = createBasicBlock("omp.inscan.dispatch"); 2000 EmitBranch(OMPScanDispatch); 2001 EmitBlock(OMPBeforeScanBlock); 2002 } 2003 2004 // Emit loop variables for C++ range loops. 2005 const Stmt *Body = 2006 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); 2007 // Emit loop body. 2008 emitBody(*this, Body, 2009 OMPLoopBasedDirective::tryToFindNextInnerLoop( 2010 Body, /*TryImperfectlyNestedLoops=*/true), 2011 D.getLoopsNumber()); 2012 2013 // Jump to the dispatcher at the end of the loop body. 2014 if (IsInscanRegion) 2015 EmitBranch(OMPScanExitBlock); 2016 2017 // The end (updates/cleanups). 2018 EmitBlock(Continue.getBlock()); 2019 BreakContinueStack.pop_back(); 2020 } 2021 2022 using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>; 2023 2024 /// Emit a captured statement and return the function as well as its captured 2025 /// closure context. 2026 static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF, 2027 const CapturedStmt *S) { 2028 LValue CapStruct = ParentCGF.InitCapturedStruct(*S); 2029 CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true); 2030 std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI = 2031 std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(*S); 2032 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get()); 2033 llvm::Function *F = CGF.GenerateCapturedStmtFunction(*S); 2034 2035 return {F, CapStruct.getPointer(ParentCGF)}; 2036 } 2037 2038 /// Emit a call to a previously captured closure. 2039 static llvm::CallInst * 2040 emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap, 2041 llvm::ArrayRef<llvm::Value *> Args) { 2042 // Append the closure context to the argument. 2043 SmallVector<llvm::Value *> EffectiveArgs; 2044 EffectiveArgs.reserve(Args.size() + 1); 2045 llvm::append_range(EffectiveArgs, Args); 2046 EffectiveArgs.push_back(Cap.second); 2047 2048 return ParentCGF.Builder.CreateCall(Cap.first, EffectiveArgs); 2049 } 2050 2051 llvm::CanonicalLoopInfo * 2052 CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) { 2053 assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented"); 2054 2055 // The caller is processing the loop-associated directive processing the \p 2056 // Depth loops nested in \p S. Put the previous pending loop-associated 2057 // directive to the stack. If the current loop-associated directive is a loop 2058 // transformation directive, it will push its generated loops onto the stack 2059 // such that together with the loops left here they form the combined loop 2060 // nest for the parent loop-associated directive. 2061 int ParentExpectedOMPLoopDepth = ExpectedOMPLoopDepth; 2062 ExpectedOMPLoopDepth = Depth; 2063 2064 EmitStmt(S); 2065 assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops"); 2066 2067 // The last added loop is the outermost one. 2068 llvm::CanonicalLoopInfo *Result = OMPLoopNestStack.back(); 2069 2070 // Pop the \p Depth loops requested by the call from that stack and restore 2071 // the previous context. 2072 OMPLoopNestStack.pop_back_n(Depth); 2073 ExpectedOMPLoopDepth = ParentExpectedOMPLoopDepth; 2074 2075 return Result; 2076 } 2077 2078 void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) { 2079 const Stmt *SyntacticalLoop = S->getLoopStmt(); 2080 if (!getLangOpts().OpenMPIRBuilder) { 2081 // Ignore if OpenMPIRBuilder is not enabled. 2082 EmitStmt(SyntacticalLoop); 2083 return; 2084 } 2085 2086 LexicalScope ForScope(*this, S->getSourceRange()); 2087 2088 // Emit init statements. The Distance/LoopVar funcs may reference variable 2089 // declarations they contain. 2090 const Stmt *BodyStmt; 2091 if (const auto *For = dyn_cast<ForStmt>(SyntacticalLoop)) { 2092 if (const Stmt *InitStmt = For->getInit()) 2093 EmitStmt(InitStmt); 2094 BodyStmt = For->getBody(); 2095 } else if (const auto *RangeFor = 2096 dyn_cast<CXXForRangeStmt>(SyntacticalLoop)) { 2097 if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt()) 2098 EmitStmt(RangeStmt); 2099 if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt()) 2100 EmitStmt(BeginStmt); 2101 if (const DeclStmt *EndStmt = RangeFor->getEndStmt()) 2102 EmitStmt(EndStmt); 2103 if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt()) 2104 EmitStmt(LoopVarStmt); 2105 BodyStmt = RangeFor->getBody(); 2106 } else 2107 llvm_unreachable("Expected for-stmt or range-based for-stmt"); 2108 2109 // Emit closure for later use. By-value captures will be captured here. 2110 const CapturedStmt *DistanceFunc = S->getDistanceFunc(); 2111 EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(*this, DistanceFunc); 2112 const CapturedStmt *LoopVarFunc = S->getLoopVarFunc(); 2113 EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(*this, LoopVarFunc); 2114 2115 // Call the distance function to get the number of iterations of the loop to 2116 // come. 2117 QualType LogicalTy = DistanceFunc->getCapturedDecl() 2118 ->getParam(0) 2119 ->getType() 2120 .getNonReferenceType(); 2121 RawAddress CountAddr = CreateMemTemp(LogicalTy, ".count.addr"); 2122 emitCapturedStmtCall(*this, DistanceClosure, {CountAddr.getPointer()}); 2123 llvm::Value *DistVal = Builder.CreateLoad(CountAddr, ".count"); 2124 2125 // Emit the loop structure. 2126 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 2127 auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, 2128 llvm::Value *IndVar) { 2129 Builder.restoreIP(CodeGenIP); 2130 2131 // Emit the loop body: Convert the logical iteration number to the loop 2132 // variable and emit the body. 2133 const DeclRefExpr *LoopVarRef = S->getLoopVarRef(); 2134 LValue LCVal = EmitLValue(LoopVarRef); 2135 Address LoopVarAddress = LCVal.getAddress(); 2136 emitCapturedStmtCall(*this, LoopVarClosure, 2137 {LoopVarAddress.emitRawPointer(*this), IndVar}); 2138 2139 RunCleanupsScope BodyScope(*this); 2140 EmitStmt(BodyStmt); 2141 return llvm::Error::success(); 2142 }; 2143 2144 llvm::CanonicalLoopInfo *CL = 2145 cantFail(OMPBuilder.createCanonicalLoop(Builder, BodyGen, DistVal)); 2146 2147 // Finish up the loop. 2148 Builder.restoreIP(CL->getAfterIP()); 2149 ForScope.ForceCleanup(); 2150 2151 // Remember the CanonicalLoopInfo for parent AST nodes consuming it. 2152 OMPLoopNestStack.push_back(CL); 2153 } 2154 2155 void CodeGenFunction::EmitOMPInnerLoop( 2156 const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond, 2157 const Expr *IncExpr, 2158 const llvm::function_ref<void(CodeGenFunction &)> BodyGen, 2159 const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) { 2160 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 2161 2162 // Start the loop with a block that tests the condition. 2163 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 2164 EmitBlock(CondBlock); 2165 const SourceRange R = S.getSourceRange(); 2166 2167 // If attributes are attached, push to the basic block with them. 2168 const auto &OMPED = cast<OMPExecutableDirective>(S); 2169 const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt(); 2170 const Stmt *SS = ICS->getCapturedStmt(); 2171 const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS); 2172 OMPLoopNestStack.clear(); 2173 if (AS) 2174 LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), 2175 AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()), 2176 SourceLocToDebugLoc(R.getEnd())); 2177 else 2178 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 2179 SourceLocToDebugLoc(R.getEnd())); 2180 2181 // If there are any cleanups between here and the loop-exit scope, 2182 // create a block to stage a loop exit along. 2183 llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); 2184 if (RequiresCleanup) 2185 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 2186 2187 llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body"); 2188 2189 // Emit condition. 2190 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 2191 if (ExitBlock != LoopExit.getBlock()) { 2192 EmitBlock(ExitBlock); 2193 EmitBranchThroughCleanup(LoopExit); 2194 } 2195 2196 EmitBlock(LoopBody); 2197 incrementProfileCounter(&S); 2198 2199 // Create a block for the increment. 2200 JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 2201 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 2202 2203 BodyGen(*this); 2204 2205 // Emit "IV = IV + 1" and a back-edge to the condition block. 2206 EmitBlock(Continue.getBlock()); 2207 EmitIgnoredExpr(IncExpr); 2208 PostIncGen(*this); 2209 BreakContinueStack.pop_back(); 2210 EmitBranch(CondBlock); 2211 LoopStack.pop(); 2212 // Emit the fall-through block. 2213 EmitBlock(LoopExit.getBlock()); 2214 } 2215 2216 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 2217 if (!HaveInsertPoint()) 2218 return false; 2219 // Emit inits for the linear variables. 2220 bool HasLinears = false; 2221 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 2222 for (const Expr *Init : C->inits()) { 2223 HasLinears = true; 2224 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 2225 if (const auto *Ref = 2226 dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 2227 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 2228 const auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 2229 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 2230 CapturedStmtInfo->lookup(OrigVD) != nullptr, 2231 VD->getInit()->getType(), VK_LValue, 2232 VD->getInit()->getExprLoc()); 2233 EmitExprAsInit( 2234 &DRE, VD, 2235 MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()), 2236 /*capturedByInit=*/false); 2237 EmitAutoVarCleanups(Emission); 2238 } else { 2239 EmitVarDecl(*VD); 2240 } 2241 } 2242 // Emit the linear steps for the linear clauses. 2243 // If a step is not constant, it is pre-calculated before the loop. 2244 if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 2245 if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 2246 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 2247 // Emit calculation of the linear step. 2248 EmitIgnoredExpr(CS); 2249 } 2250 } 2251 return HasLinears; 2252 } 2253 2254 void CodeGenFunction::EmitOMPLinearClauseFinal( 2255 const OMPLoopDirective &D, 2256 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 2257 if (!HaveInsertPoint()) 2258 return; 2259 llvm::BasicBlock *DoneBB = nullptr; 2260 // Emit the final values of the linear variables. 2261 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 2262 auto IC = C->varlist_begin(); 2263 for (const Expr *F : C->finals()) { 2264 if (!DoneBB) { 2265 if (llvm::Value *Cond = CondGen(*this)) { 2266 // If the first post-update expression is found, emit conditional 2267 // block if it was requested. 2268 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu"); 2269 DoneBB = createBasicBlock(".omp.linear.pu.done"); 2270 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 2271 EmitBlock(ThenBB); 2272 } 2273 } 2274 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 2275 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 2276 CapturedStmtInfo->lookup(OrigVD) != nullptr, 2277 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 2278 Address OrigAddr = EmitLValue(&DRE).getAddress(); 2279 CodeGenFunction::OMPPrivateScope VarScope(*this); 2280 VarScope.addPrivate(OrigVD, OrigAddr); 2281 (void)VarScope.Privatize(); 2282 EmitIgnoredExpr(F); 2283 ++IC; 2284 } 2285 if (const Expr *PostUpdate = C->getPostUpdateExpr()) 2286 EmitIgnoredExpr(PostUpdate); 2287 } 2288 if (DoneBB) 2289 EmitBlock(DoneBB, /*IsFinished=*/true); 2290 } 2291 2292 static void emitAlignedClause(CodeGenFunction &CGF, 2293 const OMPExecutableDirective &D) { 2294 if (!CGF.HaveInsertPoint()) 2295 return; 2296 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 2297 llvm::APInt ClauseAlignment(64, 0); 2298 if (const Expr *AlignmentExpr = Clause->getAlignment()) { 2299 auto *AlignmentCI = 2300 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 2301 ClauseAlignment = AlignmentCI->getValue(); 2302 } 2303 for (const Expr *E : Clause->varlist()) { 2304 llvm::APInt Alignment(ClauseAlignment); 2305 if (Alignment == 0) { 2306 // OpenMP [2.8.1, Description] 2307 // If no optional parameter is specified, implementation-defined default 2308 // alignments for SIMD instructions on the target platforms are assumed. 2309 Alignment = 2310 CGF.getContext() 2311 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 2312 E->getType()->getPointeeType())) 2313 .getQuantity(); 2314 } 2315 assert((Alignment == 0 || Alignment.isPowerOf2()) && 2316 "alignment is not power of 2"); 2317 if (Alignment != 0) { 2318 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 2319 CGF.emitAlignmentAssumption( 2320 PtrValue, E, /*No second loc needed*/ SourceLocation(), 2321 llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment)); 2322 } 2323 } 2324 } 2325 } 2326 2327 void CodeGenFunction::EmitOMPPrivateLoopCounters( 2328 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { 2329 if (!HaveInsertPoint()) 2330 return; 2331 auto I = S.private_counters().begin(); 2332 for (const Expr *E : S.counters()) { 2333 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2334 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 2335 // Emit var without initialization. 2336 AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD); 2337 EmitAutoVarCleanups(VarEmission); 2338 LocalDeclMap.erase(PrivateVD); 2339 (void)LoopScope.addPrivate(VD, VarEmission.getAllocatedAddress()); 2340 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || 2341 VD->hasGlobalStorage()) { 2342 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), 2343 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), 2344 E->getType(), VK_LValue, E->getExprLoc()); 2345 (void)LoopScope.addPrivate(PrivateVD, EmitLValue(&DRE).getAddress()); 2346 } else { 2347 (void)LoopScope.addPrivate(PrivateVD, VarEmission.getAllocatedAddress()); 2348 } 2349 ++I; 2350 } 2351 // Privatize extra loop counters used in loops for ordered(n) clauses. 2352 for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) { 2353 if (!C->getNumForLoops()) 2354 continue; 2355 for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size(); 2356 I < E; ++I) { 2357 const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I)); 2358 const auto *VD = cast<VarDecl>(DRE->getDecl()); 2359 // Override only those variables that can be captured to avoid re-emission 2360 // of the variables declared within the loops. 2361 if (DRE->refersToEnclosingVariableOrCapture()) { 2362 (void)LoopScope.addPrivate( 2363 VD, CreateMemTemp(DRE->getType(), VD->getName())); 2364 } 2365 } 2366 } 2367 } 2368 2369 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 2370 const Expr *Cond, llvm::BasicBlock *TrueBlock, 2371 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 2372 if (!CGF.HaveInsertPoint()) 2373 return; 2374 { 2375 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 2376 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); 2377 (void)PreCondScope.Privatize(); 2378 // Get initial values of real counters. 2379 for (const Expr *I : S.inits()) { 2380 CGF.EmitIgnoredExpr(I); 2381 } 2382 } 2383 // Create temp loop control variables with their init values to support 2384 // non-rectangular loops. 2385 CodeGenFunction::OMPMapVars PreCondVars; 2386 for (const Expr *E : S.dependent_counters()) { 2387 if (!E) 2388 continue; 2389 assert(!E->getType().getNonReferenceType()->isRecordType() && 2390 "dependent counter must not be an iterator."); 2391 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2392 Address CounterAddr = 2393 CGF.CreateMemTemp(VD->getType().getNonReferenceType()); 2394 (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr); 2395 } 2396 (void)PreCondVars.apply(CGF); 2397 for (const Expr *E : S.dependent_inits()) { 2398 if (!E) 2399 continue; 2400 CGF.EmitIgnoredExpr(E); 2401 } 2402 // Check that loop is executed at least one time. 2403 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 2404 PreCondVars.restore(CGF); 2405 } 2406 2407 void CodeGenFunction::EmitOMPLinearClause( 2408 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { 2409 if (!HaveInsertPoint()) 2410 return; 2411 llvm::DenseSet<const VarDecl *> SIMDLCVs; 2412 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(D); 2413 if (isOpenMPSimdDirective(EKind)) { 2414 const auto *LoopDirective = cast<OMPLoopDirective>(&D); 2415 for (const Expr *C : LoopDirective->counters()) { 2416 SIMDLCVs.insert( 2417 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 2418 } 2419 } 2420 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 2421 auto CurPrivate = C->privates().begin(); 2422 for (const Expr *E : C->varlist()) { 2423 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2424 const auto *PrivateVD = 2425 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 2426 if (!SIMDLCVs.count(VD->getCanonicalDecl())) { 2427 // Emit private VarDecl with copy init. 2428 EmitVarDecl(*PrivateVD); 2429 bool IsRegistered = 2430 PrivateScope.addPrivate(VD, GetAddrOfLocalVar(PrivateVD)); 2431 assert(IsRegistered && "linear var already registered as private"); 2432 // Silence the warning about unused variable. 2433 (void)IsRegistered; 2434 } else { 2435 EmitVarDecl(*PrivateVD); 2436 } 2437 ++CurPrivate; 2438 } 2439 } 2440 } 2441 2442 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 2443 const OMPExecutableDirective &D) { 2444 if (!CGF.HaveInsertPoint()) 2445 return; 2446 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 2447 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 2448 /*ignoreResult=*/true); 2449 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 2450 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 2451 // In presence of finite 'safelen', it may be unsafe to mark all 2452 // the memory instructions parallel, because loop-carried 2453 // dependences of 'safelen' iterations are possible. 2454 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 2455 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 2456 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 2457 /*ignoreResult=*/true); 2458 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 2459 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 2460 // In presence of finite 'safelen', it may be unsafe to mark all 2461 // the memory instructions parallel, because loop-carried 2462 // dependences of 'safelen' iterations are possible. 2463 CGF.LoopStack.setParallel(/*Enable=*/false); 2464 } 2465 } 2466 2467 // Check for the presence of an `OMPOrderedDirective`, 2468 // i.e., `ordered` in `#pragma omp ordered simd`. 2469 // 2470 // Consider the following source code: 2471 // ``` 2472 // __attribute__((noinline)) void omp_simd_loop(float X[ARRAY_SIZE][ARRAY_SIZE]) 2473 // { 2474 // for (int r = 1; r < ARRAY_SIZE; ++r) { 2475 // for (int c = 1; c < ARRAY_SIZE; ++c) { 2476 // #pragma omp simd 2477 // for (int k = 2; k < ARRAY_SIZE; ++k) { 2478 // #pragma omp ordered simd 2479 // X[r][k] = X[r][k - 2] + sinf((float)(r / c)); 2480 // } 2481 // } 2482 // } 2483 // } 2484 // ``` 2485 // 2486 // Suppose we are in `CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective 2487 // &D)`. By examining `D.dump()` we have the following AST containing 2488 // `OMPOrderedDirective`: 2489 // 2490 // ``` 2491 // OMPSimdDirective 0x1c32950 2492 // `-CapturedStmt 0x1c32028 2493 // |-CapturedDecl 0x1c310e8 2494 // | |-ForStmt 0x1c31e30 2495 // | | |-DeclStmt 0x1c31298 2496 // | | | `-VarDecl 0x1c31208 used k 'int' cinit 2497 // | | | `-IntegerLiteral 0x1c31278 'int' 2 2498 // | | |-<<<NULL>>> 2499 // | | |-BinaryOperator 0x1c31308 'int' '<' 2500 // | | | |-ImplicitCastExpr 0x1c312f0 'int' <LValueToRValue> 2501 // | | | | `-DeclRefExpr 0x1c312b0 'int' lvalue Var 0x1c31208 'k' 'int' 2502 // | | | `-IntegerLiteral 0x1c312d0 'int' 256 2503 // | | |-UnaryOperator 0x1c31348 'int' prefix '++' 2504 // | | | `-DeclRefExpr 0x1c31328 'int' lvalue Var 0x1c31208 'k' 'int' 2505 // | | `-CompoundStmt 0x1c31e18 2506 // | | `-OMPOrderedDirective 0x1c31dd8 2507 // | | |-OMPSimdClause 0x1c31380 2508 // | | `-CapturedStmt 0x1c31cd0 2509 // ``` 2510 // 2511 // Note the presence of `OMPOrderedDirective` above: 2512 // It's (transitively) nested in a `CapturedStmt` representing the pragma 2513 // annotated compound statement. Thus, we need to consider this nesting and 2514 // include checking the `getCapturedStmt` in this case. 2515 static bool hasOrderedDirective(const Stmt *S) { 2516 if (isa<OMPOrderedDirective>(S)) 2517 return true; 2518 2519 if (const auto *CS = dyn_cast<CapturedStmt>(S)) 2520 return hasOrderedDirective(CS->getCapturedStmt()); 2521 2522 for (const Stmt *Child : S->children()) { 2523 if (Child && hasOrderedDirective(Child)) 2524 return true; 2525 } 2526 2527 return false; 2528 } 2529 2530 static void applyConservativeSimdOrderedDirective(const Stmt &AssociatedStmt, 2531 LoopInfoStack &LoopStack) { 2532 // Check for the presence of an `OMPOrderedDirective` 2533 // i.e., `ordered` in `#pragma omp ordered simd` 2534 bool HasOrderedDirective = hasOrderedDirective(&AssociatedStmt); 2535 // If present then conservatively disable loop vectorization 2536 // analogously to how `emitSimdlenSafelenClause` does. 2537 if (HasOrderedDirective) 2538 LoopStack.setParallel(/*Enable=*/false); 2539 } 2540 2541 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) { 2542 // Walk clauses and process safelen/lastprivate. 2543 LoopStack.setParallel(/*Enable=*/true); 2544 LoopStack.setVectorizeEnable(); 2545 const Stmt *AssociatedStmt = D.getAssociatedStmt(); 2546 applyConservativeSimdOrderedDirective(*AssociatedStmt, LoopStack); 2547 emitSimdlenSafelenClause(*this, D); 2548 if (const auto *C = D.getSingleClause<OMPOrderClause>()) 2549 if (C->getKind() == OMPC_ORDER_concurrent) 2550 LoopStack.setParallel(/*Enable=*/true); 2551 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(D); 2552 if ((EKind == OMPD_simd || 2553 (getLangOpts().OpenMPSimd && isOpenMPSimdDirective(EKind))) && 2554 llvm::any_of(D.getClausesOfKind<OMPReductionClause>(), 2555 [](const OMPReductionClause *C) { 2556 return C->getModifier() == OMPC_REDUCTION_inscan; 2557 })) 2558 // Disable parallel access in case of prefix sum. 2559 LoopStack.setParallel(/*Enable=*/false); 2560 } 2561 2562 void CodeGenFunction::EmitOMPSimdFinal( 2563 const OMPLoopDirective &D, 2564 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 2565 if (!HaveInsertPoint()) 2566 return; 2567 llvm::BasicBlock *DoneBB = nullptr; 2568 auto IC = D.counters().begin(); 2569 auto IPC = D.private_counters().begin(); 2570 for (const Expr *F : D.finals()) { 2571 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 2572 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); 2573 const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); 2574 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || 2575 OrigVD->hasGlobalStorage() || CED) { 2576 if (!DoneBB) { 2577 if (llvm::Value *Cond = CondGen(*this)) { 2578 // If the first post-update expression is found, emit conditional 2579 // block if it was requested. 2580 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then"); 2581 DoneBB = createBasicBlock(".omp.final.done"); 2582 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 2583 EmitBlock(ThenBB); 2584 } 2585 } 2586 Address OrigAddr = Address::invalid(); 2587 if (CED) { 2588 OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(); 2589 } else { 2590 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD), 2591 /*RefersToEnclosingVariableOrCapture=*/false, 2592 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); 2593 OrigAddr = EmitLValue(&DRE).getAddress(); 2594 } 2595 OMPPrivateScope VarScope(*this); 2596 VarScope.addPrivate(OrigVD, OrigAddr); 2597 (void)VarScope.Privatize(); 2598 EmitIgnoredExpr(F); 2599 } 2600 ++IC; 2601 ++IPC; 2602 } 2603 if (DoneBB) 2604 EmitBlock(DoneBB, /*IsFinished=*/true); 2605 } 2606 2607 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, 2608 const OMPLoopDirective &S, 2609 CodeGenFunction::JumpDest LoopExit) { 2610 CGF.EmitOMPLoopBody(S, LoopExit); 2611 CGF.EmitStopPoint(&S); 2612 } 2613 2614 /// Emit a helper variable and return corresponding lvalue. 2615 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 2616 const DeclRefExpr *Helper) { 2617 auto VDecl = cast<VarDecl>(Helper->getDecl()); 2618 CGF.EmitVarDecl(*VDecl); 2619 return CGF.EmitLValue(Helper); 2620 } 2621 2622 static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S, 2623 const RegionCodeGenTy &SimdInitGen, 2624 const RegionCodeGenTy &BodyCodeGen) { 2625 auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF, 2626 PrePostActionTy &) { 2627 CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S); 2628 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 2629 SimdInitGen(CGF); 2630 2631 BodyCodeGen(CGF); 2632 }; 2633 auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 2634 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 2635 CGF.LoopStack.setVectorizeEnable(/*Enable=*/false); 2636 2637 BodyCodeGen(CGF); 2638 }; 2639 const Expr *IfCond = nullptr; 2640 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); 2641 if (isOpenMPSimdDirective(EKind)) { 2642 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2643 if (CGF.getLangOpts().OpenMP >= 50 && 2644 (C->getNameModifier() == OMPD_unknown || 2645 C->getNameModifier() == OMPD_simd)) { 2646 IfCond = C->getCondition(); 2647 break; 2648 } 2649 } 2650 } 2651 if (IfCond) { 2652 CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2653 } else { 2654 RegionCodeGenTy ThenRCG(ThenGen); 2655 ThenRCG(CGF); 2656 } 2657 } 2658 2659 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, 2660 PrePostActionTy &Action) { 2661 Action.Enter(CGF); 2662 OMPLoopScope PreInitScope(CGF, S); 2663 // if (PreCond) { 2664 // for (IV in 0..LastIteration) BODY; 2665 // <Final counter/linear vars updates>; 2666 // } 2667 2668 // The presence of lower/upper bound variable depends on the actual directive 2669 // kind in the AST node. The variables must be emitted because some of the 2670 // expressions associated with the loop will use them. 2671 OpenMPDirectiveKind DKind = S.getDirectiveKind(); 2672 if (isOpenMPDistributeDirective(DKind) || 2673 isOpenMPWorksharingDirective(DKind) || isOpenMPTaskLoopDirective(DKind) || 2674 isOpenMPGenericLoopDirective(DKind)) { 2675 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable())); 2676 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable())); 2677 } 2678 2679 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); 2680 // Emit: if (PreCond) - begin. 2681 // If the condition constant folds and can be elided, avoid emitting the 2682 // whole loop. 2683 bool CondConstant; 2684 llvm::BasicBlock *ContBlock = nullptr; 2685 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2686 if (!CondConstant) 2687 return; 2688 } else { 2689 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then"); 2690 ContBlock = CGF.createBasicBlock("simd.if.end"); 2691 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 2692 CGF.getProfileCount(&S)); 2693 CGF.EmitBlock(ThenBlock); 2694 CGF.incrementProfileCounter(&S); 2695 } 2696 2697 // Emit the loop iteration variable. 2698 const Expr *IVExpr = S.getIterationVariable(); 2699 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 2700 CGF.EmitVarDecl(*IVDecl); 2701 CGF.EmitIgnoredExpr(S.getInit()); 2702 2703 // Emit the iterations count variable. 2704 // If it is not a variable, Sema decided to calculate iterations count on 2705 // each iteration (e.g., it is foldable into a constant). 2706 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2707 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2708 // Emit calculation of the iterations count. 2709 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 2710 } 2711 2712 emitAlignedClause(CGF, S); 2713 (void)CGF.EmitOMPLinearClauseInit(S); 2714 { 2715 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2716 CGF.EmitOMPPrivateClause(S, LoopScope); 2717 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 2718 CGF.EmitOMPLinearClause(S, LoopScope); 2719 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2720 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( 2721 CGF, S, CGF.EmitLValue(S.getIterationVariable())); 2722 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2723 (void)LoopScope.Privatize(); 2724 if (isOpenMPTargetExecutionDirective(EKind)) 2725 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 2726 2727 emitCommonSimdLoop( 2728 CGF, S, 2729 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2730 CGF.EmitOMPSimdInit(S); 2731 }, 2732 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 2733 CGF.EmitOMPInnerLoop( 2734 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 2735 [&S](CodeGenFunction &CGF) { 2736 emitOMPLoopBodyWithStopPoint(CGF, S, 2737 CodeGenFunction::JumpDest()); 2738 }, 2739 [](CodeGenFunction &) {}); 2740 }); 2741 CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; }); 2742 // Emit final copy of the lastprivate variables at the end of loops. 2743 if (HasLastprivateClause) 2744 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); 2745 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); 2746 emitPostUpdateForReductionClause(CGF, S, 2747 [](CodeGenFunction &) { return nullptr; }); 2748 LoopScope.restoreMap(); 2749 CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; }); 2750 } 2751 // Emit: if (PreCond) - end. 2752 if (ContBlock) { 2753 CGF.EmitBranch(ContBlock); 2754 CGF.EmitBlock(ContBlock, true); 2755 } 2756 } 2757 2758 // Pass OMPLoopDirective (instead of OMPSimdDirective) to make this function 2759 // available for "loop bind(thread)", which maps to "simd". 2760 static bool isSimdSupportedByOpenMPIRBuilder(const OMPLoopDirective &S) { 2761 // Check for unsupported clauses 2762 for (OMPClause *C : S.clauses()) { 2763 // Currently only order, simdlen and safelen clauses are supported 2764 if (!(isa<OMPSimdlenClause>(C) || isa<OMPSafelenClause>(C) || 2765 isa<OMPOrderClause>(C) || isa<OMPAlignedClause>(C))) 2766 return false; 2767 } 2768 2769 // Check if we have a statement with the ordered directive. 2770 // Visit the statement hierarchy to find a compound statement 2771 // with a ordered directive in it. 2772 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(S.getRawStmt())) { 2773 if (const Stmt *SyntacticalLoop = CanonLoop->getLoopStmt()) { 2774 for (const Stmt *SubStmt : SyntacticalLoop->children()) { 2775 if (!SubStmt) 2776 continue; 2777 if (const CompoundStmt *CS = dyn_cast<CompoundStmt>(SubStmt)) { 2778 for (const Stmt *CSSubStmt : CS->children()) { 2779 if (!CSSubStmt) 2780 continue; 2781 if (isa<OMPOrderedDirective>(CSSubStmt)) { 2782 return false; 2783 } 2784 } 2785 } 2786 } 2787 } 2788 } 2789 return true; 2790 } 2791 2792 static llvm::MapVector<llvm::Value *, llvm::Value *> 2793 GetAlignedMapping(const OMPLoopDirective &S, CodeGenFunction &CGF) { 2794 llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars; 2795 for (const auto *Clause : S.getClausesOfKind<OMPAlignedClause>()) { 2796 llvm::APInt ClauseAlignment(64, 0); 2797 if (const Expr *AlignmentExpr = Clause->getAlignment()) { 2798 auto *AlignmentCI = 2799 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 2800 ClauseAlignment = AlignmentCI->getValue(); 2801 } 2802 for (const Expr *E : Clause->varlist()) { 2803 llvm::APInt Alignment(ClauseAlignment); 2804 if (Alignment == 0) { 2805 // OpenMP [2.8.1, Description] 2806 // If no optional parameter is specified, implementation-defined default 2807 // alignments for SIMD instructions on the target platforms are assumed. 2808 Alignment = 2809 CGF.getContext() 2810 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 2811 E->getType()->getPointeeType())) 2812 .getQuantity(); 2813 } 2814 assert((Alignment == 0 || Alignment.isPowerOf2()) && 2815 "alignment is not power of 2"); 2816 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 2817 AlignedVars[PtrValue] = CGF.Builder.getInt64(Alignment.getSExtValue()); 2818 } 2819 } 2820 return AlignedVars; 2821 } 2822 2823 // Pass OMPLoopDirective (instead of OMPSimdDirective) to make this function 2824 // available for "loop bind(thread)", which maps to "simd". 2825 static void emitOMPSimdDirective(const OMPLoopDirective &S, 2826 CodeGenFunction &CGF, CodeGenModule &CGM) { 2827 bool UseOMPIRBuilder = 2828 CGM.getLangOpts().OpenMPIRBuilder && isSimdSupportedByOpenMPIRBuilder(S); 2829 if (UseOMPIRBuilder) { 2830 auto &&CodeGenIRBuilder = [&S, &CGM, UseOMPIRBuilder](CodeGenFunction &CGF, 2831 PrePostActionTy &) { 2832 // Use the OpenMPIRBuilder if enabled. 2833 if (UseOMPIRBuilder) { 2834 llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars = 2835 GetAlignedMapping(S, CGF); 2836 // Emit the associated statement and get its loop representation. 2837 const Stmt *Inner = S.getRawStmt(); 2838 llvm::CanonicalLoopInfo *CLI = 2839 CGF.EmitOMPCollapsedCanonicalLoopNest(Inner, 1); 2840 2841 llvm::OpenMPIRBuilder &OMPBuilder = 2842 CGM.getOpenMPRuntime().getOMPBuilder(); 2843 // Add SIMD specific metadata 2844 llvm::ConstantInt *Simdlen = nullptr; 2845 if (const auto *C = S.getSingleClause<OMPSimdlenClause>()) { 2846 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 2847 /*ignoreResult=*/true); 2848 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 2849 Simdlen = Val; 2850 } 2851 llvm::ConstantInt *Safelen = nullptr; 2852 if (const auto *C = S.getSingleClause<OMPSafelenClause>()) { 2853 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 2854 /*ignoreResult=*/true); 2855 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 2856 Safelen = Val; 2857 } 2858 llvm::omp::OrderKind Order = llvm::omp::OrderKind::OMP_ORDER_unknown; 2859 if (const auto *C = S.getSingleClause<OMPOrderClause>()) { 2860 if (C->getKind() == OpenMPOrderClauseKind::OMPC_ORDER_concurrent) { 2861 Order = llvm::omp::OrderKind::OMP_ORDER_concurrent; 2862 } 2863 } 2864 // Add simd metadata to the collapsed loop. Do not generate 2865 // another loop for if clause. Support for if clause is done earlier. 2866 OMPBuilder.applySimd(CLI, AlignedVars, 2867 /*IfCond*/ nullptr, Order, Simdlen, Safelen); 2868 return; 2869 } 2870 }; 2871 { 2872 auto LPCRegion = 2873 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S); 2874 OMPLexicalScope Scope(CGF, S, OMPD_unknown); 2875 CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd, 2876 CodeGenIRBuilder); 2877 } 2878 return; 2879 } 2880 2881 CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S); 2882 CGF.OMPFirstScanLoop = true; 2883 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2884 emitOMPSimdRegion(CGF, S, Action); 2885 }; 2886 { 2887 auto LPCRegion = 2888 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S); 2889 OMPLexicalScope Scope(CGF, S, OMPD_unknown); 2890 CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd, CodeGen); 2891 } 2892 // Check for outer lastprivate conditional update. 2893 checkForLastprivateConditionalUpdate(CGF, S); 2894 } 2895 2896 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 2897 emitOMPSimdDirective(S, *this, CGM); 2898 } 2899 2900 void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) { 2901 // Emit the de-sugared statement. 2902 OMPTransformDirectiveScopeRAII TileScope(*this, &S); 2903 EmitStmt(S.getTransformedStmt()); 2904 } 2905 2906 void CodeGenFunction::EmitOMPStripeDirective(const OMPStripeDirective &S) { 2907 // Emit the de-sugared statement. 2908 OMPTransformDirectiveScopeRAII StripeScope(*this, &S); 2909 EmitStmt(S.getTransformedStmt()); 2910 } 2911 2912 void CodeGenFunction::EmitOMPReverseDirective(const OMPReverseDirective &S) { 2913 // Emit the de-sugared statement. 2914 OMPTransformDirectiveScopeRAII ReverseScope(*this, &S); 2915 EmitStmt(S.getTransformedStmt()); 2916 } 2917 2918 void CodeGenFunction::EmitOMPInterchangeDirective( 2919 const OMPInterchangeDirective &S) { 2920 // Emit the de-sugared statement. 2921 OMPTransformDirectiveScopeRAII InterchangeScope(*this, &S); 2922 EmitStmt(S.getTransformedStmt()); 2923 } 2924 2925 void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) { 2926 bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder; 2927 2928 if (UseOMPIRBuilder) { 2929 auto DL = SourceLocToDebugLoc(S.getBeginLoc()); 2930 const Stmt *Inner = S.getRawStmt(); 2931 2932 // Consume nested loop. Clear the entire remaining loop stack because a 2933 // fully unrolled loop is non-transformable. For partial unrolling the 2934 // generated outer loop is pushed back to the stack. 2935 llvm::CanonicalLoopInfo *CLI = EmitOMPCollapsedCanonicalLoopNest(Inner, 1); 2936 OMPLoopNestStack.clear(); 2937 2938 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 2939 2940 bool NeedsUnrolledCLI = ExpectedOMPLoopDepth >= 1; 2941 llvm::CanonicalLoopInfo *UnrolledCLI = nullptr; 2942 2943 if (S.hasClausesOfKind<OMPFullClause>()) { 2944 assert(ExpectedOMPLoopDepth == 0); 2945 OMPBuilder.unrollLoopFull(DL, CLI); 2946 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) { 2947 uint64_t Factor = 0; 2948 if (Expr *FactorExpr = PartialClause->getFactor()) { 2949 Factor = FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue(); 2950 assert(Factor >= 1 && "Only positive factors are valid"); 2951 } 2952 OMPBuilder.unrollLoopPartial(DL, CLI, Factor, 2953 NeedsUnrolledCLI ? &UnrolledCLI : nullptr); 2954 } else { 2955 OMPBuilder.unrollLoopHeuristic(DL, CLI); 2956 } 2957 2958 assert((!NeedsUnrolledCLI || UnrolledCLI) && 2959 "NeedsUnrolledCLI implies UnrolledCLI to be set"); 2960 if (UnrolledCLI) 2961 OMPLoopNestStack.push_back(UnrolledCLI); 2962 2963 return; 2964 } 2965 2966 // This function is only called if the unrolled loop is not consumed by any 2967 // other loop-associated construct. Such a loop-associated construct will have 2968 // used the transformed AST. 2969 2970 // Set the unroll metadata for the next emitted loop. 2971 LoopStack.setUnrollState(LoopAttributes::Enable); 2972 2973 if (S.hasClausesOfKind<OMPFullClause>()) { 2974 LoopStack.setUnrollState(LoopAttributes::Full); 2975 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) { 2976 if (Expr *FactorExpr = PartialClause->getFactor()) { 2977 uint64_t Factor = 2978 FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue(); 2979 assert(Factor >= 1 && "Only positive factors are valid"); 2980 LoopStack.setUnrollCount(Factor); 2981 } 2982 } 2983 2984 EmitStmt(S.getAssociatedStmt()); 2985 } 2986 2987 void CodeGenFunction::EmitOMPOuterLoop( 2988 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, 2989 CodeGenFunction::OMPPrivateScope &LoopScope, 2990 const CodeGenFunction::OMPLoopArguments &LoopArgs, 2991 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, 2992 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { 2993 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 2994 2995 const Expr *IVExpr = S.getIterationVariable(); 2996 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2997 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2998 2999 JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 3000 3001 // Start the loop with a block that tests the condition. 3002 llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond"); 3003 EmitBlock(CondBlock); 3004 const SourceRange R = S.getSourceRange(); 3005 OMPLoopNestStack.clear(); 3006 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 3007 SourceLocToDebugLoc(R.getEnd())); 3008 3009 llvm::Value *BoolCondVal = nullptr; 3010 if (!DynamicOrOrdered) { 3011 // UB = min(UB, GlobalUB) or 3012 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. 3013 // 'distribute parallel for') 3014 EmitIgnoredExpr(LoopArgs.EUB); 3015 // IV = LB 3016 EmitIgnoredExpr(LoopArgs.Init); 3017 // IV < UB 3018 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); 3019 } else { 3020 BoolCondVal = 3021 RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL, 3022 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); 3023 } 3024 3025 // If there are any cleanups between here and the loop-exit scope, 3026 // create a block to stage a loop exit along. 3027 llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); 3028 if (LoopScope.requiresCleanups()) 3029 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 3030 3031 llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body"); 3032 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 3033 if (ExitBlock != LoopExit.getBlock()) { 3034 EmitBlock(ExitBlock); 3035 EmitBranchThroughCleanup(LoopExit); 3036 } 3037 EmitBlock(LoopBody); 3038 3039 // Emit "IV = LB" (in case of static schedule, we have already calculated new 3040 // LB for loop condition and emitted it above). 3041 if (DynamicOrOrdered) 3042 EmitIgnoredExpr(LoopArgs.Init); 3043 3044 // Create a block for the increment. 3045 JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 3046 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 3047 3048 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); 3049 emitCommonSimdLoop( 3050 *this, S, 3051 [&S, IsMonotonic, EKind](CodeGenFunction &CGF, PrePostActionTy &) { 3052 // Generate !llvm.loop.parallel metadata for loads and stores for loops 3053 // with dynamic/guided scheduling and without ordered clause. 3054 if (!isOpenMPSimdDirective(EKind)) { 3055 CGF.LoopStack.setParallel(!IsMonotonic); 3056 if (const auto *C = S.getSingleClause<OMPOrderClause>()) 3057 if (C->getKind() == OMPC_ORDER_concurrent) 3058 CGF.LoopStack.setParallel(/*Enable=*/true); 3059 } else { 3060 CGF.EmitOMPSimdInit(S); 3061 } 3062 }, 3063 [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered, 3064 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 3065 SourceLocation Loc = S.getBeginLoc(); 3066 // when 'distribute' is not combined with a 'for': 3067 // while (idx <= UB) { BODY; ++idx; } 3068 // when 'distribute' is combined with a 'for' 3069 // (e.g. 'distribute parallel for') 3070 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 3071 CGF.EmitOMPInnerLoop( 3072 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, 3073 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 3074 CodeGenLoop(CGF, S, LoopExit); 3075 }, 3076 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { 3077 CodeGenOrdered(CGF, Loc, IVSize, IVSigned); 3078 }); 3079 }); 3080 3081 EmitBlock(Continue.getBlock()); 3082 BreakContinueStack.pop_back(); 3083 if (!DynamicOrOrdered) { 3084 // Emit "LB = LB + Stride", "UB = UB + Stride". 3085 EmitIgnoredExpr(LoopArgs.NextLB); 3086 EmitIgnoredExpr(LoopArgs.NextUB); 3087 } 3088 3089 EmitBranch(CondBlock); 3090 OMPLoopNestStack.clear(); 3091 LoopStack.pop(); 3092 // Emit the fall-through block. 3093 EmitBlock(LoopExit.getBlock()); 3094 3095 // Tell the runtime we are done. 3096 auto &&CodeGen = [DynamicOrOrdered, &S, &LoopArgs](CodeGenFunction &CGF) { 3097 if (!DynamicOrOrdered) 3098 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 3099 LoopArgs.DKind); 3100 }; 3101 OMPCancelStack.emitExit(*this, EKind, CodeGen); 3102 } 3103 3104 void CodeGenFunction::EmitOMPForOuterLoop( 3105 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, 3106 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 3107 const OMPLoopArguments &LoopArgs, 3108 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 3109 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 3110 3111 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 3112 const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind.Schedule); 3113 3114 assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule, 3115 LoopArgs.Chunk != nullptr)) && 3116 "static non-chunked schedule does not need outer loop"); 3117 3118 // Emit outer loop. 3119 // 3120 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 3121 // When schedule(dynamic,chunk_size) is specified, the iterations are 3122 // distributed to threads in the team in chunks as the threads request them. 3123 // Each thread executes a chunk of iterations, then requests another chunk, 3124 // until no chunks remain to be distributed. Each chunk contains chunk_size 3125 // iterations, except for the last chunk to be distributed, which may have 3126 // fewer iterations. When no chunk_size is specified, it defaults to 1. 3127 // 3128 // When schedule(guided,chunk_size) is specified, the iterations are assigned 3129 // to threads in the team in chunks as the executing threads request them. 3130 // Each thread executes a chunk of iterations, then requests another chunk, 3131 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 3132 // each chunk is proportional to the number of unassigned iterations divided 3133 // by the number of threads in the team, decreasing to 1. For a chunk_size 3134 // with value k (greater than 1), the size of each chunk is determined in the 3135 // same way, with the restriction that the chunks do not contain fewer than k 3136 // iterations (except for the last chunk to be assigned, which may have fewer 3137 // than k iterations). 3138 // 3139 // When schedule(auto) is specified, the decision regarding scheduling is 3140 // delegated to the compiler and/or runtime system. The programmer gives the 3141 // implementation the freedom to choose any possible mapping of iterations to 3142 // threads in the team. 3143 // 3144 // When schedule(runtime) is specified, the decision regarding scheduling is 3145 // deferred until run time, and the schedule and chunk size are taken from the 3146 // run-sched-var ICV. If the ICV is set to auto, the schedule is 3147 // implementation defined 3148 // 3149 // __kmpc_dispatch_init(); 3150 // while(__kmpc_dispatch_next(&LB, &UB)) { 3151 // idx = LB; 3152 // while (idx <= UB) { BODY; ++idx; 3153 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 3154 // } // inner loop 3155 // } 3156 // __kmpc_dispatch_deinit(); 3157 // 3158 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 3159 // When schedule(static, chunk_size) is specified, iterations are divided into 3160 // chunks of size chunk_size, and the chunks are assigned to the threads in 3161 // the team in a round-robin fashion in the order of the thread number. 3162 // 3163 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 3164 // while (idx <= UB) { BODY; ++idx; } // inner loop 3165 // LB = LB + ST; 3166 // UB = UB + ST; 3167 // } 3168 // 3169 3170 const Expr *IVExpr = S.getIterationVariable(); 3171 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 3172 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 3173 3174 if (DynamicOrOrdered) { 3175 const std::pair<llvm::Value *, llvm::Value *> DispatchBounds = 3176 CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); 3177 llvm::Value *LBVal = DispatchBounds.first; 3178 llvm::Value *UBVal = DispatchBounds.second; 3179 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, 3180 LoopArgs.Chunk}; 3181 RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize, 3182 IVSigned, Ordered, DipatchRTInputValues); 3183 } else { 3184 CGOpenMPRuntime::StaticRTInput StaticInit( 3185 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, 3186 LoopArgs.ST, LoopArgs.Chunk); 3187 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); 3188 RT.emitForStaticInit(*this, S.getBeginLoc(), EKind, ScheduleKind, 3189 StaticInit); 3190 } 3191 3192 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, 3193 const unsigned IVSize, 3194 const bool IVSigned) { 3195 if (Ordered) { 3196 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, 3197 IVSigned); 3198 } 3199 }; 3200 3201 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, 3202 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); 3203 OuterLoopArgs.IncExpr = S.getInc(); 3204 OuterLoopArgs.Init = S.getInit(); 3205 OuterLoopArgs.Cond = S.getCond(); 3206 OuterLoopArgs.NextLB = S.getNextLowerBound(); 3207 OuterLoopArgs.NextUB = S.getNextUpperBound(); 3208 OuterLoopArgs.DKind = LoopArgs.DKind; 3209 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, 3210 emitOMPLoopBodyWithStopPoint, CodeGenOrdered); 3211 if (DynamicOrOrdered) { 3212 RT.emitForDispatchDeinit(*this, S.getBeginLoc()); 3213 } 3214 } 3215 3216 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, 3217 const unsigned IVSize, const bool IVSigned) {} 3218 3219 void CodeGenFunction::EmitOMPDistributeOuterLoop( 3220 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, 3221 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, 3222 const CodeGenLoopTy &CodeGenLoopContent) { 3223 3224 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 3225 3226 // Emit outer loop. 3227 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 3228 // dynamic 3229 // 3230 3231 const Expr *IVExpr = S.getIterationVariable(); 3232 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 3233 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 3234 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); 3235 3236 CGOpenMPRuntime::StaticRTInput StaticInit( 3237 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB, 3238 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk); 3239 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit); 3240 3241 // for combined 'distribute' and 'for' the increment expression of distribute 3242 // is stored in DistInc. For 'distribute' alone, it is in Inc. 3243 Expr *IncExpr; 3244 if (isOpenMPLoopBoundSharingDirective(EKind)) 3245 IncExpr = S.getDistInc(); 3246 else 3247 IncExpr = S.getInc(); 3248 3249 // this routine is shared by 'omp distribute parallel for' and 3250 // 'omp distribute': select the right EUB expression depending on the 3251 // directive 3252 OMPLoopArguments OuterLoopArgs; 3253 OuterLoopArgs.LB = LoopArgs.LB; 3254 OuterLoopArgs.UB = LoopArgs.UB; 3255 OuterLoopArgs.ST = LoopArgs.ST; 3256 OuterLoopArgs.IL = LoopArgs.IL; 3257 OuterLoopArgs.Chunk = LoopArgs.Chunk; 3258 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(EKind) 3259 ? S.getCombinedEnsureUpperBound() 3260 : S.getEnsureUpperBound(); 3261 OuterLoopArgs.IncExpr = IncExpr; 3262 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(EKind) 3263 ? S.getCombinedInit() 3264 : S.getInit(); 3265 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(EKind) 3266 ? S.getCombinedCond() 3267 : S.getCond(); 3268 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(EKind) 3269 ? S.getCombinedNextLowerBound() 3270 : S.getNextLowerBound(); 3271 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(EKind) 3272 ? S.getCombinedNextUpperBound() 3273 : S.getNextUpperBound(); 3274 OuterLoopArgs.DKind = OMPD_distribute; 3275 3276 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, 3277 LoopScope, OuterLoopArgs, CodeGenLoopContent, 3278 emitEmptyOrdered); 3279 } 3280 3281 static std::pair<LValue, LValue> 3282 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, 3283 const OMPExecutableDirective &S) { 3284 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 3285 LValue LB = 3286 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 3287 LValue UB = 3288 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 3289 3290 // When composing 'distribute' with 'for' (e.g. as in 'distribute 3291 // parallel for') we need to use the 'distribute' 3292 // chunk lower and upper bounds rather than the whole loop iteration 3293 // space. These are parameters to the outlined function for 'parallel' 3294 // and we copy the bounds of the previous schedule into the 3295 // the current ones. 3296 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); 3297 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); 3298 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar( 3299 PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc()); 3300 PrevLBVal = CGF.EmitScalarConversion( 3301 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), 3302 LS.getIterationVariable()->getType(), 3303 LS.getPrevLowerBoundVariable()->getExprLoc()); 3304 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar( 3305 PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc()); 3306 PrevUBVal = CGF.EmitScalarConversion( 3307 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), 3308 LS.getIterationVariable()->getType(), 3309 LS.getPrevUpperBoundVariable()->getExprLoc()); 3310 3311 CGF.EmitStoreOfScalar(PrevLBVal, LB); 3312 CGF.EmitStoreOfScalar(PrevUBVal, UB); 3313 3314 return {LB, UB}; 3315 } 3316 3317 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then 3318 /// we need to use the LB and UB expressions generated by the worksharing 3319 /// code generation support, whereas in non combined situations we would 3320 /// just emit 0 and the LastIteration expression 3321 /// This function is necessary due to the difference of the LB and UB 3322 /// types for the RT emission routines for 'for_static_init' and 3323 /// 'for_dispatch_init' 3324 static std::pair<llvm::Value *, llvm::Value *> 3325 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, 3326 const OMPExecutableDirective &S, 3327 Address LB, Address UB) { 3328 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 3329 const Expr *IVExpr = LS.getIterationVariable(); 3330 // when implementing a dynamic schedule for a 'for' combined with a 3331 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop 3332 // is not normalized as each team only executes its own assigned 3333 // distribute chunk 3334 QualType IteratorTy = IVExpr->getType(); 3335 llvm::Value *LBVal = 3336 CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); 3337 llvm::Value *UBVal = 3338 CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); 3339 return {LBVal, UBVal}; 3340 } 3341 3342 static void emitDistributeParallelForDistributeInnerBoundParams( 3343 CodeGenFunction &CGF, const OMPExecutableDirective &S, 3344 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { 3345 const auto &Dir = cast<OMPLoopDirective>(S); 3346 LValue LB = 3347 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); 3348 llvm::Value *LBCast = CGF.Builder.CreateIntCast( 3349 CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 3350 CapturedVars.push_back(LBCast); 3351 LValue UB = 3352 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); 3353 3354 llvm::Value *UBCast = CGF.Builder.CreateIntCast( 3355 CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 3356 CapturedVars.push_back(UBCast); 3357 } 3358 3359 static void 3360 emitInnerParallelForWhenCombined(CodeGenFunction &CGF, 3361 const OMPLoopDirective &S, 3362 CodeGenFunction::JumpDest LoopExit) { 3363 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); 3364 auto &&CGInlinedWorksharingLoop = [&S, EKind](CodeGenFunction &CGF, 3365 PrePostActionTy &Action) { 3366 Action.Enter(CGF); 3367 bool HasCancel = false; 3368 if (!isOpenMPSimdDirective(EKind)) { 3369 if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S)) 3370 HasCancel = D->hasCancel(); 3371 else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S)) 3372 HasCancel = D->hasCancel(); 3373 else if (const auto *D = 3374 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S)) 3375 HasCancel = D->hasCancel(); 3376 } 3377 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel); 3378 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), 3379 emitDistributeParallelForInnerBounds, 3380 emitDistributeParallelForDispatchBounds); 3381 }; 3382 3383 emitCommonOMPParallelDirective( 3384 CGF, S, isOpenMPSimdDirective(EKind) ? OMPD_for_simd : OMPD_for, 3385 CGInlinedWorksharingLoop, 3386 emitDistributeParallelForDistributeInnerBoundParams); 3387 } 3388 3389 void CodeGenFunction::EmitOMPDistributeParallelForDirective( 3390 const OMPDistributeParallelForDirective &S) { 3391 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3392 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 3393 S.getDistInc()); 3394 }; 3395 OMPLexicalScope Scope(*this, S, OMPD_parallel); 3396 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 3397 } 3398 3399 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( 3400 const OMPDistributeParallelForSimdDirective &S) { 3401 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3402 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 3403 S.getDistInc()); 3404 }; 3405 OMPLexicalScope Scope(*this, S, OMPD_parallel); 3406 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 3407 } 3408 3409 void CodeGenFunction::EmitOMPDistributeSimdDirective( 3410 const OMPDistributeSimdDirective &S) { 3411 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3412 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 3413 }; 3414 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3415 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 3416 } 3417 3418 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 3419 CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) { 3420 // Emit SPMD target parallel for region as a standalone region. 3421 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3422 emitOMPSimdRegion(CGF, S, Action); 3423 }; 3424 llvm::Function *Fn; 3425 llvm::Constant *Addr; 3426 // Emit target region as a standalone region. 3427 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3428 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3429 assert(Fn && Addr && "Target device function emission failed."); 3430 } 3431 3432 void CodeGenFunction::EmitOMPTargetSimdDirective( 3433 const OMPTargetSimdDirective &S) { 3434 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3435 emitOMPSimdRegion(CGF, S, Action); 3436 }; 3437 emitCommonOMPTargetDirective(*this, S, CodeGen); 3438 } 3439 3440 namespace { 3441 struct ScheduleKindModifiersTy { 3442 OpenMPScheduleClauseKind Kind; 3443 OpenMPScheduleClauseModifier M1; 3444 OpenMPScheduleClauseModifier M2; 3445 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 3446 OpenMPScheduleClauseModifier M1, 3447 OpenMPScheduleClauseModifier M2) 3448 : Kind(Kind), M1(M1), M2(M2) {} 3449 }; 3450 } // namespace 3451 3452 bool CodeGenFunction::EmitOMPWorksharingLoop( 3453 const OMPLoopDirective &S, Expr *EUB, 3454 const CodeGenLoopBoundsTy &CodeGenLoopBounds, 3455 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 3456 // Emit the loop iteration variable. 3457 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 3458 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); 3459 EmitVarDecl(*IVDecl); 3460 3461 // Emit the iterations count variable. 3462 // If it is not a variable, Sema decided to calculate iterations count on each 3463 // iteration (e.g., it is foldable into a constant). 3464 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 3465 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 3466 // Emit calculation of the iterations count. 3467 EmitIgnoredExpr(S.getCalcLastIteration()); 3468 } 3469 3470 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 3471 3472 bool HasLastprivateClause; 3473 // Check pre-condition. 3474 { 3475 OMPLoopScope PreInitScope(*this, S); 3476 // Skip the entire loop if we don't meet the precondition. 3477 // If the condition constant folds and can be elided, avoid emitting the 3478 // whole loop. 3479 bool CondConstant; 3480 llvm::BasicBlock *ContBlock = nullptr; 3481 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 3482 if (!CondConstant) 3483 return false; 3484 } else { 3485 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then"); 3486 ContBlock = createBasicBlock("omp.precond.end"); 3487 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 3488 getProfileCount(&S)); 3489 EmitBlock(ThenBlock); 3490 incrementProfileCounter(&S); 3491 } 3492 3493 RunCleanupsScope DoacrossCleanupScope(*this); 3494 bool Ordered = false; 3495 if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { 3496 if (OrderedClause->getNumForLoops()) 3497 RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations()); 3498 else 3499 Ordered = true; 3500 } 3501 3502 emitAlignedClause(*this, S); 3503 bool HasLinears = EmitOMPLinearClauseInit(S); 3504 // Emit helper vars inits. 3505 3506 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); 3507 LValue LB = Bounds.first; 3508 LValue UB = Bounds.second; 3509 LValue ST = 3510 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 3511 LValue IL = 3512 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 3513 3514 // Emit 'then' code. 3515 { 3516 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); 3517 OMPPrivateScope LoopScope(*this); 3518 if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) { 3519 // Emit implicit barrier to synchronize threads and avoid data races on 3520 // initialization of firstprivate variables and post-update of 3521 // lastprivate variables. 3522 CGM.getOpenMPRuntime().emitBarrierCall( 3523 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 3524 /*ForceSimpleCall=*/true); 3525 } 3526 EmitOMPPrivateClause(S, LoopScope); 3527 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( 3528 *this, S, EmitLValue(S.getIterationVariable())); 3529 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 3530 EmitOMPReductionClauseInit(S, LoopScope); 3531 EmitOMPPrivateLoopCounters(S, LoopScope); 3532 EmitOMPLinearClause(S, LoopScope); 3533 (void)LoopScope.Privatize(); 3534 if (isOpenMPTargetExecutionDirective(EKind)) 3535 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); 3536 3537 // Detect the loop schedule kind and chunk. 3538 const Expr *ChunkExpr = nullptr; 3539 OpenMPScheduleTy ScheduleKind; 3540 if (const auto *C = S.getSingleClause<OMPScheduleClause>()) { 3541 ScheduleKind.Schedule = C->getScheduleKind(); 3542 ScheduleKind.M1 = C->getFirstScheduleModifier(); 3543 ScheduleKind.M2 = C->getSecondScheduleModifier(); 3544 ChunkExpr = C->getChunkSize(); 3545 } else { 3546 // Default behaviour for schedule clause. 3547 CGM.getOpenMPRuntime().getDefaultScheduleAndChunk( 3548 *this, S, ScheduleKind.Schedule, ChunkExpr); 3549 } 3550 bool HasChunkSizeOne = false; 3551 llvm::Value *Chunk = nullptr; 3552 if (ChunkExpr) { 3553 Chunk = EmitScalarExpr(ChunkExpr); 3554 Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(), 3555 S.getIterationVariable()->getType(), 3556 S.getBeginLoc()); 3557 Expr::EvalResult Result; 3558 if (ChunkExpr->EvaluateAsInt(Result, getContext())) { 3559 llvm::APSInt EvaluatedChunk = Result.Val.getInt(); 3560 HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1); 3561 } 3562 } 3563 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 3564 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 3565 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 3566 // If the static schedule kind is specified or if the ordered clause is 3567 // specified, and if no monotonic modifier is specified, the effect will 3568 // be as if the monotonic modifier was specified. 3569 bool StaticChunkedOne = 3570 RT.isStaticChunked(ScheduleKind.Schedule, 3571 /* Chunked */ Chunk != nullptr) && 3572 HasChunkSizeOne && isOpenMPLoopBoundSharingDirective(EKind); 3573 bool IsMonotonic = 3574 Ordered || 3575 (ScheduleKind.Schedule == OMPC_SCHEDULE_static && 3576 !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic || 3577 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) || 3578 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 3579 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 3580 if ((RT.isStaticNonchunked(ScheduleKind.Schedule, 3581 /* Chunked */ Chunk != nullptr) || 3582 StaticChunkedOne) && 3583 !Ordered) { 3584 JumpDest LoopExit = 3585 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 3586 emitCommonSimdLoop( 3587 *this, S, 3588 [&S, EKind](CodeGenFunction &CGF, PrePostActionTy &) { 3589 if (isOpenMPSimdDirective(EKind)) { 3590 CGF.EmitOMPSimdInit(S); 3591 } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) { 3592 if (C->getKind() == OMPC_ORDER_concurrent) 3593 CGF.LoopStack.setParallel(/*Enable=*/true); 3594 } 3595 }, 3596 [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk, 3597 &S, ScheduleKind, LoopExit, EKind, 3598 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 3599 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 3600 // When no chunk_size is specified, the iteration space is divided 3601 // into chunks that are approximately equal in size, and at most 3602 // one chunk is distributed to each thread. Note that the size of 3603 // the chunks is unspecified in this case. 3604 CGOpenMPRuntime::StaticRTInput StaticInit( 3605 IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(), 3606 UB.getAddress(), ST.getAddress(), 3607 StaticChunkedOne ? Chunk : nullptr); 3608 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 3609 CGF, S.getBeginLoc(), EKind, ScheduleKind, StaticInit); 3610 // UB = min(UB, GlobalUB); 3611 if (!StaticChunkedOne) 3612 CGF.EmitIgnoredExpr(S.getEnsureUpperBound()); 3613 // IV = LB; 3614 CGF.EmitIgnoredExpr(S.getInit()); 3615 // For unchunked static schedule generate: 3616 // 3617 // while (idx <= UB) { 3618 // BODY; 3619 // ++idx; 3620 // } 3621 // 3622 // For static schedule with chunk one: 3623 // 3624 // while (IV <= PrevUB) { 3625 // BODY; 3626 // IV += ST; 3627 // } 3628 CGF.EmitOMPInnerLoop( 3629 S, LoopScope.requiresCleanups(), 3630 StaticChunkedOne ? S.getCombinedParForInDistCond() 3631 : S.getCond(), 3632 StaticChunkedOne ? S.getDistInc() : S.getInc(), 3633 [&S, LoopExit](CodeGenFunction &CGF) { 3634 emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit); 3635 }, 3636 [](CodeGenFunction &) {}); 3637 }); 3638 EmitBlock(LoopExit.getBlock()); 3639 // Tell the runtime we are done. 3640 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 3641 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 3642 OMPD_for); 3643 }; 3644 OMPCancelStack.emitExit(*this, EKind, CodeGen); 3645 } else { 3646 // Emit the outer loop, which requests its work chunk [LB..UB] from 3647 // runtime and runs the inner loop to process it. 3648 OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(), 3649 ST.getAddress(), IL.getAddress(), Chunk, 3650 EUB); 3651 LoopArguments.DKind = OMPD_for; 3652 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 3653 LoopArguments, CGDispatchBounds); 3654 } 3655 if (isOpenMPSimdDirective(EKind)) { 3656 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { 3657 return CGF.Builder.CreateIsNotNull( 3658 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3659 }); 3660 } 3661 EmitOMPReductionClauseFinal( 3662 S, /*ReductionKind=*/isOpenMPSimdDirective(EKind) 3663 ? /*Parallel and Simd*/ OMPD_parallel_for_simd 3664 : /*Parallel only*/ OMPD_parallel); 3665 // Emit post-update of the reduction variables if IsLastIter != 0. 3666 emitPostUpdateForReductionClause( 3667 *this, S, [IL, &S](CodeGenFunction &CGF) { 3668 return CGF.Builder.CreateIsNotNull( 3669 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3670 }); 3671 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3672 if (HasLastprivateClause) 3673 EmitOMPLastprivateClauseFinal( 3674 S, isOpenMPSimdDirective(EKind), 3675 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); 3676 LoopScope.restoreMap(); 3677 EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) { 3678 return CGF.Builder.CreateIsNotNull( 3679 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3680 }); 3681 } 3682 DoacrossCleanupScope.ForceCleanup(); 3683 // We're now done with the loop, so jump to the continuation block. 3684 if (ContBlock) { 3685 EmitBranch(ContBlock); 3686 EmitBlock(ContBlock, /*IsFinished=*/true); 3687 } 3688 } 3689 return HasLastprivateClause; 3690 } 3691 3692 /// The following two functions generate expressions for the loop lower 3693 /// and upper bounds in case of static and dynamic (dispatch) schedule 3694 /// of the associated 'for' or 'distribute' loop. 3695 static std::pair<LValue, LValue> 3696 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 3697 const auto &LS = cast<OMPLoopDirective>(S); 3698 LValue LB = 3699 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 3700 LValue UB = 3701 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 3702 return {LB, UB}; 3703 } 3704 3705 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not 3706 /// consider the lower and upper bound expressions generated by the 3707 /// worksharing loop support, but we use 0 and the iteration space size as 3708 /// constants 3709 static std::pair<llvm::Value *, llvm::Value *> 3710 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, 3711 Address LB, Address UB) { 3712 const auto &LS = cast<OMPLoopDirective>(S); 3713 const Expr *IVExpr = LS.getIterationVariable(); 3714 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); 3715 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); 3716 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); 3717 return {LBVal, UBVal}; 3718 } 3719 3720 /// Emits internal temp array declarations for the directive with inscan 3721 /// reductions. 3722 /// The code is the following: 3723 /// \code 3724 /// size num_iters = <num_iters>; 3725 /// <type> buffer[num_iters]; 3726 /// \endcode 3727 static void emitScanBasedDirectiveDecls( 3728 CodeGenFunction &CGF, const OMPLoopDirective &S, 3729 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) { 3730 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( 3731 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); 3732 SmallVector<const Expr *, 4> Shareds; 3733 SmallVector<const Expr *, 4> Privates; 3734 SmallVector<const Expr *, 4> ReductionOps; 3735 SmallVector<const Expr *, 4> CopyArrayTemps; 3736 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 3737 assert(C->getModifier() == OMPC_REDUCTION_inscan && 3738 "Only inscan reductions are expected."); 3739 Shareds.append(C->varlist_begin(), C->varlist_end()); 3740 Privates.append(C->privates().begin(), C->privates().end()); 3741 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 3742 CopyArrayTemps.append(C->copy_array_temps().begin(), 3743 C->copy_array_temps().end()); 3744 } 3745 { 3746 // Emit buffers for each reduction variables. 3747 // ReductionCodeGen is required to emit correctly the code for array 3748 // reductions. 3749 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); 3750 unsigned Count = 0; 3751 auto *ITA = CopyArrayTemps.begin(); 3752 for (const Expr *IRef : Privates) { 3753 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 3754 // Emit variably modified arrays, used for arrays/array sections 3755 // reductions. 3756 if (PrivateVD->getType()->isVariablyModifiedType()) { 3757 RedCG.emitSharedOrigLValue(CGF, Count); 3758 RedCG.emitAggregateType(CGF, Count); 3759 } 3760 CodeGenFunction::OpaqueValueMapping DimMapping( 3761 CGF, 3762 cast<OpaqueValueExpr>( 3763 cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe()) 3764 ->getSizeExpr()), 3765 RValue::get(OMPScanNumIterations)); 3766 // Emit temp buffer. 3767 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl())); 3768 ++ITA; 3769 ++Count; 3770 } 3771 } 3772 } 3773 3774 /// Copies final inscan reductions values to the original variables. 3775 /// The code is the following: 3776 /// \code 3777 /// <orig_var> = buffer[num_iters-1]; 3778 /// \endcode 3779 static void emitScanBasedDirectiveFinals( 3780 CodeGenFunction &CGF, const OMPLoopDirective &S, 3781 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) { 3782 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( 3783 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); 3784 SmallVector<const Expr *, 4> Shareds; 3785 SmallVector<const Expr *, 4> LHSs; 3786 SmallVector<const Expr *, 4> RHSs; 3787 SmallVector<const Expr *, 4> Privates; 3788 SmallVector<const Expr *, 4> CopyOps; 3789 SmallVector<const Expr *, 4> CopyArrayElems; 3790 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 3791 assert(C->getModifier() == OMPC_REDUCTION_inscan && 3792 "Only inscan reductions are expected."); 3793 Shareds.append(C->varlist_begin(), C->varlist_end()); 3794 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 3795 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 3796 Privates.append(C->privates().begin(), C->privates().end()); 3797 CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); 3798 CopyArrayElems.append(C->copy_array_elems().begin(), 3799 C->copy_array_elems().end()); 3800 } 3801 // Create temp var and copy LHS value to this temp value. 3802 // LHS = TMP[LastIter]; 3803 llvm::Value *OMPLast = CGF.Builder.CreateNSWSub( 3804 OMPScanNumIterations, 3805 llvm::ConstantInt::get(CGF.SizeTy, 1, /*isSigned=*/false)); 3806 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 3807 const Expr *PrivateExpr = Privates[I]; 3808 const Expr *OrigExpr = Shareds[I]; 3809 const Expr *CopyArrayElem = CopyArrayElems[I]; 3810 CodeGenFunction::OpaqueValueMapping IdxMapping( 3811 CGF, 3812 cast<OpaqueValueExpr>( 3813 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 3814 RValue::get(OMPLast)); 3815 LValue DestLVal = CGF.EmitLValue(OrigExpr); 3816 LValue SrcLVal = CGF.EmitLValue(CopyArrayElem); 3817 CGF.EmitOMPCopy( 3818 PrivateExpr->getType(), DestLVal.getAddress(), SrcLVal.getAddress(), 3819 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 3820 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), CopyOps[I]); 3821 } 3822 } 3823 3824 /// Emits the code for the directive with inscan reductions. 3825 /// The code is the following: 3826 /// \code 3827 /// #pragma omp ... 3828 /// for (i: 0..<num_iters>) { 3829 /// <input phase>; 3830 /// buffer[i] = red; 3831 /// } 3832 /// #pragma omp master // in parallel region 3833 /// for (int k = 0; k != ceil(log2(num_iters)); ++k) 3834 /// for (size cnt = last_iter; cnt >= pow(2, k); --k) 3835 /// buffer[i] op= buffer[i-pow(2,k)]; 3836 /// #pragma omp barrier // in parallel region 3837 /// #pragma omp ... 3838 /// for (0..<num_iters>) { 3839 /// red = InclusiveScan ? buffer[i] : buffer[i-1]; 3840 /// <scan phase>; 3841 /// } 3842 /// \endcode 3843 static void emitScanBasedDirective( 3844 CodeGenFunction &CGF, const OMPLoopDirective &S, 3845 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen, 3846 llvm::function_ref<void(CodeGenFunction &)> FirstGen, 3847 llvm::function_ref<void(CodeGenFunction &)> SecondGen) { 3848 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( 3849 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); 3850 SmallVector<const Expr *, 4> Privates; 3851 SmallVector<const Expr *, 4> ReductionOps; 3852 SmallVector<const Expr *, 4> LHSs; 3853 SmallVector<const Expr *, 4> RHSs; 3854 SmallVector<const Expr *, 4> CopyArrayElems; 3855 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 3856 assert(C->getModifier() == OMPC_REDUCTION_inscan && 3857 "Only inscan reductions are expected."); 3858 Privates.append(C->privates().begin(), C->privates().end()); 3859 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 3860 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 3861 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 3862 CopyArrayElems.append(C->copy_array_elems().begin(), 3863 C->copy_array_elems().end()); 3864 } 3865 CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S); 3866 { 3867 // Emit loop with input phase: 3868 // #pragma omp ... 3869 // for (i: 0..<num_iters>) { 3870 // <input phase>; 3871 // buffer[i] = red; 3872 // } 3873 CGF.OMPFirstScanLoop = true; 3874 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 3875 FirstGen(CGF); 3876 } 3877 // #pragma omp barrier // in parallel region 3878 auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems, 3879 &ReductionOps, 3880 &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) { 3881 Action.Enter(CGF); 3882 // Emit prefix reduction: 3883 // #pragma omp master // in parallel region 3884 // for (int k = 0; k <= ceil(log2(n)); ++k) 3885 llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock(); 3886 llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body"); 3887 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit"); 3888 llvm::Function *F = 3889 CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy); 3890 llvm::Value *Arg = 3891 CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy); 3892 llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg); 3893 F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy); 3894 LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal); 3895 LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy); 3896 llvm::Value *NMin1 = CGF.Builder.CreateNUWSub( 3897 OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1)); 3898 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc()); 3899 CGF.EmitBlock(LoopBB); 3900 auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2); 3901 // size pow2k = 1; 3902 auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2); 3903 Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB); 3904 Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB); 3905 // for (size i = n - 1; i >= 2 ^ k; --i) 3906 // tmp[i] op= tmp[i-pow2k]; 3907 llvm::BasicBlock *InnerLoopBB = 3908 CGF.createBasicBlock("omp.inner.log.scan.body"); 3909 llvm::BasicBlock *InnerExitBB = 3910 CGF.createBasicBlock("omp.inner.log.scan.exit"); 3911 llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K); 3912 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); 3913 CGF.EmitBlock(InnerLoopBB); 3914 auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2); 3915 IVal->addIncoming(NMin1, LoopBB); 3916 { 3917 CodeGenFunction::OMPPrivateScope PrivScope(CGF); 3918 auto *ILHS = LHSs.begin(); 3919 auto *IRHS = RHSs.begin(); 3920 for (const Expr *CopyArrayElem : CopyArrayElems) { 3921 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 3922 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 3923 Address LHSAddr = Address::invalid(); 3924 { 3925 CodeGenFunction::OpaqueValueMapping IdxMapping( 3926 CGF, 3927 cast<OpaqueValueExpr>( 3928 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 3929 RValue::get(IVal)); 3930 LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(); 3931 } 3932 PrivScope.addPrivate(LHSVD, LHSAddr); 3933 Address RHSAddr = Address::invalid(); 3934 { 3935 llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K); 3936 CodeGenFunction::OpaqueValueMapping IdxMapping( 3937 CGF, 3938 cast<OpaqueValueExpr>( 3939 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 3940 RValue::get(OffsetIVal)); 3941 RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(); 3942 } 3943 PrivScope.addPrivate(RHSVD, RHSAddr); 3944 ++ILHS; 3945 ++IRHS; 3946 } 3947 PrivScope.Privatize(); 3948 CGF.CGM.getOpenMPRuntime().emitReduction( 3949 CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, 3950 {/*WithNowait=*/true, /*SimpleReduction=*/true, 3951 /*IsPrivateVarReduction*/ {}, OMPD_unknown}); 3952 } 3953 llvm::Value *NextIVal = 3954 CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1)); 3955 IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock()); 3956 CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K); 3957 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); 3958 CGF.EmitBlock(InnerExitBB); 3959 llvm::Value *Next = 3960 CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1)); 3961 Counter->addIncoming(Next, CGF.Builder.GetInsertBlock()); 3962 // pow2k <<= 1; 3963 llvm::Value *NextPow2K = 3964 CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true); 3965 Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock()); 3966 llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal); 3967 CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB); 3968 auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc()); 3969 CGF.EmitBlock(ExitBB); 3970 }; 3971 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); 3972 if (isOpenMPParallelDirective(EKind)) { 3973 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc()); 3974 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 3975 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 3976 /*ForceSimpleCall=*/true); 3977 } else { 3978 RegionCodeGenTy RCG(CodeGen); 3979 RCG(CGF); 3980 } 3981 3982 CGF.OMPFirstScanLoop = false; 3983 SecondGen(CGF); 3984 } 3985 3986 static bool emitWorksharingDirective(CodeGenFunction &CGF, 3987 const OMPLoopDirective &S, 3988 bool HasCancel) { 3989 bool HasLastprivates; 3990 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); 3991 if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), 3992 [](const OMPReductionClause *C) { 3993 return C->getModifier() == OMPC_REDUCTION_inscan; 3994 })) { 3995 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { 3996 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 3997 OMPLoopScope LoopScope(CGF, S); 3998 return CGF.EmitScalarExpr(S.getNumIterations()); 3999 }; 4000 const auto &&FirstGen = [&S, HasCancel, EKind](CodeGenFunction &CGF) { 4001 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel); 4002 (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 4003 emitForLoopBounds, 4004 emitDispatchForLoopBounds); 4005 // Emit an implicit barrier at the end. 4006 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(), 4007 OMPD_for); 4008 }; 4009 const auto &&SecondGen = [&S, HasCancel, EKind, 4010 &HasLastprivates](CodeGenFunction &CGF) { 4011 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel); 4012 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 4013 emitForLoopBounds, 4014 emitDispatchForLoopBounds); 4015 }; 4016 if (!isOpenMPParallelDirective(EKind)) 4017 emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen); 4018 emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen); 4019 if (!isOpenMPParallelDirective(EKind)) 4020 emitScanBasedDirectiveFinals(CGF, S, NumIteratorsGen); 4021 } else { 4022 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel); 4023 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 4024 emitForLoopBounds, 4025 emitDispatchForLoopBounds); 4026 } 4027 return HasLastprivates; 4028 } 4029 4030 // Pass OMPLoopDirective (instead of OMPForDirective) to make this check 4031 // available for "loop bind(parallel)", which maps to "for". 4032 static bool isForSupportedByOpenMPIRBuilder(const OMPLoopDirective &S, 4033 bool HasCancel) { 4034 if (HasCancel) 4035 return false; 4036 for (OMPClause *C : S.clauses()) { 4037 if (isa<OMPNowaitClause, OMPBindClause>(C)) 4038 continue; 4039 4040 if (auto *SC = dyn_cast<OMPScheduleClause>(C)) { 4041 if (SC->getFirstScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown) 4042 return false; 4043 if (SC->getSecondScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown) 4044 return false; 4045 switch (SC->getScheduleKind()) { 4046 case OMPC_SCHEDULE_auto: 4047 case OMPC_SCHEDULE_dynamic: 4048 case OMPC_SCHEDULE_runtime: 4049 case OMPC_SCHEDULE_guided: 4050 case OMPC_SCHEDULE_static: 4051 continue; 4052 case OMPC_SCHEDULE_unknown: 4053 return false; 4054 } 4055 } 4056 4057 return false; 4058 } 4059 4060 return true; 4061 } 4062 4063 static llvm::omp::ScheduleKind 4064 convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind) { 4065 switch (ScheduleClauseKind) { 4066 case OMPC_SCHEDULE_unknown: 4067 return llvm::omp::OMP_SCHEDULE_Default; 4068 case OMPC_SCHEDULE_auto: 4069 return llvm::omp::OMP_SCHEDULE_Auto; 4070 case OMPC_SCHEDULE_dynamic: 4071 return llvm::omp::OMP_SCHEDULE_Dynamic; 4072 case OMPC_SCHEDULE_guided: 4073 return llvm::omp::OMP_SCHEDULE_Guided; 4074 case OMPC_SCHEDULE_runtime: 4075 return llvm::omp::OMP_SCHEDULE_Runtime; 4076 case OMPC_SCHEDULE_static: 4077 return llvm::omp::OMP_SCHEDULE_Static; 4078 } 4079 llvm_unreachable("Unhandled schedule kind"); 4080 } 4081 4082 // Pass OMPLoopDirective (instead of OMPForDirective) to make this function 4083 // available for "loop bind(parallel)", which maps to "for". 4084 static void emitOMPForDirective(const OMPLoopDirective &S, CodeGenFunction &CGF, 4085 CodeGenModule &CGM, bool HasCancel) { 4086 bool HasLastprivates = false; 4087 bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder && 4088 isForSupportedByOpenMPIRBuilder(S, HasCancel); 4089 auto &&CodeGen = [&S, &CGM, HasCancel, &HasLastprivates, 4090 UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) { 4091 // Use the OpenMPIRBuilder if enabled. 4092 if (UseOMPIRBuilder) { 4093 bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>(); 4094 4095 llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default; 4096 llvm::Value *ChunkSize = nullptr; 4097 if (auto *SchedClause = S.getSingleClause<OMPScheduleClause>()) { 4098 SchedKind = 4099 convertClauseKindToSchedKind(SchedClause->getScheduleKind()); 4100 if (const Expr *ChunkSizeExpr = SchedClause->getChunkSize()) 4101 ChunkSize = CGF.EmitScalarExpr(ChunkSizeExpr); 4102 } 4103 4104 // Emit the associated statement and get its loop representation. 4105 const Stmt *Inner = S.getRawStmt(); 4106 llvm::CanonicalLoopInfo *CLI = 4107 CGF.EmitOMPCollapsedCanonicalLoopNest(Inner, 1); 4108 4109 llvm::OpenMPIRBuilder &OMPBuilder = 4110 CGM.getOpenMPRuntime().getOMPBuilder(); 4111 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( 4112 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator()); 4113 cantFail(OMPBuilder.applyWorkshareLoop( 4114 CGF.Builder.getCurrentDebugLocation(), CLI, AllocaIP, NeedsBarrier, 4115 SchedKind, ChunkSize, /*HasSimdModifier=*/false, 4116 /*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false, 4117 /*HasOrderedClause=*/false)); 4118 return; 4119 } 4120 4121 HasLastprivates = emitWorksharingDirective(CGF, S, HasCancel); 4122 }; 4123 { 4124 auto LPCRegion = 4125 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S); 4126 OMPLexicalScope Scope(CGF, S, OMPD_unknown); 4127 CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_for, CodeGen, 4128 HasCancel); 4129 } 4130 4131 if (!UseOMPIRBuilder) { 4132 // Emit an implicit barrier at the end. 4133 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) 4134 CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(), OMPD_for); 4135 } 4136 // Check for outer lastprivate conditional update. 4137 checkForLastprivateConditionalUpdate(CGF, S); 4138 } 4139 4140 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 4141 return emitOMPForDirective(S, *this, CGM, S.hasCancel()); 4142 } 4143 4144 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 4145 bool HasLastprivates = false; 4146 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 4147 PrePostActionTy &) { 4148 HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false); 4149 }; 4150 { 4151 auto LPCRegion = 4152 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4153 OMPLexicalScope Scope(*this, S, OMPD_unknown); 4154 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 4155 } 4156 4157 // Emit an implicit barrier at the end. 4158 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) 4159 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); 4160 // Check for outer lastprivate conditional update. 4161 checkForLastprivateConditionalUpdate(*this, S); 4162 } 4163 4164 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 4165 const Twine &Name, 4166 llvm::Value *Init = nullptr) { 4167 LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 4168 if (Init) 4169 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); 4170 return LVal; 4171 } 4172 4173 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 4174 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); 4175 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt); 4176 bool HasLastprivates = false; 4177 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); 4178 auto &&CodeGen = [&S, CapturedStmt, CS, EKind, 4179 &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { 4180 const ASTContext &C = CGF.getContext(); 4181 QualType KmpInt32Ty = 4182 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4183 // Emit helper vars inits. 4184 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 4185 CGF.Builder.getInt32(0)); 4186 llvm::ConstantInt *GlobalUBVal = CS != nullptr 4187 ? CGF.Builder.getInt32(CS->size() - 1) 4188 : CGF.Builder.getInt32(0); 4189 LValue UB = 4190 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 4191 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 4192 CGF.Builder.getInt32(1)); 4193 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 4194 CGF.Builder.getInt32(0)); 4195 // Loop counter. 4196 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 4197 OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); 4198 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 4199 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); 4200 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 4201 // Generate condition for loop. 4202 BinaryOperator *Cond = BinaryOperator::Create( 4203 C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_PRValue, OK_Ordinary, 4204 S.getBeginLoc(), FPOptionsOverride()); 4205 // Increment for loop counter. 4206 UnaryOperator *Inc = UnaryOperator::Create( 4207 C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_PRValue, OK_Ordinary, 4208 S.getBeginLoc(), true, FPOptionsOverride()); 4209 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) { 4210 // Iterate through all sections and emit a switch construct: 4211 // switch (IV) { 4212 // case 0: 4213 // <SectionStmt[0]>; 4214 // break; 4215 // ... 4216 // case <NumSection> - 1: 4217 // <SectionStmt[<NumSection> - 1]>; 4218 // break; 4219 // } 4220 // .omp.sections.exit: 4221 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 4222 llvm::SwitchInst *SwitchStmt = 4223 CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()), 4224 ExitBB, CS == nullptr ? 1 : CS->size()); 4225 if (CS) { 4226 unsigned CaseNumber = 0; 4227 for (const Stmt *SubStmt : CS->children()) { 4228 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 4229 CGF.EmitBlock(CaseBB); 4230 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 4231 CGF.EmitStmt(SubStmt); 4232 CGF.EmitBranch(ExitBB); 4233 ++CaseNumber; 4234 } 4235 } else { 4236 llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case"); 4237 CGF.EmitBlock(CaseBB); 4238 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 4239 CGF.EmitStmt(CapturedStmt); 4240 CGF.EmitBranch(ExitBB); 4241 } 4242 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 4243 }; 4244 4245 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 4246 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 4247 // Emit implicit barrier to synchronize threads and avoid data races on 4248 // initialization of firstprivate variables and post-update of lastprivate 4249 // variables. 4250 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 4251 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 4252 /*ForceSimpleCall=*/true); 4253 } 4254 CGF.EmitOMPPrivateClause(S, LoopScope); 4255 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV); 4256 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 4257 CGF.EmitOMPReductionClauseInit(S, LoopScope); 4258 (void)LoopScope.Privatize(); 4259 if (isOpenMPTargetExecutionDirective(EKind)) 4260 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 4261 4262 // Emit static non-chunked loop. 4263 OpenMPScheduleTy ScheduleKind; 4264 ScheduleKind.Schedule = OMPC_SCHEDULE_static; 4265 CGOpenMPRuntime::StaticRTInput StaticInit( 4266 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), 4267 LB.getAddress(), UB.getAddress(), ST.getAddress()); 4268 CGF.CGM.getOpenMPRuntime().emitForStaticInit(CGF, S.getBeginLoc(), EKind, 4269 ScheduleKind, StaticInit); 4270 // UB = min(UB, GlobalUB); 4271 llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc()); 4272 llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect( 4273 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 4274 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 4275 // IV = LB; 4276 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV); 4277 // while (idx <= UB) { BODY; ++idx; } 4278 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen, 4279 [](CodeGenFunction &) {}); 4280 // Tell the runtime we are done. 4281 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 4282 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 4283 OMPD_sections); 4284 }; 4285 CGF.OMPCancelStack.emitExit(CGF, EKind, CodeGen); 4286 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 4287 // Emit post-update of the reduction variables if IsLastIter != 0. 4288 emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) { 4289 return CGF.Builder.CreateIsNotNull( 4290 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 4291 }); 4292 4293 // Emit final copy of the lastprivate variables if IsLastIter != 0. 4294 if (HasLastprivates) 4295 CGF.EmitOMPLastprivateClauseFinal( 4296 S, /*NoFinals=*/false, 4297 CGF.Builder.CreateIsNotNull( 4298 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()))); 4299 }; 4300 4301 bool HasCancel = false; 4302 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 4303 HasCancel = OSD->hasCancel(); 4304 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 4305 HasCancel = OPSD->hasCancel(); 4306 OMPCancelStackRAII CancelRegion(*this, EKind, HasCancel); 4307 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 4308 HasCancel); 4309 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 4310 // clause. Otherwise the barrier will be generated by the codegen for the 4311 // directive. 4312 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 4313 // Emit implicit barrier to synchronize threads and avoid data races on 4314 // initialization of firstprivate variables. 4315 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), 4316 OMPD_unknown); 4317 } 4318 } 4319 4320 void CodeGenFunction::EmitOMPScopeDirective(const OMPScopeDirective &S) { 4321 { 4322 // Emit code for 'scope' region 4323 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4324 Action.Enter(CGF); 4325 OMPPrivateScope PrivateScope(CGF); 4326 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4327 CGF.EmitOMPPrivateClause(S, PrivateScope); 4328 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4329 (void)PrivateScope.Privatize(); 4330 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 4331 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 4332 }; 4333 auto LPCRegion = 4334 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4335 OMPLexicalScope Scope(*this, S, OMPD_unknown); 4336 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_scope, CodeGen); 4337 } 4338 // Emit an implicit barrier at the end. 4339 if (!S.getSingleClause<OMPNowaitClause>()) { 4340 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_scope); 4341 } 4342 // Check for outer lastprivate conditional update. 4343 checkForLastprivateConditionalUpdate(*this, S); 4344 } 4345 4346 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 4347 if (CGM.getLangOpts().OpenMPIRBuilder) { 4348 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 4349 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 4350 using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; 4351 4352 auto FiniCB = [](InsertPointTy IP) { 4353 // Don't FinalizeOMPRegion because this is done inside of OMPIRBuilder for 4354 // sections. 4355 return llvm::Error::success(); 4356 }; 4357 4358 const CapturedStmt *ICS = S.getInnermostCapturedStmt(); 4359 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); 4360 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt); 4361 llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector; 4362 if (CS) { 4363 for (const Stmt *SubStmt : CS->children()) { 4364 auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP, 4365 InsertPointTy CodeGenIP) { 4366 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( 4367 *this, SubStmt, AllocaIP, CodeGenIP, "section"); 4368 return llvm::Error::success(); 4369 }; 4370 SectionCBVector.push_back(SectionCB); 4371 } 4372 } else { 4373 auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP, 4374 InsertPointTy CodeGenIP) { 4375 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( 4376 *this, CapturedStmt, AllocaIP, CodeGenIP, "section"); 4377 return llvm::Error::success(); 4378 }; 4379 SectionCBVector.push_back(SectionCB); 4380 } 4381 4382 // Privatization callback that performs appropriate action for 4383 // shared/private/firstprivate/lastprivate/copyin/... variables. 4384 // 4385 // TODO: This defaults to shared right now. 4386 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 4387 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { 4388 // The next line is appropriate only for variables (Val) with the 4389 // data-sharing attribute "shared". 4390 ReplVal = &Val; 4391 4392 return CodeGenIP; 4393 }; 4394 4395 CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP); 4396 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); 4397 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( 4398 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); 4399 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = 4400 cantFail(OMPBuilder.createSections( 4401 Builder, AllocaIP, SectionCBVector, PrivCB, FiniCB, S.hasCancel(), 4402 S.getSingleClause<OMPNowaitClause>())); 4403 Builder.restoreIP(AfterIP); 4404 return; 4405 } 4406 { 4407 auto LPCRegion = 4408 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4409 OMPLexicalScope Scope(*this, S, OMPD_unknown); 4410 EmitSections(S); 4411 } 4412 // Emit an implicit barrier at the end. 4413 if (!S.getSingleClause<OMPNowaitClause>()) { 4414 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), 4415 OMPD_sections); 4416 } 4417 // Check for outer lastprivate conditional update. 4418 checkForLastprivateConditionalUpdate(*this, S); 4419 } 4420 4421 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 4422 if (CGM.getLangOpts().OpenMPIRBuilder) { 4423 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 4424 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 4425 4426 const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt(); 4427 auto FiniCB = [this](InsertPointTy IP) { 4428 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 4429 return llvm::Error::success(); 4430 }; 4431 4432 auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP, 4433 InsertPointTy CodeGenIP) { 4434 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( 4435 *this, SectionRegionBodyStmt, AllocaIP, CodeGenIP, "section"); 4436 return llvm::Error::success(); 4437 }; 4438 4439 LexicalScope Scope(*this, S.getSourceRange()); 4440 EmitStopPoint(&S); 4441 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = 4442 cantFail(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB)); 4443 Builder.restoreIP(AfterIP); 4444 4445 return; 4446 } 4447 LexicalScope Scope(*this, S.getSourceRange()); 4448 EmitStopPoint(&S); 4449 EmitStmt(S.getAssociatedStmt()); 4450 } 4451 4452 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 4453 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 4454 llvm::SmallVector<const Expr *, 8> DestExprs; 4455 llvm::SmallVector<const Expr *, 8> SrcExprs; 4456 llvm::SmallVector<const Expr *, 8> AssignmentOps; 4457 // Check if there are any 'copyprivate' clauses associated with this 4458 // 'single' construct. 4459 // Build a list of copyprivate variables along with helper expressions 4460 // (<source>, <destination>, <destination>=<source> expressions) 4461 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 4462 CopyprivateVars.append(C->varlist_begin(), C->varlist_end()); 4463 DestExprs.append(C->destination_exprs().begin(), 4464 C->destination_exprs().end()); 4465 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 4466 AssignmentOps.append(C->assignment_ops().begin(), 4467 C->assignment_ops().end()); 4468 } 4469 // Emit code for 'single' region along with 'copyprivate' clauses 4470 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4471 Action.Enter(CGF); 4472 OMPPrivateScope SingleScope(CGF); 4473 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 4474 CGF.EmitOMPPrivateClause(S, SingleScope); 4475 (void)SingleScope.Privatize(); 4476 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 4477 }; 4478 { 4479 auto LPCRegion = 4480 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4481 OMPLexicalScope Scope(*this, S, OMPD_unknown); 4482 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(), 4483 CopyprivateVars, DestExprs, 4484 SrcExprs, AssignmentOps); 4485 } 4486 // Emit an implicit barrier at the end (to avoid data race on firstprivate 4487 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 4488 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 4489 CGM.getOpenMPRuntime().emitBarrierCall( 4490 *this, S.getBeginLoc(), 4491 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 4492 } 4493 // Check for outer lastprivate conditional update. 4494 checkForLastprivateConditionalUpdate(*this, S); 4495 } 4496 4497 static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 4498 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4499 Action.Enter(CGF); 4500 CGF.EmitStmt(S.getRawStmt()); 4501 }; 4502 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc()); 4503 } 4504 4505 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 4506 if (CGM.getLangOpts().OpenMPIRBuilder) { 4507 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 4508 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 4509 4510 const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt(); 4511 4512 auto FiniCB = [this](InsertPointTy IP) { 4513 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 4514 return llvm::Error::success(); 4515 }; 4516 4517 auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP, 4518 InsertPointTy CodeGenIP) { 4519 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( 4520 *this, MasterRegionBodyStmt, AllocaIP, CodeGenIP, "master"); 4521 return llvm::Error::success(); 4522 }; 4523 4524 LexicalScope Scope(*this, S.getSourceRange()); 4525 EmitStopPoint(&S); 4526 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = 4527 cantFail(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB)); 4528 Builder.restoreIP(AfterIP); 4529 4530 return; 4531 } 4532 LexicalScope Scope(*this, S.getSourceRange()); 4533 EmitStopPoint(&S); 4534 emitMaster(*this, S); 4535 } 4536 4537 static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 4538 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4539 Action.Enter(CGF); 4540 CGF.EmitStmt(S.getRawStmt()); 4541 }; 4542 Expr *Filter = nullptr; 4543 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>()) 4544 Filter = FilterClause->getThreadID(); 4545 CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, CodeGen, S.getBeginLoc(), 4546 Filter); 4547 } 4548 4549 void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) { 4550 if (CGM.getLangOpts().OpenMPIRBuilder) { 4551 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 4552 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 4553 4554 const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt(); 4555 const Expr *Filter = nullptr; 4556 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>()) 4557 Filter = FilterClause->getThreadID(); 4558 llvm::Value *FilterVal = Filter 4559 ? EmitScalarExpr(Filter, CGM.Int32Ty) 4560 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 4561 4562 auto FiniCB = [this](InsertPointTy IP) { 4563 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 4564 return llvm::Error::success(); 4565 }; 4566 4567 auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP, 4568 InsertPointTy CodeGenIP) { 4569 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( 4570 *this, MaskedRegionBodyStmt, AllocaIP, CodeGenIP, "masked"); 4571 return llvm::Error::success(); 4572 }; 4573 4574 LexicalScope Scope(*this, S.getSourceRange()); 4575 EmitStopPoint(&S); 4576 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail( 4577 OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal)); 4578 Builder.restoreIP(AfterIP); 4579 4580 return; 4581 } 4582 LexicalScope Scope(*this, S.getSourceRange()); 4583 EmitStopPoint(&S); 4584 emitMasked(*this, S); 4585 } 4586 4587 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 4588 if (CGM.getLangOpts().OpenMPIRBuilder) { 4589 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 4590 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 4591 4592 const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt(); 4593 const Expr *Hint = nullptr; 4594 if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) 4595 Hint = HintClause->getHint(); 4596 4597 // TODO: This is slightly different from what's currently being done in 4598 // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything 4599 // about typing is final. 4600 llvm::Value *HintInst = nullptr; 4601 if (Hint) 4602 HintInst = 4603 Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false); 4604 4605 auto FiniCB = [this](InsertPointTy IP) { 4606 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 4607 return llvm::Error::success(); 4608 }; 4609 4610 auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP, 4611 InsertPointTy CodeGenIP) { 4612 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( 4613 *this, CriticalRegionBodyStmt, AllocaIP, CodeGenIP, "critical"); 4614 return llvm::Error::success(); 4615 }; 4616 4617 LexicalScope Scope(*this, S.getSourceRange()); 4618 EmitStopPoint(&S); 4619 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = 4620 cantFail(OMPBuilder.createCritical(Builder, BodyGenCB, FiniCB, 4621 S.getDirectiveName().getAsString(), 4622 HintInst)); 4623 Builder.restoreIP(AfterIP); 4624 4625 return; 4626 } 4627 4628 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4629 Action.Enter(CGF); 4630 CGF.EmitStmt(S.getAssociatedStmt()); 4631 }; 4632 const Expr *Hint = nullptr; 4633 if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) 4634 Hint = HintClause->getHint(); 4635 LexicalScope Scope(*this, S.getSourceRange()); 4636 EmitStopPoint(&S); 4637 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 4638 S.getDirectiveName().getAsString(), 4639 CodeGen, S.getBeginLoc(), Hint); 4640 } 4641 4642 void CodeGenFunction::EmitOMPParallelForDirective( 4643 const OMPParallelForDirective &S) { 4644 // Emit directive as a combined directive that consists of two implicit 4645 // directives: 'parallel' with 'for' directive. 4646 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4647 Action.Enter(CGF); 4648 emitOMPCopyinClause(CGF, S); 4649 (void)emitWorksharingDirective(CGF, S, S.hasCancel()); 4650 }; 4651 { 4652 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { 4653 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 4654 CGCapturedStmtInfo CGSI(CR_OpenMP); 4655 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); 4656 OMPLoopScope LoopScope(CGF, S); 4657 return CGF.EmitScalarExpr(S.getNumIterations()); 4658 }; 4659 bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), 4660 [](const OMPReductionClause *C) { 4661 return C->getModifier() == OMPC_REDUCTION_inscan; 4662 }); 4663 if (IsInscan) 4664 emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen); 4665 auto LPCRegion = 4666 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4667 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, 4668 emitEmptyBoundParameters); 4669 if (IsInscan) 4670 emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen); 4671 } 4672 // Check for outer lastprivate conditional update. 4673 checkForLastprivateConditionalUpdate(*this, S); 4674 } 4675 4676 void CodeGenFunction::EmitOMPParallelForSimdDirective( 4677 const OMPParallelForSimdDirective &S) { 4678 // Emit directive as a combined directive that consists of two implicit 4679 // directives: 'parallel' with 'for' directive. 4680 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4681 Action.Enter(CGF); 4682 emitOMPCopyinClause(CGF, S); 4683 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false); 4684 }; 4685 { 4686 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { 4687 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 4688 CGCapturedStmtInfo CGSI(CR_OpenMP); 4689 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); 4690 OMPLoopScope LoopScope(CGF, S); 4691 return CGF.EmitScalarExpr(S.getNumIterations()); 4692 }; 4693 bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), 4694 [](const OMPReductionClause *C) { 4695 return C->getModifier() == OMPC_REDUCTION_inscan; 4696 }); 4697 if (IsInscan) 4698 emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen); 4699 auto LPCRegion = 4700 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4701 emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen, 4702 emitEmptyBoundParameters); 4703 if (IsInscan) 4704 emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen); 4705 } 4706 // Check for outer lastprivate conditional update. 4707 checkForLastprivateConditionalUpdate(*this, S); 4708 } 4709 4710 void CodeGenFunction::EmitOMPParallelMasterDirective( 4711 const OMPParallelMasterDirective &S) { 4712 // Emit directive as a combined directive that consists of two implicit 4713 // directives: 'parallel' with 'master' directive. 4714 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4715 Action.Enter(CGF); 4716 OMPPrivateScope PrivateScope(CGF); 4717 emitOMPCopyinClause(CGF, S); 4718 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4719 CGF.EmitOMPPrivateClause(S, PrivateScope); 4720 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4721 (void)PrivateScope.Privatize(); 4722 emitMaster(CGF, S); 4723 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 4724 }; 4725 { 4726 auto LPCRegion = 4727 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4728 emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen, 4729 emitEmptyBoundParameters); 4730 emitPostUpdateForReductionClause(*this, S, 4731 [](CodeGenFunction &) { return nullptr; }); 4732 } 4733 // Check for outer lastprivate conditional update. 4734 checkForLastprivateConditionalUpdate(*this, S); 4735 } 4736 4737 void CodeGenFunction::EmitOMPParallelMaskedDirective( 4738 const OMPParallelMaskedDirective &S) { 4739 // Emit directive as a combined directive that consists of two implicit 4740 // directives: 'parallel' with 'masked' directive. 4741 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4742 Action.Enter(CGF); 4743 OMPPrivateScope PrivateScope(CGF); 4744 emitOMPCopyinClause(CGF, S); 4745 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4746 CGF.EmitOMPPrivateClause(S, PrivateScope); 4747 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4748 (void)PrivateScope.Privatize(); 4749 emitMasked(CGF, S); 4750 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 4751 }; 4752 { 4753 auto LPCRegion = 4754 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4755 emitCommonOMPParallelDirective(*this, S, OMPD_masked, CodeGen, 4756 emitEmptyBoundParameters); 4757 emitPostUpdateForReductionClause(*this, S, 4758 [](CodeGenFunction &) { return nullptr; }); 4759 } 4760 // Check for outer lastprivate conditional update. 4761 checkForLastprivateConditionalUpdate(*this, S); 4762 } 4763 4764 void CodeGenFunction::EmitOMPParallelSectionsDirective( 4765 const OMPParallelSectionsDirective &S) { 4766 // Emit directive as a combined directive that consists of two implicit 4767 // directives: 'parallel' with 'sections' directive. 4768 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4769 Action.Enter(CGF); 4770 emitOMPCopyinClause(CGF, S); 4771 CGF.EmitSections(S); 4772 }; 4773 { 4774 auto LPCRegion = 4775 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4776 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, 4777 emitEmptyBoundParameters); 4778 } 4779 // Check for outer lastprivate conditional update. 4780 checkForLastprivateConditionalUpdate(*this, S); 4781 } 4782 4783 namespace { 4784 /// Get the list of variables declared in the context of the untied tasks. 4785 class CheckVarsEscapingUntiedTaskDeclContext final 4786 : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> { 4787 llvm::SmallVector<const VarDecl *, 4> PrivateDecls; 4788 4789 public: 4790 explicit CheckVarsEscapingUntiedTaskDeclContext() = default; 4791 ~CheckVarsEscapingUntiedTaskDeclContext() = default; 4792 void VisitDeclStmt(const DeclStmt *S) { 4793 if (!S) 4794 return; 4795 // Need to privatize only local vars, static locals can be processed as is. 4796 for (const Decl *D : S->decls()) { 4797 if (const auto *VD = dyn_cast_or_null<VarDecl>(D)) 4798 if (VD->hasLocalStorage()) 4799 PrivateDecls.push_back(VD); 4800 } 4801 } 4802 void VisitOMPExecutableDirective(const OMPExecutableDirective *) {} 4803 void VisitCapturedStmt(const CapturedStmt *) {} 4804 void VisitLambdaExpr(const LambdaExpr *) {} 4805 void VisitBlockExpr(const BlockExpr *) {} 4806 void VisitStmt(const Stmt *S) { 4807 if (!S) 4808 return; 4809 for (const Stmt *Child : S->children()) 4810 if (Child) 4811 Visit(Child); 4812 } 4813 4814 /// Swaps list of vars with the provided one. 4815 ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; } 4816 }; 4817 } // anonymous namespace 4818 4819 static void buildDependences(const OMPExecutableDirective &S, 4820 OMPTaskDataTy &Data) { 4821 4822 // First look for 'omp_all_memory' and add this first. 4823 bool OmpAllMemory = false; 4824 if (llvm::any_of( 4825 S.getClausesOfKind<OMPDependClause>(), [](const OMPDependClause *C) { 4826 return C->getDependencyKind() == OMPC_DEPEND_outallmemory || 4827 C->getDependencyKind() == OMPC_DEPEND_inoutallmemory; 4828 })) { 4829 OmpAllMemory = true; 4830 // Since both OMPC_DEPEND_outallmemory and OMPC_DEPEND_inoutallmemory are 4831 // equivalent to the runtime, always use OMPC_DEPEND_outallmemory to 4832 // simplify. 4833 OMPTaskDataTy::DependData &DD = 4834 Data.Dependences.emplace_back(OMPC_DEPEND_outallmemory, 4835 /*IteratorExpr=*/nullptr); 4836 // Add a nullptr Expr to simplify the codegen in emitDependData. 4837 DD.DepExprs.push_back(nullptr); 4838 } 4839 // Add remaining dependences skipping any 'out' or 'inout' if they are 4840 // overridden by 'omp_all_memory'. 4841 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { 4842 OpenMPDependClauseKind Kind = C->getDependencyKind(); 4843 if (Kind == OMPC_DEPEND_outallmemory || Kind == OMPC_DEPEND_inoutallmemory) 4844 continue; 4845 if (OmpAllMemory && (Kind == OMPC_DEPEND_out || Kind == OMPC_DEPEND_inout)) 4846 continue; 4847 OMPTaskDataTy::DependData &DD = 4848 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); 4849 DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); 4850 } 4851 } 4852 4853 void CodeGenFunction::EmitOMPTaskBasedDirective( 4854 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, 4855 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, 4856 OMPTaskDataTy &Data) { 4857 // Emit outlined function for task construct. 4858 const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion); 4859 auto I = CS->getCapturedDecl()->param_begin(); 4860 auto PartId = std::next(I); 4861 auto TaskT = std::next(I, 4); 4862 // Check if the task is final 4863 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 4864 // If the condition constant folds and can be elided, try to avoid emitting 4865 // the condition and the dead arm of the if/else. 4866 const Expr *Cond = Clause->getCondition(); 4867 bool CondConstant; 4868 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 4869 Data.Final.setInt(CondConstant); 4870 else 4871 Data.Final.setPointer(EvaluateExprAsBool(Cond)); 4872 } else { 4873 // By default the task is not final. 4874 Data.Final.setInt(/*IntVal=*/false); 4875 } 4876 // Check if the task has 'priority' clause. 4877 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { 4878 const Expr *Prio = Clause->getPriority(); 4879 Data.Priority.setInt(/*IntVal=*/true); 4880 Data.Priority.setPointer(EmitScalarConversion( 4881 EmitScalarExpr(Prio), Prio->getType(), 4882 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), 4883 Prio->getExprLoc())); 4884 } 4885 // The first function argument for tasks is a thread id, the second one is a 4886 // part id (0 for tied tasks, >=0 for untied task). 4887 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 4888 // Get list of private variables. 4889 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 4890 auto IRef = C->varlist_begin(); 4891 for (const Expr *IInit : C->private_copies()) { 4892 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 4893 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 4894 Data.PrivateVars.push_back(*IRef); 4895 Data.PrivateCopies.push_back(IInit); 4896 } 4897 ++IRef; 4898 } 4899 } 4900 EmittedAsPrivate.clear(); 4901 // Get list of firstprivate variables. 4902 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 4903 auto IRef = C->varlist_begin(); 4904 auto IElemInitRef = C->inits().begin(); 4905 for (const Expr *IInit : C->private_copies()) { 4906 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 4907 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 4908 Data.FirstprivateVars.push_back(*IRef); 4909 Data.FirstprivateCopies.push_back(IInit); 4910 Data.FirstprivateInits.push_back(*IElemInitRef); 4911 } 4912 ++IRef; 4913 ++IElemInitRef; 4914 } 4915 } 4916 // Get list of lastprivate variables (for taskloops). 4917 llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 4918 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 4919 auto IRef = C->varlist_begin(); 4920 auto ID = C->destination_exprs().begin(); 4921 for (const Expr *IInit : C->private_copies()) { 4922 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 4923 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 4924 Data.LastprivateVars.push_back(*IRef); 4925 Data.LastprivateCopies.push_back(IInit); 4926 } 4927 LastprivateDstsOrigs.insert( 4928 std::make_pair(cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 4929 cast<DeclRefExpr>(*IRef))); 4930 ++IRef; 4931 ++ID; 4932 } 4933 } 4934 SmallVector<const Expr *, 4> LHSs; 4935 SmallVector<const Expr *, 4> RHSs; 4936 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 4937 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); 4938 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); 4939 Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); 4940 Data.ReductionOps.append(C->reduction_ops().begin(), 4941 C->reduction_ops().end()); 4942 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 4943 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 4944 } 4945 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( 4946 *this, S.getBeginLoc(), LHSs, RHSs, Data); 4947 // Build list of dependences. 4948 buildDependences(S, Data); 4949 // Get list of local vars for untied tasks. 4950 if (!Data.Tied) { 4951 CheckVarsEscapingUntiedTaskDeclContext Checker; 4952 Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt()); 4953 Data.PrivateLocals.append(Checker.getPrivateDecls().begin(), 4954 Checker.getPrivateDecls().end()); 4955 } 4956 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, 4957 CapturedRegion](CodeGenFunction &CGF, 4958 PrePostActionTy &Action) { 4959 llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 4960 std::pair<Address, Address>> 4961 UntiedLocalVars; 4962 // Set proper addresses for generated private copies. 4963 OMPPrivateScope Scope(CGF); 4964 // Generate debug info for variables present in shared clause. 4965 if (auto *DI = CGF.getDebugInfo()) { 4966 llvm::SmallDenseMap<const VarDecl *, FieldDecl *> CaptureFields = 4967 CGF.CapturedStmtInfo->getCaptureFields(); 4968 llvm::Value *ContextValue = CGF.CapturedStmtInfo->getContextValue(); 4969 if (CaptureFields.size() && ContextValue) { 4970 unsigned CharWidth = CGF.getContext().getCharWidth(); 4971 // The shared variables are packed together as members of structure. 4972 // So the address of each shared variable can be computed by adding 4973 // offset of it (within record) to the base address of record. For each 4974 // shared variable, debug intrinsic llvm.dbg.declare is generated with 4975 // appropriate expressions (DIExpression). 4976 // Ex: 4977 // %12 = load %struct.anon*, %struct.anon** %__context.addr.i 4978 // call void @llvm.dbg.declare(metadata %struct.anon* %12, 4979 // metadata !svar1, 4980 // metadata !DIExpression(DW_OP_deref)) 4981 // call void @llvm.dbg.declare(metadata %struct.anon* %12, 4982 // metadata !svar2, 4983 // metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref)) 4984 for (auto It = CaptureFields.begin(); It != CaptureFields.end(); ++It) { 4985 const VarDecl *SharedVar = It->first; 4986 RecordDecl *CaptureRecord = It->second->getParent(); 4987 const ASTRecordLayout &Layout = 4988 CGF.getContext().getASTRecordLayout(CaptureRecord); 4989 unsigned Offset = 4990 Layout.getFieldOffset(It->second->getFieldIndex()) / CharWidth; 4991 if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo()) 4992 (void)DI->EmitDeclareOfAutoVariable(SharedVar, ContextValue, 4993 CGF.Builder, false); 4994 // Get the call dbg.declare instruction we just created and update 4995 // its DIExpression to add offset to base address. 4996 auto UpdateExpr = [](llvm::LLVMContext &Ctx, auto *Declare, 4997 unsigned Offset) { 4998 SmallVector<uint64_t, 8> Ops; 4999 // Add offset to the base address if non zero. 5000 if (Offset) { 5001 Ops.push_back(llvm::dwarf::DW_OP_plus_uconst); 5002 Ops.push_back(Offset); 5003 } 5004 Ops.push_back(llvm::dwarf::DW_OP_deref); 5005 Declare->setExpression(llvm::DIExpression::get(Ctx, Ops)); 5006 }; 5007 llvm::Instruction &Last = CGF.Builder.GetInsertBlock()->back(); 5008 if (auto DDI = dyn_cast<llvm::DbgVariableIntrinsic>(&Last)) 5009 UpdateExpr(DDI->getContext(), DDI, Offset); 5010 // If we're emitting using the new debug info format into a block 5011 // without a terminator, the record will be "trailing". 5012 assert(!Last.isTerminator() && "unexpected terminator"); 5013 if (auto *Marker = 5014 CGF.Builder.GetInsertBlock()->getTrailingDbgRecords()) { 5015 for (llvm::DbgVariableRecord &DVR : llvm::reverse( 5016 llvm::filterDbgVars(Marker->getDbgRecordRange()))) { 5017 UpdateExpr(Last.getContext(), &DVR, Offset); 5018 break; 5019 } 5020 } 5021 } 5022 } 5023 } 5024 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs; 5025 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 5026 !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) { 5027 enum { PrivatesParam = 2, CopyFnParam = 3 }; 5028 llvm::Value *CopyFn = CGF.Builder.CreateLoad( 5029 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); 5030 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( 5031 CS->getCapturedDecl()->getParam(PrivatesParam))); 5032 // Map privates. 5033 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 5034 llvm::SmallVector<llvm::Value *, 16> CallArgs; 5035 llvm::SmallVector<llvm::Type *, 4> ParamTypes; 5036 CallArgs.push_back(PrivatesPtr); 5037 ParamTypes.push_back(PrivatesPtr->getType()); 5038 for (const Expr *E : Data.PrivateVars) { 5039 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5040 RawAddress PrivatePtr = CGF.CreateMemTemp( 5041 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 5042 PrivatePtrs.emplace_back(VD, PrivatePtr); 5043 CallArgs.push_back(PrivatePtr.getPointer()); 5044 ParamTypes.push_back(PrivatePtr.getType()); 5045 } 5046 for (const Expr *E : Data.FirstprivateVars) { 5047 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5048 RawAddress PrivatePtr = 5049 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 5050 ".firstpriv.ptr.addr"); 5051 PrivatePtrs.emplace_back(VD, PrivatePtr); 5052 FirstprivatePtrs.emplace_back(VD, PrivatePtr); 5053 CallArgs.push_back(PrivatePtr.getPointer()); 5054 ParamTypes.push_back(PrivatePtr.getType()); 5055 } 5056 for (const Expr *E : Data.LastprivateVars) { 5057 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5058 RawAddress PrivatePtr = 5059 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 5060 ".lastpriv.ptr.addr"); 5061 PrivatePtrs.emplace_back(VD, PrivatePtr); 5062 CallArgs.push_back(PrivatePtr.getPointer()); 5063 ParamTypes.push_back(PrivatePtr.getType()); 5064 } 5065 for (const VarDecl *VD : Data.PrivateLocals) { 5066 QualType Ty = VD->getType().getNonReferenceType(); 5067 if (VD->getType()->isLValueReferenceType()) 5068 Ty = CGF.getContext().getPointerType(Ty); 5069 if (isAllocatableDecl(VD)) 5070 Ty = CGF.getContext().getPointerType(Ty); 5071 RawAddress PrivatePtr = CGF.CreateMemTemp( 5072 CGF.getContext().getPointerType(Ty), ".local.ptr.addr"); 5073 auto Result = UntiedLocalVars.insert( 5074 std::make_pair(VD, std::make_pair(PrivatePtr, Address::invalid()))); 5075 // If key exists update in place. 5076 if (Result.second == false) 5077 *Result.first = std::make_pair( 5078 VD, std::make_pair(PrivatePtr, Address::invalid())); 5079 CallArgs.push_back(PrivatePtr.getPointer()); 5080 ParamTypes.push_back(PrivatePtr.getType()); 5081 } 5082 auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(), 5083 ParamTypes, /*isVarArg=*/false); 5084 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( 5085 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); 5086 for (const auto &Pair : LastprivateDstsOrigs) { 5087 const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 5088 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD), 5089 /*RefersToEnclosingVariableOrCapture=*/ 5090 CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, 5091 Pair.second->getType(), VK_LValue, 5092 Pair.second->getExprLoc()); 5093 Scope.addPrivate(Pair.first, CGF.EmitLValue(&DRE).getAddress()); 5094 } 5095 for (const auto &Pair : PrivatePtrs) { 5096 Address Replacement = Address( 5097 CGF.Builder.CreateLoad(Pair.second), 5098 CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()), 5099 CGF.getContext().getDeclAlign(Pair.first)); 5100 Scope.addPrivate(Pair.first, Replacement); 5101 if (auto *DI = CGF.getDebugInfo()) 5102 if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo()) 5103 (void)DI->EmitDeclareOfAutoVariable( 5104 Pair.first, Pair.second.getBasePointer(), CGF.Builder, 5105 /*UsePointerValue*/ true); 5106 } 5107 // Adjust mapping for internal locals by mapping actual memory instead of 5108 // a pointer to this memory. 5109 for (auto &Pair : UntiedLocalVars) { 5110 QualType VDType = Pair.first->getType().getNonReferenceType(); 5111 if (Pair.first->getType()->isLValueReferenceType()) 5112 VDType = CGF.getContext().getPointerType(VDType); 5113 if (isAllocatableDecl(Pair.first)) { 5114 llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first); 5115 Address Replacement( 5116 Ptr, 5117 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(VDType)), 5118 CGF.getPointerAlign()); 5119 Pair.second.first = Replacement; 5120 Ptr = CGF.Builder.CreateLoad(Replacement); 5121 Replacement = Address(Ptr, CGF.ConvertTypeForMem(VDType), 5122 CGF.getContext().getDeclAlign(Pair.first)); 5123 Pair.second.second = Replacement; 5124 } else { 5125 llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first); 5126 Address Replacement(Ptr, CGF.ConvertTypeForMem(VDType), 5127 CGF.getContext().getDeclAlign(Pair.first)); 5128 Pair.second.first = Replacement; 5129 } 5130 } 5131 } 5132 if (Data.Reductions) { 5133 OMPPrivateScope FirstprivateScope(CGF); 5134 for (const auto &Pair : FirstprivatePtrs) { 5135 Address Replacement( 5136 CGF.Builder.CreateLoad(Pair.second), 5137 CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()), 5138 CGF.getContext().getDeclAlign(Pair.first)); 5139 FirstprivateScope.addPrivate(Pair.first, Replacement); 5140 } 5141 (void)FirstprivateScope.Privatize(); 5142 OMPLexicalScope LexScope(CGF, S, CapturedRegion); 5143 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars, 5144 Data.ReductionCopies, Data.ReductionOps); 5145 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( 5146 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); 5147 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { 5148 RedCG.emitSharedOrigLValue(CGF, Cnt); 5149 RedCG.emitAggregateType(CGF, Cnt); 5150 // FIXME: This must removed once the runtime library is fixed. 5151 // Emit required threadprivate variables for 5152 // initializer/combiner/finalizer. 5153 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), 5154 RedCG, Cnt); 5155 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 5156 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 5157 Replacement = Address( 5158 CGF.EmitScalarConversion(Replacement.emitRawPointer(CGF), 5159 CGF.getContext().VoidPtrTy, 5160 CGF.getContext().getPointerType( 5161 Data.ReductionCopies[Cnt]->getType()), 5162 Data.ReductionCopies[Cnt]->getExprLoc()), 5163 CGF.ConvertTypeForMem(Data.ReductionCopies[Cnt]->getType()), 5164 Replacement.getAlignment()); 5165 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 5166 Scope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement); 5167 } 5168 } 5169 // Privatize all private variables except for in_reduction items. 5170 (void)Scope.Privatize(); 5171 SmallVector<const Expr *, 4> InRedVars; 5172 SmallVector<const Expr *, 4> InRedPrivs; 5173 SmallVector<const Expr *, 4> InRedOps; 5174 SmallVector<const Expr *, 4> TaskgroupDescriptors; 5175 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { 5176 auto IPriv = C->privates().begin(); 5177 auto IRed = C->reduction_ops().begin(); 5178 auto ITD = C->taskgroup_descriptors().begin(); 5179 for (const Expr *Ref : C->varlist()) { 5180 InRedVars.emplace_back(Ref); 5181 InRedPrivs.emplace_back(*IPriv); 5182 InRedOps.emplace_back(*IRed); 5183 TaskgroupDescriptors.emplace_back(*ITD); 5184 std::advance(IPriv, 1); 5185 std::advance(IRed, 1); 5186 std::advance(ITD, 1); 5187 } 5188 } 5189 // Privatize in_reduction items here, because taskgroup descriptors must be 5190 // privatized earlier. 5191 OMPPrivateScope InRedScope(CGF); 5192 if (!InRedVars.empty()) { 5193 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps); 5194 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { 5195 RedCG.emitSharedOrigLValue(CGF, Cnt); 5196 RedCG.emitAggregateType(CGF, Cnt); 5197 // The taskgroup descriptor variable is always implicit firstprivate and 5198 // privatized already during processing of the firstprivates. 5199 // FIXME: This must removed once the runtime library is fixed. 5200 // Emit required threadprivate variables for 5201 // initializer/combiner/finalizer. 5202 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), 5203 RedCG, Cnt); 5204 llvm::Value *ReductionsPtr; 5205 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) { 5206 ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr), 5207 TRExpr->getExprLoc()); 5208 } else { 5209 ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5210 } 5211 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 5212 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 5213 Replacement = Address( 5214 CGF.EmitScalarConversion( 5215 Replacement.emitRawPointer(CGF), CGF.getContext().VoidPtrTy, 5216 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), 5217 InRedPrivs[Cnt]->getExprLoc()), 5218 CGF.ConvertTypeForMem(InRedPrivs[Cnt]->getType()), 5219 Replacement.getAlignment()); 5220 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 5221 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement); 5222 } 5223 } 5224 (void)InRedScope.Privatize(); 5225 5226 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF, 5227 UntiedLocalVars); 5228 Action.Enter(CGF); 5229 BodyGen(CGF); 5230 }; 5231 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); 5232 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 5233 S, *I, *PartId, *TaskT, EKind, CodeGen, Data.Tied, Data.NumberOfParts); 5234 OMPLexicalScope Scope(*this, S, std::nullopt, 5235 !isOpenMPParallelDirective(EKind) && 5236 !isOpenMPSimdDirective(EKind)); 5237 TaskGen(*this, OutlinedFn, Data); 5238 } 5239 5240 static ImplicitParamDecl * 5241 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, 5242 QualType Ty, CapturedDecl *CD, 5243 SourceLocation Loc) { 5244 auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, 5245 ImplicitParamKind::Other); 5246 auto *OrigRef = DeclRefExpr::Create( 5247 C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD, 5248 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); 5249 auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, 5250 ImplicitParamKind::Other); 5251 auto *PrivateRef = DeclRefExpr::Create( 5252 C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD, 5253 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); 5254 QualType ElemType = C.getBaseElementType(Ty); 5255 auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType, 5256 ImplicitParamKind::Other); 5257 auto *InitRef = DeclRefExpr::Create( 5258 C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, 5259 /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue); 5260 PrivateVD->setInitStyle(VarDecl::CInit); 5261 PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, 5262 InitRef, /*BasePath=*/nullptr, 5263 VK_PRValue, FPOptionsOverride())); 5264 Data.FirstprivateVars.emplace_back(OrigRef); 5265 Data.FirstprivateCopies.emplace_back(PrivateRef); 5266 Data.FirstprivateInits.emplace_back(InitRef); 5267 return OrigVD; 5268 } 5269 5270 void CodeGenFunction::EmitOMPTargetTaskBasedDirective( 5271 const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, 5272 OMPTargetDataInfo &InputInfo) { 5273 // Emit outlined function for task construct. 5274 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); 5275 Address CapturedStruct = GenerateCapturedStmtArgument(*CS); 5276 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 5277 auto I = CS->getCapturedDecl()->param_begin(); 5278 auto PartId = std::next(I); 5279 auto TaskT = std::next(I, 4); 5280 OMPTaskDataTy Data; 5281 // The task is not final. 5282 Data.Final.setInt(/*IntVal=*/false); 5283 // Get list of firstprivate variables. 5284 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 5285 auto IRef = C->varlist_begin(); 5286 auto IElemInitRef = C->inits().begin(); 5287 for (auto *IInit : C->private_copies()) { 5288 Data.FirstprivateVars.push_back(*IRef); 5289 Data.FirstprivateCopies.push_back(IInit); 5290 Data.FirstprivateInits.push_back(*IElemInitRef); 5291 ++IRef; 5292 ++IElemInitRef; 5293 } 5294 } 5295 SmallVector<const Expr *, 4> LHSs; 5296 SmallVector<const Expr *, 4> RHSs; 5297 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { 5298 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); 5299 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); 5300 Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); 5301 Data.ReductionOps.append(C->reduction_ops().begin(), 5302 C->reduction_ops().end()); 5303 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 5304 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 5305 } 5306 OMPPrivateScope TargetScope(*this); 5307 VarDecl *BPVD = nullptr; 5308 VarDecl *PVD = nullptr; 5309 VarDecl *SVD = nullptr; 5310 VarDecl *MVD = nullptr; 5311 if (InputInfo.NumberOfTargetItems > 0) { 5312 auto *CD = CapturedDecl::Create( 5313 getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0); 5314 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems); 5315 QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType( 5316 getContext().VoidPtrTy, ArrSize, nullptr, ArraySizeModifier::Normal, 5317 /*IndexTypeQuals=*/0); 5318 BPVD = createImplicitFirstprivateForType( 5319 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); 5320 PVD = createImplicitFirstprivateForType( 5321 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); 5322 QualType SizesType = getContext().getConstantArrayType( 5323 getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1), 5324 ArrSize, nullptr, ArraySizeModifier::Normal, 5325 /*IndexTypeQuals=*/0); 5326 SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD, 5327 S.getBeginLoc()); 5328 TargetScope.addPrivate(BPVD, InputInfo.BasePointersArray); 5329 TargetScope.addPrivate(PVD, InputInfo.PointersArray); 5330 TargetScope.addPrivate(SVD, InputInfo.SizesArray); 5331 // If there is no user-defined mapper, the mapper array will be nullptr. In 5332 // this case, we don't need to privatize it. 5333 if (!isa_and_nonnull<llvm::ConstantPointerNull>( 5334 InputInfo.MappersArray.emitRawPointer(*this))) { 5335 MVD = createImplicitFirstprivateForType( 5336 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); 5337 TargetScope.addPrivate(MVD, InputInfo.MappersArray); 5338 } 5339 } 5340 (void)TargetScope.Privatize(); 5341 buildDependences(S, Data); 5342 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); 5343 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD, EKind, 5344 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { 5345 // Set proper addresses for generated private copies. 5346 OMPPrivateScope Scope(CGF); 5347 if (!Data.FirstprivateVars.empty()) { 5348 enum { PrivatesParam = 2, CopyFnParam = 3 }; 5349 llvm::Value *CopyFn = CGF.Builder.CreateLoad( 5350 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); 5351 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( 5352 CS->getCapturedDecl()->getParam(PrivatesParam))); 5353 // Map privates. 5354 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 5355 llvm::SmallVector<llvm::Value *, 16> CallArgs; 5356 llvm::SmallVector<llvm::Type *, 4> ParamTypes; 5357 CallArgs.push_back(PrivatesPtr); 5358 ParamTypes.push_back(PrivatesPtr->getType()); 5359 for (const Expr *E : Data.FirstprivateVars) { 5360 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5361 RawAddress PrivatePtr = 5362 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 5363 ".firstpriv.ptr.addr"); 5364 PrivatePtrs.emplace_back(VD, PrivatePtr); 5365 CallArgs.push_back(PrivatePtr.getPointer()); 5366 ParamTypes.push_back(PrivatePtr.getType()); 5367 } 5368 auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(), 5369 ParamTypes, /*isVarArg=*/false); 5370 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( 5371 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); 5372 for (const auto &Pair : PrivatePtrs) { 5373 Address Replacement( 5374 CGF.Builder.CreateLoad(Pair.second), 5375 CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()), 5376 CGF.getContext().getDeclAlign(Pair.first)); 5377 Scope.addPrivate(Pair.first, Replacement); 5378 } 5379 } 5380 CGF.processInReduction(S, Data, CGF, CS, Scope); 5381 if (InputInfo.NumberOfTargetItems > 0) { 5382 InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP( 5383 CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0); 5384 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( 5385 CGF.GetAddrOfLocalVar(PVD), /*Index=*/0); 5386 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( 5387 CGF.GetAddrOfLocalVar(SVD), /*Index=*/0); 5388 // If MVD is nullptr, the mapper array is not privatized 5389 if (MVD) 5390 InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP( 5391 CGF.GetAddrOfLocalVar(MVD), /*Index=*/0); 5392 } 5393 5394 Action.Enter(CGF); 5395 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false); 5396 auto *TL = S.getSingleClause<OMPThreadLimitClause>(); 5397 if (CGF.CGM.getLangOpts().OpenMP >= 51 && 5398 needsTaskBasedThreadLimit(EKind) && TL) { 5399 // Emit __kmpc_set_thread_limit() to set the thread_limit for the task 5400 // enclosing this target region. This will indirectly set the thread_limit 5401 // for every applicable construct within target region. 5402 CGF.CGM.getOpenMPRuntime().emitThreadLimitClause( 5403 CGF, TL->getThreadLimit().front(), S.getBeginLoc()); 5404 } 5405 BodyGen(CGF); 5406 }; 5407 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 5408 S, *I, *PartId, *TaskT, EKind, CodeGen, /*Tied=*/true, 5409 Data.NumberOfParts); 5410 llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0); 5411 IntegerLiteral IfCond(getContext(), TrueOrFalse, 5412 getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 5413 SourceLocation()); 5414 CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn, 5415 SharedsTy, CapturedStruct, &IfCond, Data); 5416 } 5417 5418 void CodeGenFunction::processInReduction(const OMPExecutableDirective &S, 5419 OMPTaskDataTy &Data, 5420 CodeGenFunction &CGF, 5421 const CapturedStmt *CS, 5422 OMPPrivateScope &Scope) { 5423 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); 5424 if (Data.Reductions) { 5425 OpenMPDirectiveKind CapturedRegion = EKind; 5426 OMPLexicalScope LexScope(CGF, S, CapturedRegion); 5427 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars, 5428 Data.ReductionCopies, Data.ReductionOps); 5429 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( 5430 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(4))); 5431 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { 5432 RedCG.emitSharedOrigLValue(CGF, Cnt); 5433 RedCG.emitAggregateType(CGF, Cnt); 5434 // FIXME: This must removed once the runtime library is fixed. 5435 // Emit required threadprivate variables for 5436 // initializer/combiner/finalizer. 5437 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), 5438 RedCG, Cnt); 5439 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 5440 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 5441 Replacement = Address( 5442 CGF.EmitScalarConversion(Replacement.emitRawPointer(CGF), 5443 CGF.getContext().VoidPtrTy, 5444 CGF.getContext().getPointerType( 5445 Data.ReductionCopies[Cnt]->getType()), 5446 Data.ReductionCopies[Cnt]->getExprLoc()), 5447 CGF.ConvertTypeForMem(Data.ReductionCopies[Cnt]->getType()), 5448 Replacement.getAlignment()); 5449 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 5450 Scope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement); 5451 } 5452 } 5453 (void)Scope.Privatize(); 5454 SmallVector<const Expr *, 4> InRedVars; 5455 SmallVector<const Expr *, 4> InRedPrivs; 5456 SmallVector<const Expr *, 4> InRedOps; 5457 SmallVector<const Expr *, 4> TaskgroupDescriptors; 5458 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { 5459 auto IPriv = C->privates().begin(); 5460 auto IRed = C->reduction_ops().begin(); 5461 auto ITD = C->taskgroup_descriptors().begin(); 5462 for (const Expr *Ref : C->varlist()) { 5463 InRedVars.emplace_back(Ref); 5464 InRedPrivs.emplace_back(*IPriv); 5465 InRedOps.emplace_back(*IRed); 5466 TaskgroupDescriptors.emplace_back(*ITD); 5467 std::advance(IPriv, 1); 5468 std::advance(IRed, 1); 5469 std::advance(ITD, 1); 5470 } 5471 } 5472 OMPPrivateScope InRedScope(CGF); 5473 if (!InRedVars.empty()) { 5474 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps); 5475 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { 5476 RedCG.emitSharedOrigLValue(CGF, Cnt); 5477 RedCG.emitAggregateType(CGF, Cnt); 5478 // FIXME: This must removed once the runtime library is fixed. 5479 // Emit required threadprivate variables for 5480 // initializer/combiner/finalizer. 5481 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), 5482 RedCG, Cnt); 5483 llvm::Value *ReductionsPtr; 5484 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) { 5485 ReductionsPtr = 5486 CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr), TRExpr->getExprLoc()); 5487 } else { 5488 ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5489 } 5490 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 5491 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 5492 Replacement = Address( 5493 CGF.EmitScalarConversion( 5494 Replacement.emitRawPointer(CGF), CGF.getContext().VoidPtrTy, 5495 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), 5496 InRedPrivs[Cnt]->getExprLoc()), 5497 CGF.ConvertTypeForMem(InRedPrivs[Cnt]->getType()), 5498 Replacement.getAlignment()); 5499 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 5500 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement); 5501 } 5502 } 5503 (void)InRedScope.Privatize(); 5504 } 5505 5506 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 5507 // Emit outlined function for task construct. 5508 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); 5509 Address CapturedStruct = GenerateCapturedStmtArgument(*CS); 5510 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 5511 const Expr *IfCond = nullptr; 5512 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 5513 if (C->getNameModifier() == OMPD_unknown || 5514 C->getNameModifier() == OMPD_task) { 5515 IfCond = C->getCondition(); 5516 break; 5517 } 5518 } 5519 5520 OMPTaskDataTy Data; 5521 // Check if we should emit tied or untied task. 5522 Data.Tied = !S.getSingleClause<OMPUntiedClause>(); 5523 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 5524 CGF.EmitStmt(CS->getCapturedStmt()); 5525 }; 5526 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 5527 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, 5528 const OMPTaskDataTy &Data) { 5529 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn, 5530 SharedsTy, CapturedStruct, IfCond, 5531 Data); 5532 }; 5533 auto LPCRegion = 5534 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 5535 EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data); 5536 } 5537 5538 void CodeGenFunction::EmitOMPTaskyieldDirective( 5539 const OMPTaskyieldDirective &S) { 5540 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc()); 5541 } 5542 5543 void CodeGenFunction::EmitOMPErrorDirective(const OMPErrorDirective &S) { 5544 const OMPMessageClause *MC = S.getSingleClause<OMPMessageClause>(); 5545 Expr *ME = MC ? MC->getMessageString() : nullptr; 5546 const OMPSeverityClause *SC = S.getSingleClause<OMPSeverityClause>(); 5547 bool IsFatal = false; 5548 if (!SC || SC->getSeverityKind() == OMPC_SEVERITY_fatal) 5549 IsFatal = true; 5550 CGM.getOpenMPRuntime().emitErrorCall(*this, S.getBeginLoc(), ME, IsFatal); 5551 } 5552 5553 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 5554 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier); 5555 } 5556 5557 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 5558 OMPTaskDataTy Data; 5559 // Build list of dependences 5560 buildDependences(S, Data); 5561 Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>(); 5562 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc(), Data); 5563 } 5564 5565 static bool isSupportedByOpenMPIRBuilder(const OMPTaskgroupDirective &T) { 5566 return T.clauses().empty(); 5567 } 5568 5569 void CodeGenFunction::EmitOMPTaskgroupDirective( 5570 const OMPTaskgroupDirective &S) { 5571 OMPLexicalScope Scope(*this, S, OMPD_unknown); 5572 if (CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S)) { 5573 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 5574 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 5575 InsertPointTy AllocaIP(AllocaInsertPt->getParent(), 5576 AllocaInsertPt->getIterator()); 5577 5578 auto BodyGenCB = [&, this](InsertPointTy AllocaIP, 5579 InsertPointTy CodeGenIP) { 5580 Builder.restoreIP(CodeGenIP); 5581 EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 5582 return llvm::Error::success(); 5583 }; 5584 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 5585 if (!CapturedStmtInfo) 5586 CapturedStmtInfo = &CapStmtInfo; 5587 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = 5588 cantFail(OMPBuilder.createTaskgroup(Builder, AllocaIP, BodyGenCB)); 5589 Builder.restoreIP(AfterIP); 5590 return; 5591 } 5592 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5593 Action.Enter(CGF); 5594 if (const Expr *E = S.getReductionRef()) { 5595 SmallVector<const Expr *, 4> LHSs; 5596 SmallVector<const Expr *, 4> RHSs; 5597 OMPTaskDataTy Data; 5598 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { 5599 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); 5600 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); 5601 Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); 5602 Data.ReductionOps.append(C->reduction_ops().begin(), 5603 C->reduction_ops().end()); 5604 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 5605 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 5606 } 5607 llvm::Value *ReductionDesc = 5608 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(), 5609 LHSs, RHSs, Data); 5610 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5611 CGF.EmitVarDecl(*VD); 5612 CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD), 5613 /*Volatile=*/false, E->getType()); 5614 } 5615 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 5616 }; 5617 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc()); 5618 } 5619 5620 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 5621 llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>() 5622 ? llvm::AtomicOrdering::NotAtomic 5623 : llvm::AtomicOrdering::AcquireRelease; 5624 CGM.getOpenMPRuntime().emitFlush( 5625 *this, 5626 [&S]() -> ArrayRef<const Expr *> { 5627 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) 5628 return llvm::ArrayRef(FlushClause->varlist_begin(), 5629 FlushClause->varlist_end()); 5630 return {}; 5631 }(), 5632 S.getBeginLoc(), AO); 5633 } 5634 5635 void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) { 5636 const auto *DO = S.getSingleClause<OMPDepobjClause>(); 5637 LValue DOLVal = EmitLValue(DO->getDepobj()); 5638 if (const auto *DC = S.getSingleClause<OMPDependClause>()) { 5639 // Build list and emit dependences 5640 OMPTaskDataTy Data; 5641 buildDependences(S, Data); 5642 for (auto &Dep : Data.Dependences) { 5643 Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause( 5644 *this, Dep, DC->getBeginLoc()); 5645 EmitStoreOfScalar(DepAddr.emitRawPointer(*this), DOLVal); 5646 } 5647 return; 5648 } 5649 if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) { 5650 CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc()); 5651 return; 5652 } 5653 if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) { 5654 CGM.getOpenMPRuntime().emitUpdateClause( 5655 *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc()); 5656 return; 5657 } 5658 } 5659 5660 void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) { 5661 if (!OMPParentLoopDirectiveForScan) 5662 return; 5663 const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan; 5664 bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>(); 5665 SmallVector<const Expr *, 4> Shareds; 5666 SmallVector<const Expr *, 4> Privates; 5667 SmallVector<const Expr *, 4> LHSs; 5668 SmallVector<const Expr *, 4> RHSs; 5669 SmallVector<const Expr *, 4> ReductionOps; 5670 SmallVector<const Expr *, 4> CopyOps; 5671 SmallVector<const Expr *, 4> CopyArrayTemps; 5672 SmallVector<const Expr *, 4> CopyArrayElems; 5673 for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) { 5674 if (C->getModifier() != OMPC_REDUCTION_inscan) 5675 continue; 5676 Shareds.append(C->varlist_begin(), C->varlist_end()); 5677 Privates.append(C->privates().begin(), C->privates().end()); 5678 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 5679 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 5680 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 5681 CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); 5682 CopyArrayTemps.append(C->copy_array_temps().begin(), 5683 C->copy_array_temps().end()); 5684 CopyArrayElems.append(C->copy_array_elems().begin(), 5685 C->copy_array_elems().end()); 5686 } 5687 if (ParentDir.getDirectiveKind() == OMPD_simd || 5688 (getLangOpts().OpenMPSimd && 5689 isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) { 5690 // For simd directive and simd-based directives in simd only mode, use the 5691 // following codegen: 5692 // int x = 0; 5693 // #pragma omp simd reduction(inscan, +: x) 5694 // for (..) { 5695 // <first part> 5696 // #pragma omp scan inclusive(x) 5697 // <second part> 5698 // } 5699 // is transformed to: 5700 // int x = 0; 5701 // for (..) { 5702 // int x_priv = 0; 5703 // <first part> 5704 // x = x_priv + x; 5705 // x_priv = x; 5706 // <second part> 5707 // } 5708 // and 5709 // int x = 0; 5710 // #pragma omp simd reduction(inscan, +: x) 5711 // for (..) { 5712 // <first part> 5713 // #pragma omp scan exclusive(x) 5714 // <second part> 5715 // } 5716 // to 5717 // int x = 0; 5718 // for (..) { 5719 // int x_priv = 0; 5720 // <second part> 5721 // int temp = x; 5722 // x = x_priv + x; 5723 // x_priv = temp; 5724 // <first part> 5725 // } 5726 llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce"); 5727 EmitBranch(IsInclusive 5728 ? OMPScanReduce 5729 : BreakContinueStack.back().ContinueBlock.getBlock()); 5730 EmitBlock(OMPScanDispatch); 5731 { 5732 // New scope for correct construction/destruction of temp variables for 5733 // exclusive scan. 5734 LexicalScope Scope(*this, S.getSourceRange()); 5735 EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock); 5736 EmitBlock(OMPScanReduce); 5737 if (!IsInclusive) { 5738 // Create temp var and copy LHS value to this temp value. 5739 // TMP = LHS; 5740 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 5741 const Expr *PrivateExpr = Privates[I]; 5742 const Expr *TempExpr = CopyArrayTemps[I]; 5743 EmitAutoVarDecl( 5744 *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl())); 5745 LValue DestLVal = EmitLValue(TempExpr); 5746 LValue SrcLVal = EmitLValue(LHSs[I]); 5747 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(), 5748 SrcLVal.getAddress(), 5749 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 5750 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), 5751 CopyOps[I]); 5752 } 5753 } 5754 CGM.getOpenMPRuntime().emitReduction( 5755 *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, 5756 {/*WithNowait=*/true, /*SimpleReduction=*/true, 5757 /*IsPrivateVarReduction*/ {}, OMPD_simd}); 5758 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 5759 const Expr *PrivateExpr = Privates[I]; 5760 LValue DestLVal; 5761 LValue SrcLVal; 5762 if (IsInclusive) { 5763 DestLVal = EmitLValue(RHSs[I]); 5764 SrcLVal = EmitLValue(LHSs[I]); 5765 } else { 5766 const Expr *TempExpr = CopyArrayTemps[I]; 5767 DestLVal = EmitLValue(RHSs[I]); 5768 SrcLVal = EmitLValue(TempExpr); 5769 } 5770 EmitOMPCopy( 5771 PrivateExpr->getType(), DestLVal.getAddress(), SrcLVal.getAddress(), 5772 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 5773 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), CopyOps[I]); 5774 } 5775 } 5776 EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock); 5777 OMPScanExitBlock = IsInclusive 5778 ? BreakContinueStack.back().ContinueBlock.getBlock() 5779 : OMPScanReduce; 5780 EmitBlock(OMPAfterScanBlock); 5781 return; 5782 } 5783 if (!IsInclusive) { 5784 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); 5785 EmitBlock(OMPScanExitBlock); 5786 } 5787 if (OMPFirstScanLoop) { 5788 // Emit buffer[i] = red; at the end of the input phase. 5789 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir) 5790 .getIterationVariable() 5791 ->IgnoreParenImpCasts(); 5792 LValue IdxLVal = EmitLValue(IVExpr); 5793 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc()); 5794 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false); 5795 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 5796 const Expr *PrivateExpr = Privates[I]; 5797 const Expr *OrigExpr = Shareds[I]; 5798 const Expr *CopyArrayElem = CopyArrayElems[I]; 5799 OpaqueValueMapping IdxMapping( 5800 *this, 5801 cast<OpaqueValueExpr>( 5802 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 5803 RValue::get(IdxVal)); 5804 LValue DestLVal = EmitLValue(CopyArrayElem); 5805 LValue SrcLVal = EmitLValue(OrigExpr); 5806 EmitOMPCopy( 5807 PrivateExpr->getType(), DestLVal.getAddress(), SrcLVal.getAddress(), 5808 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 5809 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), CopyOps[I]); 5810 } 5811 } 5812 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); 5813 if (IsInclusive) { 5814 EmitBlock(OMPScanExitBlock); 5815 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); 5816 } 5817 EmitBlock(OMPScanDispatch); 5818 if (!OMPFirstScanLoop) { 5819 // Emit red = buffer[i]; at the entrance to the scan phase. 5820 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir) 5821 .getIterationVariable() 5822 ->IgnoreParenImpCasts(); 5823 LValue IdxLVal = EmitLValue(IVExpr); 5824 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc()); 5825 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false); 5826 llvm::BasicBlock *ExclusiveExitBB = nullptr; 5827 if (!IsInclusive) { 5828 llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec"); 5829 ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit"); 5830 llvm::Value *Cmp = Builder.CreateIsNull(IdxVal); 5831 Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB); 5832 EmitBlock(ContBB); 5833 // Use idx - 1 iteration for exclusive scan. 5834 IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1)); 5835 } 5836 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 5837 const Expr *PrivateExpr = Privates[I]; 5838 const Expr *OrigExpr = Shareds[I]; 5839 const Expr *CopyArrayElem = CopyArrayElems[I]; 5840 OpaqueValueMapping IdxMapping( 5841 *this, 5842 cast<OpaqueValueExpr>( 5843 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 5844 RValue::get(IdxVal)); 5845 LValue SrcLVal = EmitLValue(CopyArrayElem); 5846 LValue DestLVal = EmitLValue(OrigExpr); 5847 EmitOMPCopy( 5848 PrivateExpr->getType(), DestLVal.getAddress(), SrcLVal.getAddress(), 5849 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 5850 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), CopyOps[I]); 5851 } 5852 if (!IsInclusive) { 5853 EmitBlock(ExclusiveExitBB); 5854 } 5855 } 5856 EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock 5857 : OMPAfterScanBlock); 5858 EmitBlock(OMPAfterScanBlock); 5859 } 5860 5861 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, 5862 const CodeGenLoopTy &CodeGenLoop, 5863 Expr *IncExpr) { 5864 // Emit the loop iteration variable. 5865 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 5866 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); 5867 EmitVarDecl(*IVDecl); 5868 5869 // Emit the iterations count variable. 5870 // If it is not a variable, Sema decided to calculate iterations count on each 5871 // iteration (e.g., it is foldable into a constant). 5872 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 5873 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 5874 // Emit calculation of the iterations count. 5875 EmitIgnoredExpr(S.getCalcLastIteration()); 5876 } 5877 5878 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 5879 5880 bool HasLastprivateClause = false; 5881 // Check pre-condition. 5882 { 5883 OMPLoopScope PreInitScope(*this, S); 5884 // Skip the entire loop if we don't meet the precondition. 5885 // If the condition constant folds and can be elided, avoid emitting the 5886 // whole loop. 5887 bool CondConstant; 5888 llvm::BasicBlock *ContBlock = nullptr; 5889 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 5890 if (!CondConstant) 5891 return; 5892 } else { 5893 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then"); 5894 ContBlock = createBasicBlock("omp.precond.end"); 5895 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 5896 getProfileCount(&S)); 5897 EmitBlock(ThenBlock); 5898 incrementProfileCounter(&S); 5899 } 5900 5901 emitAlignedClause(*this, S); 5902 // Emit 'then' code. 5903 { 5904 // Emit helper vars inits. 5905 5906 LValue LB = EmitOMPHelperVar( 5907 *this, cast<DeclRefExpr>( 5908 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5909 ? S.getCombinedLowerBoundVariable() 5910 : S.getLowerBoundVariable()))); 5911 LValue UB = EmitOMPHelperVar( 5912 *this, cast<DeclRefExpr>( 5913 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5914 ? S.getCombinedUpperBoundVariable() 5915 : S.getUpperBoundVariable()))); 5916 LValue ST = 5917 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 5918 LValue IL = 5919 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 5920 5921 OMPPrivateScope LoopScope(*this); 5922 if (EmitOMPFirstprivateClause(S, LoopScope)) { 5923 // Emit implicit barrier to synchronize threads and avoid data races 5924 // on initialization of firstprivate variables and post-update of 5925 // lastprivate variables. 5926 CGM.getOpenMPRuntime().emitBarrierCall( 5927 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 5928 /*ForceSimpleCall=*/true); 5929 } 5930 EmitOMPPrivateClause(S, LoopScope); 5931 if (isOpenMPSimdDirective(S.getDirectiveKind()) && 5932 !isOpenMPParallelDirective(S.getDirectiveKind()) && 5933 !isOpenMPTeamsDirective(S.getDirectiveKind())) 5934 EmitOMPReductionClauseInit(S, LoopScope); 5935 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 5936 EmitOMPPrivateLoopCounters(S, LoopScope); 5937 (void)LoopScope.Privatize(); 5938 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 5939 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); 5940 5941 // Detect the distribute schedule kind and chunk. 5942 llvm::Value *Chunk = nullptr; 5943 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 5944 if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 5945 ScheduleKind = C->getDistScheduleKind(); 5946 if (const Expr *Ch = C->getChunkSize()) { 5947 Chunk = EmitScalarExpr(Ch); 5948 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 5949 S.getIterationVariable()->getType(), 5950 S.getBeginLoc()); 5951 } 5952 } else { 5953 // Default behaviour for dist_schedule clause. 5954 CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk( 5955 *this, S, ScheduleKind, Chunk); 5956 } 5957 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 5958 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 5959 5960 // OpenMP [2.10.8, distribute Construct, Description] 5961 // If dist_schedule is specified, kind must be static. If specified, 5962 // iterations are divided into chunks of size chunk_size, chunks are 5963 // assigned to the teams of the league in a round-robin fashion in the 5964 // order of the team number. When no chunk_size is specified, the 5965 // iteration space is divided into chunks that are approximately equal 5966 // in size, and at most one chunk is distributed to each team of the 5967 // league. The size of the chunks is unspecified in this case. 5968 bool StaticChunked = 5969 RT.isStaticChunked(ScheduleKind, /* Chunked */ Chunk != nullptr) && 5970 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); 5971 if (RT.isStaticNonchunked(ScheduleKind, 5972 /* Chunked */ Chunk != nullptr) || 5973 StaticChunked) { 5974 CGOpenMPRuntime::StaticRTInput StaticInit( 5975 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(), 5976 LB.getAddress(), UB.getAddress(), ST.getAddress(), 5977 StaticChunked ? Chunk : nullptr); 5978 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, 5979 StaticInit); 5980 JumpDest LoopExit = 5981 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 5982 // UB = min(UB, GlobalUB); 5983 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5984 ? S.getCombinedEnsureUpperBound() 5985 : S.getEnsureUpperBound()); 5986 // IV = LB; 5987 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5988 ? S.getCombinedInit() 5989 : S.getInit()); 5990 5991 const Expr *Cond = 5992 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5993 ? S.getCombinedCond() 5994 : S.getCond(); 5995 5996 if (StaticChunked) 5997 Cond = S.getCombinedDistCond(); 5998 5999 // For static unchunked schedules generate: 6000 // 6001 // 1. For distribute alone, codegen 6002 // while (idx <= UB) { 6003 // BODY; 6004 // ++idx; 6005 // } 6006 // 6007 // 2. When combined with 'for' (e.g. as in 'distribute parallel for') 6008 // while (idx <= UB) { 6009 // <CodeGen rest of pragma>(LB, UB); 6010 // idx += ST; 6011 // } 6012 // 6013 // For static chunk one schedule generate: 6014 // 6015 // while (IV <= GlobalUB) { 6016 // <CodeGen rest of pragma>(LB, UB); 6017 // LB += ST; 6018 // UB += ST; 6019 // UB = min(UB, GlobalUB); 6020 // IV = LB; 6021 // } 6022 // 6023 emitCommonSimdLoop( 6024 *this, S, 6025 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6026 if (isOpenMPSimdDirective(S.getDirectiveKind())) 6027 CGF.EmitOMPSimdInit(S); 6028 }, 6029 [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop, 6030 StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) { 6031 CGF.EmitOMPInnerLoop( 6032 S, LoopScope.requiresCleanups(), Cond, IncExpr, 6033 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 6034 CodeGenLoop(CGF, S, LoopExit); 6035 }, 6036 [&S, StaticChunked](CodeGenFunction &CGF) { 6037 if (StaticChunked) { 6038 CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound()); 6039 CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound()); 6040 CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound()); 6041 CGF.EmitIgnoredExpr(S.getCombinedInit()); 6042 } 6043 }); 6044 }); 6045 EmitBlock(LoopExit.getBlock()); 6046 // Tell the runtime we are done. 6047 RT.emitForStaticFinish(*this, S.getEndLoc(), OMPD_distribute); 6048 } else { 6049 // Emit the outer loop, which requests its work chunk [LB..UB] from 6050 // runtime and runs the inner loop to process it. 6051 const OMPLoopArguments LoopArguments = { 6052 LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), 6053 Chunk}; 6054 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, 6055 CodeGenLoop); 6056 } 6057 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 6058 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { 6059 return CGF.Builder.CreateIsNotNull( 6060 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 6061 }); 6062 } 6063 if (isOpenMPSimdDirective(S.getDirectiveKind()) && 6064 !isOpenMPParallelDirective(S.getDirectiveKind()) && 6065 !isOpenMPTeamsDirective(S.getDirectiveKind())) { 6066 EmitOMPReductionClauseFinal(S, OMPD_simd); 6067 // Emit post-update of the reduction variables if IsLastIter != 0. 6068 emitPostUpdateForReductionClause( 6069 *this, S, [IL, &S](CodeGenFunction &CGF) { 6070 return CGF.Builder.CreateIsNotNull( 6071 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 6072 }); 6073 } 6074 // Emit final copy of the lastprivate variables if IsLastIter != 0. 6075 if (HasLastprivateClause) { 6076 EmitOMPLastprivateClauseFinal( 6077 S, /*NoFinals=*/false, 6078 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); 6079 } 6080 } 6081 6082 // We're now done with the loop, so jump to the continuation block. 6083 if (ContBlock) { 6084 EmitBranch(ContBlock); 6085 EmitBlock(ContBlock, true); 6086 } 6087 } 6088 } 6089 6090 // Pass OMPLoopDirective (instead of OMPDistributeDirective) to make this 6091 // function available for "loop bind(teams)", which maps to "distribute". 6092 static void emitOMPDistributeDirective(const OMPLoopDirective &S, 6093 CodeGenFunction &CGF, 6094 CodeGenModule &CGM) { 6095 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6096 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 6097 }; 6098 OMPLexicalScope Scope(CGF, S, OMPD_unknown); 6099 CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, CodeGen); 6100 } 6101 6102 void CodeGenFunction::EmitOMPDistributeDirective( 6103 const OMPDistributeDirective &S) { 6104 emitOMPDistributeDirective(S, *this, CGM); 6105 } 6106 6107 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 6108 const CapturedStmt *S, 6109 SourceLocation Loc) { 6110 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 6111 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 6112 CGF.CapturedStmtInfo = &CapStmtInfo; 6113 llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc); 6114 Fn->setDoesNotRecurse(); 6115 return Fn; 6116 } 6117 6118 template <typename T> 6119 static void emitRestoreIP(CodeGenFunction &CGF, const T *C, 6120 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP, 6121 llvm::OpenMPIRBuilder &OMPBuilder) { 6122 6123 unsigned NumLoops = C->getNumLoops(); 6124 QualType Int64Ty = CGF.CGM.getContext().getIntTypeForBitwidth( 6125 /*DestWidth=*/64, /*Signed=*/1); 6126 llvm::SmallVector<llvm::Value *> StoreValues; 6127 for (unsigned I = 0; I < NumLoops; I++) { 6128 const Expr *CounterVal = C->getLoopData(I); 6129 assert(CounterVal); 6130 llvm::Value *StoreValue = CGF.EmitScalarConversion( 6131 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 6132 CounterVal->getExprLoc()); 6133 StoreValues.emplace_back(StoreValue); 6134 } 6135 OMPDoacrossKind<T> ODK; 6136 bool IsDependSource = ODK.isSource(C); 6137 CGF.Builder.restoreIP( 6138 OMPBuilder.createOrderedDepend(CGF.Builder, AllocaIP, NumLoops, 6139 StoreValues, ".cnt.addr", IsDependSource)); 6140 } 6141 6142 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 6143 if (CGM.getLangOpts().OpenMPIRBuilder) { 6144 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 6145 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 6146 6147 if (S.hasClausesOfKind<OMPDependClause>() || 6148 S.hasClausesOfKind<OMPDoacrossClause>()) { 6149 // The ordered directive with depend clause. 6150 assert(!S.hasAssociatedStmt() && "No associated statement must be in " 6151 "ordered depend|doacross construct."); 6152 InsertPointTy AllocaIP(AllocaInsertPt->getParent(), 6153 AllocaInsertPt->getIterator()); 6154 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 6155 emitRestoreIP(*this, DC, AllocaIP, OMPBuilder); 6156 for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>()) 6157 emitRestoreIP(*this, DC, AllocaIP, OMPBuilder); 6158 } else { 6159 // The ordered directive with threads or simd clause, or without clause. 6160 // Without clause, it behaves as if the threads clause is specified. 6161 const auto *C = S.getSingleClause<OMPSIMDClause>(); 6162 6163 auto FiniCB = [this](InsertPointTy IP) { 6164 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 6165 return llvm::Error::success(); 6166 }; 6167 6168 auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP, 6169 InsertPointTy CodeGenIP) { 6170 Builder.restoreIP(CodeGenIP); 6171 6172 const CapturedStmt *CS = S.getInnermostCapturedStmt(); 6173 if (C) { 6174 llvm::BasicBlock *FiniBB = splitBBWithSuffix( 6175 Builder, /*CreateBranch=*/false, ".ordered.after"); 6176 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 6177 GenerateOpenMPCapturedVars(*CS, CapturedVars); 6178 llvm::Function *OutlinedFn = 6179 emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc()); 6180 assert(S.getBeginLoc().isValid() && 6181 "Outlined function call location must be valid."); 6182 ApplyDebugLocation::CreateDefaultArtificial(*this, S.getBeginLoc()); 6183 OMPBuilderCBHelpers::EmitCaptureStmt(*this, CodeGenIP, *FiniBB, 6184 OutlinedFn, CapturedVars); 6185 } else { 6186 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( 6187 *this, CS->getCapturedStmt(), AllocaIP, CodeGenIP, "ordered"); 6188 } 6189 return llvm::Error::success(); 6190 }; 6191 6192 OMPLexicalScope Scope(*this, S, OMPD_unknown); 6193 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail( 6194 OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, !C)); 6195 Builder.restoreIP(AfterIP); 6196 } 6197 return; 6198 } 6199 6200 if (S.hasClausesOfKind<OMPDependClause>()) { 6201 assert(!S.hasAssociatedStmt() && 6202 "No associated statement must be in ordered depend construct."); 6203 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 6204 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 6205 return; 6206 } 6207 if (S.hasClausesOfKind<OMPDoacrossClause>()) { 6208 assert(!S.hasAssociatedStmt() && 6209 "No associated statement must be in ordered doacross construct."); 6210 for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>()) 6211 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 6212 return; 6213 } 6214 const auto *C = S.getSingleClause<OMPSIMDClause>(); 6215 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 6216 PrePostActionTy &Action) { 6217 const CapturedStmt *CS = S.getInnermostCapturedStmt(); 6218 if (C) { 6219 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 6220 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 6221 llvm::Function *OutlinedFn = 6222 emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc()); 6223 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(), 6224 OutlinedFn, CapturedVars); 6225 } else { 6226 Action.Enter(CGF); 6227 CGF.EmitStmt(CS->getCapturedStmt()); 6228 } 6229 }; 6230 OMPLexicalScope Scope(*this, S, OMPD_unknown); 6231 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C); 6232 } 6233 6234 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 6235 QualType SrcType, QualType DestType, 6236 SourceLocation Loc) { 6237 assert(CGF.hasScalarEvaluationKind(DestType) && 6238 "DestType must have scalar evaluation kind."); 6239 assert(!Val.isAggregate() && "Must be a scalar or complex."); 6240 return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 6241 DestType, Loc) 6242 : CGF.EmitComplexToScalarConversion( 6243 Val.getComplexVal(), SrcType, DestType, Loc); 6244 } 6245 6246 static CodeGenFunction::ComplexPairTy 6247 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 6248 QualType DestType, SourceLocation Loc) { 6249 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 6250 "DestType must have complex evaluation kind."); 6251 CodeGenFunction::ComplexPairTy ComplexVal; 6252 if (Val.isScalar()) { 6253 // Convert the input element to the element type of the complex. 6254 QualType DestElementType = 6255 DestType->castAs<ComplexType>()->getElementType(); 6256 llvm::Value *ScalarVal = CGF.EmitScalarConversion( 6257 Val.getScalarVal(), SrcType, DestElementType, Loc); 6258 ComplexVal = CodeGenFunction::ComplexPairTy( 6259 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 6260 } else { 6261 assert(Val.isComplex() && "Must be a scalar or complex."); 6262 QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 6263 QualType DestElementType = 6264 DestType->castAs<ComplexType>()->getElementType(); 6265 ComplexVal.first = CGF.EmitScalarConversion( 6266 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 6267 ComplexVal.second = CGF.EmitScalarConversion( 6268 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 6269 } 6270 return ComplexVal; 6271 } 6272 6273 static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO, 6274 LValue LVal, RValue RVal) { 6275 if (LVal.isGlobalReg()) 6276 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 6277 else 6278 CGF.EmitAtomicStore(RVal, LVal, AO, LVal.isVolatile(), /*isInit=*/false); 6279 } 6280 6281 static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF, 6282 llvm::AtomicOrdering AO, LValue LVal, 6283 SourceLocation Loc) { 6284 if (LVal.isGlobalReg()) 6285 return CGF.EmitLoadOfLValue(LVal, Loc); 6286 return CGF.EmitAtomicLoad( 6287 LVal, Loc, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO), 6288 LVal.isVolatile()); 6289 } 6290 6291 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 6292 QualType RValTy, SourceLocation Loc) { 6293 switch (getEvaluationKind(LVal.getType())) { 6294 case TEK_Scalar: 6295 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 6296 *this, RVal, RValTy, LVal.getType(), Loc)), 6297 LVal); 6298 break; 6299 case TEK_Complex: 6300 EmitStoreOfComplex( 6301 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 6302 /*isInit=*/false); 6303 break; 6304 case TEK_Aggregate: 6305 llvm_unreachable("Must be a scalar or complex."); 6306 } 6307 } 6308 6309 static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO, 6310 const Expr *X, const Expr *V, 6311 SourceLocation Loc) { 6312 // v = x; 6313 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 6314 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 6315 LValue XLValue = CGF.EmitLValue(X); 6316 LValue VLValue = CGF.EmitLValue(V); 6317 RValue Res = emitSimpleAtomicLoad(CGF, AO, XLValue, Loc); 6318 // OpenMP, 2.17.7, atomic Construct 6319 // If the read or capture clause is specified and the acquire, acq_rel, or 6320 // seq_cst clause is specified then the strong flush on exit from the atomic 6321 // operation is also an acquire flush. 6322 switch (AO) { 6323 case llvm::AtomicOrdering::Acquire: 6324 case llvm::AtomicOrdering::AcquireRelease: 6325 case llvm::AtomicOrdering::SequentiallyConsistent: 6326 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, {}, Loc, 6327 llvm::AtomicOrdering::Acquire); 6328 break; 6329 case llvm::AtomicOrdering::Monotonic: 6330 case llvm::AtomicOrdering::Release: 6331 break; 6332 case llvm::AtomicOrdering::NotAtomic: 6333 case llvm::AtomicOrdering::Unordered: 6334 llvm_unreachable("Unexpected ordering."); 6335 } 6336 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 6337 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V); 6338 } 6339 6340 static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, 6341 llvm::AtomicOrdering AO, const Expr *X, 6342 const Expr *E, SourceLocation Loc) { 6343 // x = expr; 6344 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 6345 emitSimpleAtomicStore(CGF, AO, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 6346 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 6347 // OpenMP, 2.17.7, atomic Construct 6348 // If the write, update, or capture clause is specified and the release, 6349 // acq_rel, or seq_cst clause is specified then the strong flush on entry to 6350 // the atomic operation is also a release flush. 6351 switch (AO) { 6352 case llvm::AtomicOrdering::Release: 6353 case llvm::AtomicOrdering::AcquireRelease: 6354 case llvm::AtomicOrdering::SequentiallyConsistent: 6355 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, {}, Loc, 6356 llvm::AtomicOrdering::Release); 6357 break; 6358 case llvm::AtomicOrdering::Acquire: 6359 case llvm::AtomicOrdering::Monotonic: 6360 break; 6361 case llvm::AtomicOrdering::NotAtomic: 6362 case llvm::AtomicOrdering::Unordered: 6363 llvm_unreachable("Unexpected ordering."); 6364 } 6365 } 6366 6367 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 6368 RValue Update, 6369 BinaryOperatorKind BO, 6370 llvm::AtomicOrdering AO, 6371 bool IsXLHSInRHSPart) { 6372 ASTContext &Context = CGF.getContext(); 6373 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 6374 // expression is simple and atomic is allowed for the given type for the 6375 // target platform. 6376 if (BO == BO_Comma || !Update.isScalar() || !X.isSimple() || 6377 (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 6378 (Update.getScalarVal()->getType() != X.getAddress().getElementType())) || 6379 !Context.getTargetInfo().hasBuiltinAtomic( 6380 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 6381 return std::make_pair(false, RValue::get(nullptr)); 6382 6383 auto &&CheckAtomicSupport = [&CGF](llvm::Type *T, BinaryOperatorKind BO) { 6384 if (T->isIntegerTy()) 6385 return true; 6386 6387 if (T->isFloatingPointTy() && (BO == BO_Add || BO == BO_Sub)) 6388 return llvm::isPowerOf2_64(CGF.CGM.getDataLayout().getTypeStoreSize(T)); 6389 6390 return false; 6391 }; 6392 6393 if (!CheckAtomicSupport(Update.getScalarVal()->getType(), BO) || 6394 !CheckAtomicSupport(X.getAddress().getElementType(), BO)) 6395 return std::make_pair(false, RValue::get(nullptr)); 6396 6397 bool IsInteger = X.getAddress().getElementType()->isIntegerTy(); 6398 llvm::AtomicRMWInst::BinOp RMWOp; 6399 switch (BO) { 6400 case BO_Add: 6401 RMWOp = IsInteger ? llvm::AtomicRMWInst::Add : llvm::AtomicRMWInst::FAdd; 6402 break; 6403 case BO_Sub: 6404 if (!IsXLHSInRHSPart) 6405 return std::make_pair(false, RValue::get(nullptr)); 6406 RMWOp = IsInteger ? llvm::AtomicRMWInst::Sub : llvm::AtomicRMWInst::FSub; 6407 break; 6408 case BO_And: 6409 RMWOp = llvm::AtomicRMWInst::And; 6410 break; 6411 case BO_Or: 6412 RMWOp = llvm::AtomicRMWInst::Or; 6413 break; 6414 case BO_Xor: 6415 RMWOp = llvm::AtomicRMWInst::Xor; 6416 break; 6417 case BO_LT: 6418 if (IsInteger) 6419 RMWOp = X.getType()->hasSignedIntegerRepresentation() 6420 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 6421 : llvm::AtomicRMWInst::Max) 6422 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 6423 : llvm::AtomicRMWInst::UMax); 6424 else 6425 RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMin 6426 : llvm::AtomicRMWInst::FMax; 6427 break; 6428 case BO_GT: 6429 if (IsInteger) 6430 RMWOp = X.getType()->hasSignedIntegerRepresentation() 6431 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 6432 : llvm::AtomicRMWInst::Min) 6433 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 6434 : llvm::AtomicRMWInst::UMin); 6435 else 6436 RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMax 6437 : llvm::AtomicRMWInst::FMin; 6438 break; 6439 case BO_Assign: 6440 RMWOp = llvm::AtomicRMWInst::Xchg; 6441 break; 6442 case BO_Mul: 6443 case BO_Div: 6444 case BO_Rem: 6445 case BO_Shl: 6446 case BO_Shr: 6447 case BO_LAnd: 6448 case BO_LOr: 6449 return std::make_pair(false, RValue::get(nullptr)); 6450 case BO_PtrMemD: 6451 case BO_PtrMemI: 6452 case BO_LE: 6453 case BO_GE: 6454 case BO_EQ: 6455 case BO_NE: 6456 case BO_Cmp: 6457 case BO_AddAssign: 6458 case BO_SubAssign: 6459 case BO_AndAssign: 6460 case BO_OrAssign: 6461 case BO_XorAssign: 6462 case BO_MulAssign: 6463 case BO_DivAssign: 6464 case BO_RemAssign: 6465 case BO_ShlAssign: 6466 case BO_ShrAssign: 6467 case BO_Comma: 6468 llvm_unreachable("Unsupported atomic update operation"); 6469 } 6470 llvm::Value *UpdateVal = Update.getScalarVal(); 6471 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 6472 if (IsInteger) 6473 UpdateVal = CGF.Builder.CreateIntCast( 6474 IC, X.getAddress().getElementType(), 6475 X.getType()->hasSignedIntegerRepresentation()); 6476 else 6477 UpdateVal = CGF.Builder.CreateCast(llvm::Instruction::CastOps::UIToFP, IC, 6478 X.getAddress().getElementType()); 6479 } 6480 llvm::AtomicRMWInst *Res = 6481 CGF.emitAtomicRMWInst(RMWOp, X.getAddress(), UpdateVal, AO); 6482 return std::make_pair(true, RValue::get(Res)); 6483 } 6484 6485 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 6486 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 6487 llvm::AtomicOrdering AO, SourceLocation Loc, 6488 const llvm::function_ref<RValue(RValue)> CommonGen) { 6489 // Update expressions are allowed to have the following forms: 6490 // x binop= expr; -> xrval + expr; 6491 // x++, ++x -> xrval + 1; 6492 // x--, --x -> xrval - 1; 6493 // x = x binop expr; -> xrval binop expr 6494 // x = expr Op x; - > expr binop xrval; 6495 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 6496 if (!Res.first) { 6497 if (X.isGlobalReg()) { 6498 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 6499 // 'xrval'. 6500 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 6501 } else { 6502 // Perform compare-and-swap procedure. 6503 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 6504 } 6505 } 6506 return Res; 6507 } 6508 6509 static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, 6510 llvm::AtomicOrdering AO, const Expr *X, 6511 const Expr *E, const Expr *UE, 6512 bool IsXLHSInRHSPart, SourceLocation Loc) { 6513 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 6514 "Update expr in 'atomic update' must be a binary operator."); 6515 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 6516 // Update expressions are allowed to have the following forms: 6517 // x binop= expr; -> xrval + expr; 6518 // x++, ++x -> xrval + 1; 6519 // x--, --x -> xrval - 1; 6520 // x = x binop expr; -> xrval binop expr 6521 // x = expr Op x; - > expr binop xrval; 6522 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 6523 LValue XLValue = CGF.EmitLValue(X); 6524 RValue ExprRValue = CGF.EmitAnyExpr(E); 6525 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 6526 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 6527 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 6528 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 6529 auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) { 6530 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 6531 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 6532 return CGF.EmitAnyExpr(UE); 6533 }; 6534 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 6535 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 6536 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 6537 // OpenMP, 2.17.7, atomic Construct 6538 // If the write, update, or capture clause is specified and the release, 6539 // acq_rel, or seq_cst clause is specified then the strong flush on entry to 6540 // the atomic operation is also a release flush. 6541 switch (AO) { 6542 case llvm::AtomicOrdering::Release: 6543 case llvm::AtomicOrdering::AcquireRelease: 6544 case llvm::AtomicOrdering::SequentiallyConsistent: 6545 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, {}, Loc, 6546 llvm::AtomicOrdering::Release); 6547 break; 6548 case llvm::AtomicOrdering::Acquire: 6549 case llvm::AtomicOrdering::Monotonic: 6550 break; 6551 case llvm::AtomicOrdering::NotAtomic: 6552 case llvm::AtomicOrdering::Unordered: 6553 llvm_unreachable("Unexpected ordering."); 6554 } 6555 } 6556 6557 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 6558 QualType SourceType, QualType ResType, 6559 SourceLocation Loc) { 6560 switch (CGF.getEvaluationKind(ResType)) { 6561 case TEK_Scalar: 6562 return RValue::get( 6563 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 6564 case TEK_Complex: { 6565 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 6566 return RValue::getComplex(Res.first, Res.second); 6567 } 6568 case TEK_Aggregate: 6569 break; 6570 } 6571 llvm_unreachable("Must be a scalar or complex."); 6572 } 6573 6574 static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, 6575 llvm::AtomicOrdering AO, 6576 bool IsPostfixUpdate, const Expr *V, 6577 const Expr *X, const Expr *E, 6578 const Expr *UE, bool IsXLHSInRHSPart, 6579 SourceLocation Loc) { 6580 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 6581 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 6582 RValue NewVVal; 6583 LValue VLValue = CGF.EmitLValue(V); 6584 LValue XLValue = CGF.EmitLValue(X); 6585 RValue ExprRValue = CGF.EmitAnyExpr(E); 6586 QualType NewVValType; 6587 if (UE) { 6588 // 'x' is updated with some additional value. 6589 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 6590 "Update expr in 'atomic capture' must be a binary operator."); 6591 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 6592 // Update expressions are allowed to have the following forms: 6593 // x binop= expr; -> xrval + expr; 6594 // x++, ++x -> xrval + 1; 6595 // x--, --x -> xrval - 1; 6596 // x = x binop expr; -> xrval binop expr 6597 // x = expr Op x; - > expr binop xrval; 6598 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 6599 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 6600 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 6601 NewVValType = XRValExpr->getType(); 6602 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 6603 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 6604 IsPostfixUpdate](RValue XRValue) { 6605 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 6606 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 6607 RValue Res = CGF.EmitAnyExpr(UE); 6608 NewVVal = IsPostfixUpdate ? XRValue : Res; 6609 return Res; 6610 }; 6611 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 6612 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 6613 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 6614 if (Res.first) { 6615 // 'atomicrmw' instruction was generated. 6616 if (IsPostfixUpdate) { 6617 // Use old value from 'atomicrmw'. 6618 NewVVal = Res.second; 6619 } else { 6620 // 'atomicrmw' does not provide new value, so evaluate it using old 6621 // value of 'x'. 6622 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 6623 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 6624 NewVVal = CGF.EmitAnyExpr(UE); 6625 } 6626 } 6627 } else { 6628 // 'x' is simply rewritten with some 'expr'. 6629 NewVValType = X->getType().getNonReferenceType(); 6630 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 6631 X->getType().getNonReferenceType(), Loc); 6632 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) { 6633 NewVVal = XRValue; 6634 return ExprRValue; 6635 }; 6636 // Try to perform atomicrmw xchg, otherwise simple exchange. 6637 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 6638 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 6639 Loc, Gen); 6640 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 6641 if (Res.first) { 6642 // 'atomicrmw' instruction was generated. 6643 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 6644 } 6645 } 6646 // Emit post-update store to 'v' of old/new 'x' value. 6647 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 6648 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V); 6649 // OpenMP 5.1 removes the required flush for capture clause. 6650 if (CGF.CGM.getLangOpts().OpenMP < 51) { 6651 // OpenMP, 2.17.7, atomic Construct 6652 // If the write, update, or capture clause is specified and the release, 6653 // acq_rel, or seq_cst clause is specified then the strong flush on entry to 6654 // the atomic operation is also a release flush. 6655 // If the read or capture clause is specified and the acquire, acq_rel, or 6656 // seq_cst clause is specified then the strong flush on exit from the atomic 6657 // operation is also an acquire flush. 6658 switch (AO) { 6659 case llvm::AtomicOrdering::Release: 6660 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, {}, Loc, 6661 llvm::AtomicOrdering::Release); 6662 break; 6663 case llvm::AtomicOrdering::Acquire: 6664 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, {}, Loc, 6665 llvm::AtomicOrdering::Acquire); 6666 break; 6667 case llvm::AtomicOrdering::AcquireRelease: 6668 case llvm::AtomicOrdering::SequentiallyConsistent: 6669 CGF.CGM.getOpenMPRuntime().emitFlush( 6670 CGF, {}, Loc, llvm::AtomicOrdering::AcquireRelease); 6671 break; 6672 case llvm::AtomicOrdering::Monotonic: 6673 break; 6674 case llvm::AtomicOrdering::NotAtomic: 6675 case llvm::AtomicOrdering::Unordered: 6676 llvm_unreachable("Unexpected ordering."); 6677 } 6678 } 6679 } 6680 6681 static void emitOMPAtomicCompareExpr( 6682 CodeGenFunction &CGF, llvm::AtomicOrdering AO, llvm::AtomicOrdering FailAO, 6683 const Expr *X, const Expr *V, const Expr *R, const Expr *E, const Expr *D, 6684 const Expr *CE, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly, 6685 SourceLocation Loc) { 6686 llvm::OpenMPIRBuilder &OMPBuilder = 6687 CGF.CGM.getOpenMPRuntime().getOMPBuilder(); 6688 6689 OMPAtomicCompareOp Op; 6690 assert(isa<BinaryOperator>(CE) && "CE is not a BinaryOperator"); 6691 switch (cast<BinaryOperator>(CE)->getOpcode()) { 6692 case BO_EQ: 6693 Op = OMPAtomicCompareOp::EQ; 6694 break; 6695 case BO_LT: 6696 Op = OMPAtomicCompareOp::MIN; 6697 break; 6698 case BO_GT: 6699 Op = OMPAtomicCompareOp::MAX; 6700 break; 6701 default: 6702 llvm_unreachable("unsupported atomic compare binary operator"); 6703 } 6704 6705 LValue XLVal = CGF.EmitLValue(X); 6706 Address XAddr = XLVal.getAddress(); 6707 6708 auto EmitRValueWithCastIfNeeded = [&CGF, Loc](const Expr *X, const Expr *E) { 6709 if (X->getType() == E->getType()) 6710 return CGF.EmitScalarExpr(E); 6711 const Expr *NewE = E->IgnoreImplicitAsWritten(); 6712 llvm::Value *V = CGF.EmitScalarExpr(NewE); 6713 if (NewE->getType() == X->getType()) 6714 return V; 6715 return CGF.EmitScalarConversion(V, NewE->getType(), X->getType(), Loc); 6716 }; 6717 6718 llvm::Value *EVal = EmitRValueWithCastIfNeeded(X, E); 6719 llvm::Value *DVal = D ? EmitRValueWithCastIfNeeded(X, D) : nullptr; 6720 if (auto *CI = dyn_cast<llvm::ConstantInt>(EVal)) 6721 EVal = CGF.Builder.CreateIntCast( 6722 CI, XLVal.getAddress().getElementType(), 6723 E->getType()->hasSignedIntegerRepresentation()); 6724 if (DVal) 6725 if (auto *CI = dyn_cast<llvm::ConstantInt>(DVal)) 6726 DVal = CGF.Builder.CreateIntCast( 6727 CI, XLVal.getAddress().getElementType(), 6728 D->getType()->hasSignedIntegerRepresentation()); 6729 6730 llvm::OpenMPIRBuilder::AtomicOpValue XOpVal{ 6731 XAddr.emitRawPointer(CGF), XAddr.getElementType(), 6732 X->getType()->hasSignedIntegerRepresentation(), 6733 X->getType().isVolatileQualified()}; 6734 llvm::OpenMPIRBuilder::AtomicOpValue VOpVal, ROpVal; 6735 if (V) { 6736 LValue LV = CGF.EmitLValue(V); 6737 Address Addr = LV.getAddress(); 6738 VOpVal = {Addr.emitRawPointer(CGF), Addr.getElementType(), 6739 V->getType()->hasSignedIntegerRepresentation(), 6740 V->getType().isVolatileQualified()}; 6741 } 6742 if (R) { 6743 LValue LV = CGF.EmitLValue(R); 6744 Address Addr = LV.getAddress(); 6745 ROpVal = {Addr.emitRawPointer(CGF), Addr.getElementType(), 6746 R->getType()->hasSignedIntegerRepresentation(), 6747 R->getType().isVolatileQualified()}; 6748 } 6749 6750 if (FailAO == llvm::AtomicOrdering::NotAtomic) { 6751 // fail clause was not mentioned on the 6752 // "#pragma omp atomic compare" construct. 6753 CGF.Builder.restoreIP(OMPBuilder.createAtomicCompare( 6754 CGF.Builder, XOpVal, VOpVal, ROpVal, EVal, DVal, AO, Op, IsXBinopExpr, 6755 IsPostfixUpdate, IsFailOnly)); 6756 } else 6757 CGF.Builder.restoreIP(OMPBuilder.createAtomicCompare( 6758 CGF.Builder, XOpVal, VOpVal, ROpVal, EVal, DVal, AO, Op, IsXBinopExpr, 6759 IsPostfixUpdate, IsFailOnly, FailAO)); 6760 } 6761 6762 static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 6763 llvm::AtomicOrdering AO, 6764 llvm::AtomicOrdering FailAO, bool IsPostfixUpdate, 6765 const Expr *X, const Expr *V, const Expr *R, 6766 const Expr *E, const Expr *UE, const Expr *D, 6767 const Expr *CE, bool IsXLHSInRHSPart, 6768 bool IsFailOnly, SourceLocation Loc) { 6769 switch (Kind) { 6770 case OMPC_read: 6771 emitOMPAtomicReadExpr(CGF, AO, X, V, Loc); 6772 break; 6773 case OMPC_write: 6774 emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc); 6775 break; 6776 case OMPC_unknown: 6777 case OMPC_update: 6778 emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc); 6779 break; 6780 case OMPC_capture: 6781 emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE, 6782 IsXLHSInRHSPart, Loc); 6783 break; 6784 case OMPC_compare: { 6785 emitOMPAtomicCompareExpr(CGF, AO, FailAO, X, V, R, E, D, CE, 6786 IsXLHSInRHSPart, IsPostfixUpdate, IsFailOnly, Loc); 6787 break; 6788 } 6789 default: 6790 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 6791 } 6792 } 6793 6794 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 6795 llvm::AtomicOrdering AO = CGM.getOpenMPRuntime().getDefaultMemoryOrdering(); 6796 // Fail Memory Clause Ordering. 6797 llvm::AtomicOrdering FailAO = llvm::AtomicOrdering::NotAtomic; 6798 bool MemOrderingSpecified = false; 6799 if (S.getSingleClause<OMPSeqCstClause>()) { 6800 AO = llvm::AtomicOrdering::SequentiallyConsistent; 6801 MemOrderingSpecified = true; 6802 } else if (S.getSingleClause<OMPAcqRelClause>()) { 6803 AO = llvm::AtomicOrdering::AcquireRelease; 6804 MemOrderingSpecified = true; 6805 } else if (S.getSingleClause<OMPAcquireClause>()) { 6806 AO = llvm::AtomicOrdering::Acquire; 6807 MemOrderingSpecified = true; 6808 } else if (S.getSingleClause<OMPReleaseClause>()) { 6809 AO = llvm::AtomicOrdering::Release; 6810 MemOrderingSpecified = true; 6811 } else if (S.getSingleClause<OMPRelaxedClause>()) { 6812 AO = llvm::AtomicOrdering::Monotonic; 6813 MemOrderingSpecified = true; 6814 } 6815 llvm::SmallSet<OpenMPClauseKind, 2> KindsEncountered; 6816 OpenMPClauseKind Kind = OMPC_unknown; 6817 for (const OMPClause *C : S.clauses()) { 6818 // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause, 6819 // if it is first). 6820 OpenMPClauseKind K = C->getClauseKind(); 6821 // TBD 6822 if (K == OMPC_weak) 6823 return; 6824 if (K == OMPC_seq_cst || K == OMPC_acq_rel || K == OMPC_acquire || 6825 K == OMPC_release || K == OMPC_relaxed || K == OMPC_hint) 6826 continue; 6827 Kind = K; 6828 KindsEncountered.insert(K); 6829 } 6830 // We just need to correct Kind here. No need to set a bool saying it is 6831 // actually compare capture because we can tell from whether V and R are 6832 // nullptr. 6833 if (KindsEncountered.contains(OMPC_compare) && 6834 KindsEncountered.contains(OMPC_capture)) 6835 Kind = OMPC_compare; 6836 if (!MemOrderingSpecified) { 6837 llvm::AtomicOrdering DefaultOrder = 6838 CGM.getOpenMPRuntime().getDefaultMemoryOrdering(); 6839 if (DefaultOrder == llvm::AtomicOrdering::Monotonic || 6840 DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent || 6841 (DefaultOrder == llvm::AtomicOrdering::AcquireRelease && 6842 Kind == OMPC_capture)) { 6843 AO = DefaultOrder; 6844 } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) { 6845 if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) { 6846 AO = llvm::AtomicOrdering::Release; 6847 } else if (Kind == OMPC_read) { 6848 assert(Kind == OMPC_read && "Unexpected atomic kind."); 6849 AO = llvm::AtomicOrdering::Acquire; 6850 } 6851 } 6852 } 6853 6854 if (KindsEncountered.contains(OMPC_compare) && 6855 KindsEncountered.contains(OMPC_fail)) { 6856 Kind = OMPC_compare; 6857 const auto *FailClause = S.getSingleClause<OMPFailClause>(); 6858 if (FailClause) { 6859 OpenMPClauseKind FailParameter = FailClause->getFailParameter(); 6860 if (FailParameter == llvm::omp::OMPC_relaxed) 6861 FailAO = llvm::AtomicOrdering::Monotonic; 6862 else if (FailParameter == llvm::omp::OMPC_acquire) 6863 FailAO = llvm::AtomicOrdering::Acquire; 6864 else if (FailParameter == llvm::omp::OMPC_seq_cst) 6865 FailAO = llvm::AtomicOrdering::SequentiallyConsistent; 6866 } 6867 } 6868 6869 LexicalScope Scope(*this, S.getSourceRange()); 6870 EmitStopPoint(S.getAssociatedStmt()); 6871 emitOMPAtomicExpr(*this, Kind, AO, FailAO, S.isPostfixUpdate(), S.getX(), 6872 S.getV(), S.getR(), S.getExpr(), S.getUpdateExpr(), 6873 S.getD(), S.getCondExpr(), S.isXLHSInRHSPart(), 6874 S.isFailOnly(), S.getBeginLoc()); 6875 } 6876 6877 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 6878 const OMPExecutableDirective &S, 6879 const RegionCodeGenTy &CodeGen) { 6880 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); 6881 CodeGenModule &CGM = CGF.CGM; 6882 6883 // On device emit this construct as inlined code. 6884 if (CGM.getLangOpts().OpenMPIsTargetDevice) { 6885 OMPLexicalScope Scope(CGF, S, OMPD_target); 6886 CGM.getOpenMPRuntime().emitInlinedDirective( 6887 CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6888 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 6889 }); 6890 return; 6891 } 6892 6893 auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S); 6894 llvm::Function *Fn = nullptr; 6895 llvm::Constant *FnID = nullptr; 6896 6897 const Expr *IfCond = nullptr; 6898 // Check for the at most one if clause associated with the target region. 6899 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 6900 if (C->getNameModifier() == OMPD_unknown || 6901 C->getNameModifier() == OMPD_target) { 6902 IfCond = C->getCondition(); 6903 break; 6904 } 6905 } 6906 6907 // Check if we have any device clause associated with the directive. 6908 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device( 6909 nullptr, OMPC_DEVICE_unknown); 6910 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 6911 Device.setPointerAndInt(C->getDevice(), C->getModifier()); 6912 6913 // Check if we have an if clause whose conditional always evaluates to false 6914 // or if we do not have any targets specified. If so the target region is not 6915 // an offload entry point. 6916 bool IsOffloadEntry = true; 6917 if (IfCond) { 6918 bool Val; 6919 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 6920 IsOffloadEntry = false; 6921 } 6922 if (CGM.getLangOpts().OMPTargetTriples.empty()) 6923 IsOffloadEntry = false; 6924 6925 if (CGM.getLangOpts().OpenMPOffloadMandatory && !IsOffloadEntry) { 6926 unsigned DiagID = CGM.getDiags().getCustomDiagID( 6927 DiagnosticsEngine::Error, 6928 "No offloading entry generated while offloading is mandatory."); 6929 CGM.getDiags().Report(DiagID); 6930 } 6931 6932 assert(CGF.CurFuncDecl && "No parent declaration for target region!"); 6933 StringRef ParentName; 6934 // In case we have Ctors/Dtors we use the complete type variant to produce 6935 // the mangling of the device outlined kernel. 6936 if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) 6937 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 6938 else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) 6939 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 6940 else 6941 ParentName = 6942 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); 6943 6944 // Emit target region as a standalone region. 6945 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 6946 IsOffloadEntry, CodeGen); 6947 OMPLexicalScope Scope(CGF, S, OMPD_task); 6948 auto &&SizeEmitter = 6949 [IsOffloadEntry](CodeGenFunction &CGF, 6950 const OMPLoopDirective &D) -> llvm::Value * { 6951 if (IsOffloadEntry) { 6952 OMPLoopScope(CGF, D); 6953 // Emit calculation of the iterations count. 6954 llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations()); 6955 NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty, 6956 /*isSigned=*/false); 6957 return NumIterations; 6958 } 6959 return nullptr; 6960 }; 6961 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, 6962 SizeEmitter); 6963 } 6964 6965 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, 6966 PrePostActionTy &Action) { 6967 Action.Enter(CGF); 6968 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6969 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 6970 CGF.EmitOMPPrivateClause(S, PrivateScope); 6971 (void)PrivateScope.Privatize(); 6972 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 6973 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 6974 6975 CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt()); 6976 CGF.EnsureInsertPoint(); 6977 } 6978 6979 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, 6980 StringRef ParentName, 6981 const OMPTargetDirective &S) { 6982 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6983 emitTargetRegion(CGF, S, Action); 6984 }; 6985 llvm::Function *Fn; 6986 llvm::Constant *Addr; 6987 // Emit target region as a standalone region. 6988 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6989 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6990 assert(Fn && Addr && "Target device function emission failed."); 6991 } 6992 6993 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 6994 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6995 emitTargetRegion(CGF, S, Action); 6996 }; 6997 emitCommonOMPTargetDirective(*this, S, CodeGen); 6998 } 6999 7000 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 7001 const OMPExecutableDirective &S, 7002 OpenMPDirectiveKind InnermostKind, 7003 const RegionCodeGenTy &CodeGen) { 7004 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); 7005 llvm::Function *OutlinedFn = 7006 CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( 7007 CGF, S, *CS->getCapturedDecl()->param_begin(), InnermostKind, 7008 CodeGen); 7009 7010 const auto *NT = S.getSingleClause<OMPNumTeamsClause>(); 7011 const auto *TL = S.getSingleClause<OMPThreadLimitClause>(); 7012 if (NT || TL) { 7013 const Expr *NumTeams = NT ? NT->getNumTeams().front() : nullptr; 7014 const Expr *ThreadLimit = TL ? TL->getThreadLimit().front() : nullptr; 7015 7016 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 7017 S.getBeginLoc()); 7018 } 7019 7020 OMPTeamsScope Scope(CGF, S); 7021 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 7022 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 7023 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn, 7024 CapturedVars); 7025 } 7026 7027 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 7028 // Emit teams region as a standalone region. 7029 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7030 Action.Enter(CGF); 7031 OMPPrivateScope PrivateScope(CGF); 7032 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 7033 CGF.EmitOMPPrivateClause(S, PrivateScope); 7034 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 7035 (void)PrivateScope.Privatize(); 7036 CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt()); 7037 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 7038 }; 7039 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 7040 emitPostUpdateForReductionClause(*this, S, 7041 [](CodeGenFunction &) { return nullptr; }); 7042 } 7043 7044 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 7045 const OMPTargetTeamsDirective &S) { 7046 auto *CS = S.getCapturedStmt(OMPD_teams); 7047 Action.Enter(CGF); 7048 // Emit teams region as a standalone region. 7049 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { 7050 Action.Enter(CGF); 7051 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 7052 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 7053 CGF.EmitOMPPrivateClause(S, PrivateScope); 7054 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 7055 (void)PrivateScope.Privatize(); 7056 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 7057 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 7058 CGF.EmitStmt(CS->getCapturedStmt()); 7059 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 7060 }; 7061 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); 7062 emitPostUpdateForReductionClause(CGF, S, 7063 [](CodeGenFunction &) { return nullptr; }); 7064 } 7065 7066 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 7067 CodeGenModule &CGM, StringRef ParentName, 7068 const OMPTargetTeamsDirective &S) { 7069 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7070 emitTargetTeamsRegion(CGF, Action, S); 7071 }; 7072 llvm::Function *Fn; 7073 llvm::Constant *Addr; 7074 // Emit target region as a standalone region. 7075 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 7076 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 7077 assert(Fn && Addr && "Target device function emission failed."); 7078 } 7079 7080 void CodeGenFunction::EmitOMPTargetTeamsDirective( 7081 const OMPTargetTeamsDirective &S) { 7082 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7083 emitTargetTeamsRegion(CGF, Action, S); 7084 }; 7085 emitCommonOMPTargetDirective(*this, S, CodeGen); 7086 } 7087 7088 static void 7089 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 7090 const OMPTargetTeamsDistributeDirective &S) { 7091 Action.Enter(CGF); 7092 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 7093 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 7094 }; 7095 7096 // Emit teams region as a standalone region. 7097 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 7098 PrePostActionTy &Action) { 7099 Action.Enter(CGF); 7100 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 7101 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 7102 (void)PrivateScope.Privatize(); 7103 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 7104 CodeGenDistribute); 7105 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 7106 }; 7107 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen); 7108 emitPostUpdateForReductionClause(CGF, S, 7109 [](CodeGenFunction &) { return nullptr; }); 7110 } 7111 7112 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 7113 CodeGenModule &CGM, StringRef ParentName, 7114 const OMPTargetTeamsDistributeDirective &S) { 7115 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7116 emitTargetTeamsDistributeRegion(CGF, Action, S); 7117 }; 7118 llvm::Function *Fn; 7119 llvm::Constant *Addr; 7120 // Emit target region as a standalone region. 7121 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 7122 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 7123 assert(Fn && Addr && "Target device function emission failed."); 7124 } 7125 7126 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( 7127 const OMPTargetTeamsDistributeDirective &S) { 7128 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7129 emitTargetTeamsDistributeRegion(CGF, Action, S); 7130 }; 7131 emitCommonOMPTargetDirective(*this, S, CodeGen); 7132 } 7133 7134 static void emitTargetTeamsDistributeSimdRegion( 7135 CodeGenFunction &CGF, PrePostActionTy &Action, 7136 const OMPTargetTeamsDistributeSimdDirective &S) { 7137 Action.Enter(CGF); 7138 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 7139 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 7140 }; 7141 7142 // Emit teams region as a standalone region. 7143 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 7144 PrePostActionTy &Action) { 7145 Action.Enter(CGF); 7146 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 7147 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 7148 (void)PrivateScope.Privatize(); 7149 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 7150 CodeGenDistribute); 7151 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 7152 }; 7153 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen); 7154 emitPostUpdateForReductionClause(CGF, S, 7155 [](CodeGenFunction &) { return nullptr; }); 7156 } 7157 7158 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 7159 CodeGenModule &CGM, StringRef ParentName, 7160 const OMPTargetTeamsDistributeSimdDirective &S) { 7161 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7162 emitTargetTeamsDistributeSimdRegion(CGF, Action, S); 7163 }; 7164 llvm::Function *Fn; 7165 llvm::Constant *Addr; 7166 // Emit target region as a standalone region. 7167 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 7168 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 7169 assert(Fn && Addr && "Target device function emission failed."); 7170 } 7171 7172 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( 7173 const OMPTargetTeamsDistributeSimdDirective &S) { 7174 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7175 emitTargetTeamsDistributeSimdRegion(CGF, Action, S); 7176 }; 7177 emitCommonOMPTargetDirective(*this, S, CodeGen); 7178 } 7179 7180 void CodeGenFunction::EmitOMPTeamsDistributeDirective( 7181 const OMPTeamsDistributeDirective &S) { 7182 7183 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 7184 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 7185 }; 7186 7187 // Emit teams region as a standalone region. 7188 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 7189 PrePostActionTy &Action) { 7190 Action.Enter(CGF); 7191 OMPPrivateScope PrivateScope(CGF); 7192 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 7193 (void)PrivateScope.Privatize(); 7194 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 7195 CodeGenDistribute); 7196 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 7197 }; 7198 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 7199 emitPostUpdateForReductionClause(*this, S, 7200 [](CodeGenFunction &) { return nullptr; }); 7201 } 7202 7203 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( 7204 const OMPTeamsDistributeSimdDirective &S) { 7205 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 7206 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 7207 }; 7208 7209 // Emit teams region as a standalone region. 7210 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 7211 PrePostActionTy &Action) { 7212 Action.Enter(CGF); 7213 OMPPrivateScope PrivateScope(CGF); 7214 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 7215 (void)PrivateScope.Privatize(); 7216 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd, 7217 CodeGenDistribute); 7218 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 7219 }; 7220 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen); 7221 emitPostUpdateForReductionClause(*this, S, 7222 [](CodeGenFunction &) { return nullptr; }); 7223 } 7224 7225 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( 7226 const OMPTeamsDistributeParallelForDirective &S) { 7227 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 7228 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 7229 S.getDistInc()); 7230 }; 7231 7232 // Emit teams region as a standalone region. 7233 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 7234 PrePostActionTy &Action) { 7235 Action.Enter(CGF); 7236 OMPPrivateScope PrivateScope(CGF); 7237 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 7238 (void)PrivateScope.Privatize(); 7239 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 7240 CodeGenDistribute); 7241 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 7242 }; 7243 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); 7244 emitPostUpdateForReductionClause(*this, S, 7245 [](CodeGenFunction &) { return nullptr; }); 7246 } 7247 7248 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( 7249 const OMPTeamsDistributeParallelForSimdDirective &S) { 7250 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 7251 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 7252 S.getDistInc()); 7253 }; 7254 7255 // Emit teams region as a standalone region. 7256 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 7257 PrePostActionTy &Action) { 7258 Action.Enter(CGF); 7259 OMPPrivateScope PrivateScope(CGF); 7260 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 7261 (void)PrivateScope.Privatize(); 7262 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 7263 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 7264 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 7265 }; 7266 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for_simd, 7267 CodeGen); 7268 emitPostUpdateForReductionClause(*this, S, 7269 [](CodeGenFunction &) { return nullptr; }); 7270 } 7271 7272 void CodeGenFunction::EmitOMPInteropDirective(const OMPInteropDirective &S) { 7273 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 7274 llvm::Value *Device = nullptr; 7275 llvm::Value *NumDependences = nullptr; 7276 llvm::Value *DependenceList = nullptr; 7277 7278 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 7279 Device = EmitScalarExpr(C->getDevice()); 7280 7281 // Build list and emit dependences 7282 OMPTaskDataTy Data; 7283 buildDependences(S, Data); 7284 if (!Data.Dependences.empty()) { 7285 Address DependenciesArray = Address::invalid(); 7286 std::tie(NumDependences, DependenciesArray) = 7287 CGM.getOpenMPRuntime().emitDependClause(*this, Data.Dependences, 7288 S.getBeginLoc()); 7289 DependenceList = DependenciesArray.emitRawPointer(*this); 7290 } 7291 Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>(); 7292 7293 assert(!(Data.HasNowaitClause && !(S.getSingleClause<OMPInitClause>() || 7294 S.getSingleClause<OMPDestroyClause>() || 7295 S.getSingleClause<OMPUseClause>())) && 7296 "OMPNowaitClause clause is used separately in OMPInteropDirective."); 7297 7298 auto ItOMPInitClause = S.getClausesOfKind<OMPInitClause>(); 7299 if (!ItOMPInitClause.empty()) { 7300 // Look at the multiple init clauses 7301 for (const OMPInitClause *C : ItOMPInitClause) { 7302 llvm::Value *InteropvarPtr = 7303 EmitLValue(C->getInteropVar()).getPointer(*this); 7304 llvm::omp::OMPInteropType InteropType = 7305 llvm::omp::OMPInteropType::Unknown; 7306 if (C->getIsTarget()) { 7307 InteropType = llvm::omp::OMPInteropType::Target; 7308 } else { 7309 assert(C->getIsTargetSync() && 7310 "Expected interop-type target/targetsync"); 7311 InteropType = llvm::omp::OMPInteropType::TargetSync; 7312 } 7313 OMPBuilder.createOMPInteropInit(Builder, InteropvarPtr, InteropType, 7314 Device, NumDependences, DependenceList, 7315 Data.HasNowaitClause); 7316 } 7317 } 7318 auto ItOMPDestroyClause = S.getClausesOfKind<OMPDestroyClause>(); 7319 if (!ItOMPDestroyClause.empty()) { 7320 // Look at the multiple destroy clauses 7321 for (const OMPDestroyClause *C : ItOMPDestroyClause) { 7322 llvm::Value *InteropvarPtr = 7323 EmitLValue(C->getInteropVar()).getPointer(*this); 7324 OMPBuilder.createOMPInteropDestroy(Builder, InteropvarPtr, Device, 7325 NumDependences, DependenceList, 7326 Data.HasNowaitClause); 7327 } 7328 } 7329 auto ItOMPUseClause = S.getClausesOfKind<OMPUseClause>(); 7330 if (!ItOMPUseClause.empty()) { 7331 // Look at the multiple use clauses 7332 for (const OMPUseClause *C : ItOMPUseClause) { 7333 llvm::Value *InteropvarPtr = 7334 EmitLValue(C->getInteropVar()).getPointer(*this); 7335 OMPBuilder.createOMPInteropUse(Builder, InteropvarPtr, Device, 7336 NumDependences, DependenceList, 7337 Data.HasNowaitClause); 7338 } 7339 } 7340 } 7341 7342 static void emitTargetTeamsDistributeParallelForRegion( 7343 CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S, 7344 PrePostActionTy &Action) { 7345 Action.Enter(CGF); 7346 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 7347 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 7348 S.getDistInc()); 7349 }; 7350 7351 // Emit teams region as a standalone region. 7352 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 7353 PrePostActionTy &Action) { 7354 Action.Enter(CGF); 7355 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 7356 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 7357 (void)PrivateScope.Privatize(); 7358 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 7359 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 7360 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 7361 }; 7362 7363 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for, 7364 CodeGenTeams); 7365 emitPostUpdateForReductionClause(CGF, S, 7366 [](CodeGenFunction &) { return nullptr; }); 7367 } 7368 7369 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 7370 CodeGenModule &CGM, StringRef ParentName, 7371 const OMPTargetTeamsDistributeParallelForDirective &S) { 7372 // Emit SPMD target teams distribute parallel for region as a standalone 7373 // region. 7374 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7375 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); 7376 }; 7377 llvm::Function *Fn; 7378 llvm::Constant *Addr; 7379 // Emit target region as a standalone region. 7380 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 7381 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 7382 assert(Fn && Addr && "Target device function emission failed."); 7383 } 7384 7385 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( 7386 const OMPTargetTeamsDistributeParallelForDirective &S) { 7387 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7388 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); 7389 }; 7390 emitCommonOMPTargetDirective(*this, S, CodeGen); 7391 } 7392 7393 static void emitTargetTeamsDistributeParallelForSimdRegion( 7394 CodeGenFunction &CGF, 7395 const OMPTargetTeamsDistributeParallelForSimdDirective &S, 7396 PrePostActionTy &Action) { 7397 Action.Enter(CGF); 7398 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 7399 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 7400 S.getDistInc()); 7401 }; 7402 7403 // Emit teams region as a standalone region. 7404 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 7405 PrePostActionTy &Action) { 7406 Action.Enter(CGF); 7407 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 7408 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 7409 (void)PrivateScope.Privatize(); 7410 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 7411 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 7412 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 7413 }; 7414 7415 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd, 7416 CodeGenTeams); 7417 emitPostUpdateForReductionClause(CGF, S, 7418 [](CodeGenFunction &) { return nullptr; }); 7419 } 7420 7421 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 7422 CodeGenModule &CGM, StringRef ParentName, 7423 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 7424 // Emit SPMD target teams distribute parallel for simd region as a standalone 7425 // region. 7426 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7427 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); 7428 }; 7429 llvm::Function *Fn; 7430 llvm::Constant *Addr; 7431 // Emit target region as a standalone region. 7432 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 7433 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 7434 assert(Fn && Addr && "Target device function emission failed."); 7435 } 7436 7437 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( 7438 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 7439 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7440 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); 7441 }; 7442 emitCommonOMPTargetDirective(*this, S, CodeGen); 7443 } 7444 7445 void CodeGenFunction::EmitOMPCancellationPointDirective( 7446 const OMPCancellationPointDirective &S) { 7447 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(), 7448 S.getCancelRegion()); 7449 } 7450 7451 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 7452 const Expr *IfCond = nullptr; 7453 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 7454 if (C->getNameModifier() == OMPD_unknown || 7455 C->getNameModifier() == OMPD_cancel) { 7456 IfCond = C->getCondition(); 7457 break; 7458 } 7459 } 7460 if (CGM.getLangOpts().OpenMPIRBuilder) { 7461 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 7462 // TODO: This check is necessary as we only generate `omp parallel` through 7463 // the OpenMPIRBuilder for now. 7464 if (S.getCancelRegion() == OMPD_parallel || 7465 S.getCancelRegion() == OMPD_sections || 7466 S.getCancelRegion() == OMPD_section) { 7467 llvm::Value *IfCondition = nullptr; 7468 if (IfCond) 7469 IfCondition = EmitScalarExpr(IfCond, 7470 /*IgnoreResultAssign=*/true); 7471 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail( 7472 OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion())); 7473 return Builder.restoreIP(AfterIP); 7474 } 7475 } 7476 7477 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond, 7478 S.getCancelRegion()); 7479 } 7480 7481 CodeGenFunction::JumpDest 7482 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 7483 if (Kind == OMPD_parallel || Kind == OMPD_task || 7484 Kind == OMPD_target_parallel || Kind == OMPD_taskloop || 7485 Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop) 7486 return ReturnBlock; 7487 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 7488 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || 7489 Kind == OMPD_distribute_parallel_for || 7490 Kind == OMPD_target_parallel_for || 7491 Kind == OMPD_teams_distribute_parallel_for || 7492 Kind == OMPD_target_teams_distribute_parallel_for); 7493 return OMPCancelStack.getExitBlock(); 7494 } 7495 7496 void CodeGenFunction::EmitOMPUseDevicePtrClause( 7497 const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope, 7498 const llvm::DenseMap<const ValueDecl *, llvm::Value *> 7499 CaptureDeviceAddrMap) { 7500 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 7501 for (const Expr *OrigVarIt : C.varlist()) { 7502 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(OrigVarIt)->getDecl()); 7503 if (!Processed.insert(OrigVD).second) 7504 continue; 7505 7506 // In order to identify the right initializer we need to match the 7507 // declaration used by the mapping logic. In some cases we may get 7508 // OMPCapturedExprDecl that refers to the original declaration. 7509 const ValueDecl *MatchingVD = OrigVD; 7510 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 7511 // OMPCapturedExprDecl are used to privative fields of the current 7512 // structure. 7513 const auto *ME = cast<MemberExpr>(OED->getInit()); 7514 assert(isa<CXXThisExpr>(ME->getBase()->IgnoreImpCasts()) && 7515 "Base should be the current struct!"); 7516 MatchingVD = ME->getMemberDecl(); 7517 } 7518 7519 // If we don't have information about the current list item, move on to 7520 // the next one. 7521 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 7522 if (InitAddrIt == CaptureDeviceAddrMap.end()) 7523 continue; 7524 7525 llvm::Type *Ty = ConvertTypeForMem(OrigVD->getType().getNonReferenceType()); 7526 7527 // Return the address of the private variable. 7528 bool IsRegistered = PrivateScope.addPrivate( 7529 OrigVD, 7530 Address(InitAddrIt->second, Ty, 7531 getContext().getTypeAlignInChars(getContext().VoidPtrTy))); 7532 assert(IsRegistered && "firstprivate var already registered as private"); 7533 // Silence the warning about unused variable. 7534 (void)IsRegistered; 7535 } 7536 } 7537 7538 static const VarDecl *getBaseDecl(const Expr *Ref) { 7539 const Expr *Base = Ref->IgnoreParenImpCasts(); 7540 while (const auto *OASE = dyn_cast<ArraySectionExpr>(Base)) 7541 Base = OASE->getBase()->IgnoreParenImpCasts(); 7542 while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Base)) 7543 Base = ASE->getBase()->IgnoreParenImpCasts(); 7544 return cast<VarDecl>(cast<DeclRefExpr>(Base)->getDecl()); 7545 } 7546 7547 void CodeGenFunction::EmitOMPUseDeviceAddrClause( 7548 const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope, 7549 const llvm::DenseMap<const ValueDecl *, llvm::Value *> 7550 CaptureDeviceAddrMap) { 7551 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 7552 for (const Expr *Ref : C.varlist()) { 7553 const VarDecl *OrigVD = getBaseDecl(Ref); 7554 if (!Processed.insert(OrigVD).second) 7555 continue; 7556 // In order to identify the right initializer we need to match the 7557 // declaration used by the mapping logic. In some cases we may get 7558 // OMPCapturedExprDecl that refers to the original declaration. 7559 const ValueDecl *MatchingVD = OrigVD; 7560 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 7561 // OMPCapturedExprDecl are used to privative fields of the current 7562 // structure. 7563 const auto *ME = cast<MemberExpr>(OED->getInit()); 7564 assert(isa<CXXThisExpr>(ME->getBase()) && 7565 "Base should be the current struct!"); 7566 MatchingVD = ME->getMemberDecl(); 7567 } 7568 7569 // If we don't have information about the current list item, move on to 7570 // the next one. 7571 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 7572 if (InitAddrIt == CaptureDeviceAddrMap.end()) 7573 continue; 7574 7575 llvm::Type *Ty = ConvertTypeForMem(OrigVD->getType().getNonReferenceType()); 7576 7577 Address PrivAddr = 7578 Address(InitAddrIt->second, Ty, 7579 getContext().getTypeAlignInChars(getContext().VoidPtrTy)); 7580 // For declrefs and variable length array need to load the pointer for 7581 // correct mapping, since the pointer to the data was passed to the runtime. 7582 if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) || 7583 MatchingVD->getType()->isArrayType()) { 7584 QualType PtrTy = getContext().getPointerType( 7585 OrigVD->getType().getNonReferenceType()); 7586 PrivAddr = 7587 EmitLoadOfPointer(PrivAddr.withElementType(ConvertTypeForMem(PtrTy)), 7588 PtrTy->castAs<PointerType>()); 7589 } 7590 7591 (void)PrivateScope.addPrivate(OrigVD, PrivAddr); 7592 } 7593 } 7594 7595 // Generate the instructions for '#pragma omp target data' directive. 7596 void CodeGenFunction::EmitOMPTargetDataDirective( 7597 const OMPTargetDataDirective &S) { 7598 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true, 7599 /*SeparateBeginEndCalls=*/true); 7600 7601 // Create a pre/post action to signal the privatization of the device pointer. 7602 // This action can be replaced by the OpenMP runtime code generation to 7603 // deactivate privatization. 7604 bool PrivatizeDevicePointers = false; 7605 class DevicePointerPrivActionTy : public PrePostActionTy { 7606 bool &PrivatizeDevicePointers; 7607 7608 public: 7609 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) 7610 : PrivatizeDevicePointers(PrivatizeDevicePointers) {} 7611 void Enter(CodeGenFunction &CGF) override { 7612 PrivatizeDevicePointers = true; 7613 } 7614 }; 7615 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); 7616 7617 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) { 7618 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 7619 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 7620 }; 7621 7622 // Codegen that selects whether to generate the privatization code or not. 7623 auto &&PrivCodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) { 7624 RegionCodeGenTy RCG(InnermostCodeGen); 7625 PrivatizeDevicePointers = false; 7626 7627 // Call the pre-action to change the status of PrivatizeDevicePointers if 7628 // needed. 7629 Action.Enter(CGF); 7630 7631 if (PrivatizeDevicePointers) { 7632 OMPPrivateScope PrivateScope(CGF); 7633 // Emit all instances of the use_device_ptr clause. 7634 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 7635 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, 7636 Info.CaptureDeviceAddrMap); 7637 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>()) 7638 CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope, 7639 Info.CaptureDeviceAddrMap); 7640 (void)PrivateScope.Privatize(); 7641 RCG(CGF); 7642 } else { 7643 // If we don't have target devices, don't bother emitting the data 7644 // mapping code. 7645 std::optional<OpenMPDirectiveKind> CaptureRegion; 7646 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 7647 // Emit helper decls of the use_device_ptr/use_device_addr clauses. 7648 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 7649 for (const Expr *E : C->varlist()) { 7650 const Decl *D = cast<DeclRefExpr>(E)->getDecl(); 7651 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) 7652 CGF.EmitVarDecl(*OED); 7653 } 7654 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>()) 7655 for (const Expr *E : C->varlist()) { 7656 const Decl *D = getBaseDecl(E); 7657 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) 7658 CGF.EmitVarDecl(*OED); 7659 } 7660 } else { 7661 CaptureRegion = OMPD_unknown; 7662 } 7663 7664 OMPLexicalScope Scope(CGF, S, CaptureRegion); 7665 RCG(CGF); 7666 } 7667 }; 7668 7669 // Forward the provided action to the privatization codegen. 7670 RegionCodeGenTy PrivRCG(PrivCodeGen); 7671 PrivRCG.setAction(Action); 7672 7673 // Notwithstanding the body of the region is emitted as inlined directive, 7674 // we don't use an inline scope as changes in the references inside the 7675 // region are expected to be visible outside, so we do not privative them. 7676 OMPLexicalScope Scope(CGF, S); 7677 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, 7678 PrivRCG); 7679 }; 7680 7681 RegionCodeGenTy RCG(CodeGen); 7682 7683 // If we don't have target devices, don't bother emitting the data mapping 7684 // code. 7685 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 7686 RCG(*this); 7687 return; 7688 } 7689 7690 // Check if we have any if clause associated with the directive. 7691 const Expr *IfCond = nullptr; 7692 if (const auto *C = S.getSingleClause<OMPIfClause>()) 7693 IfCond = C->getCondition(); 7694 7695 // Check if we have any device clause associated with the directive. 7696 const Expr *Device = nullptr; 7697 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 7698 Device = C->getDevice(); 7699 7700 // Set the action to signal privatization of device pointers. 7701 RCG.setAction(PrivAction); 7702 7703 // Emit region code. 7704 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, 7705 Info); 7706 } 7707 7708 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 7709 const OMPTargetEnterDataDirective &S) { 7710 // If we don't have target devices, don't bother emitting the data mapping 7711 // code. 7712 if (CGM.getLangOpts().OMPTargetTriples.empty()) 7713 return; 7714 7715 // Check if we have any if clause associated with the directive. 7716 const Expr *IfCond = nullptr; 7717 if (const auto *C = S.getSingleClause<OMPIfClause>()) 7718 IfCond = C->getCondition(); 7719 7720 // Check if we have any device clause associated with the directive. 7721 const Expr *Device = nullptr; 7722 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 7723 Device = C->getDevice(); 7724 7725 OMPLexicalScope Scope(*this, S, OMPD_task); 7726 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 7727 } 7728 7729 void CodeGenFunction::EmitOMPTargetExitDataDirective( 7730 const OMPTargetExitDataDirective &S) { 7731 // If we don't have target devices, don't bother emitting the data mapping 7732 // code. 7733 if (CGM.getLangOpts().OMPTargetTriples.empty()) 7734 return; 7735 7736 // Check if we have any if clause associated with the directive. 7737 const Expr *IfCond = nullptr; 7738 if (const auto *C = S.getSingleClause<OMPIfClause>()) 7739 IfCond = C->getCondition(); 7740 7741 // Check if we have any device clause associated with the directive. 7742 const Expr *Device = nullptr; 7743 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 7744 Device = C->getDevice(); 7745 7746 OMPLexicalScope Scope(*this, S, OMPD_task); 7747 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 7748 } 7749 7750 static void emitTargetParallelRegion(CodeGenFunction &CGF, 7751 const OMPTargetParallelDirective &S, 7752 PrePostActionTy &Action) { 7753 // Get the captured statement associated with the 'parallel' region. 7754 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 7755 Action.Enter(CGF); 7756 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { 7757 Action.Enter(CGF); 7758 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 7759 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 7760 CGF.EmitOMPPrivateClause(S, PrivateScope); 7761 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 7762 (void)PrivateScope.Privatize(); 7763 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 7764 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 7765 // TODO: Add support for clauses. 7766 CGF.EmitStmt(CS->getCapturedStmt()); 7767 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 7768 }; 7769 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, 7770 emitEmptyBoundParameters); 7771 emitPostUpdateForReductionClause(CGF, S, 7772 [](CodeGenFunction &) { return nullptr; }); 7773 } 7774 7775 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 7776 CodeGenModule &CGM, StringRef ParentName, 7777 const OMPTargetParallelDirective &S) { 7778 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7779 emitTargetParallelRegion(CGF, S, Action); 7780 }; 7781 llvm::Function *Fn; 7782 llvm::Constant *Addr; 7783 // Emit target region as a standalone region. 7784 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 7785 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 7786 assert(Fn && Addr && "Target device function emission failed."); 7787 } 7788 7789 void CodeGenFunction::EmitOMPTargetParallelDirective( 7790 const OMPTargetParallelDirective &S) { 7791 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7792 emitTargetParallelRegion(CGF, S, Action); 7793 }; 7794 emitCommonOMPTargetDirective(*this, S, CodeGen); 7795 } 7796 7797 static void emitTargetParallelForRegion(CodeGenFunction &CGF, 7798 const OMPTargetParallelForDirective &S, 7799 PrePostActionTy &Action) { 7800 Action.Enter(CGF); 7801 // Emit directive as a combined directive that consists of two implicit 7802 // directives: 'parallel' with 'for' directive. 7803 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7804 Action.Enter(CGF); 7805 CodeGenFunction::OMPCancelStackRAII CancelRegion( 7806 CGF, OMPD_target_parallel_for, S.hasCancel()); 7807 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 7808 emitDispatchForLoopBounds); 7809 }; 7810 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen, 7811 emitEmptyBoundParameters); 7812 } 7813 7814 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 7815 CodeGenModule &CGM, StringRef ParentName, 7816 const OMPTargetParallelForDirective &S) { 7817 // Emit SPMD target parallel for region as a standalone region. 7818 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7819 emitTargetParallelForRegion(CGF, S, Action); 7820 }; 7821 llvm::Function *Fn; 7822 llvm::Constant *Addr; 7823 // Emit target region as a standalone region. 7824 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 7825 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 7826 assert(Fn && Addr && "Target device function emission failed."); 7827 } 7828 7829 void CodeGenFunction::EmitOMPTargetParallelForDirective( 7830 const OMPTargetParallelForDirective &S) { 7831 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7832 emitTargetParallelForRegion(CGF, S, Action); 7833 }; 7834 emitCommonOMPTargetDirective(*this, S, CodeGen); 7835 } 7836 7837 static void 7838 emitTargetParallelForSimdRegion(CodeGenFunction &CGF, 7839 const OMPTargetParallelForSimdDirective &S, 7840 PrePostActionTy &Action) { 7841 Action.Enter(CGF); 7842 // Emit directive as a combined directive that consists of two implicit 7843 // directives: 'parallel' with 'for' directive. 7844 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7845 Action.Enter(CGF); 7846 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 7847 emitDispatchForLoopBounds); 7848 }; 7849 emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen, 7850 emitEmptyBoundParameters); 7851 } 7852 7853 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 7854 CodeGenModule &CGM, StringRef ParentName, 7855 const OMPTargetParallelForSimdDirective &S) { 7856 // Emit SPMD target parallel for region as a standalone region. 7857 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7858 emitTargetParallelForSimdRegion(CGF, S, Action); 7859 }; 7860 llvm::Function *Fn; 7861 llvm::Constant *Addr; 7862 // Emit target region as a standalone region. 7863 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 7864 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 7865 assert(Fn && Addr && "Target device function emission failed."); 7866 } 7867 7868 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( 7869 const OMPTargetParallelForSimdDirective &S) { 7870 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7871 emitTargetParallelForSimdRegion(CGF, S, Action); 7872 }; 7873 emitCommonOMPTargetDirective(*this, S, CodeGen); 7874 } 7875 7876 /// Emit a helper variable and return corresponding lvalue. 7877 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, 7878 const ImplicitParamDecl *PVD, 7879 CodeGenFunction::OMPPrivateScope &Privates) { 7880 const auto *VDecl = cast<VarDecl>(Helper->getDecl()); 7881 Privates.addPrivate(VDecl, CGF.GetAddrOfLocalVar(PVD)); 7882 } 7883 7884 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { 7885 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); 7886 // Emit outlined function for task construct. 7887 const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop); 7888 Address CapturedStruct = Address::invalid(); 7889 { 7890 OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); 7891 CapturedStruct = GenerateCapturedStmtArgument(*CS); 7892 } 7893 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 7894 const Expr *IfCond = nullptr; 7895 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 7896 if (C->getNameModifier() == OMPD_unknown || 7897 C->getNameModifier() == OMPD_taskloop) { 7898 IfCond = C->getCondition(); 7899 break; 7900 } 7901 } 7902 7903 OMPTaskDataTy Data; 7904 // Check if taskloop must be emitted without taskgroup. 7905 Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); 7906 // TODO: Check if we should emit tied or untied task. 7907 Data.Tied = true; 7908 // Set scheduling for taskloop 7909 if (const auto *Clause = S.getSingleClause<OMPGrainsizeClause>()) { 7910 // grainsize clause 7911 Data.Schedule.setInt(/*IntVal=*/false); 7912 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); 7913 Data.HasModifier = 7914 (Clause->getModifier() == OMPC_GRAINSIZE_strict) ? true : false; 7915 } else if (const auto *Clause = S.getSingleClause<OMPNumTasksClause>()) { 7916 // num_tasks clause 7917 Data.Schedule.setInt(/*IntVal=*/true); 7918 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); 7919 Data.HasModifier = 7920 (Clause->getModifier() == OMPC_NUMTASKS_strict) ? true : false; 7921 } 7922 7923 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { 7924 // if (PreCond) { 7925 // for (IV in 0..LastIteration) BODY; 7926 // <Final counter/linear vars updates>; 7927 // } 7928 // 7929 7930 // Emit: if (PreCond) - begin. 7931 // If the condition constant folds and can be elided, avoid emitting the 7932 // whole loop. 7933 bool CondConstant; 7934 llvm::BasicBlock *ContBlock = nullptr; 7935 OMPLoopScope PreInitScope(CGF, S); 7936 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 7937 if (!CondConstant) 7938 return; 7939 } else { 7940 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); 7941 ContBlock = CGF.createBasicBlock("taskloop.if.end"); 7942 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 7943 CGF.getProfileCount(&S)); 7944 CGF.EmitBlock(ThenBlock); 7945 CGF.incrementProfileCounter(&S); 7946 } 7947 7948 (void)CGF.EmitOMPLinearClauseInit(S); 7949 7950 OMPPrivateScope LoopScope(CGF); 7951 // Emit helper vars inits. 7952 enum { LowerBound = 5, UpperBound, Stride, LastIter }; 7953 auto *I = CS->getCapturedDecl()->param_begin(); 7954 auto *LBP = std::next(I, LowerBound); 7955 auto *UBP = std::next(I, UpperBound); 7956 auto *STP = std::next(I, Stride); 7957 auto *LIP = std::next(I, LastIter); 7958 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, 7959 LoopScope); 7960 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, 7961 LoopScope); 7962 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); 7963 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, 7964 LoopScope); 7965 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 7966 CGF.EmitOMPLinearClause(S, LoopScope); 7967 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 7968 (void)LoopScope.Privatize(); 7969 // Emit the loop iteration variable. 7970 const Expr *IVExpr = S.getIterationVariable(); 7971 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 7972 CGF.EmitVarDecl(*IVDecl); 7973 CGF.EmitIgnoredExpr(S.getInit()); 7974 7975 // Emit the iterations count variable. 7976 // If it is not a variable, Sema decided to calculate iterations count on 7977 // each iteration (e.g., it is foldable into a constant). 7978 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 7979 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 7980 // Emit calculation of the iterations count. 7981 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 7982 } 7983 7984 { 7985 OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); 7986 emitCommonSimdLoop( 7987 CGF, S, 7988 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 7989 if (isOpenMPSimdDirective(S.getDirectiveKind())) 7990 CGF.EmitOMPSimdInit(S); 7991 }, 7992 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 7993 CGF.EmitOMPInnerLoop( 7994 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 7995 [&S](CodeGenFunction &CGF) { 7996 emitOMPLoopBodyWithStopPoint(CGF, S, 7997 CodeGenFunction::JumpDest()); 7998 }, 7999 [](CodeGenFunction &) {}); 8000 }); 8001 } 8002 // Emit: if (PreCond) - end. 8003 if (ContBlock) { 8004 CGF.EmitBranch(ContBlock); 8005 CGF.EmitBlock(ContBlock, true); 8006 } 8007 // Emit final copy of the lastprivate variables if IsLastIter != 0. 8008 if (HasLastprivateClause) { 8009 CGF.EmitOMPLastprivateClauseFinal( 8010 S, isOpenMPSimdDirective(S.getDirectiveKind()), 8011 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( 8012 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 8013 (*LIP)->getType(), S.getBeginLoc()))); 8014 } 8015 LoopScope.restoreMap(); 8016 CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) { 8017 return CGF.Builder.CreateIsNotNull( 8018 CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 8019 (*LIP)->getType(), S.getBeginLoc())); 8020 }); 8021 }; 8022 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 8023 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, 8024 const OMPTaskDataTy &Data) { 8025 auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond, 8026 &Data](CodeGenFunction &CGF, PrePostActionTy &) { 8027 OMPLoopScope PreInitScope(CGF, S); 8028 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S, 8029 OutlinedFn, SharedsTy, 8030 CapturedStruct, IfCond, Data); 8031 }; 8032 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, 8033 CodeGen); 8034 }; 8035 if (Data.Nogroup) { 8036 EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data); 8037 } else { 8038 CGM.getOpenMPRuntime().emitTaskgroupRegion( 8039 *this, 8040 [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, 8041 PrePostActionTy &Action) { 8042 Action.Enter(CGF); 8043 CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, 8044 Data); 8045 }, 8046 S.getBeginLoc()); 8047 } 8048 } 8049 8050 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 8051 auto LPCRegion = 8052 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 8053 EmitOMPTaskLoopBasedDirective(S); 8054 } 8055 8056 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 8057 const OMPTaskLoopSimdDirective &S) { 8058 auto LPCRegion = 8059 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 8060 OMPLexicalScope Scope(*this, S); 8061 EmitOMPTaskLoopBasedDirective(S); 8062 } 8063 8064 void CodeGenFunction::EmitOMPMasterTaskLoopDirective( 8065 const OMPMasterTaskLoopDirective &S) { 8066 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 8067 Action.Enter(CGF); 8068 EmitOMPTaskLoopBasedDirective(S); 8069 }; 8070 auto LPCRegion = 8071 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 8072 OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false); 8073 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); 8074 } 8075 8076 void CodeGenFunction::EmitOMPMaskedTaskLoopDirective( 8077 const OMPMaskedTaskLoopDirective &S) { 8078 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 8079 Action.Enter(CGF); 8080 EmitOMPTaskLoopBasedDirective(S); 8081 }; 8082 auto LPCRegion = 8083 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 8084 OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false); 8085 CGM.getOpenMPRuntime().emitMaskedRegion(*this, CodeGen, S.getBeginLoc()); 8086 } 8087 8088 void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective( 8089 const OMPMasterTaskLoopSimdDirective &S) { 8090 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 8091 Action.Enter(CGF); 8092 EmitOMPTaskLoopBasedDirective(S); 8093 }; 8094 auto LPCRegion = 8095 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 8096 OMPLexicalScope Scope(*this, S); 8097 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); 8098 } 8099 8100 void CodeGenFunction::EmitOMPMaskedTaskLoopSimdDirective( 8101 const OMPMaskedTaskLoopSimdDirective &S) { 8102 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 8103 Action.Enter(CGF); 8104 EmitOMPTaskLoopBasedDirective(S); 8105 }; 8106 auto LPCRegion = 8107 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 8108 OMPLexicalScope Scope(*this, S); 8109 CGM.getOpenMPRuntime().emitMaskedRegion(*this, CodeGen, S.getBeginLoc()); 8110 } 8111 8112 void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective( 8113 const OMPParallelMasterTaskLoopDirective &S) { 8114 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 8115 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, 8116 PrePostActionTy &Action) { 8117 Action.Enter(CGF); 8118 CGF.EmitOMPTaskLoopBasedDirective(S); 8119 }; 8120 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); 8121 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, 8122 S.getBeginLoc()); 8123 }; 8124 auto LPCRegion = 8125 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 8126 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen, 8127 emitEmptyBoundParameters); 8128 } 8129 8130 void CodeGenFunction::EmitOMPParallelMaskedTaskLoopDirective( 8131 const OMPParallelMaskedTaskLoopDirective &S) { 8132 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 8133 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, 8134 PrePostActionTy &Action) { 8135 Action.Enter(CGF); 8136 CGF.EmitOMPTaskLoopBasedDirective(S); 8137 }; 8138 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); 8139 CGM.getOpenMPRuntime().emitMaskedRegion(CGF, TaskLoopCodeGen, 8140 S.getBeginLoc()); 8141 }; 8142 auto LPCRegion = 8143 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 8144 emitCommonOMPParallelDirective(*this, S, OMPD_masked_taskloop, CodeGen, 8145 emitEmptyBoundParameters); 8146 } 8147 8148 void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective( 8149 const OMPParallelMasterTaskLoopSimdDirective &S) { 8150 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 8151 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, 8152 PrePostActionTy &Action) { 8153 Action.Enter(CGF); 8154 CGF.EmitOMPTaskLoopBasedDirective(S); 8155 }; 8156 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); 8157 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, 8158 S.getBeginLoc()); 8159 }; 8160 auto LPCRegion = 8161 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 8162 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen, 8163 emitEmptyBoundParameters); 8164 } 8165 8166 void CodeGenFunction::EmitOMPParallelMaskedTaskLoopSimdDirective( 8167 const OMPParallelMaskedTaskLoopSimdDirective &S) { 8168 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 8169 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, 8170 PrePostActionTy &Action) { 8171 Action.Enter(CGF); 8172 CGF.EmitOMPTaskLoopBasedDirective(S); 8173 }; 8174 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); 8175 CGM.getOpenMPRuntime().emitMaskedRegion(CGF, TaskLoopCodeGen, 8176 S.getBeginLoc()); 8177 }; 8178 auto LPCRegion = 8179 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 8180 emitCommonOMPParallelDirective(*this, S, OMPD_masked_taskloop_simd, CodeGen, 8181 emitEmptyBoundParameters); 8182 } 8183 8184 // Generate the instructions for '#pragma omp target update' directive. 8185 void CodeGenFunction::EmitOMPTargetUpdateDirective( 8186 const OMPTargetUpdateDirective &S) { 8187 // If we don't have target devices, don't bother emitting the data mapping 8188 // code. 8189 if (CGM.getLangOpts().OMPTargetTriples.empty()) 8190 return; 8191 8192 // Check if we have any if clause associated with the directive. 8193 const Expr *IfCond = nullptr; 8194 if (const auto *C = S.getSingleClause<OMPIfClause>()) 8195 IfCond = C->getCondition(); 8196 8197 // Check if we have any device clause associated with the directive. 8198 const Expr *Device = nullptr; 8199 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 8200 Device = C->getDevice(); 8201 8202 OMPLexicalScope Scope(*this, S, OMPD_task); 8203 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 8204 } 8205 8206 void CodeGenFunction::EmitOMPGenericLoopDirective( 8207 const OMPGenericLoopDirective &S) { 8208 // Always expect a bind clause on the loop directive. It it wasn't 8209 // in the source, it should have been added in sema. 8210 8211 OpenMPBindClauseKind BindKind = OMPC_BIND_unknown; 8212 if (const auto *C = S.getSingleClause<OMPBindClause>()) 8213 BindKind = C->getBindKind(); 8214 8215 switch (BindKind) { 8216 case OMPC_BIND_parallel: // for 8217 return emitOMPForDirective(S, *this, CGM, /*HasCancel=*/false); 8218 case OMPC_BIND_teams: // distribute 8219 return emitOMPDistributeDirective(S, *this, CGM); 8220 case OMPC_BIND_thread: // simd 8221 return emitOMPSimdDirective(S, *this, CGM); 8222 case OMPC_BIND_unknown: 8223 break; 8224 } 8225 8226 // Unimplemented, just inline the underlying statement for now. 8227 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 8228 // Emit the loop iteration variable. 8229 const Stmt *CS = 8230 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 8231 const auto *ForS = dyn_cast<ForStmt>(CS); 8232 if (ForS && !isa<DeclStmt>(ForS->getInit())) { 8233 OMPPrivateScope LoopScope(CGF); 8234 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 8235 (void)LoopScope.Privatize(); 8236 CGF.EmitStmt(CS); 8237 LoopScope.restoreMap(); 8238 } else { 8239 CGF.EmitStmt(CS); 8240 } 8241 }; 8242 OMPLexicalScope Scope(*this, S, OMPD_unknown); 8243 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_loop, CodeGen); 8244 } 8245 8246 void CodeGenFunction::EmitOMPParallelGenericLoopDirective( 8247 const OMPLoopDirective &S) { 8248 // Emit combined directive as if its constituent constructs are 'parallel' 8249 // and 'for'. 8250 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 8251 Action.Enter(CGF); 8252 emitOMPCopyinClause(CGF, S); 8253 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false); 8254 }; 8255 { 8256 auto LPCRegion = 8257 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 8258 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, 8259 emitEmptyBoundParameters); 8260 } 8261 // Check for outer lastprivate conditional update. 8262 checkForLastprivateConditionalUpdate(*this, S); 8263 } 8264 8265 void CodeGenFunction::EmitOMPTeamsGenericLoopDirective( 8266 const OMPTeamsGenericLoopDirective &S) { 8267 // To be consistent with current behavior of 'target teams loop', emit 8268 // 'teams loop' as if its constituent constructs are 'teams' and 'distribute'. 8269 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 8270 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 8271 }; 8272 8273 // Emit teams region as a standalone region. 8274 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 8275 PrePostActionTy &Action) { 8276 Action.Enter(CGF); 8277 OMPPrivateScope PrivateScope(CGF); 8278 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 8279 (void)PrivateScope.Privatize(); 8280 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 8281 CodeGenDistribute); 8282 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 8283 }; 8284 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 8285 emitPostUpdateForReductionClause(*this, S, 8286 [](CodeGenFunction &) { return nullptr; }); 8287 } 8288 8289 #ifndef NDEBUG 8290 static void emitTargetTeamsLoopCodegenStatus(CodeGenFunction &CGF, 8291 std::string StatusMsg, 8292 const OMPExecutableDirective &D) { 8293 bool IsDevice = CGF.CGM.getLangOpts().OpenMPIsTargetDevice; 8294 if (IsDevice) 8295 StatusMsg += ": DEVICE"; 8296 else 8297 StatusMsg += ": HOST"; 8298 SourceLocation L = D.getBeginLoc(); 8299 auto &SM = CGF.getContext().getSourceManager(); 8300 PresumedLoc PLoc = SM.getPresumedLoc(L); 8301 const char *FileName = PLoc.isValid() ? PLoc.getFilename() : nullptr; 8302 unsigned LineNo = 8303 PLoc.isValid() ? PLoc.getLine() : SM.getExpansionLineNumber(L); 8304 llvm::dbgs() << StatusMsg << ": " << FileName << ": " << LineNo << "\n"; 8305 } 8306 #endif 8307 8308 static void emitTargetTeamsGenericLoopRegionAsParallel( 8309 CodeGenFunction &CGF, PrePostActionTy &Action, 8310 const OMPTargetTeamsGenericLoopDirective &S) { 8311 Action.Enter(CGF); 8312 // Emit 'teams loop' as if its constituent constructs are 'distribute, 8313 // 'parallel, and 'for'. 8314 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 8315 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 8316 S.getDistInc()); 8317 }; 8318 8319 // Emit teams region as a standalone region. 8320 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 8321 PrePostActionTy &Action) { 8322 Action.Enter(CGF); 8323 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 8324 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 8325 (void)PrivateScope.Privatize(); 8326 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 8327 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 8328 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 8329 }; 8330 DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE, 8331 emitTargetTeamsLoopCodegenStatus( 8332 CGF, TTL_CODEGEN_TYPE " as parallel for", S)); 8333 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for, 8334 CodeGenTeams); 8335 emitPostUpdateForReductionClause(CGF, S, 8336 [](CodeGenFunction &) { return nullptr; }); 8337 } 8338 8339 static void emitTargetTeamsGenericLoopRegionAsDistribute( 8340 CodeGenFunction &CGF, PrePostActionTy &Action, 8341 const OMPTargetTeamsGenericLoopDirective &S) { 8342 Action.Enter(CGF); 8343 // Emit 'teams loop' as if its constituent construct is 'distribute'. 8344 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 8345 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 8346 }; 8347 8348 // Emit teams region as a standalone region. 8349 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 8350 PrePostActionTy &Action) { 8351 Action.Enter(CGF); 8352 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 8353 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 8354 (void)PrivateScope.Privatize(); 8355 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 8356 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 8357 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 8358 }; 8359 DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE, 8360 emitTargetTeamsLoopCodegenStatus( 8361 CGF, TTL_CODEGEN_TYPE " as distribute", S)); 8362 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen); 8363 emitPostUpdateForReductionClause(CGF, S, 8364 [](CodeGenFunction &) { return nullptr; }); 8365 } 8366 8367 void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDirective( 8368 const OMPTargetTeamsGenericLoopDirective &S) { 8369 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 8370 if (S.canBeParallelFor()) 8371 emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S); 8372 else 8373 emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S); 8374 }; 8375 emitCommonOMPTargetDirective(*this, S, CodeGen); 8376 } 8377 8378 void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction( 8379 CodeGenModule &CGM, StringRef ParentName, 8380 const OMPTargetTeamsGenericLoopDirective &S) { 8381 // Emit SPMD target parallel loop region as a standalone region. 8382 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 8383 if (S.canBeParallelFor()) 8384 emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S); 8385 else 8386 emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S); 8387 }; 8388 llvm::Function *Fn; 8389 llvm::Constant *Addr; 8390 // Emit target region as a standalone region. 8391 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 8392 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 8393 assert(Fn && Addr && 8394 "Target device function emission failed for 'target teams loop'."); 8395 } 8396 8397 static void emitTargetParallelGenericLoopRegion( 8398 CodeGenFunction &CGF, const OMPTargetParallelGenericLoopDirective &S, 8399 PrePostActionTy &Action) { 8400 Action.Enter(CGF); 8401 // Emit as 'parallel for'. 8402 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 8403 Action.Enter(CGF); 8404 CodeGenFunction::OMPCancelStackRAII CancelRegion( 8405 CGF, OMPD_target_parallel_loop, /*hasCancel=*/false); 8406 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 8407 emitDispatchForLoopBounds); 8408 }; 8409 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen, 8410 emitEmptyBoundParameters); 8411 } 8412 8413 void CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction( 8414 CodeGenModule &CGM, StringRef ParentName, 8415 const OMPTargetParallelGenericLoopDirective &S) { 8416 // Emit target parallel loop region as a standalone region. 8417 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 8418 emitTargetParallelGenericLoopRegion(CGF, S, Action); 8419 }; 8420 llvm::Function *Fn; 8421 llvm::Constant *Addr; 8422 // Emit target region as a standalone region. 8423 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 8424 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 8425 assert(Fn && Addr && "Target device function emission failed."); 8426 } 8427 8428 /// Emit combined directive 'target parallel loop' as if its constituent 8429 /// constructs are 'target', 'parallel', and 'for'. 8430 void CodeGenFunction::EmitOMPTargetParallelGenericLoopDirective( 8431 const OMPTargetParallelGenericLoopDirective &S) { 8432 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 8433 emitTargetParallelGenericLoopRegion(CGF, S, Action); 8434 }; 8435 emitCommonOMPTargetDirective(*this, S, CodeGen); 8436 } 8437 8438 void CodeGenFunction::EmitSimpleOMPExecutableDirective( 8439 const OMPExecutableDirective &D) { 8440 if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) { 8441 EmitOMPScanDirective(*SD); 8442 return; 8443 } 8444 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt()) 8445 return; 8446 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) { 8447 OMPPrivateScope GlobalsScope(CGF); 8448 if (isOpenMPTaskingDirective(D.getDirectiveKind())) { 8449 // Capture global firstprivates to avoid crash. 8450 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 8451 for (const Expr *Ref : C->varlist()) { 8452 const auto *DRE = cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 8453 if (!DRE) 8454 continue; 8455 const auto *VD = dyn_cast<VarDecl>(DRE->getDecl()); 8456 if (!VD || VD->hasLocalStorage()) 8457 continue; 8458 if (!CGF.LocalDeclMap.count(VD)) { 8459 LValue GlobLVal = CGF.EmitLValue(Ref); 8460 GlobalsScope.addPrivate(VD, GlobLVal.getAddress()); 8461 } 8462 } 8463 } 8464 } 8465 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 8466 (void)GlobalsScope.Privatize(); 8467 ParentLoopDirectiveForScanRegion ScanRegion(CGF, D); 8468 emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action); 8469 } else { 8470 if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) { 8471 for (const Expr *E : LD->counters()) { 8472 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 8473 if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) { 8474 LValue GlobLVal = CGF.EmitLValue(E); 8475 GlobalsScope.addPrivate(VD, GlobLVal.getAddress()); 8476 } 8477 if (isa<OMPCapturedExprDecl>(VD)) { 8478 // Emit only those that were not explicitly referenced in clauses. 8479 if (!CGF.LocalDeclMap.count(VD)) 8480 CGF.EmitVarDecl(*VD); 8481 } 8482 } 8483 for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) { 8484 if (!C->getNumForLoops()) 8485 continue; 8486 for (unsigned I = LD->getLoopsNumber(), 8487 E = C->getLoopNumIterations().size(); 8488 I < E; ++I) { 8489 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>( 8490 cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) { 8491 // Emit only those that were not explicitly referenced in clauses. 8492 if (!CGF.LocalDeclMap.count(VD)) 8493 CGF.EmitVarDecl(*VD); 8494 } 8495 } 8496 } 8497 } 8498 (void)GlobalsScope.Privatize(); 8499 CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt()); 8500 } 8501 }; 8502 if (D.getDirectiveKind() == OMPD_atomic || 8503 D.getDirectiveKind() == OMPD_critical || 8504 D.getDirectiveKind() == OMPD_section || 8505 D.getDirectiveKind() == OMPD_master || 8506 D.getDirectiveKind() == OMPD_masked || 8507 D.getDirectiveKind() == OMPD_unroll || 8508 D.getDirectiveKind() == OMPD_assume) { 8509 EmitStmt(D.getAssociatedStmt()); 8510 } else { 8511 auto LPCRegion = 8512 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D); 8513 OMPSimdLexicalScope Scope(*this, D); 8514 CGM.getOpenMPRuntime().emitInlinedDirective( 8515 *this, 8516 isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd 8517 : D.getDirectiveKind(), 8518 CodeGen); 8519 } 8520 // Check for outer lastprivate conditional update. 8521 checkForLastprivateConditionalUpdate(*this, D); 8522 } 8523 8524 void CodeGenFunction::EmitOMPAssumeDirective(const OMPAssumeDirective &S) { 8525 EmitStmt(S.getAssociatedStmt()); 8526 } 8527