1 //===- OpenMPIRBuilder.cpp - Builder for LLVM-IR for OpenMP directives ----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// 10 /// This file implements the OpenMPIRBuilder class, which is used as a 11 /// convenient way to create LLVM instructions for OpenMP directives. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 16 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/ADT/Triple.h" 19 #include "llvm/IR/CFG.h" 20 #include "llvm/IR/DebugInfo.h" 21 #include "llvm/IR/IRBuilder.h" 22 #include "llvm/IR/MDBuilder.h" 23 #include "llvm/IR/Value.h" 24 #include "llvm/Support/CommandLine.h" 25 #include "llvm/Support/Error.h" 26 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 27 #include "llvm/Transforms/Utils/CodeExtractor.h" 28 29 #include <sstream> 30 31 #define DEBUG_TYPE "openmp-ir-builder" 32 33 using namespace llvm; 34 using namespace omp; 35 36 static cl::opt<bool> 37 OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, 38 cl::desc("Use optimistic attributes describing " 39 "'as-if' properties of runtime calls."), 40 cl::init(false)); 41 42 void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) { 43 LLVMContext &Ctx = Fn.getContext(); 44 45 // Get the function's current attributes. 46 auto Attrs = Fn.getAttributes(); 47 auto FnAttrs = Attrs.getFnAttributes(); 48 auto RetAttrs = Attrs.getRetAttributes(); 49 SmallVector<AttributeSet, 4> ArgAttrs; 50 for (size_t ArgNo = 0; ArgNo < Fn.arg_size(); ++ArgNo) 51 ArgAttrs.emplace_back(Attrs.getParamAttributes(ArgNo)); 52 53 #define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet; 54 #include "llvm/Frontend/OpenMP/OMPKinds.def" 55 56 // Add attributes to the function declaration. 57 switch (FnID) { 58 #define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \ 59 case Enum: \ 60 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \ 61 RetAttrs = RetAttrs.addAttributes(Ctx, RetAttrSet); \ 62 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \ 63 ArgAttrs[ArgNo] = \ 64 ArgAttrs[ArgNo].addAttributes(Ctx, ArgAttrSets[ArgNo]); \ 65 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \ 66 break; 67 #include "llvm/Frontend/OpenMP/OMPKinds.def" 68 default: 69 // Attributes are optional. 70 break; 71 } 72 } 73 74 FunctionCallee 75 OpenMPIRBuilder::getOrCreateRuntimeFunction(Module &M, RuntimeFunction FnID) { 76 FunctionType *FnTy = nullptr; 77 Function *Fn = nullptr; 78 79 // Try to find the declation in the module first. 80 switch (FnID) { 81 #define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \ 82 case Enum: \ 83 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \ 84 IsVarArg); \ 85 Fn = M.getFunction(Str); \ 86 break; 87 #include "llvm/Frontend/OpenMP/OMPKinds.def" 88 } 89 90 if (!Fn) { 91 // Create a new declaration if we need one. 92 switch (FnID) { 93 #define OMP_RTL(Enum, Str, ...) \ 94 case Enum: \ 95 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \ 96 break; 97 #include "llvm/Frontend/OpenMP/OMPKinds.def" 98 } 99 100 // Add information if the runtime function takes a callback function 101 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) { 102 if (!Fn->hasMetadata(LLVMContext::MD_callback)) { 103 LLVMContext &Ctx = Fn->getContext(); 104 MDBuilder MDB(Ctx); 105 // Annotate the callback behavior of the runtime function: 106 // - The callback callee is argument number 2 (microtask). 107 // - The first two arguments of the callback callee are unknown (-1). 108 // - All variadic arguments to the runtime function are passed to the 109 // callback callee. 110 Fn->addMetadata( 111 LLVMContext::MD_callback, 112 *MDNode::get(Ctx, {MDB.createCallbackEncoding( 113 2, {-1, -1}, /* VarArgsArePassed */ true)})); 114 } 115 } 116 117 LLVM_DEBUG(dbgs() << "Created OpenMP runtime function " << Fn->getName() 118 << " with type " << *Fn->getFunctionType() << "\n"); 119 addAttributes(FnID, *Fn); 120 121 } else { 122 LLVM_DEBUG(dbgs() << "Found OpenMP runtime function " << Fn->getName() 123 << " with type " << *Fn->getFunctionType() << "\n"); 124 } 125 126 assert(Fn && "Failed to create OpenMP runtime function"); 127 128 // Cast the function to the expected type if necessary 129 Constant *C = ConstantExpr::getBitCast(Fn, FnTy->getPointerTo()); 130 return {FnTy, C}; 131 } 132 133 Function *OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr(RuntimeFunction FnID) { 134 FunctionCallee RTLFn = getOrCreateRuntimeFunction(M, FnID); 135 auto *Fn = dyn_cast<llvm::Function>(RTLFn.getCallee()); 136 assert(Fn && "Failed to create OpenMP runtime function pointer"); 137 return Fn; 138 } 139 140 void OpenMPIRBuilder::initialize() { initializeTypes(M); } 141 142 void OpenMPIRBuilder::finalize(Function *Fn, bool AllowExtractorSinking) { 143 SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet; 144 SmallVector<BasicBlock *, 32> Blocks; 145 SmallVector<OutlineInfo, 16> DeferredOutlines; 146 for (OutlineInfo &OI : OutlineInfos) { 147 // Skip functions that have not finalized yet; may happen with nested 148 // function generation. 149 if (Fn && OI.getFunction() != Fn) { 150 DeferredOutlines.push_back(OI); 151 continue; 152 } 153 154 ParallelRegionBlockSet.clear(); 155 Blocks.clear(); 156 OI.collectBlocks(ParallelRegionBlockSet, Blocks); 157 158 Function *OuterFn = OI.getFunction(); 159 CodeExtractorAnalysisCache CEAC(*OuterFn); 160 CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr, 161 /* AggregateArgs */ false, 162 /* BlockFrequencyInfo */ nullptr, 163 /* BranchProbabilityInfo */ nullptr, 164 /* AssumptionCache */ nullptr, 165 /* AllowVarArgs */ true, 166 /* AllowAlloca */ true, 167 /* Suffix */ ".omp_par"); 168 169 LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n"); 170 LLVM_DEBUG(dbgs() << "Entry " << OI.EntryBB->getName() 171 << " Exit: " << OI.ExitBB->getName() << "\n"); 172 assert(Extractor.isEligible() && 173 "Expected OpenMP outlining to be possible!"); 174 175 Function *OutlinedFn = Extractor.extractCodeRegion(CEAC); 176 177 LLVM_DEBUG(dbgs() << "After outlining: " << *OuterFn << "\n"); 178 LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n"); 179 assert(OutlinedFn->getReturnType()->isVoidTy() && 180 "OpenMP outlined functions should not return a value!"); 181 182 // For compability with the clang CG we move the outlined function after the 183 // one with the parallel region. 184 OutlinedFn->removeFromParent(); 185 M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn); 186 187 // Remove the artificial entry introduced by the extractor right away, we 188 // made our own entry block after all. 189 { 190 BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock(); 191 assert(ArtificialEntry.getUniqueSuccessor() == OI.EntryBB); 192 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry); 193 if (AllowExtractorSinking) { 194 // Move instructions from the to-be-deleted ArtificialEntry to the entry 195 // basic block of the parallel region. CodeExtractor may have sunk 196 // allocas/bitcasts for values that are solely used in the outlined 197 // region and do not escape. 198 assert(!ArtificialEntry.empty() && 199 "Expected instructions to sink in the outlined region"); 200 for (BasicBlock::iterator It = ArtificialEntry.begin(), 201 End = ArtificialEntry.end(); 202 It != End;) { 203 Instruction &I = *It; 204 It++; 205 206 if (I.isTerminator()) 207 continue; 208 209 I.moveBefore(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt()); 210 } 211 } 212 OI.EntryBB->moveBefore(&ArtificialEntry); 213 ArtificialEntry.eraseFromParent(); 214 } 215 assert(&OutlinedFn->getEntryBlock() == OI.EntryBB); 216 assert(OutlinedFn && OutlinedFn->getNumUses() == 1); 217 218 // Run a user callback, e.g. to add attributes. 219 if (OI.PostOutlineCB) 220 OI.PostOutlineCB(*OutlinedFn); 221 } 222 223 // Remove work items that have been completed. 224 OutlineInfos = std::move(DeferredOutlines); 225 } 226 227 OpenMPIRBuilder::~OpenMPIRBuilder() { 228 assert(OutlineInfos.empty() && "There must be no outstanding outlinings"); 229 } 230 231 Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr, 232 IdentFlag LocFlags, 233 unsigned Reserve2Flags) { 234 // Enable "C-mode". 235 LocFlags |= OMP_IDENT_FLAG_KMPC; 236 237 Value *&Ident = 238 IdentMap[{SrcLocStr, uint64_t(LocFlags) << 31 | Reserve2Flags}]; 239 if (!Ident) { 240 Constant *I32Null = ConstantInt::getNullValue(Int32); 241 Constant *IdentData[] = { 242 I32Null, ConstantInt::get(Int32, uint32_t(LocFlags)), 243 ConstantInt::get(Int32, Reserve2Flags), I32Null, SrcLocStr}; 244 Constant *Initializer = ConstantStruct::get( 245 cast<StructType>(IdentPtr->getPointerElementType()), IdentData); 246 247 // Look for existing encoding of the location + flags, not needed but 248 // minimizes the difference to the existing solution while we transition. 249 for (GlobalVariable &GV : M.getGlobalList()) 250 if (GV.getType() == IdentPtr && GV.hasInitializer()) 251 if (GV.getInitializer() == Initializer) 252 return Ident = &GV; 253 254 auto *GV = new GlobalVariable(M, IdentPtr->getPointerElementType(), 255 /* isConstant = */ true, 256 GlobalValue::PrivateLinkage, Initializer); 257 GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 258 GV->setAlignment(Align(8)); 259 Ident = GV; 260 } 261 return Builder.CreatePointerCast(Ident, IdentPtr); 262 } 263 264 Type *OpenMPIRBuilder::getLanemaskType() { 265 LLVMContext &Ctx = M.getContext(); 266 Triple triple(M.getTargetTriple()); 267 268 // This test is adequate until deviceRTL has finer grained lane widths 269 return triple.isAMDGCN() ? Type::getInt64Ty(Ctx) : Type::getInt32Ty(Ctx); 270 } 271 272 Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr) { 273 Constant *&SrcLocStr = SrcLocStrMap[LocStr]; 274 if (!SrcLocStr) { 275 Constant *Initializer = 276 ConstantDataArray::getString(M.getContext(), LocStr); 277 278 // Look for existing encoding of the location, not needed but minimizes the 279 // difference to the existing solution while we transition. 280 for (GlobalVariable &GV : M.getGlobalList()) 281 if (GV.isConstant() && GV.hasInitializer() && 282 GV.getInitializer() == Initializer) 283 return SrcLocStr = ConstantExpr::getPointerCast(&GV, Int8Ptr); 284 285 SrcLocStr = Builder.CreateGlobalStringPtr(LocStr, /* Name */ "", 286 /* AddressSpace */ 0, &M); 287 } 288 return SrcLocStr; 289 } 290 291 Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef FunctionName, 292 StringRef FileName, 293 unsigned Line, 294 unsigned Column) { 295 SmallString<128> Buffer; 296 Buffer.push_back(';'); 297 Buffer.append(FileName); 298 Buffer.push_back(';'); 299 Buffer.append(FunctionName); 300 Buffer.push_back(';'); 301 Buffer.append(std::to_string(Line)); 302 Buffer.push_back(';'); 303 Buffer.append(std::to_string(Column)); 304 Buffer.push_back(';'); 305 Buffer.push_back(';'); 306 return getOrCreateSrcLocStr(Buffer.str()); 307 } 308 309 Constant *OpenMPIRBuilder::getOrCreateDefaultSrcLocStr() { 310 return getOrCreateSrcLocStr(";unknown;unknown;0;0;;"); 311 } 312 313 Constant * 314 OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc) { 315 DILocation *DIL = Loc.DL.get(); 316 if (!DIL) 317 return getOrCreateDefaultSrcLocStr(); 318 StringRef FileName = M.getName(); 319 if (DIFile *DIF = DIL->getFile()) 320 if (Optional<StringRef> Source = DIF->getSource()) 321 FileName = *Source; 322 StringRef Function = DIL->getScope()->getSubprogram()->getName(); 323 Function = 324 !Function.empty() ? Function : Loc.IP.getBlock()->getParent()->getName(); 325 return getOrCreateSrcLocStr(Function, FileName, DIL->getLine(), 326 DIL->getColumn()); 327 } 328 329 Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) { 330 return Builder.CreateCall( 331 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident, 332 "omp_global_thread_num"); 333 } 334 335 OpenMPIRBuilder::InsertPointTy 336 OpenMPIRBuilder::createBarrier(const LocationDescription &Loc, Directive DK, 337 bool ForceSimpleCall, bool CheckCancelFlag) { 338 if (!updateToLocation(Loc)) 339 return Loc.IP; 340 return emitBarrierImpl(Loc, DK, ForceSimpleCall, CheckCancelFlag); 341 } 342 343 OpenMPIRBuilder::InsertPointTy 344 OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind, 345 bool ForceSimpleCall, bool CheckCancelFlag) { 346 // Build call __kmpc_cancel_barrier(loc, thread_id) or 347 // __kmpc_barrier(loc, thread_id); 348 349 IdentFlag BarrierLocFlags; 350 switch (Kind) { 351 case OMPD_for: 352 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR; 353 break; 354 case OMPD_sections: 355 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS; 356 break; 357 case OMPD_single: 358 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE; 359 break; 360 case OMPD_barrier: 361 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL; 362 break; 363 default: 364 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL; 365 break; 366 } 367 368 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 369 Value *Args[] = {getOrCreateIdent(SrcLocStr, BarrierLocFlags), 370 getOrCreateThreadID(getOrCreateIdent(SrcLocStr))}; 371 372 // If we are in a cancellable parallel region, barriers are cancellation 373 // points. 374 // TODO: Check why we would force simple calls or to ignore the cancel flag. 375 bool UseCancelBarrier = 376 !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel); 377 378 Value *Result = 379 Builder.CreateCall(getOrCreateRuntimeFunctionPtr( 380 UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier 381 : OMPRTL___kmpc_barrier), 382 Args); 383 384 if (UseCancelBarrier && CheckCancelFlag) 385 emitCancelationCheckImpl(Result, OMPD_parallel); 386 387 return Builder.saveIP(); 388 } 389 390 OpenMPIRBuilder::InsertPointTy 391 OpenMPIRBuilder::createCancel(const LocationDescription &Loc, 392 Value *IfCondition, 393 omp::Directive CanceledDirective) { 394 if (!updateToLocation(Loc)) 395 return Loc.IP; 396 397 // LLVM utilities like blocks with terminators. 398 auto *UI = Builder.CreateUnreachable(); 399 400 Instruction *ThenTI = UI, *ElseTI = nullptr; 401 if (IfCondition) 402 SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI); 403 Builder.SetInsertPoint(ThenTI); 404 405 Value *CancelKind = nullptr; 406 switch (CanceledDirective) { 407 #define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \ 408 case DirectiveEnum: \ 409 CancelKind = Builder.getInt32(Value); \ 410 break; 411 #include "llvm/Frontend/OpenMP/OMPKinds.def" 412 default: 413 llvm_unreachable("Unknown cancel kind!"); 414 } 415 416 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 417 Value *Ident = getOrCreateIdent(SrcLocStr); 418 Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind}; 419 Value *Result = Builder.CreateCall( 420 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args); 421 auto ExitCB = [this, CanceledDirective, Loc](InsertPointTy IP) { 422 if (CanceledDirective == OMPD_parallel) { 423 IRBuilder<>::InsertPointGuard IPG(Builder); 424 Builder.restoreIP(IP); 425 createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), 426 omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false, 427 /* CheckCancelFlag */ false); 428 } 429 }; 430 431 // The actual cancel logic is shared with others, e.g., cancel_barriers. 432 emitCancelationCheckImpl(Result, CanceledDirective, ExitCB); 433 434 // Update the insertion point and remove the terminator we introduced. 435 Builder.SetInsertPoint(UI->getParent()); 436 UI->eraseFromParent(); 437 438 return Builder.saveIP(); 439 } 440 441 void OpenMPIRBuilder::emitCancelationCheckImpl(Value *CancelFlag, 442 omp::Directive CanceledDirective, 443 FinalizeCallbackTy ExitCB) { 444 assert(isLastFinalizationInfoCancellable(CanceledDirective) && 445 "Unexpected cancellation!"); 446 447 // For a cancel barrier we create two new blocks. 448 BasicBlock *BB = Builder.GetInsertBlock(); 449 BasicBlock *NonCancellationBlock; 450 if (Builder.GetInsertPoint() == BB->end()) { 451 // TODO: This branch will not be needed once we moved to the 452 // OpenMPIRBuilder codegen completely. 453 NonCancellationBlock = BasicBlock::Create( 454 BB->getContext(), BB->getName() + ".cont", BB->getParent()); 455 } else { 456 NonCancellationBlock = SplitBlock(BB, &*Builder.GetInsertPoint()); 457 BB->getTerminator()->eraseFromParent(); 458 Builder.SetInsertPoint(BB); 459 } 460 BasicBlock *CancellationBlock = BasicBlock::Create( 461 BB->getContext(), BB->getName() + ".cncl", BB->getParent()); 462 463 // Jump to them based on the return value. 464 Value *Cmp = Builder.CreateIsNull(CancelFlag); 465 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock, 466 /* TODO weight */ nullptr, nullptr); 467 468 // From the cancellation block we finalize all variables and go to the 469 // post finalization block that is known to the FiniCB callback. 470 Builder.SetInsertPoint(CancellationBlock); 471 if (ExitCB) 472 ExitCB(Builder.saveIP()); 473 auto &FI = FinalizationStack.back(); 474 FI.FiniCB(Builder.saveIP()); 475 476 // The continuation block is where code generation continues. 477 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin()); 478 } 479 480 IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( 481 const LocationDescription &Loc, InsertPointTy OuterAllocaIP, 482 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, 483 FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, 484 omp::ProcBindKind ProcBind, bool IsCancellable) { 485 if (!updateToLocation(Loc)) 486 return Loc.IP; 487 488 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 489 Value *Ident = getOrCreateIdent(SrcLocStr); 490 Value *ThreadID = getOrCreateThreadID(Ident); 491 492 if (NumThreads) { 493 // Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads) 494 Value *Args[] = { 495 Ident, ThreadID, 496 Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)}; 497 Builder.CreateCall( 498 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads), Args); 499 } 500 501 if (ProcBind != OMP_PROC_BIND_default) { 502 // Build call __kmpc_push_proc_bind(&Ident, global_tid, proc_bind) 503 Value *Args[] = { 504 Ident, ThreadID, 505 ConstantInt::get(Int32, unsigned(ProcBind), /*isSigned=*/true)}; 506 Builder.CreateCall( 507 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind), Args); 508 } 509 510 BasicBlock *InsertBB = Builder.GetInsertBlock(); 511 Function *OuterFn = InsertBB->getParent(); 512 513 // Save the outer alloca block because the insertion iterator may get 514 // invalidated and we still need this later. 515 BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock(); 516 517 // Vector to remember instructions we used only during the modeling but which 518 // we want to delete at the end. 519 SmallVector<Instruction *, 4> ToBeDeleted; 520 521 // Change the location to the outer alloca insertion point to create and 522 // initialize the allocas we pass into the parallel region. 523 Builder.restoreIP(OuterAllocaIP); 524 AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr"); 525 AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr"); 526 527 // If there is an if condition we actually use the TIDAddr and ZeroAddr in the 528 // program, otherwise we only need them for modeling purposes to get the 529 // associated arguments in the outlined function. In the former case, 530 // initialize the allocas properly, in the latter case, delete them later. 531 if (IfCondition) { 532 Builder.CreateStore(Constant::getNullValue(Int32), TIDAddr); 533 Builder.CreateStore(Constant::getNullValue(Int32), ZeroAddr); 534 } else { 535 ToBeDeleted.push_back(TIDAddr); 536 ToBeDeleted.push_back(ZeroAddr); 537 } 538 539 // Create an artificial insertion point that will also ensure the blocks we 540 // are about to split are not degenerated. 541 auto *UI = new UnreachableInst(Builder.getContext(), InsertBB); 542 543 Instruction *ThenTI = UI, *ElseTI = nullptr; 544 if (IfCondition) 545 SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI); 546 547 BasicBlock *ThenBB = ThenTI->getParent(); 548 BasicBlock *PRegEntryBB = ThenBB->splitBasicBlock(ThenTI, "omp.par.entry"); 549 BasicBlock *PRegBodyBB = 550 PRegEntryBB->splitBasicBlock(ThenTI, "omp.par.region"); 551 BasicBlock *PRegPreFiniBB = 552 PRegBodyBB->splitBasicBlock(ThenTI, "omp.par.pre_finalize"); 553 BasicBlock *PRegExitBB = 554 PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit"); 555 556 auto FiniCBWrapper = [&](InsertPointTy IP) { 557 // Hide "open-ended" blocks from the given FiniCB by setting the right jump 558 // target to the region exit block. 559 if (IP.getBlock()->end() == IP.getPoint()) { 560 IRBuilder<>::InsertPointGuard IPG(Builder); 561 Builder.restoreIP(IP); 562 Instruction *I = Builder.CreateBr(PRegExitBB); 563 IP = InsertPointTy(I->getParent(), I->getIterator()); 564 } 565 assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 && 566 IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB && 567 "Unexpected insertion point for finalization call!"); 568 return FiniCB(IP); 569 }; 570 571 FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable}); 572 573 // Generate the privatization allocas in the block that will become the entry 574 // of the outlined function. 575 Builder.SetInsertPoint(PRegEntryBB->getTerminator()); 576 InsertPointTy InnerAllocaIP = Builder.saveIP(); 577 578 AllocaInst *PrivTIDAddr = 579 Builder.CreateAlloca(Int32, nullptr, "tid.addr.local"); 580 Instruction *PrivTID = Builder.CreateLoad(Int32, PrivTIDAddr, "tid"); 581 582 // Add some fake uses for OpenMP provided arguments. 583 ToBeDeleted.push_back(Builder.CreateLoad(Int32, TIDAddr, "tid.addr.use")); 584 Instruction *ZeroAddrUse = Builder.CreateLoad(Int32, ZeroAddr, 585 "zero.addr.use"); 586 ToBeDeleted.push_back(ZeroAddrUse); 587 588 // ThenBB 589 // | 590 // V 591 // PRegionEntryBB <- Privatization allocas are placed here. 592 // | 593 // V 594 // PRegionBodyBB <- BodeGen is invoked here. 595 // | 596 // V 597 // PRegPreFiniBB <- The block we will start finalization from. 598 // | 599 // V 600 // PRegionExitBB <- A common exit to simplify block collection. 601 // 602 603 LLVM_DEBUG(dbgs() << "Before body codegen: " << *OuterFn << "\n"); 604 605 // Let the caller create the body. 606 assert(BodyGenCB && "Expected body generation callback!"); 607 InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin()); 608 BodyGenCB(InnerAllocaIP, CodeGenIP, *PRegPreFiniBB); 609 610 LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n"); 611 612 FunctionCallee RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call); 613 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 614 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 615 llvm::LLVMContext &Ctx = F->getContext(); 616 MDBuilder MDB(Ctx); 617 // Annotate the callback behavior of the __kmpc_fork_call: 618 // - The callback callee is argument number 2 (microtask). 619 // - The first two arguments of the callback callee are unknown (-1). 620 // - All variadic arguments to the __kmpc_fork_call are passed to the 621 // callback callee. 622 F->addMetadata( 623 llvm::LLVMContext::MD_callback, 624 *llvm::MDNode::get( 625 Ctx, {MDB.createCallbackEncoding(2, {-1, -1}, 626 /* VarArgsArePassed */ true)})); 627 } 628 } 629 630 OutlineInfo OI; 631 OI.PostOutlineCB = [=](Function &OutlinedFn) { 632 // Add some known attributes. 633 OutlinedFn.addParamAttr(0, Attribute::NoAlias); 634 OutlinedFn.addParamAttr(1, Attribute::NoAlias); 635 OutlinedFn.addFnAttr(Attribute::NoUnwind); 636 OutlinedFn.addFnAttr(Attribute::NoRecurse); 637 638 assert(OutlinedFn.arg_size() >= 2 && 639 "Expected at least tid and bounded tid as arguments"); 640 unsigned NumCapturedVars = 641 OutlinedFn.arg_size() - /* tid & bounded tid */ 2; 642 643 CallInst *CI = cast<CallInst>(OutlinedFn.user_back()); 644 CI->getParent()->setName("omp_parallel"); 645 Builder.SetInsertPoint(CI); 646 647 // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn); 648 Value *ForkCallArgs[] = { 649 Ident, Builder.getInt32(NumCapturedVars), 650 Builder.CreateBitCast(&OutlinedFn, ParallelTaskPtr)}; 651 652 SmallVector<Value *, 16> RealArgs; 653 RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs)); 654 RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end()); 655 656 Builder.CreateCall(RTLFn, RealArgs); 657 658 LLVM_DEBUG(dbgs() << "With fork_call placed: " 659 << *Builder.GetInsertBlock()->getParent() << "\n"); 660 661 InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end()); 662 663 // Initialize the local TID stack location with the argument value. 664 Builder.SetInsertPoint(PrivTID); 665 Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin(); 666 Builder.CreateStore(Builder.CreateLoad(Int32, OutlinedAI), PrivTIDAddr); 667 668 // If no "if" clause was present we do not need the call created during 669 // outlining, otherwise we reuse it in the serialized parallel region. 670 if (!ElseTI) { 671 CI->eraseFromParent(); 672 } else { 673 674 // If an "if" clause was present we are now generating the serialized 675 // version into the "else" branch. 676 Builder.SetInsertPoint(ElseTI); 677 678 // Build calls __kmpc_serialized_parallel(&Ident, GTid); 679 Value *SerializedParallelCallArgs[] = {Ident, ThreadID}; 680 Builder.CreateCall( 681 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_serialized_parallel), 682 SerializedParallelCallArgs); 683 684 // OutlinedFn(>id, &zero, CapturedStruct); 685 CI->removeFromParent(); 686 Builder.Insert(CI); 687 688 // __kmpc_end_serialized_parallel(&Ident, GTid); 689 Value *EndArgs[] = {Ident, ThreadID}; 690 Builder.CreateCall( 691 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_serialized_parallel), 692 EndArgs); 693 694 LLVM_DEBUG(dbgs() << "With serialized parallel region: " 695 << *Builder.GetInsertBlock()->getParent() << "\n"); 696 } 697 698 for (Instruction *I : ToBeDeleted) 699 I->eraseFromParent(); 700 }; 701 702 // Adjust the finalization stack, verify the adjustment, and call the 703 // finalize function a last time to finalize values between the pre-fini 704 // block and the exit block if we left the parallel "the normal way". 705 auto FiniInfo = FinalizationStack.pop_back_val(); 706 (void)FiniInfo; 707 assert(FiniInfo.DK == OMPD_parallel && 708 "Unexpected finalization stack state!"); 709 710 Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator(); 711 712 InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator()); 713 FiniCB(PreFiniIP); 714 715 OI.EntryBB = PRegEntryBB; 716 OI.ExitBB = PRegExitBB; 717 718 SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet; 719 SmallVector<BasicBlock *, 32> Blocks; 720 OI.collectBlocks(ParallelRegionBlockSet, Blocks); 721 722 // Ensure a single exit node for the outlined region by creating one. 723 // We might have multiple incoming edges to the exit now due to finalizations, 724 // e.g., cancel calls that cause the control flow to leave the region. 725 BasicBlock *PRegOutlinedExitBB = PRegExitBB; 726 PRegExitBB = SplitBlock(PRegExitBB, &*PRegExitBB->getFirstInsertionPt()); 727 PRegOutlinedExitBB->setName("omp.par.outlined.exit"); 728 Blocks.push_back(PRegOutlinedExitBB); 729 730 CodeExtractorAnalysisCache CEAC(*OuterFn); 731 CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr, 732 /* AggregateArgs */ false, 733 /* BlockFrequencyInfo */ nullptr, 734 /* BranchProbabilityInfo */ nullptr, 735 /* AssumptionCache */ nullptr, 736 /* AllowVarArgs */ true, 737 /* AllowAlloca */ true, 738 /* Suffix */ ".omp_par"); 739 740 // Find inputs to, outputs from the code region. 741 BasicBlock *CommonExit = nullptr; 742 SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands; 743 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit); 744 Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands); 745 746 LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n"); 747 748 FunctionCallee TIDRTLFn = 749 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num); 750 751 auto PrivHelper = [&](Value &V) { 752 if (&V == TIDAddr || &V == ZeroAddr) 753 return; 754 755 SetVector<Use *> Uses; 756 for (Use &U : V.uses()) 757 if (auto *UserI = dyn_cast<Instruction>(U.getUser())) 758 if (ParallelRegionBlockSet.count(UserI->getParent())) 759 Uses.insert(&U); 760 761 // __kmpc_fork_call expects extra arguments as pointers. If the input 762 // already has a pointer type, everything is fine. Otherwise, store the 763 // value onto stack and load it back inside the to-be-outlined region. This 764 // will ensure only the pointer will be passed to the function. 765 // FIXME: if there are more than 15 trailing arguments, they must be 766 // additionally packed in a struct. 767 Value *Inner = &V; 768 if (!V.getType()->isPointerTy()) { 769 IRBuilder<>::InsertPointGuard Guard(Builder); 770 LLVM_DEBUG(llvm::dbgs() << "Forwarding input as pointer: " << V << "\n"); 771 772 Builder.restoreIP(OuterAllocaIP); 773 Value *Ptr = 774 Builder.CreateAlloca(V.getType(), nullptr, V.getName() + ".reloaded"); 775 776 // Store to stack at end of the block that currently branches to the entry 777 // block of the to-be-outlined region. 778 Builder.SetInsertPoint(InsertBB, 779 InsertBB->getTerminator()->getIterator()); 780 Builder.CreateStore(&V, Ptr); 781 782 // Load back next to allocations in the to-be-outlined region. 783 Builder.restoreIP(InnerAllocaIP); 784 Inner = Builder.CreateLoad(V.getType(), Ptr); 785 } 786 787 Value *ReplacementValue = nullptr; 788 CallInst *CI = dyn_cast<CallInst>(&V); 789 if (CI && CI->getCalledFunction() == TIDRTLFn.getCallee()) { 790 ReplacementValue = PrivTID; 791 } else { 792 Builder.restoreIP( 793 PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue)); 794 assert(ReplacementValue && 795 "Expected copy/create callback to set replacement value!"); 796 if (ReplacementValue == &V) 797 return; 798 } 799 800 for (Use *UPtr : Uses) 801 UPtr->set(ReplacementValue); 802 }; 803 804 // Reset the inner alloca insertion as it will be used for loading the values 805 // wrapped into pointers before passing them into the to-be-outlined region. 806 // Configure it to insert immediately after the fake use of zero address so 807 // that they are available in the generated body and so that the 808 // OpenMP-related values (thread ID and zero address pointers) remain leading 809 // in the argument list. 810 InnerAllocaIP = IRBuilder<>::InsertPoint( 811 ZeroAddrUse->getParent(), ZeroAddrUse->getNextNode()->getIterator()); 812 813 // Reset the outer alloca insertion point to the entry of the relevant block 814 // in case it was invalidated. 815 OuterAllocaIP = IRBuilder<>::InsertPoint( 816 OuterAllocaBlock, OuterAllocaBlock->getFirstInsertionPt()); 817 818 for (Value *Input : Inputs) { 819 LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n"); 820 PrivHelper(*Input); 821 } 822 LLVM_DEBUG({ 823 for (Value *Output : Outputs) 824 LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n"); 825 }); 826 assert(Outputs.empty() && 827 "OpenMP outlining should not produce live-out values!"); 828 829 LLVM_DEBUG(dbgs() << "After privatization: " << *OuterFn << "\n"); 830 LLVM_DEBUG({ 831 for (auto *BB : Blocks) 832 dbgs() << " PBR: " << BB->getName() << "\n"; 833 }); 834 835 // Register the outlined info. 836 addOutlineInfo(std::move(OI)); 837 838 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end()); 839 UI->eraseFromParent(); 840 841 return AfterIP; 842 } 843 844 void OpenMPIRBuilder::emitFlush(const LocationDescription &Loc) { 845 // Build call void __kmpc_flush(ident_t *loc) 846 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 847 Value *Args[] = {getOrCreateIdent(SrcLocStr)}; 848 849 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args); 850 } 851 852 void OpenMPIRBuilder::createFlush(const LocationDescription &Loc) { 853 if (!updateToLocation(Loc)) 854 return; 855 emitFlush(Loc); 856 } 857 858 void OpenMPIRBuilder::emitTaskwaitImpl(const LocationDescription &Loc) { 859 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 860 // global_tid); 861 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 862 Value *Ident = getOrCreateIdent(SrcLocStr); 863 Value *Args[] = {Ident, getOrCreateThreadID(Ident)}; 864 865 // Ignore return result until untied tasks are supported. 866 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait), 867 Args); 868 } 869 870 void OpenMPIRBuilder::createTaskwait(const LocationDescription &Loc) { 871 if (!updateToLocation(Loc)) 872 return; 873 emitTaskwaitImpl(Loc); 874 } 875 876 void OpenMPIRBuilder::emitTaskyieldImpl(const LocationDescription &Loc) { 877 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 878 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 879 Value *Ident = getOrCreateIdent(SrcLocStr); 880 Constant *I32Null = ConstantInt::getNullValue(Int32); 881 Value *Args[] = {Ident, getOrCreateThreadID(Ident), I32Null}; 882 883 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield), 884 Args); 885 } 886 887 void OpenMPIRBuilder::createTaskyield(const LocationDescription &Loc) { 888 if (!updateToLocation(Loc)) 889 return; 890 emitTaskyieldImpl(Loc); 891 } 892 893 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( 894 const LocationDescription &Loc, InsertPointTy AllocaIP, 895 ArrayRef<StorableBodyGenCallbackTy> SectionCBs, PrivatizeCallbackTy PrivCB, 896 FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait) { 897 if (!updateToLocation(Loc)) 898 return Loc.IP; 899 900 auto FiniCBWrapper = [&](InsertPointTy IP) { 901 if (IP.getBlock()->end() != IP.getPoint()) 902 return FiniCB(IP); 903 // This must be done otherwise any nested constructs using FinalizeOMPRegion 904 // will fail because that function requires the Finalization Basic Block to 905 // have a terminator, which is already removed by EmitOMPRegionBody. 906 // IP is currently at cancelation block. 907 // We need to backtrack to the condition block to fetch 908 // the exit block and create a branch from cancelation 909 // to exit block. 910 IRBuilder<>::InsertPointGuard IPG(Builder); 911 Builder.restoreIP(IP); 912 auto *CaseBB = IP.getBlock()->getSinglePredecessor(); 913 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor(); 914 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1); 915 Instruction *I = Builder.CreateBr(ExitBB); 916 IP = InsertPointTy(I->getParent(), I->getIterator()); 917 return FiniCB(IP); 918 }; 919 920 FinalizationStack.push_back({FiniCBWrapper, OMPD_sections, IsCancellable}); 921 922 // Each section is emitted as a switch case 923 // Each finalization callback is handled from clang.EmitOMPSectionDirective() 924 // -> OMP.createSection() which generates the IR for each section 925 // Iterate through all sections and emit a switch construct: 926 // switch (IV) { 927 // case 0: 928 // <SectionStmt[0]>; 929 // break; 930 // ... 931 // case <NumSection> - 1: 932 // <SectionStmt[<NumSection> - 1]>; 933 // break; 934 // } 935 // ... 936 // section_loop.after: 937 // <FiniCB>; 938 auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, Value *IndVar) { 939 auto *CurFn = CodeGenIP.getBlock()->getParent(); 940 auto *ForIncBB = CodeGenIP.getBlock()->getSingleSuccessor(); 941 auto *ForExitBB = CodeGenIP.getBlock() 942 ->getSinglePredecessor() 943 ->getTerminator() 944 ->getSuccessor(1); 945 SwitchInst *SwitchStmt = Builder.CreateSwitch(IndVar, ForIncBB); 946 Builder.restoreIP(CodeGenIP); 947 unsigned CaseNumber = 0; 948 for (auto SectionCB : SectionCBs) { 949 auto *CaseBB = BasicBlock::Create(M.getContext(), 950 "omp_section_loop.body.case", CurFn); 951 SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB); 952 Builder.SetInsertPoint(CaseBB); 953 SectionCB(InsertPointTy(), Builder.saveIP(), *ForExitBB); 954 CaseNumber++; 955 } 956 // remove the existing terminator from body BB since there can be no 957 // terminators after switch/case 958 CodeGenIP.getBlock()->getTerminator()->eraseFromParent(); 959 }; 960 // Loop body ends here 961 // LowerBound, UpperBound, and STride for createCanonicalLoop 962 Type *I32Ty = Type::getInt32Ty(M.getContext()); 963 Value *LB = ConstantInt::get(I32Ty, 0); 964 Value *UB = ConstantInt::get(I32Ty, SectionCBs.size()); 965 Value *ST = ConstantInt::get(I32Ty, 1); 966 llvm::CanonicalLoopInfo *LoopInfo = createCanonicalLoop( 967 Loc, LoopBodyGenCB, LB, UB, ST, true, false, AllocaIP, "section_loop"); 968 LoopInfo = createStaticWorkshareLoop(Loc, LoopInfo, AllocaIP, true); 969 BasicBlock *LoopAfterBB = LoopInfo->getAfter(); 970 Instruction *SplitPos = LoopAfterBB->getTerminator(); 971 if (!isa_and_nonnull<BranchInst>(SplitPos)) 972 SplitPos = new UnreachableInst(Builder.getContext(), LoopAfterBB); 973 // ExitBB after LoopAfterBB because LoopAfterBB is used for FinalizationCB, 974 // which requires a BB with branch 975 BasicBlock *ExitBB = 976 LoopAfterBB->splitBasicBlock(SplitPos, "omp_sections.end"); 977 SplitPos->eraseFromParent(); 978 979 // Apply the finalization callback in LoopAfterBB 980 auto FiniInfo = FinalizationStack.pop_back_val(); 981 assert(FiniInfo.DK == OMPD_sections && 982 "Unexpected finalization stack state!"); 983 Builder.SetInsertPoint(LoopAfterBB->getTerminator()); 984 FiniInfo.FiniCB(Builder.saveIP()); 985 Builder.SetInsertPoint(ExitBB); 986 987 return Builder.saveIP(); 988 } 989 990 OpenMPIRBuilder::InsertPointTy 991 OpenMPIRBuilder::createSection(const LocationDescription &Loc, 992 BodyGenCallbackTy BodyGenCB, 993 FinalizeCallbackTy FiniCB) { 994 if (!updateToLocation(Loc)) 995 return Loc.IP; 996 997 auto FiniCBWrapper = [&](InsertPointTy IP) { 998 if (IP.getBlock()->end() != IP.getPoint()) 999 return FiniCB(IP); 1000 // This must be done otherwise any nested constructs using FinalizeOMPRegion 1001 // will fail because that function requires the Finalization Basic Block to 1002 // have a terminator, which is already removed by EmitOMPRegionBody. 1003 // IP is currently at cancelation block. 1004 // We need to backtrack to the condition block to fetch 1005 // the exit block and create a branch from cancelation 1006 // to exit block. 1007 IRBuilder<>::InsertPointGuard IPG(Builder); 1008 Builder.restoreIP(IP); 1009 auto *CaseBB = Loc.IP.getBlock(); 1010 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor(); 1011 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1); 1012 Instruction *I = Builder.CreateBr(ExitBB); 1013 IP = InsertPointTy(I->getParent(), I->getIterator()); 1014 return FiniCB(IP); 1015 }; 1016 1017 Directive OMPD = Directive::OMPD_sections; 1018 // Since we are using Finalization Callback here, HasFinalize 1019 // and IsCancellable have to be true 1020 return EmitOMPInlinedRegion(OMPD, nullptr, nullptr, BodyGenCB, FiniCBWrapper, 1021 /*Conditional*/ false, /*hasFinalize*/ true, 1022 /*IsCancellable*/ true); 1023 } 1024 1025 OpenMPIRBuilder::InsertPointTy 1026 OpenMPIRBuilder::createMaster(const LocationDescription &Loc, 1027 BodyGenCallbackTy BodyGenCB, 1028 FinalizeCallbackTy FiniCB) { 1029 1030 if (!updateToLocation(Loc)) 1031 return Loc.IP; 1032 1033 Directive OMPD = Directive::OMPD_master; 1034 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1035 Value *Ident = getOrCreateIdent(SrcLocStr); 1036 Value *ThreadId = getOrCreateThreadID(Ident); 1037 Value *Args[] = {Ident, ThreadId}; 1038 1039 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master); 1040 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args); 1041 1042 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master); 1043 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args); 1044 1045 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, 1046 /*Conditional*/ true, /*hasFinalize*/ true); 1047 } 1048 1049 OpenMPIRBuilder::InsertPointTy 1050 OpenMPIRBuilder::createMasked(const LocationDescription &Loc, 1051 BodyGenCallbackTy BodyGenCB, 1052 FinalizeCallbackTy FiniCB, Value *Filter) { 1053 if (!updateToLocation(Loc)) 1054 return Loc.IP; 1055 1056 Directive OMPD = Directive::OMPD_masked; 1057 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1058 Value *Ident = getOrCreateIdent(SrcLocStr); 1059 Value *ThreadId = getOrCreateThreadID(Ident); 1060 Value *Args[] = {Ident, ThreadId, Filter}; 1061 Value *ArgsEnd[] = {Ident, ThreadId}; 1062 1063 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_masked); 1064 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args); 1065 1066 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_masked); 1067 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, ArgsEnd); 1068 1069 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, 1070 /*Conditional*/ true, /*hasFinalize*/ true); 1071 } 1072 1073 CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton( 1074 DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, 1075 BasicBlock *PostInsertBefore, const Twine &Name) { 1076 Module *M = F->getParent(); 1077 LLVMContext &Ctx = M->getContext(); 1078 Type *IndVarTy = TripCount->getType(); 1079 1080 // Create the basic block structure. 1081 BasicBlock *Preheader = 1082 BasicBlock::Create(Ctx, "omp_" + Name + ".preheader", F, PreInsertBefore); 1083 BasicBlock *Header = 1084 BasicBlock::Create(Ctx, "omp_" + Name + ".header", F, PreInsertBefore); 1085 BasicBlock *Cond = 1086 BasicBlock::Create(Ctx, "omp_" + Name + ".cond", F, PreInsertBefore); 1087 BasicBlock *Body = 1088 BasicBlock::Create(Ctx, "omp_" + Name + ".body", F, PreInsertBefore); 1089 BasicBlock *Latch = 1090 BasicBlock::Create(Ctx, "omp_" + Name + ".inc", F, PostInsertBefore); 1091 BasicBlock *Exit = 1092 BasicBlock::Create(Ctx, "omp_" + Name + ".exit", F, PostInsertBefore); 1093 BasicBlock *After = 1094 BasicBlock::Create(Ctx, "omp_" + Name + ".after", F, PostInsertBefore); 1095 1096 // Use specified DebugLoc for new instructions. 1097 Builder.SetCurrentDebugLocation(DL); 1098 1099 Builder.SetInsertPoint(Preheader); 1100 Builder.CreateBr(Header); 1101 1102 Builder.SetInsertPoint(Header); 1103 PHINode *IndVarPHI = Builder.CreatePHI(IndVarTy, 2, "omp_" + Name + ".iv"); 1104 IndVarPHI->addIncoming(ConstantInt::get(IndVarTy, 0), Preheader); 1105 Builder.CreateBr(Cond); 1106 1107 Builder.SetInsertPoint(Cond); 1108 Value *Cmp = 1109 Builder.CreateICmpULT(IndVarPHI, TripCount, "omp_" + Name + ".cmp"); 1110 Builder.CreateCondBr(Cmp, Body, Exit); 1111 1112 Builder.SetInsertPoint(Body); 1113 Builder.CreateBr(Latch); 1114 1115 Builder.SetInsertPoint(Latch); 1116 Value *Next = Builder.CreateAdd(IndVarPHI, ConstantInt::get(IndVarTy, 1), 1117 "omp_" + Name + ".next", /*HasNUW=*/true); 1118 Builder.CreateBr(Header); 1119 IndVarPHI->addIncoming(Next, Latch); 1120 1121 Builder.SetInsertPoint(Exit); 1122 Builder.CreateBr(After); 1123 1124 // Remember and return the canonical control flow. 1125 LoopInfos.emplace_front(); 1126 CanonicalLoopInfo *CL = &LoopInfos.front(); 1127 1128 CL->Preheader = Preheader; 1129 CL->Header = Header; 1130 CL->Cond = Cond; 1131 CL->Body = Body; 1132 CL->Latch = Latch; 1133 CL->Exit = Exit; 1134 CL->After = After; 1135 1136 CL->IsValid = true; 1137 1138 #ifndef NDEBUG 1139 CL->assertOK(); 1140 #endif 1141 return CL; 1142 } 1143 1144 CanonicalLoopInfo * 1145 OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc, 1146 LoopBodyGenCallbackTy BodyGenCB, 1147 Value *TripCount, const Twine &Name) { 1148 BasicBlock *BB = Loc.IP.getBlock(); 1149 BasicBlock *NextBB = BB->getNextNode(); 1150 1151 CanonicalLoopInfo *CL = createLoopSkeleton(Loc.DL, TripCount, BB->getParent(), 1152 NextBB, NextBB, Name); 1153 BasicBlock *After = CL->getAfter(); 1154 1155 // If location is not set, don't connect the loop. 1156 if (updateToLocation(Loc)) { 1157 // Split the loop at the insertion point: Branch to the preheader and move 1158 // every following instruction to after the loop (the After BB). Also, the 1159 // new successor is the loop's after block. 1160 Builder.CreateBr(CL->Preheader); 1161 After->getInstList().splice(After->begin(), BB->getInstList(), 1162 Builder.GetInsertPoint(), BB->end()); 1163 After->replaceSuccessorsPhiUsesWith(BB, After); 1164 } 1165 1166 // Emit the body content. We do it after connecting the loop to the CFG to 1167 // avoid that the callback encounters degenerate BBs. 1168 BodyGenCB(CL->getBodyIP(), CL->getIndVar()); 1169 1170 #ifndef NDEBUG 1171 CL->assertOK(); 1172 #endif 1173 return CL; 1174 } 1175 1176 CanonicalLoopInfo *OpenMPIRBuilder::createCanonicalLoop( 1177 const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, 1178 Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, 1179 InsertPointTy ComputeIP, const Twine &Name) { 1180 1181 // Consider the following difficulties (assuming 8-bit signed integers): 1182 // * Adding \p Step to the loop counter which passes \p Stop may overflow: 1183 // DO I = 1, 100, 50 1184 /// * A \p Step of INT_MIN cannot not be normalized to a positive direction: 1185 // DO I = 100, 0, -128 1186 1187 // Start, Stop and Step must be of the same integer type. 1188 auto *IndVarTy = cast<IntegerType>(Start->getType()); 1189 assert(IndVarTy == Stop->getType() && "Stop type mismatch"); 1190 assert(IndVarTy == Step->getType() && "Step type mismatch"); 1191 1192 LocationDescription ComputeLoc = 1193 ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc; 1194 updateToLocation(ComputeLoc); 1195 1196 ConstantInt *Zero = ConstantInt::get(IndVarTy, 0); 1197 ConstantInt *One = ConstantInt::get(IndVarTy, 1); 1198 1199 // Like Step, but always positive. 1200 Value *Incr = Step; 1201 1202 // Distance between Start and Stop; always positive. 1203 Value *Span; 1204 1205 // Condition whether there are no iterations are executed at all, e.g. because 1206 // UB < LB. 1207 Value *ZeroCmp; 1208 1209 if (IsSigned) { 1210 // Ensure that increment is positive. If not, negate and invert LB and UB. 1211 Value *IsNeg = Builder.CreateICmpSLT(Step, Zero); 1212 Incr = Builder.CreateSelect(IsNeg, Builder.CreateNeg(Step), Step); 1213 Value *LB = Builder.CreateSelect(IsNeg, Stop, Start); 1214 Value *UB = Builder.CreateSelect(IsNeg, Start, Stop); 1215 Span = Builder.CreateSub(UB, LB, "", false, true); 1216 ZeroCmp = Builder.CreateICmp( 1217 InclusiveStop ? CmpInst::ICMP_SLT : CmpInst::ICMP_SLE, UB, LB); 1218 } else { 1219 Span = Builder.CreateSub(Stop, Start, "", true); 1220 ZeroCmp = Builder.CreateICmp( 1221 InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Stop, Start); 1222 } 1223 1224 Value *CountIfLooping; 1225 if (InclusiveStop) { 1226 CountIfLooping = Builder.CreateAdd(Builder.CreateUDiv(Span, Incr), One); 1227 } else { 1228 // Avoid incrementing past stop since it could overflow. 1229 Value *CountIfTwo = Builder.CreateAdd( 1230 Builder.CreateUDiv(Builder.CreateSub(Span, One), Incr), One); 1231 Value *OneCmp = Builder.CreateICmp( 1232 InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Span, Incr); 1233 CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo); 1234 } 1235 Value *TripCount = Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping, 1236 "omp_" + Name + ".tripcount"); 1237 1238 auto BodyGen = [=](InsertPointTy CodeGenIP, Value *IV) { 1239 Builder.restoreIP(CodeGenIP); 1240 Value *Span = Builder.CreateMul(IV, Step); 1241 Value *IndVar = Builder.CreateAdd(Span, Start); 1242 BodyGenCB(Builder.saveIP(), IndVar); 1243 }; 1244 LocationDescription LoopLoc = ComputeIP.isSet() ? Loc.IP : Builder.saveIP(); 1245 return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name); 1246 } 1247 1248 // Returns an LLVM function to call for initializing loop bounds using OpenMP 1249 // static scheduling depending on `type`. Only i32 and i64 are supported by the 1250 // runtime. Always interpret integers as unsigned similarly to 1251 // CanonicalLoopInfo. 1252 static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, 1253 OpenMPIRBuilder &OMPBuilder) { 1254 unsigned Bitwidth = Ty->getIntegerBitWidth(); 1255 if (Bitwidth == 32) 1256 return OMPBuilder.getOrCreateRuntimeFunction( 1257 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u); 1258 if (Bitwidth == 64) 1259 return OMPBuilder.getOrCreateRuntimeFunction( 1260 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u); 1261 llvm_unreachable("unknown OpenMP loop iterator bitwidth"); 1262 } 1263 1264 // Sets the number of loop iterations to the given value. This value must be 1265 // valid in the condition block (i.e., defined in the preheader) and is 1266 // interpreted as an unsigned integer. 1267 void setCanonicalLoopTripCount(CanonicalLoopInfo *CLI, Value *TripCount) { 1268 Instruction *CmpI = &CLI->getCond()->front(); 1269 assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount"); 1270 CmpI->setOperand(1, TripCount); 1271 CLI->assertOK(); 1272 } 1273 1274 CanonicalLoopInfo *OpenMPIRBuilder::createStaticWorkshareLoop( 1275 const LocationDescription &Loc, CanonicalLoopInfo *CLI, 1276 InsertPointTy AllocaIP, bool NeedsBarrier, Value *Chunk) { 1277 // Set up the source location value for OpenMP runtime. 1278 if (!updateToLocation(Loc)) 1279 return nullptr; 1280 1281 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1282 Value *SrcLoc = getOrCreateIdent(SrcLocStr); 1283 1284 // Declare useful OpenMP runtime functions. 1285 Value *IV = CLI->getIndVar(); 1286 Type *IVTy = IV->getType(); 1287 FunctionCallee StaticInit = getKmpcForStaticInitForType(IVTy, M, *this); 1288 FunctionCallee StaticFini = 1289 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini); 1290 1291 // Allocate space for computed loop bounds as expected by the "init" function. 1292 Builder.restoreIP(AllocaIP); 1293 Type *I32Type = Type::getInt32Ty(M.getContext()); 1294 Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter"); 1295 Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound"); 1296 Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound"); 1297 Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride"); 1298 1299 // At the end of the preheader, prepare for calling the "init" function by 1300 // storing the current loop bounds into the allocated space. A canonical loop 1301 // always iterates from 0 to trip-count with step 1. Note that "init" expects 1302 // and produces an inclusive upper bound. 1303 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator()); 1304 Constant *Zero = ConstantInt::get(IVTy, 0); 1305 Constant *One = ConstantInt::get(IVTy, 1); 1306 Builder.CreateStore(Zero, PLowerBound); 1307 Value *UpperBound = Builder.CreateSub(CLI->getTripCount(), One); 1308 Builder.CreateStore(UpperBound, PUpperBound); 1309 Builder.CreateStore(One, PStride); 1310 1311 if (!Chunk) 1312 Chunk = One; 1313 1314 Value *ThreadNum = getOrCreateThreadID(SrcLoc); 1315 1316 Constant *SchedulingType = 1317 ConstantInt::get(I32Type, static_cast<int>(OMPScheduleType::Static)); 1318 1319 // Call the "init" function and update the trip count of the loop with the 1320 // value it produced. 1321 Builder.CreateCall(StaticInit, 1322 {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound, 1323 PUpperBound, PStride, One, Chunk}); 1324 Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound); 1325 Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound); 1326 Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound); 1327 Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One); 1328 setCanonicalLoopTripCount(CLI, TripCount); 1329 1330 // Update all uses of the induction variable except the one in the condition 1331 // block that compares it with the actual upper bound, and the increment in 1332 // the latch block. 1333 // TODO: this can eventually move to CanonicalLoopInfo or to a new 1334 // CanonicalLoopInfoUpdater interface. 1335 Builder.SetInsertPoint(CLI->getBody(), CLI->getBody()->getFirstInsertionPt()); 1336 Value *UpdatedIV = Builder.CreateAdd(IV, LowerBound); 1337 IV->replaceUsesWithIf(UpdatedIV, [&](Use &U) { 1338 auto *Instr = dyn_cast<Instruction>(U.getUser()); 1339 return !Instr || 1340 (Instr->getParent() != CLI->getCond() && 1341 Instr->getParent() != CLI->getLatch() && Instr != UpdatedIV); 1342 }); 1343 1344 // In the "exit" block, call the "fini" function. 1345 Builder.SetInsertPoint(CLI->getExit(), 1346 CLI->getExit()->getTerminator()->getIterator()); 1347 Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum}); 1348 1349 // Add the barrier if requested. 1350 if (NeedsBarrier) 1351 createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), 1352 omp::Directive::OMPD_for, /* ForceSimpleCall */ false, 1353 /* CheckCancelFlag */ false); 1354 1355 CLI->assertOK(); 1356 return CLI; 1357 } 1358 1359 CanonicalLoopInfo *OpenMPIRBuilder::createWorkshareLoop( 1360 const LocationDescription &Loc, CanonicalLoopInfo *CLI, 1361 InsertPointTy AllocaIP, bool NeedsBarrier) { 1362 // Currently only supports static schedules. 1363 return createStaticWorkshareLoop(Loc, CLI, AllocaIP, NeedsBarrier); 1364 } 1365 1366 /// Returns an LLVM function to call for initializing loop bounds using OpenMP 1367 /// dynamic scheduling depending on `type`. Only i32 and i64 are supported by 1368 /// the runtime. Always interpret integers as unsigned similarly to 1369 /// CanonicalLoopInfo. 1370 static FunctionCallee 1371 getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) { 1372 unsigned Bitwidth = Ty->getIntegerBitWidth(); 1373 if (Bitwidth == 32) 1374 return OMPBuilder.getOrCreateRuntimeFunction( 1375 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u); 1376 if (Bitwidth == 64) 1377 return OMPBuilder.getOrCreateRuntimeFunction( 1378 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u); 1379 llvm_unreachable("unknown OpenMP loop iterator bitwidth"); 1380 } 1381 1382 /// Returns an LLVM function to call for updating the next loop using OpenMP 1383 /// dynamic scheduling depending on `type`. Only i32 and i64 are supported by 1384 /// the runtime. Always interpret integers as unsigned similarly to 1385 /// CanonicalLoopInfo. 1386 static FunctionCallee 1387 getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) { 1388 unsigned Bitwidth = Ty->getIntegerBitWidth(); 1389 if (Bitwidth == 32) 1390 return OMPBuilder.getOrCreateRuntimeFunction( 1391 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u); 1392 if (Bitwidth == 64) 1393 return OMPBuilder.getOrCreateRuntimeFunction( 1394 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u); 1395 llvm_unreachable("unknown OpenMP loop iterator bitwidth"); 1396 } 1397 1398 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createDynamicWorkshareLoop( 1399 const LocationDescription &Loc, CanonicalLoopInfo *CLI, 1400 InsertPointTy AllocaIP, OMPScheduleType SchedType, bool NeedsBarrier, 1401 Value *Chunk) { 1402 // Set up the source location value for OpenMP runtime. 1403 Builder.SetCurrentDebugLocation(Loc.DL); 1404 1405 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1406 Value *SrcLoc = getOrCreateIdent(SrcLocStr); 1407 1408 // Declare useful OpenMP runtime functions. 1409 Value *IV = CLI->getIndVar(); 1410 Type *IVTy = IV->getType(); 1411 FunctionCallee DynamicInit = getKmpcForDynamicInitForType(IVTy, M, *this); 1412 FunctionCallee DynamicNext = getKmpcForDynamicNextForType(IVTy, M, *this); 1413 1414 // Allocate space for computed loop bounds as expected by the "init" function. 1415 Builder.restoreIP(AllocaIP); 1416 Type *I32Type = Type::getInt32Ty(M.getContext()); 1417 Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter"); 1418 Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound"); 1419 Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound"); 1420 Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride"); 1421 1422 // At the end of the preheader, prepare for calling the "init" function by 1423 // storing the current loop bounds into the allocated space. A canonical loop 1424 // always iterates from 0 to trip-count with step 1. Note that "init" expects 1425 // and produces an inclusive upper bound. 1426 BasicBlock *PreHeader = CLI->getPreheader(); 1427 Builder.SetInsertPoint(PreHeader->getTerminator()); 1428 Constant *One = ConstantInt::get(IVTy, 1); 1429 Builder.CreateStore(One, PLowerBound); 1430 Value *UpperBound = CLI->getTripCount(); 1431 Builder.CreateStore(UpperBound, PUpperBound); 1432 Builder.CreateStore(One, PStride); 1433 1434 BasicBlock *Header = CLI->getHeader(); 1435 BasicBlock *Exit = CLI->getExit(); 1436 BasicBlock *Cond = CLI->getCond(); 1437 InsertPointTy AfterIP = CLI->getAfterIP(); 1438 1439 // The CLI will be "broken" in the code below, as the loop is no longer 1440 // a valid canonical loop. 1441 1442 if (!Chunk) 1443 Chunk = One; 1444 1445 Value *ThreadNum = getOrCreateThreadID(SrcLoc); 1446 1447 Constant *SchedulingType = 1448 ConstantInt::get(I32Type, static_cast<int>(SchedType)); 1449 1450 // Call the "init" function. 1451 Builder.CreateCall(DynamicInit, 1452 {SrcLoc, ThreadNum, SchedulingType, /* LowerBound */ One, 1453 UpperBound, /* step */ One, Chunk}); 1454 1455 // An outer loop around the existing one. 1456 BasicBlock *OuterCond = BasicBlock::Create( 1457 PreHeader->getContext(), Twine(PreHeader->getName()) + ".outer.cond", 1458 PreHeader->getParent()); 1459 // This needs to be 32-bit always, so can't use the IVTy Zero above. 1460 Builder.SetInsertPoint(OuterCond, OuterCond->getFirstInsertionPt()); 1461 Value *Res = 1462 Builder.CreateCall(DynamicNext, {SrcLoc, ThreadNum, PLastIter, 1463 PLowerBound, PUpperBound, PStride}); 1464 Constant *Zero32 = ConstantInt::get(I32Type, 0); 1465 Value *MoreWork = Builder.CreateCmp(CmpInst::ICMP_NE, Res, Zero32); 1466 Value *LowerBound = 1467 Builder.CreateSub(Builder.CreateLoad(IVTy, PLowerBound), One, "lb"); 1468 Builder.CreateCondBr(MoreWork, Header, Exit); 1469 1470 // Change PHI-node in loop header to use outer cond rather than preheader, 1471 // and set IV to the LowerBound. 1472 Instruction *Phi = &Header->front(); 1473 auto *PI = cast<PHINode>(Phi); 1474 PI->setIncomingBlock(0, OuterCond); 1475 PI->setIncomingValue(0, LowerBound); 1476 1477 // Then set the pre-header to jump to the OuterCond 1478 Instruction *Term = PreHeader->getTerminator(); 1479 auto *Br = cast<BranchInst>(Term); 1480 Br->setSuccessor(0, OuterCond); 1481 1482 // Modify the inner condition: 1483 // * Use the UpperBound returned from the DynamicNext call. 1484 // * jump to the loop outer loop when done with one of the inner loops. 1485 Builder.SetInsertPoint(Cond, Cond->getFirstInsertionPt()); 1486 UpperBound = Builder.CreateLoad(IVTy, PUpperBound, "ub"); 1487 Instruction *Comp = &*Builder.GetInsertPoint(); 1488 auto *CI = cast<CmpInst>(Comp); 1489 CI->setOperand(1, UpperBound); 1490 // Redirect the inner exit to branch to outer condition. 1491 Instruction *Branch = &Cond->back(); 1492 auto *BI = cast<BranchInst>(Branch); 1493 assert(BI->getSuccessor(1) == Exit); 1494 BI->setSuccessor(1, OuterCond); 1495 1496 // Add the barrier if requested. 1497 if (NeedsBarrier) { 1498 Builder.SetInsertPoint(&Exit->back()); 1499 createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), 1500 omp::Directive::OMPD_for, /* ForceSimpleCall */ false, 1501 /* CheckCancelFlag */ false); 1502 } 1503 1504 return AfterIP; 1505 } 1506 1507 /// Make \p Source branch to \p Target. 1508 /// 1509 /// Handles two situations: 1510 /// * \p Source already has an unconditional branch. 1511 /// * \p Source is a degenerate block (no terminator because the BB is 1512 /// the current head of the IR construction). 1513 static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL) { 1514 if (Instruction *Term = Source->getTerminator()) { 1515 auto *Br = cast<BranchInst>(Term); 1516 assert(!Br->isConditional() && 1517 "BB's terminator must be an unconditional branch (or degenerate)"); 1518 BasicBlock *Succ = Br->getSuccessor(0); 1519 Succ->removePredecessor(Source, /*KeepOneInputPHIs=*/true); 1520 Br->setSuccessor(0, Target); 1521 return; 1522 } 1523 1524 auto *NewBr = BranchInst::Create(Target, Source); 1525 NewBr->setDebugLoc(DL); 1526 } 1527 1528 /// Redirect all edges that branch to \p OldTarget to \p NewTarget. That is, 1529 /// after this \p OldTarget will be orphaned. 1530 static void redirectAllPredecessorsTo(BasicBlock *OldTarget, 1531 BasicBlock *NewTarget, DebugLoc DL) { 1532 for (BasicBlock *Pred : make_early_inc_range(predecessors(OldTarget))) 1533 redirectTo(Pred, NewTarget, DL); 1534 } 1535 1536 /// Determine which blocks in \p BBs are reachable from outside and remove the 1537 /// ones that are not reachable from the function. 1538 static void removeUnusedBlocksFromParent(ArrayRef<BasicBlock *> BBs) { 1539 SmallPtrSet<BasicBlock *, 6> BBsToErase{BBs.begin(), BBs.end()}; 1540 auto HasRemainingUses = [&BBsToErase](BasicBlock *BB) { 1541 for (Use &U : BB->uses()) { 1542 auto *UseInst = dyn_cast<Instruction>(U.getUser()); 1543 if (!UseInst) 1544 continue; 1545 if (BBsToErase.count(UseInst->getParent())) 1546 continue; 1547 return true; 1548 } 1549 return false; 1550 }; 1551 1552 while (true) { 1553 bool Changed = false; 1554 for (BasicBlock *BB : make_early_inc_range(BBsToErase)) { 1555 if (HasRemainingUses(BB)) { 1556 BBsToErase.erase(BB); 1557 Changed = true; 1558 } 1559 } 1560 if (!Changed) 1561 break; 1562 } 1563 1564 SmallVector<BasicBlock *, 7> BBVec(BBsToErase.begin(), BBsToErase.end()); 1565 DeleteDeadBlocks(BBVec); 1566 } 1567 1568 CanonicalLoopInfo * 1569 OpenMPIRBuilder::collapseLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops, 1570 InsertPointTy ComputeIP) { 1571 assert(Loops.size() >= 1 && "At least one loop required"); 1572 size_t NumLoops = Loops.size(); 1573 1574 // Nothing to do if there is already just one loop. 1575 if (NumLoops == 1) 1576 return Loops.front(); 1577 1578 CanonicalLoopInfo *Outermost = Loops.front(); 1579 CanonicalLoopInfo *Innermost = Loops.back(); 1580 BasicBlock *OrigPreheader = Outermost->getPreheader(); 1581 BasicBlock *OrigAfter = Outermost->getAfter(); 1582 Function *F = OrigPreheader->getParent(); 1583 1584 // Setup the IRBuilder for inserting the trip count computation. 1585 Builder.SetCurrentDebugLocation(DL); 1586 if (ComputeIP.isSet()) 1587 Builder.restoreIP(ComputeIP); 1588 else 1589 Builder.restoreIP(Outermost->getPreheaderIP()); 1590 1591 // Derive the collapsed' loop trip count. 1592 // TODO: Find common/largest indvar type. 1593 Value *CollapsedTripCount = nullptr; 1594 for (CanonicalLoopInfo *L : Loops) { 1595 Value *OrigTripCount = L->getTripCount(); 1596 if (!CollapsedTripCount) { 1597 CollapsedTripCount = OrigTripCount; 1598 continue; 1599 } 1600 1601 // TODO: Enable UndefinedSanitizer to diagnose an overflow here. 1602 CollapsedTripCount = Builder.CreateMul(CollapsedTripCount, OrigTripCount, 1603 {}, /*HasNUW=*/true); 1604 } 1605 1606 // Create the collapsed loop control flow. 1607 CanonicalLoopInfo *Result = 1608 createLoopSkeleton(DL, CollapsedTripCount, F, 1609 OrigPreheader->getNextNode(), OrigAfter, "collapsed"); 1610 1611 // Build the collapsed loop body code. 1612 // Start with deriving the input loop induction variables from the collapsed 1613 // one, using a divmod scheme. To preserve the original loops' order, the 1614 // innermost loop use the least significant bits. 1615 Builder.restoreIP(Result->getBodyIP()); 1616 1617 Value *Leftover = Result->getIndVar(); 1618 SmallVector<Value *> NewIndVars; 1619 NewIndVars.set_size(NumLoops); 1620 for (int i = NumLoops - 1; i >= 1; --i) { 1621 Value *OrigTripCount = Loops[i]->getTripCount(); 1622 1623 Value *NewIndVar = Builder.CreateURem(Leftover, OrigTripCount); 1624 NewIndVars[i] = NewIndVar; 1625 1626 Leftover = Builder.CreateUDiv(Leftover, OrigTripCount); 1627 } 1628 // Outermost loop gets all the remaining bits. 1629 NewIndVars[0] = Leftover; 1630 1631 // Construct the loop body control flow. 1632 // We progressively construct the branch structure following in direction of 1633 // the control flow, from the leading in-between code, the loop nest body, the 1634 // trailing in-between code, and rejoining the collapsed loop's latch. 1635 // ContinueBlock and ContinuePred keep track of the source(s) of next edge. If 1636 // the ContinueBlock is set, continue with that block. If ContinuePred, use 1637 // its predecessors as sources. 1638 BasicBlock *ContinueBlock = Result->getBody(); 1639 BasicBlock *ContinuePred = nullptr; 1640 auto ContinueWith = [&ContinueBlock, &ContinuePred, DL](BasicBlock *Dest, 1641 BasicBlock *NextSrc) { 1642 if (ContinueBlock) 1643 redirectTo(ContinueBlock, Dest, DL); 1644 else 1645 redirectAllPredecessorsTo(ContinuePred, Dest, DL); 1646 1647 ContinueBlock = nullptr; 1648 ContinuePred = NextSrc; 1649 }; 1650 1651 // The code before the nested loop of each level. 1652 // Because we are sinking it into the nest, it will be executed more often 1653 // that the original loop. More sophisticated schemes could keep track of what 1654 // the in-between code is and instantiate it only once per thread. 1655 for (size_t i = 0; i < NumLoops - 1; ++i) 1656 ContinueWith(Loops[i]->getBody(), Loops[i + 1]->getHeader()); 1657 1658 // Connect the loop nest body. 1659 ContinueWith(Innermost->getBody(), Innermost->getLatch()); 1660 1661 // The code after the nested loop at each level. 1662 for (size_t i = NumLoops - 1; i > 0; --i) 1663 ContinueWith(Loops[i]->getAfter(), Loops[i - 1]->getLatch()); 1664 1665 // Connect the finished loop to the collapsed loop latch. 1666 ContinueWith(Result->getLatch(), nullptr); 1667 1668 // Replace the input loops with the new collapsed loop. 1669 redirectTo(Outermost->getPreheader(), Result->getPreheader(), DL); 1670 redirectTo(Result->getAfter(), Outermost->getAfter(), DL); 1671 1672 // Replace the input loop indvars with the derived ones. 1673 for (size_t i = 0; i < NumLoops; ++i) 1674 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]); 1675 1676 // Remove unused parts of the input loops. 1677 SmallVector<BasicBlock *, 12> OldControlBBs; 1678 OldControlBBs.reserve(6 * Loops.size()); 1679 for (CanonicalLoopInfo *Loop : Loops) 1680 Loop->collectControlBlocks(OldControlBBs); 1681 removeUnusedBlocksFromParent(OldControlBBs); 1682 1683 #ifndef NDEBUG 1684 Result->assertOK(); 1685 #endif 1686 return Result; 1687 } 1688 1689 std::vector<CanonicalLoopInfo *> 1690 OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops, 1691 ArrayRef<Value *> TileSizes) { 1692 assert(TileSizes.size() == Loops.size() && 1693 "Must pass as many tile sizes as there are loops"); 1694 int NumLoops = Loops.size(); 1695 assert(NumLoops >= 1 && "At least one loop to tile required"); 1696 1697 CanonicalLoopInfo *OutermostLoop = Loops.front(); 1698 CanonicalLoopInfo *InnermostLoop = Loops.back(); 1699 Function *F = OutermostLoop->getBody()->getParent(); 1700 BasicBlock *InnerEnter = InnermostLoop->getBody(); 1701 BasicBlock *InnerLatch = InnermostLoop->getLatch(); 1702 1703 // Collect original trip counts and induction variable to be accessible by 1704 // index. Also, the structure of the original loops is not preserved during 1705 // the construction of the tiled loops, so do it before we scavenge the BBs of 1706 // any original CanonicalLoopInfo. 1707 SmallVector<Value *, 4> OrigTripCounts, OrigIndVars; 1708 for (CanonicalLoopInfo *L : Loops) { 1709 OrigTripCounts.push_back(L->getTripCount()); 1710 OrigIndVars.push_back(L->getIndVar()); 1711 } 1712 1713 // Collect the code between loop headers. These may contain SSA definitions 1714 // that are used in the loop nest body. To be usable with in the innermost 1715 // body, these BasicBlocks will be sunk into the loop nest body. That is, 1716 // these instructions may be executed more often than before the tiling. 1717 // TODO: It would be sufficient to only sink them into body of the 1718 // corresponding tile loop. 1719 SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> InbetweenCode; 1720 for (int i = 0; i < NumLoops - 1; ++i) { 1721 CanonicalLoopInfo *Surrounding = Loops[i]; 1722 CanonicalLoopInfo *Nested = Loops[i + 1]; 1723 1724 BasicBlock *EnterBB = Surrounding->getBody(); 1725 BasicBlock *ExitBB = Nested->getHeader(); 1726 InbetweenCode.emplace_back(EnterBB, ExitBB); 1727 } 1728 1729 // Compute the trip counts of the floor loops. 1730 Builder.SetCurrentDebugLocation(DL); 1731 Builder.restoreIP(OutermostLoop->getPreheaderIP()); 1732 SmallVector<Value *, 4> FloorCount, FloorRems; 1733 for (int i = 0; i < NumLoops; ++i) { 1734 Value *TileSize = TileSizes[i]; 1735 Value *OrigTripCount = OrigTripCounts[i]; 1736 Type *IVType = OrigTripCount->getType(); 1737 1738 Value *FloorTripCount = Builder.CreateUDiv(OrigTripCount, TileSize); 1739 Value *FloorTripRem = Builder.CreateURem(OrigTripCount, TileSize); 1740 1741 // 0 if tripcount divides the tilesize, 1 otherwise. 1742 // 1 means we need an additional iteration for a partial tile. 1743 // 1744 // Unfortunately we cannot just use the roundup-formula 1745 // (tripcount + tilesize - 1)/tilesize 1746 // because the summation might overflow. We do not want introduce undefined 1747 // behavior when the untiled loop nest did not. 1748 Value *FloorTripOverflow = 1749 Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0)); 1750 1751 FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType); 1752 FloorTripCount = 1753 Builder.CreateAdd(FloorTripCount, FloorTripOverflow, 1754 "omp_floor" + Twine(i) + ".tripcount", true); 1755 1756 // Remember some values for later use. 1757 FloorCount.push_back(FloorTripCount); 1758 FloorRems.push_back(FloorTripRem); 1759 } 1760 1761 // Generate the new loop nest, from the outermost to the innermost. 1762 std::vector<CanonicalLoopInfo *> Result; 1763 Result.reserve(NumLoops * 2); 1764 1765 // The basic block of the surrounding loop that enters the nest generated 1766 // loop. 1767 BasicBlock *Enter = OutermostLoop->getPreheader(); 1768 1769 // The basic block of the surrounding loop where the inner code should 1770 // continue. 1771 BasicBlock *Continue = OutermostLoop->getAfter(); 1772 1773 // Where the next loop basic block should be inserted. 1774 BasicBlock *OutroInsertBefore = InnermostLoop->getExit(); 1775 1776 auto EmbeddNewLoop = 1777 [this, DL, F, InnerEnter, &Enter, &Continue, &OutroInsertBefore]( 1778 Value *TripCount, const Twine &Name) -> CanonicalLoopInfo * { 1779 CanonicalLoopInfo *EmbeddedLoop = createLoopSkeleton( 1780 DL, TripCount, F, InnerEnter, OutroInsertBefore, Name); 1781 redirectTo(Enter, EmbeddedLoop->getPreheader(), DL); 1782 redirectTo(EmbeddedLoop->getAfter(), Continue, DL); 1783 1784 // Setup the position where the next embedded loop connects to this loop. 1785 Enter = EmbeddedLoop->getBody(); 1786 Continue = EmbeddedLoop->getLatch(); 1787 OutroInsertBefore = EmbeddedLoop->getLatch(); 1788 return EmbeddedLoop; 1789 }; 1790 1791 auto EmbeddNewLoops = [&Result, &EmbeddNewLoop](ArrayRef<Value *> TripCounts, 1792 const Twine &NameBase) { 1793 for (auto P : enumerate(TripCounts)) { 1794 CanonicalLoopInfo *EmbeddedLoop = 1795 EmbeddNewLoop(P.value(), NameBase + Twine(P.index())); 1796 Result.push_back(EmbeddedLoop); 1797 } 1798 }; 1799 1800 EmbeddNewLoops(FloorCount, "floor"); 1801 1802 // Within the innermost floor loop, emit the code that computes the tile 1803 // sizes. 1804 Builder.SetInsertPoint(Enter->getTerminator()); 1805 SmallVector<Value *, 4> TileCounts; 1806 for (int i = 0; i < NumLoops; ++i) { 1807 CanonicalLoopInfo *FloorLoop = Result[i]; 1808 Value *TileSize = TileSizes[i]; 1809 1810 Value *FloorIsEpilogue = 1811 Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCount[i]); 1812 Value *TileTripCount = 1813 Builder.CreateSelect(FloorIsEpilogue, FloorRems[i], TileSize); 1814 1815 TileCounts.push_back(TileTripCount); 1816 } 1817 1818 // Create the tile loops. 1819 EmbeddNewLoops(TileCounts, "tile"); 1820 1821 // Insert the inbetween code into the body. 1822 BasicBlock *BodyEnter = Enter; 1823 BasicBlock *BodyEntered = nullptr; 1824 for (std::pair<BasicBlock *, BasicBlock *> P : InbetweenCode) { 1825 BasicBlock *EnterBB = P.first; 1826 BasicBlock *ExitBB = P.second; 1827 1828 if (BodyEnter) 1829 redirectTo(BodyEnter, EnterBB, DL); 1830 else 1831 redirectAllPredecessorsTo(BodyEntered, EnterBB, DL); 1832 1833 BodyEnter = nullptr; 1834 BodyEntered = ExitBB; 1835 } 1836 1837 // Append the original loop nest body into the generated loop nest body. 1838 if (BodyEnter) 1839 redirectTo(BodyEnter, InnerEnter, DL); 1840 else 1841 redirectAllPredecessorsTo(BodyEntered, InnerEnter, DL); 1842 redirectAllPredecessorsTo(InnerLatch, Continue, DL); 1843 1844 // Replace the original induction variable with an induction variable computed 1845 // from the tile and floor induction variables. 1846 Builder.restoreIP(Result.back()->getBodyIP()); 1847 for (int i = 0; i < NumLoops; ++i) { 1848 CanonicalLoopInfo *FloorLoop = Result[i]; 1849 CanonicalLoopInfo *TileLoop = Result[NumLoops + i]; 1850 Value *OrigIndVar = OrigIndVars[i]; 1851 Value *Size = TileSizes[i]; 1852 1853 Value *Scale = 1854 Builder.CreateMul(Size, FloorLoop->getIndVar(), {}, /*HasNUW=*/true); 1855 Value *Shift = 1856 Builder.CreateAdd(Scale, TileLoop->getIndVar(), {}, /*HasNUW=*/true); 1857 OrigIndVar->replaceAllUsesWith(Shift); 1858 } 1859 1860 // Remove unused parts of the original loops. 1861 SmallVector<BasicBlock *, 12> OldControlBBs; 1862 OldControlBBs.reserve(6 * Loops.size()); 1863 for (CanonicalLoopInfo *Loop : Loops) 1864 Loop->collectControlBlocks(OldControlBBs); 1865 removeUnusedBlocksFromParent(OldControlBBs); 1866 1867 #ifndef NDEBUG 1868 for (CanonicalLoopInfo *GenL : Result) 1869 GenL->assertOK(); 1870 #endif 1871 return Result; 1872 } 1873 1874 OpenMPIRBuilder::InsertPointTy 1875 OpenMPIRBuilder::createCopyPrivate(const LocationDescription &Loc, 1876 llvm::Value *BufSize, llvm::Value *CpyBuf, 1877 llvm::Value *CpyFn, llvm::Value *DidIt) { 1878 if (!updateToLocation(Loc)) 1879 return Loc.IP; 1880 1881 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1882 Value *Ident = getOrCreateIdent(SrcLocStr); 1883 Value *ThreadId = getOrCreateThreadID(Ident); 1884 1885 llvm::Value *DidItLD = Builder.CreateLoad(Builder.getInt32Ty(), DidIt); 1886 1887 Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD}; 1888 1889 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_copyprivate); 1890 Builder.CreateCall(Fn, Args); 1891 1892 return Builder.saveIP(); 1893 } 1894 1895 OpenMPIRBuilder::InsertPointTy 1896 OpenMPIRBuilder::createSingle(const LocationDescription &Loc, 1897 BodyGenCallbackTy BodyGenCB, 1898 FinalizeCallbackTy FiniCB, llvm::Value *DidIt) { 1899 1900 if (!updateToLocation(Loc)) 1901 return Loc.IP; 1902 1903 // If needed (i.e. not null), initialize `DidIt` with 0 1904 if (DidIt) { 1905 Builder.CreateStore(Builder.getInt32(0), DidIt); 1906 } 1907 1908 Directive OMPD = Directive::OMPD_single; 1909 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1910 Value *Ident = getOrCreateIdent(SrcLocStr); 1911 Value *ThreadId = getOrCreateThreadID(Ident); 1912 Value *Args[] = {Ident, ThreadId}; 1913 1914 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_single); 1915 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args); 1916 1917 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single); 1918 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args); 1919 1920 // generates the following: 1921 // if (__kmpc_single()) { 1922 // .... single region ... 1923 // __kmpc_end_single 1924 // } 1925 1926 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, 1927 /*Conditional*/ true, /*hasFinalize*/ true); 1928 } 1929 1930 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCritical( 1931 const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, 1932 FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst) { 1933 1934 if (!updateToLocation(Loc)) 1935 return Loc.IP; 1936 1937 Directive OMPD = Directive::OMPD_critical; 1938 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1939 Value *Ident = getOrCreateIdent(SrcLocStr); 1940 Value *ThreadId = getOrCreateThreadID(Ident); 1941 Value *LockVar = getOMPCriticalRegionLock(CriticalName); 1942 Value *Args[] = {Ident, ThreadId, LockVar}; 1943 1944 SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), std::end(Args)); 1945 Function *RTFn = nullptr; 1946 if (HintInst) { 1947 // Add Hint to entry Args and create call 1948 EnterArgs.push_back(HintInst); 1949 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint); 1950 } else { 1951 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical); 1952 } 1953 Instruction *EntryCall = Builder.CreateCall(RTFn, EnterArgs); 1954 1955 Function *ExitRTLFn = 1956 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical); 1957 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args); 1958 1959 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, 1960 /*Conditional*/ false, /*hasFinalize*/ true); 1961 } 1962 1963 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( 1964 Directive OMPD, Instruction *EntryCall, Instruction *ExitCall, 1965 BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional, 1966 bool HasFinalize, bool IsCancellable) { 1967 1968 if (HasFinalize) 1969 FinalizationStack.push_back({FiniCB, OMPD, IsCancellable}); 1970 1971 // Create inlined region's entry and body blocks, in preparation 1972 // for conditional creation 1973 BasicBlock *EntryBB = Builder.GetInsertBlock(); 1974 Instruction *SplitPos = EntryBB->getTerminator(); 1975 if (!isa_and_nonnull<BranchInst>(SplitPos)) 1976 SplitPos = new UnreachableInst(Builder.getContext(), EntryBB); 1977 BasicBlock *ExitBB = EntryBB->splitBasicBlock(SplitPos, "omp_region.end"); 1978 BasicBlock *FiniBB = 1979 EntryBB->splitBasicBlock(EntryBB->getTerminator(), "omp_region.finalize"); 1980 1981 Builder.SetInsertPoint(EntryBB->getTerminator()); 1982 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional); 1983 1984 // generate body 1985 BodyGenCB(/* AllocaIP */ InsertPointTy(), 1986 /* CodeGenIP */ Builder.saveIP(), *FiniBB); 1987 1988 // If we didn't emit a branch to FiniBB during body generation, it means 1989 // FiniBB is unreachable (e.g. while(1);). stop generating all the 1990 // unreachable blocks, and remove anything we are not going to use. 1991 auto SkipEmittingRegion = FiniBB->hasNPredecessors(0); 1992 if (SkipEmittingRegion) { 1993 FiniBB->eraseFromParent(); 1994 ExitCall->eraseFromParent(); 1995 // Discard finalization if we have it. 1996 if (HasFinalize) { 1997 assert(!FinalizationStack.empty() && 1998 "Unexpected finalization stack state!"); 1999 FinalizationStack.pop_back(); 2000 } 2001 } else { 2002 // emit exit call and do any needed finalization. 2003 auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt()); 2004 assert(FiniBB->getTerminator()->getNumSuccessors() == 1 && 2005 FiniBB->getTerminator()->getSuccessor(0) == ExitBB && 2006 "Unexpected control flow graph state!!"); 2007 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize); 2008 assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB && 2009 "Unexpected Control Flow State!"); 2010 MergeBlockIntoPredecessor(FiniBB); 2011 } 2012 2013 // If we are skipping the region of a non conditional, remove the exit 2014 // block, and clear the builder's insertion point. 2015 assert(SplitPos->getParent() == ExitBB && 2016 "Unexpected Insertion point location!"); 2017 if (!Conditional && SkipEmittingRegion) { 2018 ExitBB->eraseFromParent(); 2019 Builder.ClearInsertionPoint(); 2020 } else { 2021 auto merged = MergeBlockIntoPredecessor(ExitBB); 2022 BasicBlock *ExitPredBB = SplitPos->getParent(); 2023 auto InsertBB = merged ? ExitPredBB : ExitBB; 2024 if (!isa_and_nonnull<BranchInst>(SplitPos)) 2025 SplitPos->eraseFromParent(); 2026 Builder.SetInsertPoint(InsertBB); 2027 } 2028 2029 return Builder.saveIP(); 2030 } 2031 2032 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry( 2033 Directive OMPD, Value *EntryCall, BasicBlock *ExitBB, bool Conditional) { 2034 // if nothing to do, Return current insertion point. 2035 if (!Conditional || !EntryCall) 2036 return Builder.saveIP(); 2037 2038 BasicBlock *EntryBB = Builder.GetInsertBlock(); 2039 Value *CallBool = Builder.CreateIsNotNull(EntryCall); 2040 auto *ThenBB = BasicBlock::Create(M.getContext(), "omp_region.body"); 2041 auto *UI = new UnreachableInst(Builder.getContext(), ThenBB); 2042 2043 // Emit thenBB and set the Builder's insertion point there for 2044 // body generation next. Place the block after the current block. 2045 Function *CurFn = EntryBB->getParent(); 2046 CurFn->getBasicBlockList().insertAfter(EntryBB->getIterator(), ThenBB); 2047 2048 // Move Entry branch to end of ThenBB, and replace with conditional 2049 // branch (If-stmt) 2050 Instruction *EntryBBTI = EntryBB->getTerminator(); 2051 Builder.CreateCondBr(CallBool, ThenBB, ExitBB); 2052 EntryBBTI->removeFromParent(); 2053 Builder.SetInsertPoint(UI); 2054 Builder.Insert(EntryBBTI); 2055 UI->eraseFromParent(); 2056 Builder.SetInsertPoint(ThenBB->getTerminator()); 2057 2058 // return an insertion point to ExitBB. 2059 return IRBuilder<>::InsertPoint(ExitBB, ExitBB->getFirstInsertionPt()); 2060 } 2061 2062 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit( 2063 omp::Directive OMPD, InsertPointTy FinIP, Instruction *ExitCall, 2064 bool HasFinalize) { 2065 2066 Builder.restoreIP(FinIP); 2067 2068 // If there is finalization to do, emit it before the exit call 2069 if (HasFinalize) { 2070 assert(!FinalizationStack.empty() && 2071 "Unexpected finalization stack state!"); 2072 2073 FinalizationInfo Fi = FinalizationStack.pop_back_val(); 2074 assert(Fi.DK == OMPD && "Unexpected Directive for Finalization call!"); 2075 2076 Fi.FiniCB(FinIP); 2077 2078 BasicBlock *FiniBB = FinIP.getBlock(); 2079 Instruction *FiniBBTI = FiniBB->getTerminator(); 2080 2081 // set Builder IP for call creation 2082 Builder.SetInsertPoint(FiniBBTI); 2083 } 2084 2085 if (!ExitCall) 2086 return Builder.saveIP(); 2087 2088 // place the Exitcall as last instruction before Finalization block terminator 2089 ExitCall->removeFromParent(); 2090 Builder.Insert(ExitCall); 2091 2092 return IRBuilder<>::InsertPoint(ExitCall->getParent(), 2093 ExitCall->getIterator()); 2094 } 2095 2096 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCopyinClauseBlocks( 2097 InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, 2098 llvm::IntegerType *IntPtrTy, bool BranchtoEnd) { 2099 if (!IP.isSet()) 2100 return IP; 2101 2102 IRBuilder<>::InsertPointGuard IPG(Builder); 2103 2104 // creates the following CFG structure 2105 // OMP_Entry : (MasterAddr != PrivateAddr)? 2106 // F T 2107 // | \ 2108 // | copin.not.master 2109 // | / 2110 // v / 2111 // copyin.not.master.end 2112 // | 2113 // v 2114 // OMP.Entry.Next 2115 2116 BasicBlock *OMP_Entry = IP.getBlock(); 2117 Function *CurFn = OMP_Entry->getParent(); 2118 BasicBlock *CopyBegin = 2119 BasicBlock::Create(M.getContext(), "copyin.not.master", CurFn); 2120 BasicBlock *CopyEnd = nullptr; 2121 2122 // If entry block is terminated, split to preserve the branch to following 2123 // basic block (i.e. OMP.Entry.Next), otherwise, leave everything as is. 2124 if (isa_and_nonnull<BranchInst>(OMP_Entry->getTerminator())) { 2125 CopyEnd = OMP_Entry->splitBasicBlock(OMP_Entry->getTerminator(), 2126 "copyin.not.master.end"); 2127 OMP_Entry->getTerminator()->eraseFromParent(); 2128 } else { 2129 CopyEnd = 2130 BasicBlock::Create(M.getContext(), "copyin.not.master.end", CurFn); 2131 } 2132 2133 Builder.SetInsertPoint(OMP_Entry); 2134 Value *MasterPtr = Builder.CreatePtrToInt(MasterAddr, IntPtrTy); 2135 Value *PrivatePtr = Builder.CreatePtrToInt(PrivateAddr, IntPtrTy); 2136 Value *cmp = Builder.CreateICmpNE(MasterPtr, PrivatePtr); 2137 Builder.CreateCondBr(cmp, CopyBegin, CopyEnd); 2138 2139 Builder.SetInsertPoint(CopyBegin); 2140 if (BranchtoEnd) 2141 Builder.SetInsertPoint(Builder.CreateBr(CopyEnd)); 2142 2143 return Builder.saveIP(); 2144 } 2145 2146 CallInst *OpenMPIRBuilder::createOMPAlloc(const LocationDescription &Loc, 2147 Value *Size, Value *Allocator, 2148 std::string Name) { 2149 IRBuilder<>::InsertPointGuard IPG(Builder); 2150 Builder.restoreIP(Loc.IP); 2151 2152 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 2153 Value *Ident = getOrCreateIdent(SrcLocStr); 2154 Value *ThreadId = getOrCreateThreadID(Ident); 2155 Value *Args[] = {ThreadId, Size, Allocator}; 2156 2157 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc); 2158 2159 return Builder.CreateCall(Fn, Args, Name); 2160 } 2161 2162 CallInst *OpenMPIRBuilder::createOMPFree(const LocationDescription &Loc, 2163 Value *Addr, Value *Allocator, 2164 std::string Name) { 2165 IRBuilder<>::InsertPointGuard IPG(Builder); 2166 Builder.restoreIP(Loc.IP); 2167 2168 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 2169 Value *Ident = getOrCreateIdent(SrcLocStr); 2170 Value *ThreadId = getOrCreateThreadID(Ident); 2171 Value *Args[] = {ThreadId, Addr, Allocator}; 2172 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free); 2173 return Builder.CreateCall(Fn, Args, Name); 2174 } 2175 2176 CallInst *OpenMPIRBuilder::createCachedThreadPrivate( 2177 const LocationDescription &Loc, llvm::Value *Pointer, 2178 llvm::ConstantInt *Size, const llvm::Twine &Name) { 2179 IRBuilder<>::InsertPointGuard IPG(Builder); 2180 Builder.restoreIP(Loc.IP); 2181 2182 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 2183 Value *Ident = getOrCreateIdent(SrcLocStr); 2184 Value *ThreadId = getOrCreateThreadID(Ident); 2185 Constant *ThreadPrivateCache = 2186 getOrCreateOMPInternalVariable(Int8PtrPtr, Name); 2187 llvm::Value *Args[] = {Ident, ThreadId, Pointer, Size, ThreadPrivateCache}; 2188 2189 Function *Fn = 2190 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached); 2191 2192 return Builder.CreateCall(Fn, Args); 2193 } 2194 2195 OpenMPIRBuilder::InsertPointTy 2196 OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime) { 2197 if (!updateToLocation(Loc)) 2198 return Loc.IP; 2199 2200 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 2201 Value *Ident = getOrCreateIdent(SrcLocStr); 2202 ConstantInt *IsSPMDVal = ConstantInt::getBool(Int32->getContext(), IsSPMD); 2203 ConstantInt *UseGenericStateMachine = 2204 ConstantInt::getBool(Int32->getContext(), !IsSPMD); 2205 ConstantInt *RequiresFullRuntimeVal = ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime); 2206 2207 Function *Fn = getOrCreateRuntimeFunctionPtr( 2208 omp::RuntimeFunction::OMPRTL___kmpc_target_init); 2209 2210 CallInst *ThreadKind = 2211 Builder.CreateCall(Fn, {Ident, IsSPMDVal, UseGenericStateMachine, RequiresFullRuntimeVal}); 2212 2213 Value *ExecUserCode = Builder.CreateICmpEQ( 2214 ThreadKind, ConstantInt::get(ThreadKind->getType(), -1), "exec_user_code"); 2215 2216 // ThreadKind = __kmpc_target_init(...) 2217 // if (ThreadKind == -1) 2218 // user_code 2219 // else 2220 // return; 2221 2222 auto *UI = Builder.CreateUnreachable(); 2223 BasicBlock *CheckBB = UI->getParent(); 2224 BasicBlock *UserCodeEntryBB = CheckBB->splitBasicBlock(UI, "user_code.entry"); 2225 2226 BasicBlock *WorkerExitBB = BasicBlock::Create( 2227 CheckBB->getContext(), "worker.exit", CheckBB->getParent()); 2228 Builder.SetInsertPoint(WorkerExitBB); 2229 Builder.CreateRetVoid(); 2230 2231 auto *CheckBBTI = CheckBB->getTerminator(); 2232 Builder.SetInsertPoint(CheckBBTI); 2233 Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB); 2234 2235 CheckBBTI->eraseFromParent(); 2236 UI->eraseFromParent(); 2237 2238 // Continue in the "user_code" block, see diagram above and in 2239 // openmp/libomptarget/deviceRTLs/common/include/target.h . 2240 return InsertPointTy(UserCodeEntryBB, UserCodeEntryBB->getFirstInsertionPt()); 2241 } 2242 2243 void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc, 2244 bool IsSPMD, bool RequiresFullRuntime) { 2245 if (!updateToLocation(Loc)) 2246 return; 2247 2248 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 2249 Value *Ident = getOrCreateIdent(SrcLocStr); 2250 ConstantInt *IsSPMDVal = ConstantInt::getBool(Int32->getContext(), IsSPMD); 2251 ConstantInt *RequiresFullRuntimeVal = ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime); 2252 2253 Function *Fn = getOrCreateRuntimeFunctionPtr( 2254 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit); 2255 2256 Builder.CreateCall(Fn, {Ident, IsSPMDVal, RequiresFullRuntimeVal}); 2257 } 2258 2259 std::string OpenMPIRBuilder::getNameWithSeparators(ArrayRef<StringRef> Parts, 2260 StringRef FirstSeparator, 2261 StringRef Separator) { 2262 SmallString<128> Buffer; 2263 llvm::raw_svector_ostream OS(Buffer); 2264 StringRef Sep = FirstSeparator; 2265 for (StringRef Part : Parts) { 2266 OS << Sep << Part; 2267 Sep = Separator; 2268 } 2269 return OS.str().str(); 2270 } 2271 2272 Constant *OpenMPIRBuilder::getOrCreateOMPInternalVariable( 2273 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2274 // TODO: Replace the twine arg with stringref to get rid of the conversion 2275 // logic. However This is taken from current implementation in clang as is. 2276 // Since this method is used in many places exclusively for OMP internal use 2277 // we will keep it as is for temporarily until we move all users to the 2278 // builder and then, if possible, fix it everywhere in one go. 2279 SmallString<256> Buffer; 2280 llvm::raw_svector_ostream Out(Buffer); 2281 Out << Name; 2282 StringRef RuntimeName = Out.str(); 2283 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2284 if (Elem.second) { 2285 assert(Elem.second->getType()->getPointerElementType() == Ty && 2286 "OMP internal variable has different type than requested"); 2287 } else { 2288 // TODO: investigate the appropriate linkage type used for the global 2289 // variable for possibly changing that to internal or private, or maybe 2290 // create different versions of the function for different OMP internal 2291 // variables. 2292 Elem.second = new llvm::GlobalVariable( 2293 M, Ty, /*IsConstant*/ false, llvm::GlobalValue::CommonLinkage, 2294 llvm::Constant::getNullValue(Ty), Elem.first(), 2295 /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal, 2296 AddressSpace); 2297 } 2298 2299 return Elem.second; 2300 } 2301 2302 Value *OpenMPIRBuilder::getOMPCriticalRegionLock(StringRef CriticalName) { 2303 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2304 std::string Name = getNameWithSeparators({Prefix, "var"}, ".", "."); 2305 return getOrCreateOMPInternalVariable(KmpCriticalNameTy, Name); 2306 } 2307 2308 GlobalVariable * 2309 OpenMPIRBuilder::createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings, 2310 std::string VarName) { 2311 llvm::Constant *MaptypesArrayInit = 2312 llvm::ConstantDataArray::get(M.getContext(), Mappings); 2313 auto *MaptypesArrayGlobal = new llvm::GlobalVariable( 2314 M, MaptypesArrayInit->getType(), 2315 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MaptypesArrayInit, 2316 VarName); 2317 MaptypesArrayGlobal->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 2318 return MaptypesArrayGlobal; 2319 } 2320 2321 void OpenMPIRBuilder::createMapperAllocas(const LocationDescription &Loc, 2322 InsertPointTy AllocaIP, 2323 unsigned NumOperands, 2324 struct MapperAllocas &MapperAllocas) { 2325 if (!updateToLocation(Loc)) 2326 return; 2327 2328 auto *ArrI8PtrTy = ArrayType::get(Int8Ptr, NumOperands); 2329 auto *ArrI64Ty = ArrayType::get(Int64, NumOperands); 2330 Builder.restoreIP(AllocaIP); 2331 AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI8PtrTy); 2332 AllocaInst *Args = Builder.CreateAlloca(ArrI8PtrTy); 2333 AllocaInst *ArgSizes = Builder.CreateAlloca(ArrI64Ty); 2334 Builder.restoreIP(Loc.IP); 2335 MapperAllocas.ArgsBase = ArgsBase; 2336 MapperAllocas.Args = Args; 2337 MapperAllocas.ArgSizes = ArgSizes; 2338 } 2339 2340 void OpenMPIRBuilder::emitMapperCall(const LocationDescription &Loc, 2341 Function *MapperFunc, Value *SrcLocInfo, 2342 Value *MaptypesArg, Value *MapnamesArg, 2343 struct MapperAllocas &MapperAllocas, 2344 int64_t DeviceID, unsigned NumOperands) { 2345 if (!updateToLocation(Loc)) 2346 return; 2347 2348 auto *ArrI8PtrTy = ArrayType::get(Int8Ptr, NumOperands); 2349 auto *ArrI64Ty = ArrayType::get(Int64, NumOperands); 2350 Value *ArgsBaseGEP = 2351 Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.ArgsBase, 2352 {Builder.getInt32(0), Builder.getInt32(0)}); 2353 Value *ArgsGEP = 2354 Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.Args, 2355 {Builder.getInt32(0), Builder.getInt32(0)}); 2356 Value *ArgSizesGEP = 2357 Builder.CreateInBoundsGEP(ArrI64Ty, MapperAllocas.ArgSizes, 2358 {Builder.getInt32(0), Builder.getInt32(0)}); 2359 Value *NullPtr = Constant::getNullValue(Int8Ptr->getPointerTo()); 2360 Builder.CreateCall(MapperFunc, 2361 {SrcLocInfo, Builder.getInt64(DeviceID), 2362 Builder.getInt32(NumOperands), ArgsBaseGEP, ArgsGEP, 2363 ArgSizesGEP, MaptypesArg, MapnamesArg, NullPtr}); 2364 } 2365 2366 bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic( 2367 const LocationDescription &Loc, llvm::AtomicOrdering AO, AtomicKind AK) { 2368 assert(!(AO == AtomicOrdering::NotAtomic || 2369 AO == llvm::AtomicOrdering::Unordered) && 2370 "Unexpected Atomic Ordering."); 2371 2372 bool Flush = false; 2373 llvm::AtomicOrdering FlushAO = AtomicOrdering::Monotonic; 2374 2375 switch (AK) { 2376 case Read: 2377 if (AO == AtomicOrdering::Acquire || AO == AtomicOrdering::AcquireRelease || 2378 AO == AtomicOrdering::SequentiallyConsistent) { 2379 FlushAO = AtomicOrdering::Acquire; 2380 Flush = true; 2381 } 2382 break; 2383 case Write: 2384 case Update: 2385 if (AO == AtomicOrdering::Release || AO == AtomicOrdering::AcquireRelease || 2386 AO == AtomicOrdering::SequentiallyConsistent) { 2387 FlushAO = AtomicOrdering::Release; 2388 Flush = true; 2389 } 2390 break; 2391 case Capture: 2392 switch (AO) { 2393 case AtomicOrdering::Acquire: 2394 FlushAO = AtomicOrdering::Acquire; 2395 Flush = true; 2396 break; 2397 case AtomicOrdering::Release: 2398 FlushAO = AtomicOrdering::Release; 2399 Flush = true; 2400 break; 2401 case AtomicOrdering::AcquireRelease: 2402 case AtomicOrdering::SequentiallyConsistent: 2403 FlushAO = AtomicOrdering::AcquireRelease; 2404 Flush = true; 2405 break; 2406 default: 2407 // do nothing - leave silently. 2408 break; 2409 } 2410 } 2411 2412 if (Flush) { 2413 // Currently Flush RT call still doesn't take memory_ordering, so for when 2414 // that happens, this tries to do the resolution of which atomic ordering 2415 // to use with but issue the flush call 2416 // TODO: pass `FlushAO` after memory ordering support is added 2417 (void)FlushAO; 2418 emitFlush(Loc); 2419 } 2420 2421 // for AO == AtomicOrdering::Monotonic and all other case combinations 2422 // do nothing 2423 return Flush; 2424 } 2425 2426 OpenMPIRBuilder::InsertPointTy 2427 OpenMPIRBuilder::createAtomicRead(const LocationDescription &Loc, 2428 AtomicOpValue &X, AtomicOpValue &V, 2429 AtomicOrdering AO) { 2430 if (!updateToLocation(Loc)) 2431 return Loc.IP; 2432 2433 Type *XTy = X.Var->getType(); 2434 assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory"); 2435 Type *XElemTy = XTy->getPointerElementType(); 2436 assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() || 2437 XElemTy->isPointerTy()) && 2438 "OMP atomic read expected a scalar type"); 2439 2440 Value *XRead = nullptr; 2441 2442 if (XElemTy->isIntegerTy()) { 2443 LoadInst *XLD = 2444 Builder.CreateLoad(XElemTy, X.Var, X.IsVolatile, "omp.atomic.read"); 2445 XLD->setAtomic(AO); 2446 XRead = cast<Value>(XLD); 2447 } else { 2448 // We need to bitcast and perform atomic op as integer 2449 unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace(); 2450 IntegerType *IntCastTy = 2451 IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits()); 2452 Value *XBCast = Builder.CreateBitCast( 2453 X.Var, IntCastTy->getPointerTo(Addrspace), "atomic.src.int.cast"); 2454 LoadInst *XLoad = 2455 Builder.CreateLoad(IntCastTy, XBCast, X.IsVolatile, "omp.atomic.load"); 2456 XLoad->setAtomic(AO); 2457 if (XElemTy->isFloatingPointTy()) { 2458 XRead = Builder.CreateBitCast(XLoad, XElemTy, "atomic.flt.cast"); 2459 } else { 2460 XRead = Builder.CreateIntToPtr(XLoad, XElemTy, "atomic.ptr.cast"); 2461 } 2462 } 2463 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Read); 2464 Builder.CreateStore(XRead, V.Var, V.IsVolatile); 2465 return Builder.saveIP(); 2466 } 2467 2468 OpenMPIRBuilder::InsertPointTy 2469 OpenMPIRBuilder::createAtomicWrite(const LocationDescription &Loc, 2470 AtomicOpValue &X, Value *Expr, 2471 AtomicOrdering AO) { 2472 if (!updateToLocation(Loc)) 2473 return Loc.IP; 2474 2475 Type *XTy = X.Var->getType(); 2476 assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory"); 2477 Type *XElemTy = XTy->getPointerElementType(); 2478 assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() || 2479 XElemTy->isPointerTy()) && 2480 "OMP atomic write expected a scalar type"); 2481 2482 if (XElemTy->isIntegerTy()) { 2483 StoreInst *XSt = Builder.CreateStore(Expr, X.Var, X.IsVolatile); 2484 XSt->setAtomic(AO); 2485 } else { 2486 // We need to bitcast and perform atomic op as integers 2487 unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace(); 2488 IntegerType *IntCastTy = 2489 IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits()); 2490 Value *XBCast = Builder.CreateBitCast( 2491 X.Var, IntCastTy->getPointerTo(Addrspace), "atomic.dst.int.cast"); 2492 Value *ExprCast = 2493 Builder.CreateBitCast(Expr, IntCastTy, "atomic.src.int.cast"); 2494 StoreInst *XSt = Builder.CreateStore(ExprCast, XBCast, X.IsVolatile); 2495 XSt->setAtomic(AO); 2496 } 2497 2498 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Write); 2499 return Builder.saveIP(); 2500 } 2501 2502 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicUpdate( 2503 const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X, 2504 Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, 2505 AtomicUpdateCallbackTy &UpdateOp, bool IsXLHSInRHSPart) { 2506 if (!updateToLocation(Loc)) 2507 return Loc.IP; 2508 2509 LLVM_DEBUG({ 2510 Type *XTy = X.Var->getType(); 2511 assert(XTy->isPointerTy() && 2512 "OMP Atomic expects a pointer to target memory"); 2513 Type *XElemTy = XTy->getPointerElementType(); 2514 assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() || 2515 XElemTy->isPointerTy()) && 2516 "OMP atomic update expected a scalar type"); 2517 assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) && 2518 (RMWOp != AtomicRMWInst::UMax) && (RMWOp != AtomicRMWInst::UMin) && 2519 "OpenMP atomic does not support LT or GT operations"); 2520 }); 2521 2522 emitAtomicUpdate(AllocIP, X.Var, Expr, AO, RMWOp, UpdateOp, X.IsVolatile, 2523 IsXLHSInRHSPart); 2524 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update); 2525 return Builder.saveIP(); 2526 } 2527 2528 Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2, 2529 AtomicRMWInst::BinOp RMWOp) { 2530 switch (RMWOp) { 2531 case AtomicRMWInst::Add: 2532 return Builder.CreateAdd(Src1, Src2); 2533 case AtomicRMWInst::Sub: 2534 return Builder.CreateSub(Src1, Src2); 2535 case AtomicRMWInst::And: 2536 return Builder.CreateAnd(Src1, Src2); 2537 case AtomicRMWInst::Nand: 2538 return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2)); 2539 case AtomicRMWInst::Or: 2540 return Builder.CreateOr(Src1, Src2); 2541 case AtomicRMWInst::Xor: 2542 return Builder.CreateXor(Src1, Src2); 2543 case AtomicRMWInst::Xchg: 2544 case AtomicRMWInst::FAdd: 2545 case AtomicRMWInst::FSub: 2546 case AtomicRMWInst::BAD_BINOP: 2547 case AtomicRMWInst::Max: 2548 case AtomicRMWInst::Min: 2549 case AtomicRMWInst::UMax: 2550 case AtomicRMWInst::UMin: 2551 llvm_unreachable("Unsupported atomic update operation"); 2552 } 2553 llvm_unreachable("Unsupported atomic update operation"); 2554 } 2555 2556 std::pair<Value *, Value *> 2557 OpenMPIRBuilder::emitAtomicUpdate(Instruction *AllocIP, Value *X, Value *Expr, 2558 AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, 2559 AtomicUpdateCallbackTy &UpdateOp, 2560 bool VolatileX, bool IsXLHSInRHSPart) { 2561 Type *XElemTy = X->getType()->getPointerElementType(); 2562 2563 bool DoCmpExch = 2564 ((RMWOp == AtomicRMWInst::BAD_BINOP) || (RMWOp == AtomicRMWInst::FAdd)) || 2565 (RMWOp == AtomicRMWInst::FSub) || 2566 (RMWOp == AtomicRMWInst::Sub && !IsXLHSInRHSPart); 2567 2568 std::pair<Value *, Value *> Res; 2569 if (XElemTy->isIntegerTy() && !DoCmpExch) { 2570 Res.first = Builder.CreateAtomicRMW(RMWOp, X, Expr, llvm::MaybeAlign(), AO); 2571 // not needed except in case of postfix captures. Generate anyway for 2572 // consistency with the else part. Will be removed with any DCE pass. 2573 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp); 2574 } else { 2575 unsigned Addrspace = cast<PointerType>(X->getType())->getAddressSpace(); 2576 IntegerType *IntCastTy = 2577 IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits()); 2578 Value *XBCast = 2579 Builder.CreateBitCast(X, IntCastTy->getPointerTo(Addrspace)); 2580 LoadInst *OldVal = 2581 Builder.CreateLoad(IntCastTy, XBCast, X->getName() + ".atomic.load"); 2582 OldVal->setAtomic(AO); 2583 // CurBB 2584 // | /---\ 2585 // ContBB | 2586 // | \---/ 2587 // ExitBB 2588 BasicBlock *CurBB = Builder.GetInsertBlock(); 2589 Instruction *CurBBTI = CurBB->getTerminator(); 2590 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable(); 2591 BasicBlock *ExitBB = 2592 CurBB->splitBasicBlock(CurBBTI, X->getName() + ".atomic.exit"); 2593 BasicBlock *ContBB = CurBB->splitBasicBlock(CurBB->getTerminator(), 2594 X->getName() + ".atomic.cont"); 2595 ContBB->getTerminator()->eraseFromParent(); 2596 Builder.SetInsertPoint(ContBB); 2597 llvm::PHINode *PHI = Builder.CreatePHI(OldVal->getType(), 2); 2598 PHI->addIncoming(OldVal, CurBB); 2599 AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy); 2600 NewAtomicAddr->setName(X->getName() + "x.new.val"); 2601 NewAtomicAddr->moveBefore(AllocIP); 2602 IntegerType *NewAtomicCastTy = 2603 IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits()); 2604 bool IsIntTy = XElemTy->isIntegerTy(); 2605 Value *NewAtomicIntAddr = 2606 (IsIntTy) 2607 ? NewAtomicAddr 2608 : Builder.CreateBitCast(NewAtomicAddr, 2609 NewAtomicCastTy->getPointerTo(Addrspace)); 2610 Value *OldExprVal = PHI; 2611 if (!IsIntTy) { 2612 if (XElemTy->isFloatingPointTy()) { 2613 OldExprVal = Builder.CreateBitCast(PHI, XElemTy, 2614 X->getName() + ".atomic.fltCast"); 2615 } else { 2616 OldExprVal = Builder.CreateIntToPtr(PHI, XElemTy, 2617 X->getName() + ".atomic.ptrCast"); 2618 } 2619 } 2620 2621 Value *Upd = UpdateOp(OldExprVal, Builder); 2622 Builder.CreateStore(Upd, NewAtomicAddr); 2623 LoadInst *DesiredVal = Builder.CreateLoad(XElemTy, NewAtomicIntAddr); 2624 Value *XAddr = 2625 (IsIntTy) 2626 ? X 2627 : Builder.CreateBitCast(X, IntCastTy->getPointerTo(Addrspace)); 2628 AtomicOrdering Failure = 2629 llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO); 2630 AtomicCmpXchgInst *Result = Builder.CreateAtomicCmpXchg( 2631 XAddr, OldExprVal, DesiredVal, llvm::MaybeAlign(), AO, Failure); 2632 Result->setVolatile(VolatileX); 2633 Value *PreviousVal = Builder.CreateExtractValue(Result, /*Idxs=*/0); 2634 Value *SuccessFailureVal = Builder.CreateExtractValue(Result, /*Idxs=*/1); 2635 PHI->addIncoming(PreviousVal, Builder.GetInsertBlock()); 2636 Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB); 2637 2638 Res.first = OldExprVal; 2639 Res.second = Upd; 2640 2641 // set Insertion point in exit block 2642 if (UnreachableInst *ExitTI = 2643 dyn_cast<UnreachableInst>(ExitBB->getTerminator())) { 2644 CurBBTI->eraseFromParent(); 2645 Builder.SetInsertPoint(ExitBB); 2646 } else { 2647 Builder.SetInsertPoint(ExitTI); 2648 } 2649 } 2650 2651 return Res; 2652 } 2653 2654 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCapture( 2655 const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X, 2656 AtomicOpValue &V, Value *Expr, AtomicOrdering AO, 2657 AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, 2658 bool UpdateExpr, bool IsPostfixUpdate, bool IsXLHSInRHSPart) { 2659 if (!updateToLocation(Loc)) 2660 return Loc.IP; 2661 2662 LLVM_DEBUG({ 2663 Type *XTy = X.Var->getType(); 2664 assert(XTy->isPointerTy() && 2665 "OMP Atomic expects a pointer to target memory"); 2666 Type *XElemTy = XTy->getPointerElementType(); 2667 assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() || 2668 XElemTy->isPointerTy()) && 2669 "OMP atomic capture expected a scalar type"); 2670 assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) && 2671 "OpenMP atomic does not support LT or GT operations"); 2672 }); 2673 2674 // If UpdateExpr is 'x' updated with some `expr` not based on 'x', 2675 // 'x' is simply atomically rewritten with 'expr'. 2676 AtomicRMWInst::BinOp AtomicOp = (UpdateExpr ? RMWOp : AtomicRMWInst::Xchg); 2677 std::pair<Value *, Value *> Result = 2678 emitAtomicUpdate(AllocIP, X.Var, Expr, AO, AtomicOp, UpdateOp, 2679 X.IsVolatile, IsXLHSInRHSPart); 2680 2681 Value *CapturedVal = (IsPostfixUpdate ? Result.first : Result.second); 2682 Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile); 2683 2684 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Capture); 2685 return Builder.saveIP(); 2686 } 2687 2688 GlobalVariable * 2689 OpenMPIRBuilder::createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names, 2690 std::string VarName) { 2691 llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get( 2692 llvm::ArrayType::get( 2693 llvm::Type::getInt8Ty(M.getContext())->getPointerTo(), Names.size()), 2694 Names); 2695 auto *MapNamesArrayGlobal = new llvm::GlobalVariable( 2696 M, MapNamesArrayInit->getType(), 2697 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MapNamesArrayInit, 2698 VarName); 2699 return MapNamesArrayGlobal; 2700 } 2701 2702 // Create all simple and struct types exposed by the runtime and remember 2703 // the llvm::PointerTypes of them for easy access later. 2704 void OpenMPIRBuilder::initializeTypes(Module &M) { 2705 LLVMContext &Ctx = M.getContext(); 2706 StructType *T; 2707 #define OMP_TYPE(VarName, InitValue) VarName = InitValue; 2708 #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \ 2709 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \ 2710 VarName##PtrTy = PointerType::getUnqual(VarName##Ty); 2711 #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \ 2712 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \ 2713 VarName##Ptr = PointerType::getUnqual(VarName); 2714 #define OMP_STRUCT_TYPE(VarName, StructName, ...) \ 2715 T = StructType::getTypeByName(Ctx, StructName); \ 2716 if (!T) \ 2717 T = StructType::create(Ctx, {__VA_ARGS__}, StructName); \ 2718 VarName = T; \ 2719 VarName##Ptr = PointerType::getUnqual(T); 2720 #include "llvm/Frontend/OpenMP/OMPKinds.def" 2721 } 2722 2723 void OpenMPIRBuilder::OutlineInfo::collectBlocks( 2724 SmallPtrSetImpl<BasicBlock *> &BlockSet, 2725 SmallVectorImpl<BasicBlock *> &BlockVector) { 2726 SmallVector<BasicBlock *, 32> Worklist; 2727 BlockSet.insert(EntryBB); 2728 BlockSet.insert(ExitBB); 2729 2730 Worklist.push_back(EntryBB); 2731 while (!Worklist.empty()) { 2732 BasicBlock *BB = Worklist.pop_back_val(); 2733 BlockVector.push_back(BB); 2734 for (BasicBlock *SuccBB : successors(BB)) 2735 if (BlockSet.insert(SuccBB).second) 2736 Worklist.push_back(SuccBB); 2737 } 2738 } 2739 2740 void CanonicalLoopInfo::collectControlBlocks( 2741 SmallVectorImpl<BasicBlock *> &BBs) { 2742 // We only count those BBs as control block for which we do not need to 2743 // reverse the CFG, i.e. not the loop body which can contain arbitrary control 2744 // flow. For consistency, this also means we do not add the Body block, which 2745 // is just the entry to the body code. 2746 BBs.reserve(BBs.size() + 6); 2747 BBs.append({Preheader, Header, Cond, Latch, Exit, After}); 2748 } 2749 2750 void CanonicalLoopInfo::assertOK() const { 2751 #ifndef NDEBUG 2752 if (!IsValid) 2753 return; 2754 2755 // Verify standard control-flow we use for OpenMP loops. 2756 assert(Preheader); 2757 assert(isa<BranchInst>(Preheader->getTerminator()) && 2758 "Preheader must terminate with unconditional branch"); 2759 assert(Preheader->getSingleSuccessor() == Header && 2760 "Preheader must jump to header"); 2761 2762 assert(Header); 2763 assert(isa<BranchInst>(Header->getTerminator()) && 2764 "Header must terminate with unconditional branch"); 2765 assert(Header->getSingleSuccessor() == Cond && 2766 "Header must jump to exiting block"); 2767 2768 assert(Cond); 2769 assert(Cond->getSinglePredecessor() == Header && 2770 "Exiting block only reachable from header"); 2771 2772 assert(isa<BranchInst>(Cond->getTerminator()) && 2773 "Exiting block must terminate with conditional branch"); 2774 assert(size(successors(Cond)) == 2 && 2775 "Exiting block must have two successors"); 2776 assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(0) == Body && 2777 "Exiting block's first successor jump to the body"); 2778 assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(1) == Exit && 2779 "Exiting block's second successor must exit the loop"); 2780 2781 assert(Body); 2782 assert(Body->getSinglePredecessor() == Cond && 2783 "Body only reachable from exiting block"); 2784 assert(!isa<PHINode>(Body->front())); 2785 2786 assert(Latch); 2787 assert(isa<BranchInst>(Latch->getTerminator()) && 2788 "Latch must terminate with unconditional branch"); 2789 assert(Latch->getSingleSuccessor() == Header && "Latch must jump to header"); 2790 // TODO: To support simple redirecting of the end of the body code that has 2791 // multiple; introduce another auxiliary basic block like preheader and after. 2792 assert(Latch->getSinglePredecessor() != nullptr); 2793 assert(!isa<PHINode>(Latch->front())); 2794 2795 assert(Exit); 2796 assert(isa<BranchInst>(Exit->getTerminator()) && 2797 "Exit block must terminate with unconditional branch"); 2798 assert(Exit->getSingleSuccessor() == After && 2799 "Exit block must jump to after block"); 2800 2801 assert(After); 2802 assert(After->getSinglePredecessor() == Exit && 2803 "After block only reachable from exit block"); 2804 assert(After->empty() || !isa<PHINode>(After->front())); 2805 2806 Instruction *IndVar = getIndVar(); 2807 assert(IndVar && "Canonical induction variable not found?"); 2808 assert(isa<IntegerType>(IndVar->getType()) && 2809 "Induction variable must be an integer"); 2810 assert(cast<PHINode>(IndVar)->getParent() == Header && 2811 "Induction variable must be a PHI in the loop header"); 2812 assert(cast<PHINode>(IndVar)->getIncomingBlock(0) == Preheader); 2813 assert( 2814 cast<ConstantInt>(cast<PHINode>(IndVar)->getIncomingValue(0))->isZero()); 2815 assert(cast<PHINode>(IndVar)->getIncomingBlock(1) == Latch); 2816 2817 auto *NextIndVar = cast<PHINode>(IndVar)->getIncomingValue(1); 2818 assert(cast<Instruction>(NextIndVar)->getParent() == Latch); 2819 assert(cast<BinaryOperator>(NextIndVar)->getOpcode() == BinaryOperator::Add); 2820 assert(cast<BinaryOperator>(NextIndVar)->getOperand(0) == IndVar); 2821 assert(cast<ConstantInt>(cast<BinaryOperator>(NextIndVar)->getOperand(1)) 2822 ->isOne()); 2823 2824 Value *TripCount = getTripCount(); 2825 assert(TripCount && "Loop trip count not found?"); 2826 assert(IndVar->getType() == TripCount->getType() && 2827 "Trip count and induction variable must have the same type"); 2828 2829 auto *CmpI = cast<CmpInst>(&Cond->front()); 2830 assert(CmpI->getPredicate() == CmpInst::ICMP_ULT && 2831 "Exit condition must be a signed less-than comparison"); 2832 assert(CmpI->getOperand(0) == IndVar && 2833 "Exit condition must compare the induction variable"); 2834 assert(CmpI->getOperand(1) == TripCount && 2835 "Exit condition must compare with the trip count"); 2836 #endif 2837 } 2838