1 //===- OpenMPIRBuilder.cpp - Builder for LLVM-IR for OpenMP directives ----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// 10 /// This file implements the OpenMPIRBuilder class, which is used as a 11 /// convenient way to create LLVM instructions for OpenMP directives. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 16 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/ADT/Triple.h" 19 #include "llvm/IR/CFG.h" 20 #include "llvm/IR/DebugInfo.h" 21 #include "llvm/IR/IRBuilder.h" 22 #include "llvm/IR/MDBuilder.h" 23 #include "llvm/Support/CommandLine.h" 24 #include "llvm/Support/Error.h" 25 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 26 #include "llvm/Transforms/Utils/CodeExtractor.h" 27 28 #include <sstream> 29 30 #define DEBUG_TYPE "openmp-ir-builder" 31 32 using namespace llvm; 33 using namespace omp; 34 35 static cl::opt<bool> 36 OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, 37 cl::desc("Use optimistic attributes describing " 38 "'as-if' properties of runtime calls."), 39 cl::init(false)); 40 41 void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) { 42 LLVMContext &Ctx = Fn.getContext(); 43 44 #define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet; 45 #include "llvm/Frontend/OpenMP/OMPKinds.def" 46 47 // Add attributes to the new declaration. 48 switch (FnID) { 49 #define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \ 50 case Enum: \ 51 Fn.setAttributes( \ 52 AttributeList::get(Ctx, FnAttrSet, RetAttrSet, ArgAttrSets)); \ 53 break; 54 #include "llvm/Frontend/OpenMP/OMPKinds.def" 55 default: 56 // Attributes are optional. 57 break; 58 } 59 } 60 61 FunctionCallee 62 OpenMPIRBuilder::getOrCreateRuntimeFunction(Module &M, RuntimeFunction FnID) { 63 FunctionType *FnTy = nullptr; 64 Function *Fn = nullptr; 65 66 // Try to find the declation in the module first. 67 switch (FnID) { 68 #define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \ 69 case Enum: \ 70 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \ 71 IsVarArg); \ 72 Fn = M.getFunction(Str); \ 73 break; 74 #include "llvm/Frontend/OpenMP/OMPKinds.def" 75 } 76 77 if (!Fn) { 78 // Create a new declaration if we need one. 79 switch (FnID) { 80 #define OMP_RTL(Enum, Str, ...) \ 81 case Enum: \ 82 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \ 83 break; 84 #include "llvm/Frontend/OpenMP/OMPKinds.def" 85 } 86 87 // Add information if the runtime function takes a callback function 88 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) { 89 if (!Fn->hasMetadata(LLVMContext::MD_callback)) { 90 LLVMContext &Ctx = Fn->getContext(); 91 MDBuilder MDB(Ctx); 92 // Annotate the callback behavior of the runtime function: 93 // - The callback callee is argument number 2 (microtask). 94 // - The first two arguments of the callback callee are unknown (-1). 95 // - All variadic arguments to the runtime function are passed to the 96 // callback callee. 97 Fn->addMetadata( 98 LLVMContext::MD_callback, 99 *MDNode::get(Ctx, {MDB.createCallbackEncoding( 100 2, {-1, -1}, /* VarArgsArePassed */ true)})); 101 } 102 } 103 104 LLVM_DEBUG(dbgs() << "Created OpenMP runtime function " << Fn->getName() 105 << " with type " << *Fn->getFunctionType() << "\n"); 106 addAttributes(FnID, *Fn); 107 108 } else { 109 LLVM_DEBUG(dbgs() << "Found OpenMP runtime function " << Fn->getName() 110 << " with type " << *Fn->getFunctionType() << "\n"); 111 } 112 113 assert(Fn && "Failed to create OpenMP runtime function"); 114 115 // Cast the function to the expected type if necessary 116 Constant *C = ConstantExpr::getBitCast(Fn, FnTy->getPointerTo()); 117 return {FnTy, C}; 118 } 119 120 Function *OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr(RuntimeFunction FnID) { 121 FunctionCallee RTLFn = getOrCreateRuntimeFunction(M, FnID); 122 auto *Fn = dyn_cast<llvm::Function>(RTLFn.getCallee()); 123 assert(Fn && "Failed to create OpenMP runtime function pointer"); 124 return Fn; 125 } 126 127 void OpenMPIRBuilder::initialize() { initializeTypes(M); } 128 129 void OpenMPIRBuilder::finalize(bool AllowExtractorSinking) { 130 SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet; 131 SmallVector<BasicBlock *, 32> Blocks; 132 for (OutlineInfo &OI : OutlineInfos) { 133 ParallelRegionBlockSet.clear(); 134 Blocks.clear(); 135 OI.collectBlocks(ParallelRegionBlockSet, Blocks); 136 137 Function *OuterFn = OI.EntryBB->getParent(); 138 CodeExtractorAnalysisCache CEAC(*OuterFn); 139 CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr, 140 /* AggregateArgs */ false, 141 /* BlockFrequencyInfo */ nullptr, 142 /* BranchProbabilityInfo */ nullptr, 143 /* AssumptionCache */ nullptr, 144 /* AllowVarArgs */ true, 145 /* AllowAlloca */ true, 146 /* Suffix */ ".omp_par"); 147 148 LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n"); 149 LLVM_DEBUG(dbgs() << "Entry " << OI.EntryBB->getName() 150 << " Exit: " << OI.ExitBB->getName() << "\n"); 151 assert(Extractor.isEligible() && 152 "Expected OpenMP outlining to be possible!"); 153 154 Function *OutlinedFn = Extractor.extractCodeRegion(CEAC); 155 156 LLVM_DEBUG(dbgs() << "After outlining: " << *OuterFn << "\n"); 157 LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n"); 158 assert(OutlinedFn->getReturnType()->isVoidTy() && 159 "OpenMP outlined functions should not return a value!"); 160 161 // For compability with the clang CG we move the outlined function after the 162 // one with the parallel region. 163 OutlinedFn->removeFromParent(); 164 M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn); 165 166 // Remove the artificial entry introduced by the extractor right away, we 167 // made our own entry block after all. 168 { 169 BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock(); 170 assert(ArtificialEntry.getUniqueSuccessor() == OI.EntryBB); 171 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry); 172 if (AllowExtractorSinking) { 173 // Move instructions from the to-be-deleted ArtificialEntry to the entry 174 // basic block of the parallel region. CodeExtractor may have sunk 175 // allocas/bitcasts for values that are solely used in the outlined 176 // region and do not escape. 177 assert(!ArtificialEntry.empty() && 178 "Expected instructions to sink in the outlined region"); 179 for (BasicBlock::iterator It = ArtificialEntry.begin(), 180 End = ArtificialEntry.end(); 181 It != End;) { 182 Instruction &I = *It; 183 It++; 184 185 if (I.isTerminator()) 186 continue; 187 188 I.moveBefore(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt()); 189 } 190 } 191 OI.EntryBB->moveBefore(&ArtificialEntry); 192 ArtificialEntry.eraseFromParent(); 193 } 194 assert(&OutlinedFn->getEntryBlock() == OI.EntryBB); 195 assert(OutlinedFn && OutlinedFn->getNumUses() == 1); 196 197 // Run a user callback, e.g. to add attributes. 198 if (OI.PostOutlineCB) 199 OI.PostOutlineCB(*OutlinedFn); 200 } 201 202 // Allow finalize to be called multiple times. 203 OutlineInfos.clear(); 204 } 205 206 Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr, 207 IdentFlag LocFlags, 208 unsigned Reserve2Flags) { 209 // Enable "C-mode". 210 LocFlags |= OMP_IDENT_FLAG_KMPC; 211 212 Value *&Ident = 213 IdentMap[{SrcLocStr, uint64_t(LocFlags) << 31 | Reserve2Flags}]; 214 if (!Ident) { 215 Constant *I32Null = ConstantInt::getNullValue(Int32); 216 Constant *IdentData[] = { 217 I32Null, ConstantInt::get(Int32, uint32_t(LocFlags)), 218 ConstantInt::get(Int32, Reserve2Flags), I32Null, SrcLocStr}; 219 Constant *Initializer = ConstantStruct::get( 220 cast<StructType>(IdentPtr->getPointerElementType()), IdentData); 221 222 // Look for existing encoding of the location + flags, not needed but 223 // minimizes the difference to the existing solution while we transition. 224 for (GlobalVariable &GV : M.getGlobalList()) 225 if (GV.getType() == IdentPtr && GV.hasInitializer()) 226 if (GV.getInitializer() == Initializer) 227 return Ident = &GV; 228 229 auto *GV = new GlobalVariable(M, IdentPtr->getPointerElementType(), 230 /* isConstant = */ true, 231 GlobalValue::PrivateLinkage, Initializer); 232 GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 233 GV->setAlignment(Align(8)); 234 Ident = GV; 235 } 236 return Builder.CreatePointerCast(Ident, IdentPtr); 237 } 238 239 Type *OpenMPIRBuilder::getLanemaskType() { 240 LLVMContext &Ctx = M.getContext(); 241 Triple triple(M.getTargetTriple()); 242 243 // This test is adequate until deviceRTL has finer grained lane widths 244 return triple.isAMDGCN() ? Type::getInt64Ty(Ctx) : Type::getInt32Ty(Ctx); 245 } 246 247 Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr) { 248 Constant *&SrcLocStr = SrcLocStrMap[LocStr]; 249 if (!SrcLocStr) { 250 Constant *Initializer = 251 ConstantDataArray::getString(M.getContext(), LocStr); 252 253 // Look for existing encoding of the location, not needed but minimizes the 254 // difference to the existing solution while we transition. 255 for (GlobalVariable &GV : M.getGlobalList()) 256 if (GV.isConstant() && GV.hasInitializer() && 257 GV.getInitializer() == Initializer) 258 return SrcLocStr = ConstantExpr::getPointerCast(&GV, Int8Ptr); 259 260 SrcLocStr = Builder.CreateGlobalStringPtr(LocStr, /* Name */ "", 261 /* AddressSpace */ 0, &M); 262 } 263 return SrcLocStr; 264 } 265 266 Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef FunctionName, 267 StringRef FileName, 268 unsigned Line, 269 unsigned Column) { 270 SmallString<128> Buffer; 271 Buffer.push_back(';'); 272 Buffer.append(FileName); 273 Buffer.push_back(';'); 274 Buffer.append(FunctionName); 275 Buffer.push_back(';'); 276 Buffer.append(std::to_string(Line)); 277 Buffer.push_back(';'); 278 Buffer.append(std::to_string(Column)); 279 Buffer.push_back(';'); 280 Buffer.push_back(';'); 281 return getOrCreateSrcLocStr(Buffer.str()); 282 } 283 284 Constant *OpenMPIRBuilder::getOrCreateDefaultSrcLocStr() { 285 return getOrCreateSrcLocStr(";unknown;unknown;0;0;;"); 286 } 287 288 Constant * 289 OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc) { 290 DILocation *DIL = Loc.DL.get(); 291 if (!DIL) 292 return getOrCreateDefaultSrcLocStr(); 293 StringRef FileName = M.getName(); 294 if (DIFile *DIF = DIL->getFile()) 295 if (Optional<StringRef> Source = DIF->getSource()) 296 FileName = *Source; 297 StringRef Function = DIL->getScope()->getSubprogram()->getName(); 298 Function = 299 !Function.empty() ? Function : Loc.IP.getBlock()->getParent()->getName(); 300 return getOrCreateSrcLocStr(Function, FileName, DIL->getLine(), 301 DIL->getColumn()); 302 } 303 304 Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) { 305 return Builder.CreateCall( 306 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident, 307 "omp_global_thread_num"); 308 } 309 310 OpenMPIRBuilder::InsertPointTy 311 OpenMPIRBuilder::createBarrier(const LocationDescription &Loc, Directive DK, 312 bool ForceSimpleCall, bool CheckCancelFlag) { 313 if (!updateToLocation(Loc)) 314 return Loc.IP; 315 return emitBarrierImpl(Loc, DK, ForceSimpleCall, CheckCancelFlag); 316 } 317 318 OpenMPIRBuilder::InsertPointTy 319 OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind, 320 bool ForceSimpleCall, bool CheckCancelFlag) { 321 // Build call __kmpc_cancel_barrier(loc, thread_id) or 322 // __kmpc_barrier(loc, thread_id); 323 324 IdentFlag BarrierLocFlags; 325 switch (Kind) { 326 case OMPD_for: 327 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR; 328 break; 329 case OMPD_sections: 330 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS; 331 break; 332 case OMPD_single: 333 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE; 334 break; 335 case OMPD_barrier: 336 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL; 337 break; 338 default: 339 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL; 340 break; 341 } 342 343 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 344 Value *Args[] = {getOrCreateIdent(SrcLocStr, BarrierLocFlags), 345 getOrCreateThreadID(getOrCreateIdent(SrcLocStr))}; 346 347 // If we are in a cancellable parallel region, barriers are cancellation 348 // points. 349 // TODO: Check why we would force simple calls or to ignore the cancel flag. 350 bool UseCancelBarrier = 351 !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel); 352 353 Value *Result = 354 Builder.CreateCall(getOrCreateRuntimeFunctionPtr( 355 UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier 356 : OMPRTL___kmpc_barrier), 357 Args); 358 359 if (UseCancelBarrier && CheckCancelFlag) 360 emitCancelationCheckImpl(Result, OMPD_parallel); 361 362 return Builder.saveIP(); 363 } 364 365 OpenMPIRBuilder::InsertPointTy 366 OpenMPIRBuilder::createCancel(const LocationDescription &Loc, 367 Value *IfCondition, 368 omp::Directive CanceledDirective) { 369 if (!updateToLocation(Loc)) 370 return Loc.IP; 371 372 // LLVM utilities like blocks with terminators. 373 auto *UI = Builder.CreateUnreachable(); 374 375 Instruction *ThenTI = UI, *ElseTI = nullptr; 376 if (IfCondition) 377 SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI); 378 Builder.SetInsertPoint(ThenTI); 379 380 Value *CancelKind = nullptr; 381 switch (CanceledDirective) { 382 #define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \ 383 case DirectiveEnum: \ 384 CancelKind = Builder.getInt32(Value); \ 385 break; 386 #include "llvm/Frontend/OpenMP/OMPKinds.def" 387 default: 388 llvm_unreachable("Unknown cancel kind!"); 389 } 390 391 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 392 Value *Ident = getOrCreateIdent(SrcLocStr); 393 Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind}; 394 Value *Result = Builder.CreateCall( 395 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args); 396 397 // The actual cancel logic is shared with others, e.g., cancel_barriers. 398 emitCancelationCheckImpl(Result, CanceledDirective); 399 400 // Update the insertion point and remove the terminator we introduced. 401 Builder.SetInsertPoint(UI->getParent()); 402 UI->eraseFromParent(); 403 404 return Builder.saveIP(); 405 } 406 407 void OpenMPIRBuilder::emitCancelationCheckImpl( 408 Value *CancelFlag, omp::Directive CanceledDirective) { 409 assert(isLastFinalizationInfoCancellable(CanceledDirective) && 410 "Unexpected cancellation!"); 411 412 // For a cancel barrier we create two new blocks. 413 BasicBlock *BB = Builder.GetInsertBlock(); 414 BasicBlock *NonCancellationBlock; 415 if (Builder.GetInsertPoint() == BB->end()) { 416 // TODO: This branch will not be needed once we moved to the 417 // OpenMPIRBuilder codegen completely. 418 NonCancellationBlock = BasicBlock::Create( 419 BB->getContext(), BB->getName() + ".cont", BB->getParent()); 420 } else { 421 NonCancellationBlock = SplitBlock(BB, &*Builder.GetInsertPoint()); 422 BB->getTerminator()->eraseFromParent(); 423 Builder.SetInsertPoint(BB); 424 } 425 BasicBlock *CancellationBlock = BasicBlock::Create( 426 BB->getContext(), BB->getName() + ".cncl", BB->getParent()); 427 428 // Jump to them based on the return value. 429 Value *Cmp = Builder.CreateIsNull(CancelFlag); 430 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock, 431 /* TODO weight */ nullptr, nullptr); 432 433 // From the cancellation block we finalize all variables and go to the 434 // post finalization block that is known to the FiniCB callback. 435 Builder.SetInsertPoint(CancellationBlock); 436 auto &FI = FinalizationStack.back(); 437 FI.FiniCB(Builder.saveIP()); 438 439 // The continuation block is where code generation continues. 440 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin()); 441 } 442 443 IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( 444 const LocationDescription &Loc, InsertPointTy OuterAllocaIP, 445 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, 446 FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, 447 omp::ProcBindKind ProcBind, bool IsCancellable) { 448 if (!updateToLocation(Loc)) 449 return Loc.IP; 450 451 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 452 Value *Ident = getOrCreateIdent(SrcLocStr); 453 Value *ThreadID = getOrCreateThreadID(Ident); 454 455 if (NumThreads) { 456 // Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads) 457 Value *Args[] = { 458 Ident, ThreadID, 459 Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)}; 460 Builder.CreateCall( 461 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads), Args); 462 } 463 464 if (ProcBind != OMP_PROC_BIND_default) { 465 // Build call __kmpc_push_proc_bind(&Ident, global_tid, proc_bind) 466 Value *Args[] = { 467 Ident, ThreadID, 468 ConstantInt::get(Int32, unsigned(ProcBind), /*isSigned=*/true)}; 469 Builder.CreateCall( 470 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind), Args); 471 } 472 473 BasicBlock *InsertBB = Builder.GetInsertBlock(); 474 Function *OuterFn = InsertBB->getParent(); 475 476 // Save the outer alloca block because the insertion iterator may get 477 // invalidated and we still need this later. 478 BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock(); 479 480 // Vector to remember instructions we used only during the modeling but which 481 // we want to delete at the end. 482 SmallVector<Instruction *, 4> ToBeDeleted; 483 484 // Change the location to the outer alloca insertion point to create and 485 // initialize the allocas we pass into the parallel region. 486 Builder.restoreIP(OuterAllocaIP); 487 AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr"); 488 AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr"); 489 490 // If there is an if condition we actually use the TIDAddr and ZeroAddr in the 491 // program, otherwise we only need them for modeling purposes to get the 492 // associated arguments in the outlined function. In the former case, 493 // initialize the allocas properly, in the latter case, delete them later. 494 if (IfCondition) { 495 Builder.CreateStore(Constant::getNullValue(Int32), TIDAddr); 496 Builder.CreateStore(Constant::getNullValue(Int32), ZeroAddr); 497 } else { 498 ToBeDeleted.push_back(TIDAddr); 499 ToBeDeleted.push_back(ZeroAddr); 500 } 501 502 // Create an artificial insertion point that will also ensure the blocks we 503 // are about to split are not degenerated. 504 auto *UI = new UnreachableInst(Builder.getContext(), InsertBB); 505 506 Instruction *ThenTI = UI, *ElseTI = nullptr; 507 if (IfCondition) 508 SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI); 509 510 BasicBlock *ThenBB = ThenTI->getParent(); 511 BasicBlock *PRegEntryBB = ThenBB->splitBasicBlock(ThenTI, "omp.par.entry"); 512 BasicBlock *PRegBodyBB = 513 PRegEntryBB->splitBasicBlock(ThenTI, "omp.par.region"); 514 BasicBlock *PRegPreFiniBB = 515 PRegBodyBB->splitBasicBlock(ThenTI, "omp.par.pre_finalize"); 516 BasicBlock *PRegExitBB = 517 PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit"); 518 519 auto FiniCBWrapper = [&](InsertPointTy IP) { 520 // Hide "open-ended" blocks from the given FiniCB by setting the right jump 521 // target to the region exit block. 522 if (IP.getBlock()->end() == IP.getPoint()) { 523 IRBuilder<>::InsertPointGuard IPG(Builder); 524 Builder.restoreIP(IP); 525 Instruction *I = Builder.CreateBr(PRegExitBB); 526 IP = InsertPointTy(I->getParent(), I->getIterator()); 527 } 528 assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 && 529 IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB && 530 "Unexpected insertion point for finalization call!"); 531 return FiniCB(IP); 532 }; 533 534 FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable}); 535 536 // Generate the privatization allocas in the block that will become the entry 537 // of the outlined function. 538 Builder.SetInsertPoint(PRegEntryBB->getTerminator()); 539 InsertPointTy InnerAllocaIP = Builder.saveIP(); 540 541 AllocaInst *PrivTIDAddr = 542 Builder.CreateAlloca(Int32, nullptr, "tid.addr.local"); 543 Instruction *PrivTID = Builder.CreateLoad(PrivTIDAddr, "tid"); 544 545 // Add some fake uses for OpenMP provided arguments. 546 ToBeDeleted.push_back(Builder.CreateLoad(TIDAddr, "tid.addr.use")); 547 Instruction *ZeroAddrUse = Builder.CreateLoad(ZeroAddr, "zero.addr.use"); 548 ToBeDeleted.push_back(ZeroAddrUse); 549 550 // ThenBB 551 // | 552 // V 553 // PRegionEntryBB <- Privatization allocas are placed here. 554 // | 555 // V 556 // PRegionBodyBB <- BodeGen is invoked here. 557 // | 558 // V 559 // PRegPreFiniBB <- The block we will start finalization from. 560 // | 561 // V 562 // PRegionExitBB <- A common exit to simplify block collection. 563 // 564 565 LLVM_DEBUG(dbgs() << "Before body codegen: " << *OuterFn << "\n"); 566 567 // Let the caller create the body. 568 assert(BodyGenCB && "Expected body generation callback!"); 569 InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin()); 570 BodyGenCB(InnerAllocaIP, CodeGenIP, *PRegPreFiniBB); 571 572 LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n"); 573 574 FunctionCallee RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call); 575 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 576 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 577 llvm::LLVMContext &Ctx = F->getContext(); 578 MDBuilder MDB(Ctx); 579 // Annotate the callback behavior of the __kmpc_fork_call: 580 // - The callback callee is argument number 2 (microtask). 581 // - The first two arguments of the callback callee are unknown (-1). 582 // - All variadic arguments to the __kmpc_fork_call are passed to the 583 // callback callee. 584 F->addMetadata( 585 llvm::LLVMContext::MD_callback, 586 *llvm::MDNode::get( 587 Ctx, {MDB.createCallbackEncoding(2, {-1, -1}, 588 /* VarArgsArePassed */ true)})); 589 } 590 } 591 592 OutlineInfo OI; 593 OI.PostOutlineCB = [=](Function &OutlinedFn) { 594 // Add some known attributes. 595 OutlinedFn.addParamAttr(0, Attribute::NoAlias); 596 OutlinedFn.addParamAttr(1, Attribute::NoAlias); 597 OutlinedFn.addFnAttr(Attribute::NoUnwind); 598 OutlinedFn.addFnAttr(Attribute::NoRecurse); 599 600 assert(OutlinedFn.arg_size() >= 2 && 601 "Expected at least tid and bounded tid as arguments"); 602 unsigned NumCapturedVars = 603 OutlinedFn.arg_size() - /* tid & bounded tid */ 2; 604 605 CallInst *CI = cast<CallInst>(OutlinedFn.user_back()); 606 CI->getParent()->setName("omp_parallel"); 607 Builder.SetInsertPoint(CI); 608 609 // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn); 610 Value *ForkCallArgs[] = { 611 Ident, Builder.getInt32(NumCapturedVars), 612 Builder.CreateBitCast(&OutlinedFn, ParallelTaskPtr)}; 613 614 SmallVector<Value *, 16> RealArgs; 615 RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs)); 616 RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end()); 617 618 Builder.CreateCall(RTLFn, RealArgs); 619 620 LLVM_DEBUG(dbgs() << "With fork_call placed: " 621 << *Builder.GetInsertBlock()->getParent() << "\n"); 622 623 InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end()); 624 625 // Initialize the local TID stack location with the argument value. 626 Builder.SetInsertPoint(PrivTID); 627 Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin(); 628 Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr); 629 630 // If no "if" clause was present we do not need the call created during 631 // outlining, otherwise we reuse it in the serialized parallel region. 632 if (!ElseTI) { 633 CI->eraseFromParent(); 634 } else { 635 636 // If an "if" clause was present we are now generating the serialized 637 // version into the "else" branch. 638 Builder.SetInsertPoint(ElseTI); 639 640 // Build calls __kmpc_serialized_parallel(&Ident, GTid); 641 Value *SerializedParallelCallArgs[] = {Ident, ThreadID}; 642 Builder.CreateCall( 643 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_serialized_parallel), 644 SerializedParallelCallArgs); 645 646 // OutlinedFn(>id, &zero, CapturedStruct); 647 CI->removeFromParent(); 648 Builder.Insert(CI); 649 650 // __kmpc_end_serialized_parallel(&Ident, GTid); 651 Value *EndArgs[] = {Ident, ThreadID}; 652 Builder.CreateCall( 653 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_serialized_parallel), 654 EndArgs); 655 656 LLVM_DEBUG(dbgs() << "With serialized parallel region: " 657 << *Builder.GetInsertBlock()->getParent() << "\n"); 658 } 659 660 for (Instruction *I : ToBeDeleted) 661 I->eraseFromParent(); 662 }; 663 664 // Adjust the finalization stack, verify the adjustment, and call the 665 // finalize function a last time to finalize values between the pre-fini 666 // block and the exit block if we left the parallel "the normal way". 667 auto FiniInfo = FinalizationStack.pop_back_val(); 668 (void)FiniInfo; 669 assert(FiniInfo.DK == OMPD_parallel && 670 "Unexpected finalization stack state!"); 671 672 Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator(); 673 674 InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator()); 675 FiniCB(PreFiniIP); 676 677 OI.EntryBB = PRegEntryBB; 678 OI.ExitBB = PRegExitBB; 679 680 SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet; 681 SmallVector<BasicBlock *, 32> Blocks; 682 OI.collectBlocks(ParallelRegionBlockSet, Blocks); 683 684 // Ensure a single exit node for the outlined region by creating one. 685 // We might have multiple incoming edges to the exit now due to finalizations, 686 // e.g., cancel calls that cause the control flow to leave the region. 687 BasicBlock *PRegOutlinedExitBB = PRegExitBB; 688 PRegExitBB = SplitBlock(PRegExitBB, &*PRegExitBB->getFirstInsertionPt()); 689 PRegOutlinedExitBB->setName("omp.par.outlined.exit"); 690 Blocks.push_back(PRegOutlinedExitBB); 691 692 CodeExtractorAnalysisCache CEAC(*OuterFn); 693 CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr, 694 /* AggregateArgs */ false, 695 /* BlockFrequencyInfo */ nullptr, 696 /* BranchProbabilityInfo */ nullptr, 697 /* AssumptionCache */ nullptr, 698 /* AllowVarArgs */ true, 699 /* AllowAlloca */ true, 700 /* Suffix */ ".omp_par"); 701 702 // Find inputs to, outputs from the code region. 703 BasicBlock *CommonExit = nullptr; 704 SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands; 705 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit); 706 Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands); 707 708 LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n"); 709 710 FunctionCallee TIDRTLFn = 711 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num); 712 713 auto PrivHelper = [&](Value &V) { 714 if (&V == TIDAddr || &V == ZeroAddr) 715 return; 716 717 SetVector<Use *> Uses; 718 for (Use &U : V.uses()) 719 if (auto *UserI = dyn_cast<Instruction>(U.getUser())) 720 if (ParallelRegionBlockSet.count(UserI->getParent())) 721 Uses.insert(&U); 722 723 // __kmpc_fork_call expects extra arguments as pointers. If the input 724 // already has a pointer type, everything is fine. Otherwise, store the 725 // value onto stack and load it back inside the to-be-outlined region. This 726 // will ensure only the pointer will be passed to the function. 727 // FIXME: if there are more than 15 trailing arguments, they must be 728 // additionally packed in a struct. 729 Value *Inner = &V; 730 if (!V.getType()->isPointerTy()) { 731 IRBuilder<>::InsertPointGuard Guard(Builder); 732 LLVM_DEBUG(llvm::dbgs() << "Forwarding input as pointer: " << V << "\n"); 733 734 Builder.restoreIP(OuterAllocaIP); 735 Value *Ptr = 736 Builder.CreateAlloca(V.getType(), nullptr, V.getName() + ".reloaded"); 737 738 // Store to stack at end of the block that currently branches to the entry 739 // block of the to-be-outlined region. 740 Builder.SetInsertPoint(InsertBB, 741 InsertBB->getTerminator()->getIterator()); 742 Builder.CreateStore(&V, Ptr); 743 744 // Load back next to allocations in the to-be-outlined region. 745 Builder.restoreIP(InnerAllocaIP); 746 Inner = Builder.CreateLoad(Ptr); 747 } 748 749 Value *ReplacementValue = nullptr; 750 CallInst *CI = dyn_cast<CallInst>(&V); 751 if (CI && CI->getCalledFunction() == TIDRTLFn.getCallee()) { 752 ReplacementValue = PrivTID; 753 } else { 754 Builder.restoreIP( 755 PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue)); 756 assert(ReplacementValue && 757 "Expected copy/create callback to set replacement value!"); 758 if (ReplacementValue == &V) 759 return; 760 } 761 762 for (Use *UPtr : Uses) 763 UPtr->set(ReplacementValue); 764 }; 765 766 // Reset the inner alloca insertion as it will be used for loading the values 767 // wrapped into pointers before passing them into the to-be-outlined region. 768 // Configure it to insert immediately after the fake use of zero address so 769 // that they are available in the generated body and so that the 770 // OpenMP-related values (thread ID and zero address pointers) remain leading 771 // in the argument list. 772 InnerAllocaIP = IRBuilder<>::InsertPoint( 773 ZeroAddrUse->getParent(), ZeroAddrUse->getNextNode()->getIterator()); 774 775 // Reset the outer alloca insertion point to the entry of the relevant block 776 // in case it was invalidated. 777 OuterAllocaIP = IRBuilder<>::InsertPoint( 778 OuterAllocaBlock, OuterAllocaBlock->getFirstInsertionPt()); 779 780 for (Value *Input : Inputs) { 781 LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n"); 782 PrivHelper(*Input); 783 } 784 LLVM_DEBUG({ 785 for (Value *Output : Outputs) 786 LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n"); 787 }); 788 assert(Outputs.empty() && 789 "OpenMP outlining should not produce live-out values!"); 790 791 LLVM_DEBUG(dbgs() << "After privatization: " << *OuterFn << "\n"); 792 LLVM_DEBUG({ 793 for (auto *BB : Blocks) 794 dbgs() << " PBR: " << BB->getName() << "\n"; 795 }); 796 797 // Register the outlined info. 798 addOutlineInfo(std::move(OI)); 799 800 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end()); 801 UI->eraseFromParent(); 802 803 return AfterIP; 804 } 805 806 void OpenMPIRBuilder::emitFlush(const LocationDescription &Loc) { 807 // Build call void __kmpc_flush(ident_t *loc) 808 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 809 Value *Args[] = {getOrCreateIdent(SrcLocStr)}; 810 811 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args); 812 } 813 814 void OpenMPIRBuilder::createFlush(const LocationDescription &Loc) { 815 if (!updateToLocation(Loc)) 816 return; 817 emitFlush(Loc); 818 } 819 820 void OpenMPIRBuilder::emitTaskwaitImpl(const LocationDescription &Loc) { 821 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 822 // global_tid); 823 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 824 Value *Ident = getOrCreateIdent(SrcLocStr); 825 Value *Args[] = {Ident, getOrCreateThreadID(Ident)}; 826 827 // Ignore return result until untied tasks are supported. 828 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait), 829 Args); 830 } 831 832 void OpenMPIRBuilder::createTaskwait(const LocationDescription &Loc) { 833 if (!updateToLocation(Loc)) 834 return; 835 emitTaskwaitImpl(Loc); 836 } 837 838 void OpenMPIRBuilder::emitTaskyieldImpl(const LocationDescription &Loc) { 839 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 840 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 841 Value *Ident = getOrCreateIdent(SrcLocStr); 842 Constant *I32Null = ConstantInt::getNullValue(Int32); 843 Value *Args[] = {Ident, getOrCreateThreadID(Ident), I32Null}; 844 845 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield), 846 Args); 847 } 848 849 void OpenMPIRBuilder::createTaskyield(const LocationDescription &Loc) { 850 if (!updateToLocation(Loc)) 851 return; 852 emitTaskyieldImpl(Loc); 853 } 854 855 OpenMPIRBuilder::InsertPointTy 856 OpenMPIRBuilder::createMaster(const LocationDescription &Loc, 857 BodyGenCallbackTy BodyGenCB, 858 FinalizeCallbackTy FiniCB) { 859 860 if (!updateToLocation(Loc)) 861 return Loc.IP; 862 863 Directive OMPD = Directive::OMPD_master; 864 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 865 Value *Ident = getOrCreateIdent(SrcLocStr); 866 Value *ThreadId = getOrCreateThreadID(Ident); 867 Value *Args[] = {Ident, ThreadId}; 868 869 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master); 870 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args); 871 872 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master); 873 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args); 874 875 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, 876 /*Conditional*/ true, /*hasFinalize*/ true); 877 } 878 879 CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton( 880 DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, 881 BasicBlock *PostInsertBefore, const Twine &Name) { 882 Module *M = F->getParent(); 883 LLVMContext &Ctx = M->getContext(); 884 Type *IndVarTy = TripCount->getType(); 885 886 // Create the basic block structure. 887 BasicBlock *Preheader = 888 BasicBlock::Create(Ctx, "omp_" + Name + ".preheader", F, PreInsertBefore); 889 BasicBlock *Header = 890 BasicBlock::Create(Ctx, "omp_" + Name + ".header", F, PreInsertBefore); 891 BasicBlock *Cond = 892 BasicBlock::Create(Ctx, "omp_" + Name + ".cond", F, PreInsertBefore); 893 BasicBlock *Body = 894 BasicBlock::Create(Ctx, "omp_" + Name + ".body", F, PreInsertBefore); 895 BasicBlock *Latch = 896 BasicBlock::Create(Ctx, "omp_" + Name + ".inc", F, PostInsertBefore); 897 BasicBlock *Exit = 898 BasicBlock::Create(Ctx, "omp_" + Name + ".exit", F, PostInsertBefore); 899 BasicBlock *After = 900 BasicBlock::Create(Ctx, "omp_" + Name + ".after", F, PostInsertBefore); 901 902 // Use specified DebugLoc for new instructions. 903 Builder.SetCurrentDebugLocation(DL); 904 905 Builder.SetInsertPoint(Preheader); 906 Builder.CreateBr(Header); 907 908 Builder.SetInsertPoint(Header); 909 PHINode *IndVarPHI = Builder.CreatePHI(IndVarTy, 2, "omp_" + Name + ".iv"); 910 IndVarPHI->addIncoming(ConstantInt::get(IndVarTy, 0), Preheader); 911 Builder.CreateBr(Cond); 912 913 Builder.SetInsertPoint(Cond); 914 Value *Cmp = 915 Builder.CreateICmpULT(IndVarPHI, TripCount, "omp_" + Name + ".cmp"); 916 Builder.CreateCondBr(Cmp, Body, Exit); 917 918 Builder.SetInsertPoint(Body); 919 Builder.CreateBr(Latch); 920 921 Builder.SetInsertPoint(Latch); 922 Value *Next = Builder.CreateAdd(IndVarPHI, ConstantInt::get(IndVarTy, 1), 923 "omp_" + Name + ".next", /*HasNUW=*/true); 924 Builder.CreateBr(Header); 925 IndVarPHI->addIncoming(Next, Latch); 926 927 Builder.SetInsertPoint(Exit); 928 Builder.CreateBr(After); 929 930 // Remember and return the canonical control flow. 931 LoopInfos.emplace_front(); 932 CanonicalLoopInfo *CL = &LoopInfos.front(); 933 934 CL->Preheader = Preheader; 935 CL->Header = Header; 936 CL->Cond = Cond; 937 CL->Body = Body; 938 CL->Latch = Latch; 939 CL->Exit = Exit; 940 CL->After = After; 941 942 CL->IsValid = true; 943 944 #ifndef NDEBUG 945 CL->assertOK(); 946 #endif 947 return CL; 948 } 949 950 CanonicalLoopInfo * 951 OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc, 952 LoopBodyGenCallbackTy BodyGenCB, 953 Value *TripCount, const Twine &Name) { 954 BasicBlock *BB = Loc.IP.getBlock(); 955 BasicBlock *NextBB = BB->getNextNode(); 956 957 CanonicalLoopInfo *CL = createLoopSkeleton(Loc.DL, TripCount, BB->getParent(), 958 NextBB, NextBB, Name); 959 BasicBlock *After = CL->getAfter(); 960 961 // If location is not set, don't connect the loop. 962 if (updateToLocation(Loc)) { 963 // Split the loop at the insertion point: Branch to the preheader and move 964 // every following instruction to after the loop (the After BB). Also, the 965 // new successor is the loop's after block. 966 Builder.CreateBr(CL->Preheader); 967 After->getInstList().splice(After->begin(), BB->getInstList(), 968 Builder.GetInsertPoint(), BB->end()); 969 After->replaceSuccessorsPhiUsesWith(BB, After); 970 } 971 972 // Emit the body content. We do it after connecting the loop to the CFG to 973 // avoid that the callback encounters degenerate BBs. 974 BodyGenCB(CL->getBodyIP(), CL->getIndVar()); 975 976 #ifndef NDEBUG 977 CL->assertOK(); 978 #endif 979 return CL; 980 } 981 982 CanonicalLoopInfo *OpenMPIRBuilder::createCanonicalLoop( 983 const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, 984 Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, 985 InsertPointTy ComputeIP, const Twine &Name) { 986 987 // Consider the following difficulties (assuming 8-bit signed integers): 988 // * Adding \p Step to the loop counter which passes \p Stop may overflow: 989 // DO I = 1, 100, 50 990 /// * A \p Step of INT_MIN cannot not be normalized to a positive direction: 991 // DO I = 100, 0, -128 992 993 // Start, Stop and Step must be of the same integer type. 994 auto *IndVarTy = cast<IntegerType>(Start->getType()); 995 assert(IndVarTy == Stop->getType() && "Stop type mismatch"); 996 assert(IndVarTy == Step->getType() && "Step type mismatch"); 997 998 LocationDescription ComputeLoc = 999 ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc; 1000 updateToLocation(ComputeLoc); 1001 1002 ConstantInt *Zero = ConstantInt::get(IndVarTy, 0); 1003 ConstantInt *One = ConstantInt::get(IndVarTy, 1); 1004 1005 // Like Step, but always positive. 1006 Value *Incr = Step; 1007 1008 // Distance between Start and Stop; always positive. 1009 Value *Span; 1010 1011 // Condition whether there are no iterations are executed at all, e.g. because 1012 // UB < LB. 1013 Value *ZeroCmp; 1014 1015 if (IsSigned) { 1016 // Ensure that increment is positive. If not, negate and invert LB and UB. 1017 Value *IsNeg = Builder.CreateICmpSLT(Step, Zero); 1018 Incr = Builder.CreateSelect(IsNeg, Builder.CreateNeg(Step), Step); 1019 Value *LB = Builder.CreateSelect(IsNeg, Stop, Start); 1020 Value *UB = Builder.CreateSelect(IsNeg, Start, Stop); 1021 Span = Builder.CreateSub(UB, LB, "", false, true); 1022 ZeroCmp = Builder.CreateICmp( 1023 InclusiveStop ? CmpInst::ICMP_SLT : CmpInst::ICMP_SLE, UB, LB); 1024 } else { 1025 Span = Builder.CreateSub(Stop, Start, "", true); 1026 ZeroCmp = Builder.CreateICmp( 1027 InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Stop, Start); 1028 } 1029 1030 Value *CountIfLooping; 1031 if (InclusiveStop) { 1032 CountIfLooping = Builder.CreateAdd(Builder.CreateUDiv(Span, Incr), One); 1033 } else { 1034 // Avoid incrementing past stop since it could overflow. 1035 Value *CountIfTwo = Builder.CreateAdd( 1036 Builder.CreateUDiv(Builder.CreateSub(Span, One), Incr), One); 1037 Value *OneCmp = Builder.CreateICmp( 1038 InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Span, Incr); 1039 CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo); 1040 } 1041 Value *TripCount = Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping, 1042 "omp_" + Name + ".tripcount"); 1043 1044 auto BodyGen = [=](InsertPointTy CodeGenIP, Value *IV) { 1045 Builder.restoreIP(CodeGenIP); 1046 Value *Span = Builder.CreateMul(IV, Step); 1047 Value *IndVar = Builder.CreateAdd(Span, Start); 1048 BodyGenCB(Builder.saveIP(), IndVar); 1049 }; 1050 LocationDescription LoopLoc = ComputeIP.isSet() ? Loc.IP : Builder.saveIP(); 1051 return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name); 1052 } 1053 1054 // Returns an LLVM function to call for initializing loop bounds using OpenMP 1055 // static scheduling depending on `type`. Only i32 and i64 are supported by the 1056 // runtime. Always interpret integers as unsigned similarly to 1057 // CanonicalLoopInfo. 1058 static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, 1059 OpenMPIRBuilder &OMPBuilder) { 1060 unsigned Bitwidth = Ty->getIntegerBitWidth(); 1061 if (Bitwidth == 32) 1062 return OMPBuilder.getOrCreateRuntimeFunction( 1063 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u); 1064 if (Bitwidth == 64) 1065 return OMPBuilder.getOrCreateRuntimeFunction( 1066 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u); 1067 llvm_unreachable("unknown OpenMP loop iterator bitwidth"); 1068 } 1069 1070 // Sets the number of loop iterations to the given value. This value must be 1071 // valid in the condition block (i.e., defined in the preheader) and is 1072 // interpreted as an unsigned integer. 1073 void setCanonicalLoopTripCount(CanonicalLoopInfo *CLI, Value *TripCount) { 1074 Instruction *CmpI = &CLI->getCond()->front(); 1075 assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount"); 1076 CmpI->setOperand(1, TripCount); 1077 CLI->assertOK(); 1078 } 1079 1080 CanonicalLoopInfo *OpenMPIRBuilder::createStaticWorkshareLoop( 1081 const LocationDescription &Loc, CanonicalLoopInfo *CLI, 1082 InsertPointTy AllocaIP, bool NeedsBarrier, Value *Chunk) { 1083 // Set up the source location value for OpenMP runtime. 1084 if (!updateToLocation(Loc)) 1085 return nullptr; 1086 1087 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1088 Value *SrcLoc = getOrCreateIdent(SrcLocStr); 1089 1090 // Declare useful OpenMP runtime functions. 1091 Value *IV = CLI->getIndVar(); 1092 Type *IVTy = IV->getType(); 1093 FunctionCallee StaticInit = getKmpcForStaticInitForType(IVTy, M, *this); 1094 FunctionCallee StaticFini = 1095 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini); 1096 1097 // Allocate space for computed loop bounds as expected by the "init" function. 1098 Builder.restoreIP(AllocaIP); 1099 Type *I32Type = Type::getInt32Ty(M.getContext()); 1100 Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter"); 1101 Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound"); 1102 Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound"); 1103 Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride"); 1104 1105 // At the end of the preheader, prepare for calling the "init" function by 1106 // storing the current loop bounds into the allocated space. A canonical loop 1107 // always iterates from 0 to trip-count with step 1. Note that "init" expects 1108 // and produces an inclusive upper bound. 1109 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator()); 1110 Constant *Zero = ConstantInt::get(IVTy, 0); 1111 Constant *One = ConstantInt::get(IVTy, 1); 1112 Builder.CreateStore(Zero, PLowerBound); 1113 Value *UpperBound = Builder.CreateSub(CLI->getTripCount(), One); 1114 Builder.CreateStore(UpperBound, PUpperBound); 1115 Builder.CreateStore(One, PStride); 1116 1117 if (!Chunk) 1118 Chunk = One; 1119 1120 Value *ThreadNum = getOrCreateThreadID(SrcLoc); 1121 1122 // TODO: extract scheduling type and map it to OMP constant. This is curently 1123 // happening in kmp.h and its ilk and needs to be moved to OpenMP.td first. 1124 constexpr int StaticSchedType = 34; 1125 Constant *SchedulingType = ConstantInt::get(I32Type, StaticSchedType); 1126 1127 // Call the "init" function and update the trip count of the loop with the 1128 // value it produced. 1129 Builder.CreateCall(StaticInit, 1130 {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound, 1131 PUpperBound, PStride, One, Chunk}); 1132 Value *LowerBound = Builder.CreateLoad(PLowerBound); 1133 Value *InclusiveUpperBound = Builder.CreateLoad(PUpperBound); 1134 Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound); 1135 Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One); 1136 setCanonicalLoopTripCount(CLI, TripCount); 1137 1138 // Update all uses of the induction variable except the one in the condition 1139 // block that compares it with the actual upper bound, and the increment in 1140 // the latch block. 1141 // TODO: this can eventually move to CanonicalLoopInfo or to a new 1142 // CanonicalLoopInfoUpdater interface. 1143 Builder.SetInsertPoint(CLI->getBody(), CLI->getBody()->getFirstInsertionPt()); 1144 Value *UpdatedIV = Builder.CreateAdd(IV, LowerBound); 1145 IV->replaceUsesWithIf(UpdatedIV, [&](Use &U) { 1146 auto *Instr = dyn_cast<Instruction>(U.getUser()); 1147 return !Instr || 1148 (Instr->getParent() != CLI->getCond() && 1149 Instr->getParent() != CLI->getLatch() && Instr != UpdatedIV); 1150 }); 1151 1152 // In the "exit" block, call the "fini" function. 1153 Builder.SetInsertPoint(CLI->getExit(), 1154 CLI->getExit()->getTerminator()->getIterator()); 1155 Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum}); 1156 1157 // Add the barrier if requested. 1158 if (NeedsBarrier) 1159 createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), 1160 omp::Directive::OMPD_for, /* ForceSimpleCall */ false, 1161 /* CheckCancelFlag */ false); 1162 1163 CLI->assertOK(); 1164 return CLI; 1165 } 1166 1167 /// Make \p Source branch to \p Target. 1168 /// 1169 /// Handles two situations: 1170 /// * \p Source already has an unconditional branch. 1171 /// * \p Source is a degenerate block (no terminator because the BB is 1172 /// the current head of the IR construction). 1173 static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL) { 1174 if (Instruction *Term = Source->getTerminator()) { 1175 auto *Br = cast<BranchInst>(Term); 1176 assert(!Br->isConditional() && 1177 "BB's terminator must be an unconditional branch (or degenerate)"); 1178 BasicBlock *Succ = Br->getSuccessor(0); 1179 Succ->removePredecessor(Source, /*KeepOneInputPHIs=*/true); 1180 Br->setSuccessor(0, Target); 1181 return; 1182 } 1183 1184 auto *NewBr = BranchInst::Create(Target, Source); 1185 NewBr->setDebugLoc(DL); 1186 } 1187 1188 /// Redirect all edges that branch to \p OldTarget to \p NewTarget. That is, 1189 /// after this \p OldTarget will be orphaned. 1190 static void redirectAllPredecessorsTo(BasicBlock *OldTarget, 1191 BasicBlock *NewTarget, DebugLoc DL) { 1192 for (BasicBlock *Pred : make_early_inc_range(predecessors(OldTarget))) 1193 redirectTo(Pred, NewTarget, DL); 1194 } 1195 1196 /// Determine which blocks in \p BBs are reachable from outside and remove the 1197 /// ones that are not reachable from the function. 1198 static void removeUnusedBlocksFromParent(ArrayRef<BasicBlock *> BBs) { 1199 SmallPtrSet<BasicBlock *, 6> BBsToErase{BBs.begin(), BBs.end()}; 1200 auto HasRemainingUses = [&BBsToErase](BasicBlock *BB) { 1201 for (Use &U : BB->uses()) { 1202 auto *UseInst = dyn_cast<Instruction>(U.getUser()); 1203 if (!UseInst) 1204 continue; 1205 if (BBsToErase.count(UseInst->getParent())) 1206 continue; 1207 return true; 1208 } 1209 return false; 1210 }; 1211 1212 while (true) { 1213 bool Changed = false; 1214 for (BasicBlock *BB : make_early_inc_range(BBsToErase)) { 1215 if (HasRemainingUses(BB)) { 1216 BBsToErase.erase(BB); 1217 Changed = true; 1218 } 1219 } 1220 if (!Changed) 1221 break; 1222 } 1223 1224 SmallVector<BasicBlock *, 7> BBVec(BBsToErase.begin(), BBsToErase.end()); 1225 DeleteDeadBlocks(BBVec); 1226 } 1227 1228 std::vector<CanonicalLoopInfo *> 1229 OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops, 1230 ArrayRef<Value *> TileSizes) { 1231 assert(TileSizes.size() == Loops.size() && 1232 "Must pass as many tile sizes as there are loops"); 1233 int NumLoops = Loops.size(); 1234 assert(NumLoops >= 1 && "At least one loop to tile required"); 1235 1236 CanonicalLoopInfo *OutermostLoop = Loops.front(); 1237 CanonicalLoopInfo *InnermostLoop = Loops.back(); 1238 Function *F = OutermostLoop->getBody()->getParent(); 1239 BasicBlock *InnerEnter = InnermostLoop->getBody(); 1240 BasicBlock *InnerLatch = InnermostLoop->getLatch(); 1241 1242 // Collect original trip counts and induction variable to be accessible by 1243 // index. Also, the structure of the original loops is not preserved during 1244 // the construction of the tiled loops, so do it before we scavenge the BBs of 1245 // any original CanonicalLoopInfo. 1246 SmallVector<Value *, 4> OrigTripCounts, OrigIndVars; 1247 for (CanonicalLoopInfo *L : Loops) { 1248 OrigTripCounts.push_back(L->getTripCount()); 1249 OrigIndVars.push_back(L->getIndVar()); 1250 } 1251 1252 // Collect the code between loop headers. These may contain SSA definitions 1253 // that are used in the loop nest body. To be usable with in the innermost 1254 // body, these BasicBlocks will be sunk into the loop nest body. That is, 1255 // these instructions may be executed more often than before the tiling. 1256 // TODO: It would be sufficient to only sink them into body of the 1257 // corresponding tile loop. 1258 SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> InbetweenCode; 1259 for (int i = 0; i < NumLoops - 1; ++i) { 1260 CanonicalLoopInfo *Surrounding = Loops[i]; 1261 CanonicalLoopInfo *Nested = Loops[i + 1]; 1262 1263 BasicBlock *EnterBB = Surrounding->getBody(); 1264 BasicBlock *ExitBB = Nested->getHeader(); 1265 InbetweenCode.emplace_back(EnterBB, ExitBB); 1266 } 1267 1268 // Compute the trip counts of the floor loops. 1269 Builder.SetCurrentDebugLocation(DL); 1270 Builder.restoreIP(OutermostLoop->getPreheaderIP()); 1271 SmallVector<Value *, 4> FloorCount, FloorRems; 1272 for (int i = 0; i < NumLoops; ++i) { 1273 Value *TileSize = TileSizes[i]; 1274 Value *OrigTripCount = OrigTripCounts[i]; 1275 Type *IVType = OrigTripCount->getType(); 1276 1277 Value *FloorTripCount = Builder.CreateUDiv(OrigTripCount, TileSize); 1278 Value *FloorTripRem = Builder.CreateURem(OrigTripCount, TileSize); 1279 1280 // 0 if tripcount divides the tilesize, 1 otherwise. 1281 // 1 means we need an additional iteration for a partial tile. 1282 // 1283 // Unfortunately we cannot just use the roundup-formula 1284 // (tripcount + tilesize - 1)/tilesize 1285 // because the summation might overflow. We do not want introduce undefined 1286 // behavior when the untiled loop nest did not. 1287 Value *FloorTripOverflow = 1288 Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0)); 1289 1290 FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType); 1291 FloorTripCount = 1292 Builder.CreateAdd(FloorTripCount, FloorTripOverflow, 1293 "omp_floor" + Twine(i) + ".tripcount", true); 1294 1295 // Remember some values for later use. 1296 FloorCount.push_back(FloorTripCount); 1297 FloorRems.push_back(FloorTripRem); 1298 } 1299 1300 // Generate the new loop nest, from the outermost to the innermost. 1301 std::vector<CanonicalLoopInfo *> Result; 1302 Result.reserve(NumLoops * 2); 1303 1304 // The basic block of the surrounding loop that enters the nest generated 1305 // loop. 1306 BasicBlock *Enter = OutermostLoop->getPreheader(); 1307 1308 // The basic block of the surrounding loop where the inner code should 1309 // continue. 1310 BasicBlock *Continue = OutermostLoop->getAfter(); 1311 1312 // Where the next loop basic block should be inserted. 1313 BasicBlock *OutroInsertBefore = InnermostLoop->getExit(); 1314 1315 auto EmbeddNewLoop = 1316 [this, DL, F, InnerEnter, &Enter, &Continue, &OutroInsertBefore]( 1317 Value *TripCount, const Twine &Name) -> CanonicalLoopInfo * { 1318 CanonicalLoopInfo *EmbeddedLoop = createLoopSkeleton( 1319 DL, TripCount, F, InnerEnter, OutroInsertBefore, Name); 1320 redirectTo(Enter, EmbeddedLoop->getPreheader(), DL); 1321 redirectTo(EmbeddedLoop->getAfter(), Continue, DL); 1322 1323 // Setup the position where the next embedded loop connects to this loop. 1324 Enter = EmbeddedLoop->getBody(); 1325 Continue = EmbeddedLoop->getLatch(); 1326 OutroInsertBefore = EmbeddedLoop->getLatch(); 1327 return EmbeddedLoop; 1328 }; 1329 1330 auto EmbeddNewLoops = [&Result, &EmbeddNewLoop](ArrayRef<Value *> TripCounts, 1331 const Twine &NameBase) { 1332 for (auto P : enumerate(TripCounts)) { 1333 CanonicalLoopInfo *EmbeddedLoop = 1334 EmbeddNewLoop(P.value(), NameBase + Twine(P.index())); 1335 Result.push_back(EmbeddedLoop); 1336 } 1337 }; 1338 1339 EmbeddNewLoops(FloorCount, "floor"); 1340 1341 // Within the innermost floor loop, emit the code that computes the tile 1342 // sizes. 1343 Builder.SetInsertPoint(Enter->getTerminator()); 1344 SmallVector<Value *, 4> TileCounts; 1345 for (int i = 0; i < NumLoops; ++i) { 1346 CanonicalLoopInfo *FloorLoop = Result[i]; 1347 Value *TileSize = TileSizes[i]; 1348 1349 Value *FloorIsEpilogue = 1350 Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCount[i]); 1351 Value *TileTripCount = 1352 Builder.CreateSelect(FloorIsEpilogue, FloorRems[i], TileSize); 1353 1354 TileCounts.push_back(TileTripCount); 1355 } 1356 1357 // Create the tile loops. 1358 EmbeddNewLoops(TileCounts, "tile"); 1359 1360 // Insert the inbetween code into the body. 1361 BasicBlock *BodyEnter = Enter; 1362 BasicBlock *BodyEntered = nullptr; 1363 for (std::pair<BasicBlock *, BasicBlock *> P : InbetweenCode) { 1364 BasicBlock *EnterBB = P.first; 1365 BasicBlock *ExitBB = P.second; 1366 1367 if (BodyEnter) 1368 redirectTo(BodyEnter, EnterBB, DL); 1369 else 1370 redirectAllPredecessorsTo(BodyEntered, EnterBB, DL); 1371 1372 BodyEnter = nullptr; 1373 BodyEntered = ExitBB; 1374 } 1375 1376 // Append the original loop nest body into the generated loop nest body. 1377 if (BodyEnter) 1378 redirectTo(BodyEnter, InnerEnter, DL); 1379 else 1380 redirectAllPredecessorsTo(BodyEntered, InnerEnter, DL); 1381 redirectAllPredecessorsTo(InnerLatch, Continue, DL); 1382 1383 // Replace the original induction variable with an induction variable computed 1384 // from the tile and floor induction variables. 1385 Builder.restoreIP(Result.back()->getBodyIP()); 1386 for (int i = 0; i < NumLoops; ++i) { 1387 CanonicalLoopInfo *FloorLoop = Result[i]; 1388 CanonicalLoopInfo *TileLoop = Result[NumLoops + i]; 1389 Value *OrigIndVar = OrigIndVars[i]; 1390 Value *Size = TileSizes[i]; 1391 1392 Value *Scale = 1393 Builder.CreateMul(Size, FloorLoop->getIndVar(), {}, /*HasNUW=*/true); 1394 Value *Shift = 1395 Builder.CreateAdd(Scale, TileLoop->getIndVar(), {}, /*HasNUW=*/true); 1396 OrigIndVar->replaceAllUsesWith(Shift); 1397 } 1398 1399 // Remove unused parts of the original loops. 1400 SmallVector<BasicBlock *, 12> OldControlBBs; 1401 OldControlBBs.reserve(6 * Loops.size()); 1402 for (CanonicalLoopInfo *Loop : Loops) 1403 Loop->collectControlBlocks(OldControlBBs); 1404 removeUnusedBlocksFromParent(OldControlBBs); 1405 1406 #ifndef NDEBUG 1407 for (CanonicalLoopInfo *GenL : Result) 1408 GenL->assertOK(); 1409 #endif 1410 return Result; 1411 } 1412 1413 OpenMPIRBuilder::InsertPointTy 1414 OpenMPIRBuilder::createCopyPrivate(const LocationDescription &Loc, 1415 llvm::Value *BufSize, llvm::Value *CpyBuf, 1416 llvm::Value *CpyFn, llvm::Value *DidIt) { 1417 if (!updateToLocation(Loc)) 1418 return Loc.IP; 1419 1420 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1421 Value *Ident = getOrCreateIdent(SrcLocStr); 1422 Value *ThreadId = getOrCreateThreadID(Ident); 1423 1424 llvm::Value *DidItLD = Builder.CreateLoad(DidIt); 1425 1426 Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD}; 1427 1428 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_copyprivate); 1429 Builder.CreateCall(Fn, Args); 1430 1431 return Builder.saveIP(); 1432 } 1433 1434 OpenMPIRBuilder::InsertPointTy 1435 OpenMPIRBuilder::createSingle(const LocationDescription &Loc, 1436 BodyGenCallbackTy BodyGenCB, 1437 FinalizeCallbackTy FiniCB, llvm::Value *DidIt) { 1438 1439 if (!updateToLocation(Loc)) 1440 return Loc.IP; 1441 1442 // If needed (i.e. not null), initialize `DidIt` with 0 1443 if (DidIt) { 1444 Builder.CreateStore(Builder.getInt32(0), DidIt); 1445 } 1446 1447 Directive OMPD = Directive::OMPD_single; 1448 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1449 Value *Ident = getOrCreateIdent(SrcLocStr); 1450 Value *ThreadId = getOrCreateThreadID(Ident); 1451 Value *Args[] = {Ident, ThreadId}; 1452 1453 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_single); 1454 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args); 1455 1456 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single); 1457 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args); 1458 1459 // generates the following: 1460 // if (__kmpc_single()) { 1461 // .... single region ... 1462 // __kmpc_end_single 1463 // } 1464 1465 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, 1466 /*Conditional*/ true, /*hasFinalize*/ true); 1467 } 1468 1469 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCritical( 1470 const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, 1471 FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst) { 1472 1473 if (!updateToLocation(Loc)) 1474 return Loc.IP; 1475 1476 Directive OMPD = Directive::OMPD_critical; 1477 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1478 Value *Ident = getOrCreateIdent(SrcLocStr); 1479 Value *ThreadId = getOrCreateThreadID(Ident); 1480 Value *LockVar = getOMPCriticalRegionLock(CriticalName); 1481 Value *Args[] = {Ident, ThreadId, LockVar}; 1482 1483 SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), std::end(Args)); 1484 Function *RTFn = nullptr; 1485 if (HintInst) { 1486 // Add Hint to entry Args and create call 1487 EnterArgs.push_back(HintInst); 1488 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint); 1489 } else { 1490 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical); 1491 } 1492 Instruction *EntryCall = Builder.CreateCall(RTFn, EnterArgs); 1493 1494 Function *ExitRTLFn = 1495 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical); 1496 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args); 1497 1498 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, 1499 /*Conditional*/ false, /*hasFinalize*/ true); 1500 } 1501 1502 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( 1503 Directive OMPD, Instruction *EntryCall, Instruction *ExitCall, 1504 BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional, 1505 bool HasFinalize) { 1506 1507 if (HasFinalize) 1508 FinalizationStack.push_back({FiniCB, OMPD, /*IsCancellable*/ false}); 1509 1510 // Create inlined region's entry and body blocks, in preparation 1511 // for conditional creation 1512 BasicBlock *EntryBB = Builder.GetInsertBlock(); 1513 Instruction *SplitPos = EntryBB->getTerminator(); 1514 if (!isa_and_nonnull<BranchInst>(SplitPos)) 1515 SplitPos = new UnreachableInst(Builder.getContext(), EntryBB); 1516 BasicBlock *ExitBB = EntryBB->splitBasicBlock(SplitPos, "omp_region.end"); 1517 BasicBlock *FiniBB = 1518 EntryBB->splitBasicBlock(EntryBB->getTerminator(), "omp_region.finalize"); 1519 1520 Builder.SetInsertPoint(EntryBB->getTerminator()); 1521 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional); 1522 1523 // generate body 1524 BodyGenCB(/* AllocaIP */ InsertPointTy(), 1525 /* CodeGenIP */ Builder.saveIP(), *FiniBB); 1526 1527 // If we didn't emit a branch to FiniBB during body generation, it means 1528 // FiniBB is unreachable (e.g. while(1);). stop generating all the 1529 // unreachable blocks, and remove anything we are not going to use. 1530 auto SkipEmittingRegion = FiniBB->hasNPredecessors(0); 1531 if (SkipEmittingRegion) { 1532 FiniBB->eraseFromParent(); 1533 ExitCall->eraseFromParent(); 1534 // Discard finalization if we have it. 1535 if (HasFinalize) { 1536 assert(!FinalizationStack.empty() && 1537 "Unexpected finalization stack state!"); 1538 FinalizationStack.pop_back(); 1539 } 1540 } else { 1541 // emit exit call and do any needed finalization. 1542 auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt()); 1543 assert(FiniBB->getTerminator()->getNumSuccessors() == 1 && 1544 FiniBB->getTerminator()->getSuccessor(0) == ExitBB && 1545 "Unexpected control flow graph state!!"); 1546 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize); 1547 assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB && 1548 "Unexpected Control Flow State!"); 1549 MergeBlockIntoPredecessor(FiniBB); 1550 } 1551 1552 // If we are skipping the region of a non conditional, remove the exit 1553 // block, and clear the builder's insertion point. 1554 assert(SplitPos->getParent() == ExitBB && 1555 "Unexpected Insertion point location!"); 1556 if (!Conditional && SkipEmittingRegion) { 1557 ExitBB->eraseFromParent(); 1558 Builder.ClearInsertionPoint(); 1559 } else { 1560 auto merged = MergeBlockIntoPredecessor(ExitBB); 1561 BasicBlock *ExitPredBB = SplitPos->getParent(); 1562 auto InsertBB = merged ? ExitPredBB : ExitBB; 1563 if (!isa_and_nonnull<BranchInst>(SplitPos)) 1564 SplitPos->eraseFromParent(); 1565 Builder.SetInsertPoint(InsertBB); 1566 } 1567 1568 return Builder.saveIP(); 1569 } 1570 1571 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry( 1572 Directive OMPD, Value *EntryCall, BasicBlock *ExitBB, bool Conditional) { 1573 1574 // if nothing to do, Return current insertion point. 1575 if (!Conditional) 1576 return Builder.saveIP(); 1577 1578 BasicBlock *EntryBB = Builder.GetInsertBlock(); 1579 Value *CallBool = Builder.CreateIsNotNull(EntryCall); 1580 auto *ThenBB = BasicBlock::Create(M.getContext(), "omp_region.body"); 1581 auto *UI = new UnreachableInst(Builder.getContext(), ThenBB); 1582 1583 // Emit thenBB and set the Builder's insertion point there for 1584 // body generation next. Place the block after the current block. 1585 Function *CurFn = EntryBB->getParent(); 1586 CurFn->getBasicBlockList().insertAfter(EntryBB->getIterator(), ThenBB); 1587 1588 // Move Entry branch to end of ThenBB, and replace with conditional 1589 // branch (If-stmt) 1590 Instruction *EntryBBTI = EntryBB->getTerminator(); 1591 Builder.CreateCondBr(CallBool, ThenBB, ExitBB); 1592 EntryBBTI->removeFromParent(); 1593 Builder.SetInsertPoint(UI); 1594 Builder.Insert(EntryBBTI); 1595 UI->eraseFromParent(); 1596 Builder.SetInsertPoint(ThenBB->getTerminator()); 1597 1598 // return an insertion point to ExitBB. 1599 return IRBuilder<>::InsertPoint(ExitBB, ExitBB->getFirstInsertionPt()); 1600 } 1601 1602 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit( 1603 omp::Directive OMPD, InsertPointTy FinIP, Instruction *ExitCall, 1604 bool HasFinalize) { 1605 1606 Builder.restoreIP(FinIP); 1607 1608 // If there is finalization to do, emit it before the exit call 1609 if (HasFinalize) { 1610 assert(!FinalizationStack.empty() && 1611 "Unexpected finalization stack state!"); 1612 1613 FinalizationInfo Fi = FinalizationStack.pop_back_val(); 1614 assert(Fi.DK == OMPD && "Unexpected Directive for Finalization call!"); 1615 1616 Fi.FiniCB(FinIP); 1617 1618 BasicBlock *FiniBB = FinIP.getBlock(); 1619 Instruction *FiniBBTI = FiniBB->getTerminator(); 1620 1621 // set Builder IP for call creation 1622 Builder.SetInsertPoint(FiniBBTI); 1623 } 1624 1625 // place the Exitcall as last instruction before Finalization block terminator 1626 ExitCall->removeFromParent(); 1627 Builder.Insert(ExitCall); 1628 1629 return IRBuilder<>::InsertPoint(ExitCall->getParent(), 1630 ExitCall->getIterator()); 1631 } 1632 1633 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCopyinClauseBlocks( 1634 InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, 1635 llvm::IntegerType *IntPtrTy, bool BranchtoEnd) { 1636 if (!IP.isSet()) 1637 return IP; 1638 1639 IRBuilder<>::InsertPointGuard IPG(Builder); 1640 1641 // creates the following CFG structure 1642 // OMP_Entry : (MasterAddr != PrivateAddr)? 1643 // F T 1644 // | \ 1645 // | copin.not.master 1646 // | / 1647 // v / 1648 // copyin.not.master.end 1649 // | 1650 // v 1651 // OMP.Entry.Next 1652 1653 BasicBlock *OMP_Entry = IP.getBlock(); 1654 Function *CurFn = OMP_Entry->getParent(); 1655 BasicBlock *CopyBegin = 1656 BasicBlock::Create(M.getContext(), "copyin.not.master", CurFn); 1657 BasicBlock *CopyEnd = nullptr; 1658 1659 // If entry block is terminated, split to preserve the branch to following 1660 // basic block (i.e. OMP.Entry.Next), otherwise, leave everything as is. 1661 if (isa_and_nonnull<BranchInst>(OMP_Entry->getTerminator())) { 1662 CopyEnd = OMP_Entry->splitBasicBlock(OMP_Entry->getTerminator(), 1663 "copyin.not.master.end"); 1664 OMP_Entry->getTerminator()->eraseFromParent(); 1665 } else { 1666 CopyEnd = 1667 BasicBlock::Create(M.getContext(), "copyin.not.master.end", CurFn); 1668 } 1669 1670 Builder.SetInsertPoint(OMP_Entry); 1671 Value *MasterPtr = Builder.CreatePtrToInt(MasterAddr, IntPtrTy); 1672 Value *PrivatePtr = Builder.CreatePtrToInt(PrivateAddr, IntPtrTy); 1673 Value *cmp = Builder.CreateICmpNE(MasterPtr, PrivatePtr); 1674 Builder.CreateCondBr(cmp, CopyBegin, CopyEnd); 1675 1676 Builder.SetInsertPoint(CopyBegin); 1677 if (BranchtoEnd) 1678 Builder.SetInsertPoint(Builder.CreateBr(CopyEnd)); 1679 1680 return Builder.saveIP(); 1681 } 1682 1683 CallInst *OpenMPIRBuilder::createOMPAlloc(const LocationDescription &Loc, 1684 Value *Size, Value *Allocator, 1685 std::string Name) { 1686 IRBuilder<>::InsertPointGuard IPG(Builder); 1687 Builder.restoreIP(Loc.IP); 1688 1689 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1690 Value *Ident = getOrCreateIdent(SrcLocStr); 1691 Value *ThreadId = getOrCreateThreadID(Ident); 1692 Value *Args[] = {ThreadId, Size, Allocator}; 1693 1694 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc); 1695 1696 return Builder.CreateCall(Fn, Args, Name); 1697 } 1698 1699 CallInst *OpenMPIRBuilder::createOMPFree(const LocationDescription &Loc, 1700 Value *Addr, Value *Allocator, 1701 std::string Name) { 1702 IRBuilder<>::InsertPointGuard IPG(Builder); 1703 Builder.restoreIP(Loc.IP); 1704 1705 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1706 Value *Ident = getOrCreateIdent(SrcLocStr); 1707 Value *ThreadId = getOrCreateThreadID(Ident); 1708 Value *Args[] = {ThreadId, Addr, Allocator}; 1709 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free); 1710 return Builder.CreateCall(Fn, Args, Name); 1711 } 1712 1713 CallInst *OpenMPIRBuilder::createCachedThreadPrivate( 1714 const LocationDescription &Loc, llvm::Value *Pointer, 1715 llvm::ConstantInt *Size, const llvm::Twine &Name) { 1716 IRBuilder<>::InsertPointGuard IPG(Builder); 1717 Builder.restoreIP(Loc.IP); 1718 1719 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1720 Value *Ident = getOrCreateIdent(SrcLocStr); 1721 Value *ThreadId = getOrCreateThreadID(Ident); 1722 Constant *ThreadPrivateCache = 1723 getOrCreateOMPInternalVariable(Int8PtrPtr, Name); 1724 llvm::Value *Args[] = {Ident, ThreadId, Pointer, Size, ThreadPrivateCache}; 1725 1726 Function *Fn = 1727 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached); 1728 1729 return Builder.CreateCall(Fn, Args); 1730 } 1731 1732 std::string OpenMPIRBuilder::getNameWithSeparators(ArrayRef<StringRef> Parts, 1733 StringRef FirstSeparator, 1734 StringRef Separator) { 1735 SmallString<128> Buffer; 1736 llvm::raw_svector_ostream OS(Buffer); 1737 StringRef Sep = FirstSeparator; 1738 for (StringRef Part : Parts) { 1739 OS << Sep << Part; 1740 Sep = Separator; 1741 } 1742 return OS.str().str(); 1743 } 1744 1745 Constant *OpenMPIRBuilder::getOrCreateOMPInternalVariable( 1746 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 1747 // TODO: Replace the twine arg with stringref to get rid of the conversion 1748 // logic. However This is taken from current implementation in clang as is. 1749 // Since this method is used in many places exclusively for OMP internal use 1750 // we will keep it as is for temporarily until we move all users to the 1751 // builder and then, if possible, fix it everywhere in one go. 1752 SmallString<256> Buffer; 1753 llvm::raw_svector_ostream Out(Buffer); 1754 Out << Name; 1755 StringRef RuntimeName = Out.str(); 1756 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 1757 if (Elem.second) { 1758 assert(Elem.second->getType()->getPointerElementType() == Ty && 1759 "OMP internal variable has different type than requested"); 1760 } else { 1761 // TODO: investigate the appropriate linkage type used for the global 1762 // variable for possibly changing that to internal or private, or maybe 1763 // create different versions of the function for different OMP internal 1764 // variables. 1765 Elem.second = new llvm::GlobalVariable( 1766 M, Ty, /*IsConstant*/ false, llvm::GlobalValue::CommonLinkage, 1767 llvm::Constant::getNullValue(Ty), Elem.first(), 1768 /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal, 1769 AddressSpace); 1770 } 1771 1772 return Elem.second; 1773 } 1774 1775 Value *OpenMPIRBuilder::getOMPCriticalRegionLock(StringRef CriticalName) { 1776 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 1777 std::string Name = getNameWithSeparators({Prefix, "var"}, ".", "."); 1778 return getOrCreateOMPInternalVariable(KmpCriticalNameTy, Name); 1779 } 1780 1781 // Create all simple and struct types exposed by the runtime and remember 1782 // the llvm::PointerTypes of them for easy access later. 1783 void OpenMPIRBuilder::initializeTypes(Module &M) { 1784 LLVMContext &Ctx = M.getContext(); 1785 StructType *T; 1786 #define OMP_TYPE(VarName, InitValue) VarName = InitValue; 1787 #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \ 1788 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \ 1789 VarName##PtrTy = PointerType::getUnqual(VarName##Ty); 1790 #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \ 1791 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \ 1792 VarName##Ptr = PointerType::getUnqual(VarName); 1793 #define OMP_STRUCT_TYPE(VarName, StructName, ...) \ 1794 T = StructType::getTypeByName(Ctx, StructName); \ 1795 if (!T) \ 1796 T = StructType::create(Ctx, {__VA_ARGS__}, StructName); \ 1797 VarName = T; \ 1798 VarName##Ptr = PointerType::getUnqual(T); 1799 #include "llvm/Frontend/OpenMP/OMPKinds.def" 1800 } 1801 1802 void OpenMPIRBuilder::OutlineInfo::collectBlocks( 1803 SmallPtrSetImpl<BasicBlock *> &BlockSet, 1804 SmallVectorImpl<BasicBlock *> &BlockVector) { 1805 SmallVector<BasicBlock *, 32> Worklist; 1806 BlockSet.insert(EntryBB); 1807 BlockSet.insert(ExitBB); 1808 1809 Worklist.push_back(EntryBB); 1810 while (!Worklist.empty()) { 1811 BasicBlock *BB = Worklist.pop_back_val(); 1812 BlockVector.push_back(BB); 1813 for (BasicBlock *SuccBB : successors(BB)) 1814 if (BlockSet.insert(SuccBB).second) 1815 Worklist.push_back(SuccBB); 1816 } 1817 } 1818 1819 void CanonicalLoopInfo::collectControlBlocks( 1820 SmallVectorImpl<BasicBlock *> &BBs) { 1821 // We only count those BBs as control block for which we do not need to 1822 // reverse the CFG, i.e. not the loop body which can contain arbitrary control 1823 // flow. For consistency, this also means we do not add the Body block, which 1824 // is just the entry to the body code. 1825 BBs.reserve(BBs.size() + 6); 1826 BBs.append({Preheader, Header, Cond, Latch, Exit, After}); 1827 } 1828 1829 void CanonicalLoopInfo::assertOK() const { 1830 #ifndef NDEBUG 1831 if (!IsValid) 1832 return; 1833 1834 // Verify standard control-flow we use for OpenMP loops. 1835 assert(Preheader); 1836 assert(isa<BranchInst>(Preheader->getTerminator()) && 1837 "Preheader must terminate with unconditional branch"); 1838 assert(Preheader->getSingleSuccessor() == Header && 1839 "Preheader must jump to header"); 1840 1841 assert(Header); 1842 assert(isa<BranchInst>(Header->getTerminator()) && 1843 "Header must terminate with unconditional branch"); 1844 assert(Header->getSingleSuccessor() == Cond && 1845 "Header must jump to exiting block"); 1846 1847 assert(Cond); 1848 assert(Cond->getSinglePredecessor() == Header && 1849 "Exiting block only reachable from header"); 1850 1851 assert(isa<BranchInst>(Cond->getTerminator()) && 1852 "Exiting block must terminate with conditional branch"); 1853 assert(size(successors(Cond)) == 2 && 1854 "Exiting block must have two successors"); 1855 assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(0) == Body && 1856 "Exiting block's first successor jump to the body"); 1857 assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(1) == Exit && 1858 "Exiting block's second successor must exit the loop"); 1859 1860 assert(Body); 1861 assert(Body->getSinglePredecessor() == Cond && 1862 "Body only reachable from exiting block"); 1863 assert(!isa<PHINode>(Body->front())); 1864 1865 assert(Latch); 1866 assert(isa<BranchInst>(Latch->getTerminator()) && 1867 "Latch must terminate with unconditional branch"); 1868 assert(Latch->getSingleSuccessor() == Header && "Latch must jump to header"); 1869 // TODO: To support simple redirecting of the end of the body code that has 1870 // multiple; introduce another auxiliary basic block like preheader and after. 1871 assert(Latch->getSinglePredecessor() != nullptr); 1872 assert(!isa<PHINode>(Latch->front())); 1873 1874 assert(Exit); 1875 assert(isa<BranchInst>(Exit->getTerminator()) && 1876 "Exit block must terminate with unconditional branch"); 1877 assert(Exit->getSingleSuccessor() == After && 1878 "Exit block must jump to after block"); 1879 1880 assert(After); 1881 assert(After->getSinglePredecessor() == Exit && 1882 "After block only reachable from exit block"); 1883 assert(After->empty() || !isa<PHINode>(After->front())); 1884 1885 Instruction *IndVar = getIndVar(); 1886 assert(IndVar && "Canonical induction variable not found?"); 1887 assert(isa<IntegerType>(IndVar->getType()) && 1888 "Induction variable must be an integer"); 1889 assert(cast<PHINode>(IndVar)->getParent() == Header && 1890 "Induction variable must be a PHI in the loop header"); 1891 assert(cast<PHINode>(IndVar)->getIncomingBlock(0) == Preheader); 1892 assert( 1893 cast<ConstantInt>(cast<PHINode>(IndVar)->getIncomingValue(0))->isZero()); 1894 assert(cast<PHINode>(IndVar)->getIncomingBlock(1) == Latch); 1895 1896 auto *NextIndVar = cast<PHINode>(IndVar)->getIncomingValue(1); 1897 assert(cast<Instruction>(NextIndVar)->getParent() == Latch); 1898 assert(cast<BinaryOperator>(NextIndVar)->getOpcode() == BinaryOperator::Add); 1899 assert(cast<BinaryOperator>(NextIndVar)->getOperand(0) == IndVar); 1900 assert(cast<ConstantInt>(cast<BinaryOperator>(NextIndVar)->getOperand(1)) 1901 ->isOne()); 1902 1903 Value *TripCount = getTripCount(); 1904 assert(TripCount && "Loop trip count not found?"); 1905 assert(IndVar->getType() == TripCount->getType() && 1906 "Trip count and induction variable must have the same type"); 1907 1908 auto *CmpI = cast<CmpInst>(&Cond->front()); 1909 assert(CmpI->getPredicate() == CmpInst::ICMP_ULT && 1910 "Exit condition must be a signed less-than comparison"); 1911 assert(CmpI->getOperand(0) == IndVar && 1912 "Exit condition must compare the induction variable"); 1913 assert(CmpI->getOperand(1) == TripCount && 1914 "Exit condition must compare with the trip count"); 1915 #endif 1916 } 1917