1 //===- OpenMPIRBuilder.cpp - Builder for LLVM-IR for OpenMP directives ----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// 10 /// This file implements the OpenMPIRBuilder class, which is used as a 11 /// convenient way to create LLVM instructions for OpenMP directives. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 16 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/ADT/StringSwitch.h" 19 #include "llvm/IR/CFG.h" 20 #include "llvm/IR/DebugInfo.h" 21 #include "llvm/IR/MDBuilder.h" 22 #include "llvm/IR/IRBuilder.h" 23 #include "llvm/Support/CommandLine.h" 24 #include "llvm/Support/Error.h" 25 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 26 #include "llvm/Transforms/Utils/CodeExtractor.h" 27 28 #include <sstream> 29 30 #define DEBUG_TYPE "openmp-ir-builder" 31 32 using namespace llvm; 33 using namespace omp; 34 using namespace types; 35 36 static cl::opt<bool> 37 OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, 38 cl::desc("Use optimistic attributes describing " 39 "'as-if' properties of runtime calls."), 40 cl::init(false)); 41 42 void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) { 43 LLVMContext &Ctx = Fn.getContext(); 44 45 #define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet; 46 #include "llvm/Frontend/OpenMP/OMPKinds.def" 47 48 // Add attributes to the new declaration. 49 switch (FnID) { 50 #define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \ 51 case Enum: \ 52 Fn.setAttributes( \ 53 AttributeList::get(Ctx, FnAttrSet, RetAttrSet, ArgAttrSets)); \ 54 break; 55 #include "llvm/Frontend/OpenMP/OMPKinds.def" 56 default: 57 // Attributes are optional. 58 break; 59 } 60 } 61 62 Function *OpenMPIRBuilder::getOrCreateRuntimeFunction(RuntimeFunction FnID) { 63 Function *Fn = nullptr; 64 65 // Try to find the declation in the module first. 66 switch (FnID) { 67 #define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \ 68 case Enum: \ 69 Fn = M.getFunction(Str); \ 70 break; 71 #include "llvm/Frontend/OpenMP/OMPKinds.def" 72 } 73 74 if (!Fn) { 75 // Create a new declaration if we need one. 76 switch (FnID) { 77 #define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \ 78 case Enum: \ 79 Fn = Function::Create(FunctionType::get(ReturnType, \ 80 ArrayRef<Type *>{__VA_ARGS__}, \ 81 IsVarArg), \ 82 GlobalValue::ExternalLinkage, Str, M); \ 83 break; 84 #include "llvm/Frontend/OpenMP/OMPKinds.def" 85 } 86 87 addAttributes(FnID, *Fn); 88 } 89 90 assert(Fn && "Failed to create OpenMP runtime function"); 91 return Fn; 92 } 93 94 void OpenMPIRBuilder::initialize() { initializeTypes(M); } 95 96 Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr, 97 IdentFlag LocFlags) { 98 // Enable "C-mode". 99 LocFlags |= OMP_IDENT_FLAG_KMPC; 100 101 GlobalVariable *&DefaultIdent = IdentMap[{SrcLocStr, uint64_t(LocFlags)}]; 102 if (!DefaultIdent) { 103 Constant *I32Null = ConstantInt::getNullValue(Int32); 104 Constant *IdentData[] = {I32Null, 105 ConstantInt::get(Int32, uint64_t(LocFlags)), 106 I32Null, I32Null, SrcLocStr}; 107 Constant *Initializer = ConstantStruct::get( 108 cast<StructType>(IdentPtr->getPointerElementType()), IdentData); 109 110 // Look for existing encoding of the location + flags, not needed but 111 // minimizes the difference to the existing solution while we transition. 112 for (GlobalVariable &GV : M.getGlobalList()) 113 if (GV.getType() == IdentPtr && GV.hasInitializer()) 114 if (GV.getInitializer() == Initializer) 115 return DefaultIdent = &GV; 116 117 DefaultIdent = new GlobalVariable(M, IdentPtr->getPointerElementType(), 118 /* isConstant = */ false, 119 GlobalValue::PrivateLinkage, Initializer); 120 DefaultIdent->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 121 DefaultIdent->setAlignment(Align(8)); 122 } 123 return DefaultIdent; 124 } 125 126 Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr) { 127 Constant *&SrcLocStr = SrcLocStrMap[LocStr]; 128 if (!SrcLocStr) { 129 Constant *Initializer = 130 ConstantDataArray::getString(M.getContext(), LocStr); 131 132 // Look for existing encoding of the location, not needed but minimizes the 133 // difference to the existing solution while we transition. 134 for (GlobalVariable &GV : M.getGlobalList()) 135 if (GV.isConstant() && GV.hasInitializer() && 136 GV.getInitializer() == Initializer) 137 return SrcLocStr = ConstantExpr::getPointerCast(&GV, Int8Ptr); 138 139 SrcLocStr = Builder.CreateGlobalStringPtr(LocStr); 140 } 141 return SrcLocStr; 142 } 143 144 Constant *OpenMPIRBuilder::getOrCreateDefaultSrcLocStr() { 145 return getOrCreateSrcLocStr(";unknown;unknown;0;0;;"); 146 } 147 148 Constant * 149 OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc) { 150 DILocation *DIL = Loc.DL.get(); 151 if (!DIL) 152 return getOrCreateDefaultSrcLocStr(); 153 StringRef Filename = 154 !DIL->getFilename().empty() ? DIL->getFilename() : M.getName(); 155 StringRef Function = DIL->getScope()->getSubprogram()->getName(); 156 Function = 157 !Function.empty() ? Function : Loc.IP.getBlock()->getParent()->getName(); 158 std::string LineStr = std::to_string(DIL->getLine()); 159 std::string ColumnStr = std::to_string(DIL->getColumn()); 160 std::stringstream SrcLocStr; 161 SrcLocStr << ";" << Filename.data() << ";" << Function.data() << ";" 162 << LineStr << ";" << ColumnStr << ";;"; 163 return getOrCreateSrcLocStr(SrcLocStr.str()); 164 } 165 166 Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) { 167 return Builder.CreateCall( 168 getOrCreateRuntimeFunction(OMPRTL___kmpc_global_thread_num), Ident, 169 "omp_global_thread_num"); 170 } 171 172 OpenMPIRBuilder::InsertPointTy 173 OpenMPIRBuilder::CreateBarrier(const LocationDescription &Loc, Directive DK, 174 bool ForceSimpleCall, bool CheckCancelFlag) { 175 if (!updateToLocation(Loc)) 176 return Loc.IP; 177 return emitBarrierImpl(Loc, DK, ForceSimpleCall, CheckCancelFlag); 178 } 179 180 OpenMPIRBuilder::InsertPointTy 181 OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind, 182 bool ForceSimpleCall, bool CheckCancelFlag) { 183 // Build call __kmpc_cancel_barrier(loc, thread_id) or 184 // __kmpc_barrier(loc, thread_id); 185 186 IdentFlag BarrierLocFlags; 187 switch (Kind) { 188 case OMPD_for: 189 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR; 190 break; 191 case OMPD_sections: 192 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS; 193 break; 194 case OMPD_single: 195 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE; 196 break; 197 case OMPD_barrier: 198 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL; 199 break; 200 default: 201 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL; 202 break; 203 } 204 205 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 206 Value *Args[] = {getOrCreateIdent(SrcLocStr, BarrierLocFlags), 207 getOrCreateThreadID(getOrCreateIdent(SrcLocStr))}; 208 209 // If we are in a cancellable parallel region, barriers are cancellation 210 // points. 211 // TODO: Check why we would force simple calls or to ignore the cancel flag. 212 bool UseCancelBarrier = 213 !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel); 214 215 Value *Result = Builder.CreateCall( 216 getOrCreateRuntimeFunction(UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier 217 : OMPRTL___kmpc_barrier), 218 Args); 219 220 if (UseCancelBarrier && CheckCancelFlag) 221 emitCancelationCheckImpl(Result, OMPD_parallel); 222 223 return Builder.saveIP(); 224 } 225 226 OpenMPIRBuilder::InsertPointTy 227 OpenMPIRBuilder::CreateCancel(const LocationDescription &Loc, 228 Value *IfCondition, 229 omp::Directive CanceledDirective) { 230 if (!updateToLocation(Loc)) 231 return Loc.IP; 232 233 // LLVM utilities like blocks with terminators. 234 auto *UI = Builder.CreateUnreachable(); 235 236 Instruction *ThenTI = UI, *ElseTI = nullptr; 237 if (IfCondition) 238 SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI); 239 Builder.SetInsertPoint(ThenTI); 240 241 Value *CancelKind = nullptr; 242 switch (CanceledDirective) { 243 #define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \ 244 case DirectiveEnum: \ 245 CancelKind = Builder.getInt32(Value); \ 246 break; 247 #include "llvm/Frontend/OpenMP/OMPKinds.def" 248 default: 249 llvm_unreachable("Unknown cancel kind!"); 250 } 251 252 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 253 Value *Ident = getOrCreateIdent(SrcLocStr); 254 Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind}; 255 Value *Result = Builder.CreateCall( 256 getOrCreateRuntimeFunction(OMPRTL___kmpc_cancel), Args); 257 258 // The actual cancel logic is shared with others, e.g., cancel_barriers. 259 emitCancelationCheckImpl(Result, CanceledDirective); 260 261 // Update the insertion point and remove the terminator we introduced. 262 Builder.SetInsertPoint(UI->getParent()); 263 UI->eraseFromParent(); 264 265 return Builder.saveIP(); 266 } 267 268 void OpenMPIRBuilder::emitCancelationCheckImpl( 269 Value *CancelFlag, omp::Directive CanceledDirective) { 270 assert(isLastFinalizationInfoCancellable(CanceledDirective) && 271 "Unexpected cancellation!"); 272 273 // For a cancel barrier we create two new blocks. 274 BasicBlock *BB = Builder.GetInsertBlock(); 275 BasicBlock *NonCancellationBlock; 276 if (Builder.GetInsertPoint() == BB->end()) { 277 // TODO: This branch will not be needed once we moved to the 278 // OpenMPIRBuilder codegen completely. 279 NonCancellationBlock = BasicBlock::Create( 280 BB->getContext(), BB->getName() + ".cont", BB->getParent()); 281 } else { 282 NonCancellationBlock = SplitBlock(BB, &*Builder.GetInsertPoint()); 283 BB->getTerminator()->eraseFromParent(); 284 Builder.SetInsertPoint(BB); 285 } 286 BasicBlock *CancellationBlock = BasicBlock::Create( 287 BB->getContext(), BB->getName() + ".cncl", BB->getParent()); 288 289 // Jump to them based on the return value. 290 Value *Cmp = Builder.CreateIsNull(CancelFlag); 291 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock, 292 /* TODO weight */ nullptr, nullptr); 293 294 // From the cancellation block we finalize all variables and go to the 295 // post finalization block that is known to the FiniCB callback. 296 Builder.SetInsertPoint(CancellationBlock); 297 auto &FI = FinalizationStack.back(); 298 FI.FiniCB(Builder.saveIP()); 299 300 // The continuation block is where code generation continues. 301 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin()); 302 } 303 304 IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel( 305 const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, 306 PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, 307 Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable) { 308 if (!updateToLocation(Loc)) 309 return Loc.IP; 310 311 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 312 Value *Ident = getOrCreateIdent(SrcLocStr); 313 Value *ThreadID = getOrCreateThreadID(Ident); 314 315 if (NumThreads) { 316 // Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads) 317 Value *Args[] = { 318 Ident, ThreadID, 319 Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)}; 320 Builder.CreateCall( 321 getOrCreateRuntimeFunction(OMPRTL___kmpc_push_num_threads), Args); 322 } 323 324 if (ProcBind != OMP_PROC_BIND_default) { 325 // Build call __kmpc_push_proc_bind(&Ident, global_tid, proc_bind) 326 Value *Args[] = { 327 Ident, ThreadID, 328 ConstantInt::get(Int32, unsigned(ProcBind), /*isSigned=*/true)}; 329 Builder.CreateCall(getOrCreateRuntimeFunction(OMPRTL___kmpc_push_proc_bind), 330 Args); 331 } 332 333 BasicBlock *InsertBB = Builder.GetInsertBlock(); 334 Function *OuterFn = InsertBB->getParent(); 335 336 // Vector to remember instructions we used only during the modeling but which 337 // we want to delete at the end. 338 SmallVector<Instruction *, 4> ToBeDeleted; 339 340 Builder.SetInsertPoint(OuterFn->getEntryBlock().getFirstNonPHI()); 341 AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr"); 342 AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr"); 343 344 // If there is an if condition we actually use the TIDAddr and ZeroAddr in the 345 // program, otherwise we only need them for modeling purposes to get the 346 // associated arguments in the outlined function. In the former case, 347 // initialize the allocas properly, in the latter case, delete them later. 348 if (IfCondition) { 349 Builder.CreateStore(Constant::getNullValue(Int32), TIDAddr); 350 Builder.CreateStore(Constant::getNullValue(Int32), ZeroAddr); 351 } else { 352 ToBeDeleted.push_back(TIDAddr); 353 ToBeDeleted.push_back(ZeroAddr); 354 } 355 356 // Create an artificial insertion point that will also ensure the blocks we 357 // are about to split are not degenerated. 358 auto *UI = new UnreachableInst(Builder.getContext(), InsertBB); 359 360 Instruction *ThenTI = UI, *ElseTI = nullptr; 361 if (IfCondition) 362 SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI); 363 364 BasicBlock *ThenBB = ThenTI->getParent(); 365 BasicBlock *PRegEntryBB = ThenBB->splitBasicBlock(ThenTI, "omp.par.entry"); 366 BasicBlock *PRegBodyBB = 367 PRegEntryBB->splitBasicBlock(ThenTI, "omp.par.region"); 368 BasicBlock *PRegPreFiniBB = 369 PRegBodyBB->splitBasicBlock(ThenTI, "omp.par.pre_finalize"); 370 BasicBlock *PRegExitBB = 371 PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit"); 372 373 auto FiniCBWrapper = [&](InsertPointTy IP) { 374 // Hide "open-ended" blocks from the given FiniCB by setting the right jump 375 // target to the region exit block. 376 if (IP.getBlock()->end() == IP.getPoint()) { 377 IRBuilder<>::InsertPointGuard IPG(Builder); 378 Builder.restoreIP(IP); 379 Instruction *I = Builder.CreateBr(PRegExitBB); 380 IP = InsertPointTy(I->getParent(), I->getIterator()); 381 } 382 assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 && 383 IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB && 384 "Unexpected insertion point for finalization call!"); 385 return FiniCB(IP); 386 }; 387 388 FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable}); 389 390 // Generate the privatization allocas in the block that will become the entry 391 // of the outlined function. 392 InsertPointTy AllocaIP(PRegEntryBB, 393 PRegEntryBB->getTerminator()->getIterator()); 394 Builder.restoreIP(AllocaIP); 395 AllocaInst *PrivTIDAddr = 396 Builder.CreateAlloca(Int32, nullptr, "tid.addr.local"); 397 Instruction *PrivTID = Builder.CreateLoad(PrivTIDAddr, "tid"); 398 399 // Add some fake uses for OpenMP provided arguments. 400 ToBeDeleted.push_back(Builder.CreateLoad(TIDAddr, "tid.addr.use")); 401 ToBeDeleted.push_back(Builder.CreateLoad(ZeroAddr, "zero.addr.use")); 402 403 // ThenBB 404 // | 405 // V 406 // PRegionEntryBB <- Privatization allocas are placed here. 407 // | 408 // V 409 // PRegionBodyBB <- BodeGen is invoked here. 410 // | 411 // V 412 // PRegPreFiniBB <- The block we will start finalization from. 413 // | 414 // V 415 // PRegionExitBB <- A common exit to simplify block collection. 416 // 417 418 LLVM_DEBUG(dbgs() << "Before body codegen: " << *UI->getFunction() << "\n"); 419 420 // Let the caller create the body. 421 assert(BodyGenCB && "Expected body generation callback!"); 422 InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin()); 423 BodyGenCB(AllocaIP, CodeGenIP, *PRegPreFiniBB); 424 425 LLVM_DEBUG(dbgs() << "After body codegen: " << *UI->getFunction() << "\n"); 426 427 SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet; 428 SmallVector<BasicBlock *, 32> ParallelRegionBlocks, Worklist; 429 ParallelRegionBlockSet.insert(PRegEntryBB); 430 ParallelRegionBlockSet.insert(PRegExitBB); 431 432 // Collect all blocks in-between PRegEntryBB and PRegExitBB. 433 Worklist.push_back(PRegEntryBB); 434 while (!Worklist.empty()) { 435 BasicBlock *BB = Worklist.pop_back_val(); 436 ParallelRegionBlocks.push_back(BB); 437 for (BasicBlock *SuccBB : successors(BB)) 438 if (ParallelRegionBlockSet.insert(SuccBB).second) 439 Worklist.push_back(SuccBB); 440 } 441 442 CodeExtractorAnalysisCache CEAC(*OuterFn); 443 CodeExtractor Extractor(ParallelRegionBlocks, /* DominatorTree */ nullptr, 444 /* AggregateArgs */ false, 445 /* BlockFrequencyInfo */ nullptr, 446 /* BranchProbabilityInfo */ nullptr, 447 /* AssumptionCache */ nullptr, 448 /* AllowVarArgs */ true, 449 /* AllowAlloca */ true, 450 /* Suffix */ ".omp_par"); 451 452 // Find inputs to, outputs from the code region. 453 BasicBlock *CommonExit = nullptr; 454 SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands; 455 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit); 456 Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands); 457 458 LLVM_DEBUG(dbgs() << "Before privatization: " << *UI->getFunction() << "\n"); 459 460 FunctionCallee TIDRTLFn = 461 getOrCreateRuntimeFunction(OMPRTL___kmpc_global_thread_num); 462 463 auto PrivHelper = [&](Value &V) { 464 if (&V == TIDAddr || &V == ZeroAddr) 465 return; 466 467 SmallVector<Use *, 8> Uses; 468 for (Use &U : V.uses()) 469 if (auto *UserI = dyn_cast<Instruction>(U.getUser())) 470 if (ParallelRegionBlockSet.count(UserI->getParent())) 471 Uses.push_back(&U); 472 473 Value *ReplacementValue = nullptr; 474 CallInst *CI = dyn_cast<CallInst>(&V); 475 if (CI && CI->getCalledFunction() == TIDRTLFn.getCallee()) { 476 ReplacementValue = PrivTID; 477 } else { 478 Builder.restoreIP( 479 PrivCB(AllocaIP, Builder.saveIP(), V, ReplacementValue)); 480 assert(ReplacementValue && 481 "Expected copy/create callback to set replacement value!"); 482 if (ReplacementValue == &V) 483 return; 484 } 485 486 for (Use *UPtr : Uses) 487 UPtr->set(ReplacementValue); 488 }; 489 490 for (Value *Input : Inputs) { 491 LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n"); 492 PrivHelper(*Input); 493 } 494 for (Value *Output : Outputs) { 495 LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n"); 496 PrivHelper(*Output); 497 } 498 499 LLVM_DEBUG(dbgs() << "After privatization: " << *UI->getFunction() << "\n"); 500 LLVM_DEBUG({ 501 for (auto *BB : ParallelRegionBlocks) 502 dbgs() << " PBR: " << BB->getName() << "\n"; 503 }); 504 505 // Add some known attributes to the outlined function. 506 Function *OutlinedFn = Extractor.extractCodeRegion(CEAC); 507 OutlinedFn->addParamAttr(0, Attribute::NoAlias); 508 OutlinedFn->addParamAttr(1, Attribute::NoAlias); 509 OutlinedFn->addFnAttr(Attribute::NoUnwind); 510 OutlinedFn->addFnAttr(Attribute::NoRecurse); 511 512 LLVM_DEBUG(dbgs() << "After outlining: " << *UI->getFunction() << "\n"); 513 LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n"); 514 515 // For compability with the clang CG we move the outlined function after the 516 // one with the parallel region. 517 OutlinedFn->removeFromParent(); 518 M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn); 519 520 // Remove the artificial entry introduced by the extractor right away, we 521 // made our own entry block after all. 522 { 523 BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock(); 524 assert(ArtificialEntry.getUniqueSuccessor() == PRegEntryBB); 525 assert(PRegEntryBB->getUniquePredecessor() == &ArtificialEntry); 526 PRegEntryBB->moveBefore(&ArtificialEntry); 527 ArtificialEntry.eraseFromParent(); 528 } 529 LLVM_DEBUG(dbgs() << "PP Outlined function: " << *OutlinedFn << "\n"); 530 assert(&OutlinedFn->getEntryBlock() == PRegEntryBB); 531 532 assert(OutlinedFn && OutlinedFn->getNumUses() == 1); 533 assert(OutlinedFn->arg_size() >= 2 && 534 "Expected at least tid and bounded tid as arguments"); 535 unsigned NumCapturedVars = OutlinedFn->arg_size() - /* tid & bounded tid */ 2; 536 537 CallInst *CI = cast<CallInst>(OutlinedFn->user_back()); 538 CI->getParent()->setName("omp_parallel"); 539 Builder.SetInsertPoint(CI); 540 541 // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn); 542 Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars), 543 Builder.CreateBitCast(OutlinedFn, ParallelTaskPtr)}; 544 545 SmallVector<Value *, 16> RealArgs; 546 RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs)); 547 RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end()); 548 549 FunctionCallee RTLFn = getOrCreateRuntimeFunction(OMPRTL___kmpc_fork_call); 550 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 551 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 552 llvm::LLVMContext &Ctx = F->getContext(); 553 MDBuilder MDB(Ctx); 554 // Annotate the callback behavior of the __kmpc_fork_call: 555 // - The callback callee is argument number 2 (microtask). 556 // - The first two arguments of the callback callee are unknown (-1). 557 // - All variadic arguments to the __kmpc_fork_call are passed to the 558 // callback callee. 559 F->addMetadata( 560 llvm::LLVMContext::MD_callback, 561 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 562 2, {-1, -1}, 563 /* VarArgsArePassed */ true)})); 564 } 565 } 566 567 Builder.CreateCall(RTLFn, RealArgs); 568 569 LLVM_DEBUG(dbgs() << "With fork_call placed: " 570 << *Builder.GetInsertBlock()->getParent() << "\n"); 571 572 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end()); 573 InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end()); 574 UI->eraseFromParent(); 575 576 // Initialize the local TID stack location with the argument value. 577 Builder.SetInsertPoint(PrivTID); 578 Function::arg_iterator OutlinedAI = OutlinedFn->arg_begin(); 579 Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr); 580 581 // If no "if" clause was present we do not need the call created during 582 // outlining, otherwise we reuse it in the serialized parallel region. 583 if (!ElseTI) { 584 CI->eraseFromParent(); 585 } else { 586 587 // If an "if" clause was present we are now generating the serialized 588 // version into the "else" branch. 589 Builder.SetInsertPoint(ElseTI); 590 591 // Build calls __kmpc_serialized_parallel(&Ident, GTid); 592 Value *SerializedParallelCallArgs[] = {Ident, ThreadID}; 593 Builder.CreateCall( 594 getOrCreateRuntimeFunction(OMPRTL___kmpc_serialized_parallel), 595 SerializedParallelCallArgs); 596 597 // OutlinedFn(>id, &zero, CapturedStruct); 598 CI->removeFromParent(); 599 Builder.Insert(CI); 600 601 // __kmpc_end_serialized_parallel(&Ident, GTid); 602 Value *EndArgs[] = {Ident, ThreadID}; 603 Builder.CreateCall( 604 getOrCreateRuntimeFunction(OMPRTL___kmpc_end_serialized_parallel), 605 EndArgs); 606 607 LLVM_DEBUG(dbgs() << "With serialized parallel region: " 608 << *Builder.GetInsertBlock()->getParent() << "\n"); 609 } 610 611 // Adjust the finalization stack, verify the adjustment, and call the 612 // finalize function a last time to finalize values between the pre-fini block 613 // and the exit block if we left the parallel "the normal way". 614 auto FiniInfo = FinalizationStack.pop_back_val(); 615 (void)FiniInfo; 616 assert(FiniInfo.DK == OMPD_parallel && 617 "Unexpected finalization stack state!"); 618 619 Instruction *PreFiniTI = PRegPreFiniBB->getTerminator(); 620 assert(PreFiniTI->getNumSuccessors() == 1 && 621 PreFiniTI->getSuccessor(0)->size() == 1 && 622 isa<ReturnInst>(PreFiniTI->getSuccessor(0)->getTerminator()) && 623 "Unexpected CFG structure!"); 624 625 InsertPointTy PreFiniIP(PRegPreFiniBB, PreFiniTI->getIterator()); 626 FiniCB(PreFiniIP); 627 628 for (Instruction *I : ToBeDeleted) 629 I->eraseFromParent(); 630 631 return AfterIP; 632 } 633