1 //===- InlineFunction.cpp - Code to perform function inlining -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements inlining of a function into a call site, resolving 10 // parameters and the return value as appropriate. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ADT/DenseMap.h" 15 #include "llvm/ADT/STLExtras.h" 16 #include "llvm/ADT/SetVector.h" 17 #include "llvm/ADT/SmallPtrSet.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/StringExtras.h" 20 #include "llvm/ADT/iterator_range.h" 21 #include "llvm/Analysis/AliasAnalysis.h" 22 #include "llvm/Analysis/AssumptionCache.h" 23 #include "llvm/Analysis/BlockFrequencyInfo.h" 24 #include "llvm/Analysis/CallGraph.h" 25 #include "llvm/Analysis/CaptureTracking.h" 26 #include "llvm/Analysis/EHPersonalities.h" 27 #include "llvm/Analysis/InstructionSimplify.h" 28 #include "llvm/Analysis/MemoryProfileInfo.h" 29 #include "llvm/Analysis/ObjCARCAnalysisUtils.h" 30 #include "llvm/Analysis/ObjCARCUtil.h" 31 #include "llvm/Analysis/ProfileSummaryInfo.h" 32 #include "llvm/Analysis/ValueTracking.h" 33 #include "llvm/Analysis/VectorUtils.h" 34 #include "llvm/IR/Argument.h" 35 #include "llvm/IR/BasicBlock.h" 36 #include "llvm/IR/CFG.h" 37 #include "llvm/IR/Constant.h" 38 #include "llvm/IR/Constants.h" 39 #include "llvm/IR/DataLayout.h" 40 #include "llvm/IR/DebugInfo.h" 41 #include "llvm/IR/DebugInfoMetadata.h" 42 #include "llvm/IR/DebugLoc.h" 43 #include "llvm/IR/DerivedTypes.h" 44 #include "llvm/IR/Dominators.h" 45 #include "llvm/IR/Function.h" 46 #include "llvm/IR/IRBuilder.h" 47 #include "llvm/IR/InlineAsm.h" 48 #include "llvm/IR/InstrTypes.h" 49 #include "llvm/IR/Instruction.h" 50 #include "llvm/IR/Instructions.h" 51 #include "llvm/IR/IntrinsicInst.h" 52 #include "llvm/IR/Intrinsics.h" 53 #include "llvm/IR/LLVMContext.h" 54 #include "llvm/IR/MDBuilder.h" 55 #include "llvm/IR/Metadata.h" 56 #include "llvm/IR/Module.h" 57 #include "llvm/IR/Type.h" 58 #include "llvm/IR/User.h" 59 #include "llvm/IR/Value.h" 60 #include "llvm/Support/Casting.h" 61 #include "llvm/Support/CommandLine.h" 62 #include "llvm/Support/ErrorHandling.h" 63 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" 64 #include "llvm/Transforms/Utils/Cloning.h" 65 #include "llvm/Transforms/Utils/Local.h" 66 #include "llvm/Transforms/Utils/ValueMapper.h" 67 #include <algorithm> 68 #include <cassert> 69 #include <cstdint> 70 #include <iterator> 71 #include <limits> 72 #include <optional> 73 #include <string> 74 #include <utility> 75 #include <vector> 76 77 #define DEBUG_TYPE "inline-function" 78 79 using namespace llvm; 80 using namespace llvm::memprof; 81 using ProfileCount = Function::ProfileCount; 82 83 static cl::opt<bool> 84 EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true), 85 cl::Hidden, 86 cl::desc("Convert noalias attributes to metadata during inlining.")); 87 88 static cl::opt<bool> 89 UseNoAliasIntrinsic("use-noalias-intrinsic-during-inlining", cl::Hidden, 90 cl::init(true), 91 cl::desc("Use the llvm.experimental.noalias.scope.decl " 92 "intrinsic during inlining.")); 93 94 // Disabled by default, because the added alignment assumptions may increase 95 // compile-time and block optimizations. This option is not suitable for use 96 // with frontends that emit comprehensive parameter alignment annotations. 97 static cl::opt<bool> 98 PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining", 99 cl::init(false), cl::Hidden, 100 cl::desc("Convert align attributes to assumptions during inlining.")); 101 102 static cl::opt<bool> UpdateReturnAttributes( 103 "update-return-attrs", cl::init(true), cl::Hidden, 104 cl::desc("Update return attributes on calls within inlined body")); 105 106 static cl::opt<unsigned> InlinerAttributeWindow( 107 "max-inst-checked-for-throw-during-inlining", cl::Hidden, 108 cl::desc("the maximum number of instructions analyzed for may throw during " 109 "attribute inference in inlined body"), 110 cl::init(4)); 111 112 namespace { 113 114 /// A class for recording information about inlining a landing pad. 115 class LandingPadInliningInfo { 116 /// Destination of the invoke's unwind. 117 BasicBlock *OuterResumeDest; 118 119 /// Destination for the callee's resume. 120 BasicBlock *InnerResumeDest = nullptr; 121 122 /// LandingPadInst associated with the invoke. 123 LandingPadInst *CallerLPad = nullptr; 124 125 /// PHI for EH values from landingpad insts. 126 PHINode *InnerEHValuesPHI = nullptr; 127 128 SmallVector<Value*, 8> UnwindDestPHIValues; 129 130 public: 131 LandingPadInliningInfo(InvokeInst *II) 132 : OuterResumeDest(II->getUnwindDest()) { 133 // If there are PHI nodes in the unwind destination block, we need to keep 134 // track of which values came into them from the invoke before removing 135 // the edge from this block. 136 BasicBlock *InvokeBB = II->getParent(); 137 BasicBlock::iterator I = OuterResumeDest->begin(); 138 for (; isa<PHINode>(I); ++I) { 139 // Save the value to use for this edge. 140 PHINode *PHI = cast<PHINode>(I); 141 UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB)); 142 } 143 144 CallerLPad = cast<LandingPadInst>(I); 145 } 146 147 /// The outer unwind destination is the target of 148 /// unwind edges introduced for calls within the inlined function. 149 BasicBlock *getOuterResumeDest() const { 150 return OuterResumeDest; 151 } 152 153 BasicBlock *getInnerResumeDest(); 154 155 LandingPadInst *getLandingPadInst() const { return CallerLPad; } 156 157 /// Forward the 'resume' instruction to the caller's landing pad block. 158 /// When the landing pad block has only one predecessor, this is 159 /// a simple branch. When there is more than one predecessor, we need to 160 /// split the landing pad block after the landingpad instruction and jump 161 /// to there. 162 void forwardResume(ResumeInst *RI, 163 SmallPtrSetImpl<LandingPadInst*> &InlinedLPads); 164 165 /// Add incoming-PHI values to the unwind destination block for the given 166 /// basic block, using the values for the original invoke's source block. 167 void addIncomingPHIValuesFor(BasicBlock *BB) const { 168 addIncomingPHIValuesForInto(BB, OuterResumeDest); 169 } 170 171 void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const { 172 BasicBlock::iterator I = dest->begin(); 173 for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { 174 PHINode *phi = cast<PHINode>(I); 175 phi->addIncoming(UnwindDestPHIValues[i], src); 176 } 177 } 178 }; 179 180 } // end anonymous namespace 181 182 /// Get or create a target for the branch from ResumeInsts. 183 BasicBlock *LandingPadInliningInfo::getInnerResumeDest() { 184 if (InnerResumeDest) return InnerResumeDest; 185 186 // Split the landing pad. 187 BasicBlock::iterator SplitPoint = ++CallerLPad->getIterator(); 188 InnerResumeDest = 189 OuterResumeDest->splitBasicBlock(SplitPoint, 190 OuterResumeDest->getName() + ".body"); 191 192 // The number of incoming edges we expect to the inner landing pad. 193 const unsigned PHICapacity = 2; 194 195 // Create corresponding new PHIs for all the PHIs in the outer landing pad. 196 Instruction *InsertPoint = &InnerResumeDest->front(); 197 BasicBlock::iterator I = OuterResumeDest->begin(); 198 for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { 199 PHINode *OuterPHI = cast<PHINode>(I); 200 PHINode *InnerPHI = PHINode::Create(OuterPHI->getType(), PHICapacity, 201 OuterPHI->getName() + ".lpad-body", 202 InsertPoint); 203 OuterPHI->replaceAllUsesWith(InnerPHI); 204 InnerPHI->addIncoming(OuterPHI, OuterResumeDest); 205 } 206 207 // Create a PHI for the exception values. 208 InnerEHValuesPHI = PHINode::Create(CallerLPad->getType(), PHICapacity, 209 "eh.lpad-body", InsertPoint); 210 CallerLPad->replaceAllUsesWith(InnerEHValuesPHI); 211 InnerEHValuesPHI->addIncoming(CallerLPad, OuterResumeDest); 212 213 // All done. 214 return InnerResumeDest; 215 } 216 217 /// Forward the 'resume' instruction to the caller's landing pad block. 218 /// When the landing pad block has only one predecessor, this is a simple 219 /// branch. When there is more than one predecessor, we need to split the 220 /// landing pad block after the landingpad instruction and jump to there. 221 void LandingPadInliningInfo::forwardResume( 222 ResumeInst *RI, SmallPtrSetImpl<LandingPadInst *> &InlinedLPads) { 223 BasicBlock *Dest = getInnerResumeDest(); 224 BasicBlock *Src = RI->getParent(); 225 226 BranchInst::Create(Dest, Src); 227 228 // Update the PHIs in the destination. They were inserted in an order which 229 // makes this work. 230 addIncomingPHIValuesForInto(Src, Dest); 231 232 InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src); 233 RI->eraseFromParent(); 234 } 235 236 /// Helper for getUnwindDestToken/getUnwindDestTokenHelper. 237 static Value *getParentPad(Value *EHPad) { 238 if (auto *FPI = dyn_cast<FuncletPadInst>(EHPad)) 239 return FPI->getParentPad(); 240 return cast<CatchSwitchInst>(EHPad)->getParentPad(); 241 } 242 243 using UnwindDestMemoTy = DenseMap<Instruction *, Value *>; 244 245 /// Helper for getUnwindDestToken that does the descendant-ward part of 246 /// the search. 247 static Value *getUnwindDestTokenHelper(Instruction *EHPad, 248 UnwindDestMemoTy &MemoMap) { 249 SmallVector<Instruction *, 8> Worklist(1, EHPad); 250 251 while (!Worklist.empty()) { 252 Instruction *CurrentPad = Worklist.pop_back_val(); 253 // We only put pads on the worklist that aren't in the MemoMap. When 254 // we find an unwind dest for a pad we may update its ancestors, but 255 // the queue only ever contains uncles/great-uncles/etc. of CurrentPad, 256 // so they should never get updated while queued on the worklist. 257 assert(!MemoMap.count(CurrentPad)); 258 Value *UnwindDestToken = nullptr; 259 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(CurrentPad)) { 260 if (CatchSwitch->hasUnwindDest()) { 261 UnwindDestToken = CatchSwitch->getUnwindDest()->getFirstNonPHI(); 262 } else { 263 // Catchswitch doesn't have a 'nounwind' variant, and one might be 264 // annotated as "unwinds to caller" when really it's nounwind (see 265 // e.g. SimplifyCFGOpt::SimplifyUnreachable), so we can't infer the 266 // parent's unwind dest from this. We can check its catchpads' 267 // descendants, since they might include a cleanuppad with an 268 // "unwinds to caller" cleanupret, which can be trusted. 269 for (auto HI = CatchSwitch->handler_begin(), 270 HE = CatchSwitch->handler_end(); 271 HI != HE && !UnwindDestToken; ++HI) { 272 BasicBlock *HandlerBlock = *HI; 273 auto *CatchPad = cast<CatchPadInst>(HandlerBlock->getFirstNonPHI()); 274 for (User *Child : CatchPad->users()) { 275 // Intentionally ignore invokes here -- since the catchswitch is 276 // marked "unwind to caller", it would be a verifier error if it 277 // contained an invoke which unwinds out of it, so any invoke we'd 278 // encounter must unwind to some child of the catch. 279 if (!isa<CleanupPadInst>(Child) && !isa<CatchSwitchInst>(Child)) 280 continue; 281 282 Instruction *ChildPad = cast<Instruction>(Child); 283 auto Memo = MemoMap.find(ChildPad); 284 if (Memo == MemoMap.end()) { 285 // Haven't figured out this child pad yet; queue it. 286 Worklist.push_back(ChildPad); 287 continue; 288 } 289 // We've already checked this child, but might have found that 290 // it offers no proof either way. 291 Value *ChildUnwindDestToken = Memo->second; 292 if (!ChildUnwindDestToken) 293 continue; 294 // We already know the child's unwind dest, which can either 295 // be ConstantTokenNone to indicate unwind to caller, or can 296 // be another child of the catchpad. Only the former indicates 297 // the unwind dest of the catchswitch. 298 if (isa<ConstantTokenNone>(ChildUnwindDestToken)) { 299 UnwindDestToken = ChildUnwindDestToken; 300 break; 301 } 302 assert(getParentPad(ChildUnwindDestToken) == CatchPad); 303 } 304 } 305 } 306 } else { 307 auto *CleanupPad = cast<CleanupPadInst>(CurrentPad); 308 for (User *U : CleanupPad->users()) { 309 if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(U)) { 310 if (BasicBlock *RetUnwindDest = CleanupRet->getUnwindDest()) 311 UnwindDestToken = RetUnwindDest->getFirstNonPHI(); 312 else 313 UnwindDestToken = ConstantTokenNone::get(CleanupPad->getContext()); 314 break; 315 } 316 Value *ChildUnwindDestToken; 317 if (auto *Invoke = dyn_cast<InvokeInst>(U)) { 318 ChildUnwindDestToken = Invoke->getUnwindDest()->getFirstNonPHI(); 319 } else if (isa<CleanupPadInst>(U) || isa<CatchSwitchInst>(U)) { 320 Instruction *ChildPad = cast<Instruction>(U); 321 auto Memo = MemoMap.find(ChildPad); 322 if (Memo == MemoMap.end()) { 323 // Haven't resolved this child yet; queue it and keep searching. 324 Worklist.push_back(ChildPad); 325 continue; 326 } 327 // We've checked this child, but still need to ignore it if it 328 // had no proof either way. 329 ChildUnwindDestToken = Memo->second; 330 if (!ChildUnwindDestToken) 331 continue; 332 } else { 333 // Not a relevant user of the cleanuppad 334 continue; 335 } 336 // In a well-formed program, the child/invoke must either unwind to 337 // an(other) child of the cleanup, or exit the cleanup. In the 338 // first case, continue searching. 339 if (isa<Instruction>(ChildUnwindDestToken) && 340 getParentPad(ChildUnwindDestToken) == CleanupPad) 341 continue; 342 UnwindDestToken = ChildUnwindDestToken; 343 break; 344 } 345 } 346 // If we haven't found an unwind dest for CurrentPad, we may have queued its 347 // children, so move on to the next in the worklist. 348 if (!UnwindDestToken) 349 continue; 350 351 // Now we know that CurrentPad unwinds to UnwindDestToken. It also exits 352 // any ancestors of CurrentPad up to but not including UnwindDestToken's 353 // parent pad. Record this in the memo map, and check to see if the 354 // original EHPad being queried is one of the ones exited. 355 Value *UnwindParent; 356 if (auto *UnwindPad = dyn_cast<Instruction>(UnwindDestToken)) 357 UnwindParent = getParentPad(UnwindPad); 358 else 359 UnwindParent = nullptr; 360 bool ExitedOriginalPad = false; 361 for (Instruction *ExitedPad = CurrentPad; 362 ExitedPad && ExitedPad != UnwindParent; 363 ExitedPad = dyn_cast<Instruction>(getParentPad(ExitedPad))) { 364 // Skip over catchpads since they just follow their catchswitches. 365 if (isa<CatchPadInst>(ExitedPad)) 366 continue; 367 MemoMap[ExitedPad] = UnwindDestToken; 368 ExitedOriginalPad |= (ExitedPad == EHPad); 369 } 370 371 if (ExitedOriginalPad) 372 return UnwindDestToken; 373 374 // Continue the search. 375 } 376 377 // No definitive information is contained within this funclet. 378 return nullptr; 379 } 380 381 /// Given an EH pad, find where it unwinds. If it unwinds to an EH pad, 382 /// return that pad instruction. If it unwinds to caller, return 383 /// ConstantTokenNone. If it does not have a definitive unwind destination, 384 /// return nullptr. 385 /// 386 /// This routine gets invoked for calls in funclets in inlinees when inlining 387 /// an invoke. Since many funclets don't have calls inside them, it's queried 388 /// on-demand rather than building a map of pads to unwind dests up front. 389 /// Determining a funclet's unwind dest may require recursively searching its 390 /// descendants, and also ancestors and cousins if the descendants don't provide 391 /// an answer. Since most funclets will have their unwind dest immediately 392 /// available as the unwind dest of a catchswitch or cleanupret, this routine 393 /// searches top-down from the given pad and then up. To avoid worst-case 394 /// quadratic run-time given that approach, it uses a memo map to avoid 395 /// re-processing funclet trees. The callers that rewrite the IR as they go 396 /// take advantage of this, for correctness, by checking/forcing rewritten 397 /// pads' entries to match the original callee view. 398 static Value *getUnwindDestToken(Instruction *EHPad, 399 UnwindDestMemoTy &MemoMap) { 400 // Catchpads unwind to the same place as their catchswitch; 401 // redirct any queries on catchpads so the code below can 402 // deal with just catchswitches and cleanuppads. 403 if (auto *CPI = dyn_cast<CatchPadInst>(EHPad)) 404 EHPad = CPI->getCatchSwitch(); 405 406 // Check if we've already determined the unwind dest for this pad. 407 auto Memo = MemoMap.find(EHPad); 408 if (Memo != MemoMap.end()) 409 return Memo->second; 410 411 // Search EHPad and, if necessary, its descendants. 412 Value *UnwindDestToken = getUnwindDestTokenHelper(EHPad, MemoMap); 413 assert((UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != 0)); 414 if (UnwindDestToken) 415 return UnwindDestToken; 416 417 // No information is available for this EHPad from itself or any of its 418 // descendants. An unwind all the way out to a pad in the caller would 419 // need also to agree with the unwind dest of the parent funclet, so 420 // search up the chain to try to find a funclet with information. Put 421 // null entries in the memo map to avoid re-processing as we go up. 422 MemoMap[EHPad] = nullptr; 423 #ifndef NDEBUG 424 SmallPtrSet<Instruction *, 4> TempMemos; 425 TempMemos.insert(EHPad); 426 #endif 427 Instruction *LastUselessPad = EHPad; 428 Value *AncestorToken; 429 for (AncestorToken = getParentPad(EHPad); 430 auto *AncestorPad = dyn_cast<Instruction>(AncestorToken); 431 AncestorToken = getParentPad(AncestorToken)) { 432 // Skip over catchpads since they just follow their catchswitches. 433 if (isa<CatchPadInst>(AncestorPad)) 434 continue; 435 // If the MemoMap had an entry mapping AncestorPad to nullptr, since we 436 // haven't yet called getUnwindDestTokenHelper for AncestorPad in this 437 // call to getUnwindDestToken, that would mean that AncestorPad had no 438 // information in itself, its descendants, or its ancestors. If that 439 // were the case, then we should also have recorded the lack of information 440 // for the descendant that we're coming from. So assert that we don't 441 // find a null entry in the MemoMap for AncestorPad. 442 assert(!MemoMap.count(AncestorPad) || MemoMap[AncestorPad]); 443 auto AncestorMemo = MemoMap.find(AncestorPad); 444 if (AncestorMemo == MemoMap.end()) { 445 UnwindDestToken = getUnwindDestTokenHelper(AncestorPad, MemoMap); 446 } else { 447 UnwindDestToken = AncestorMemo->second; 448 } 449 if (UnwindDestToken) 450 break; 451 LastUselessPad = AncestorPad; 452 MemoMap[LastUselessPad] = nullptr; 453 #ifndef NDEBUG 454 TempMemos.insert(LastUselessPad); 455 #endif 456 } 457 458 // We know that getUnwindDestTokenHelper was called on LastUselessPad and 459 // returned nullptr (and likewise for EHPad and any of its ancestors up to 460 // LastUselessPad), so LastUselessPad has no information from below. Since 461 // getUnwindDestTokenHelper must investigate all downward paths through 462 // no-information nodes to prove that a node has no information like this, 463 // and since any time it finds information it records it in the MemoMap for 464 // not just the immediately-containing funclet but also any ancestors also 465 // exited, it must be the case that, walking downward from LastUselessPad, 466 // visiting just those nodes which have not been mapped to an unwind dest 467 // by getUnwindDestTokenHelper (the nullptr TempMemos notwithstanding, since 468 // they are just used to keep getUnwindDestTokenHelper from repeating work), 469 // any node visited must have been exhaustively searched with no information 470 // for it found. 471 SmallVector<Instruction *, 8> Worklist(1, LastUselessPad); 472 while (!Worklist.empty()) { 473 Instruction *UselessPad = Worklist.pop_back_val(); 474 auto Memo = MemoMap.find(UselessPad); 475 if (Memo != MemoMap.end() && Memo->second) { 476 // Here the name 'UselessPad' is a bit of a misnomer, because we've found 477 // that it is a funclet that does have information about unwinding to 478 // a particular destination; its parent was a useless pad. 479 // Since its parent has no information, the unwind edge must not escape 480 // the parent, and must target a sibling of this pad. This local unwind 481 // gives us no information about EHPad. Leave it and the subtree rooted 482 // at it alone. 483 assert(getParentPad(Memo->second) == getParentPad(UselessPad)); 484 continue; 485 } 486 // We know we don't have information for UselesPad. If it has an entry in 487 // the MemoMap (mapping it to nullptr), it must be one of the TempMemos 488 // added on this invocation of getUnwindDestToken; if a previous invocation 489 // recorded nullptr, it would have had to prove that the ancestors of 490 // UselessPad, which include LastUselessPad, had no information, and that 491 // in turn would have required proving that the descendants of 492 // LastUselesPad, which include EHPad, have no information about 493 // LastUselessPad, which would imply that EHPad was mapped to nullptr in 494 // the MemoMap on that invocation, which isn't the case if we got here. 495 assert(!MemoMap.count(UselessPad) || TempMemos.count(UselessPad)); 496 // Assert as we enumerate users that 'UselessPad' doesn't have any unwind 497 // information that we'd be contradicting by making a map entry for it 498 // (which is something that getUnwindDestTokenHelper must have proved for 499 // us to get here). Just assert on is direct users here; the checks in 500 // this downward walk at its descendants will verify that they don't have 501 // any unwind edges that exit 'UselessPad' either (i.e. they either have no 502 // unwind edges or unwind to a sibling). 503 MemoMap[UselessPad] = UnwindDestToken; 504 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UselessPad)) { 505 assert(CatchSwitch->getUnwindDest() == nullptr && "Expected useless pad"); 506 for (BasicBlock *HandlerBlock : CatchSwitch->handlers()) { 507 auto *CatchPad = HandlerBlock->getFirstNonPHI(); 508 for (User *U : CatchPad->users()) { 509 assert( 510 (!isa<InvokeInst>(U) || 511 (getParentPad( 512 cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) == 513 CatchPad)) && 514 "Expected useless pad"); 515 if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U)) 516 Worklist.push_back(cast<Instruction>(U)); 517 } 518 } 519 } else { 520 assert(isa<CleanupPadInst>(UselessPad)); 521 for (User *U : UselessPad->users()) { 522 assert(!isa<CleanupReturnInst>(U) && "Expected useless pad"); 523 assert((!isa<InvokeInst>(U) || 524 (getParentPad( 525 cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) == 526 UselessPad)) && 527 "Expected useless pad"); 528 if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U)) 529 Worklist.push_back(cast<Instruction>(U)); 530 } 531 } 532 } 533 534 return UnwindDestToken; 535 } 536 537 /// When we inline a basic block into an invoke, 538 /// we have to turn all of the calls that can throw into invokes. 539 /// This function analyze BB to see if there are any calls, and if so, 540 /// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI 541 /// nodes in that block with the values specified in InvokeDestPHIValues. 542 static BasicBlock *HandleCallsInBlockInlinedThroughInvoke( 543 BasicBlock *BB, BasicBlock *UnwindEdge, 544 UnwindDestMemoTy *FuncletUnwindMap = nullptr) { 545 for (Instruction &I : llvm::make_early_inc_range(*BB)) { 546 // We only need to check for function calls: inlined invoke 547 // instructions require no special handling. 548 CallInst *CI = dyn_cast<CallInst>(&I); 549 550 if (!CI || CI->doesNotThrow()) 551 continue; 552 553 // We do not need to (and in fact, cannot) convert possibly throwing calls 554 // to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into 555 // invokes. The caller's "segment" of the deoptimization continuation 556 // attached to the newly inlined @llvm.experimental_deoptimize 557 // (resp. @llvm.experimental.guard) call should contain the exception 558 // handling logic, if any. 559 if (auto *F = CI->getCalledFunction()) 560 if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize || 561 F->getIntrinsicID() == Intrinsic::experimental_guard) 562 continue; 563 564 if (auto FuncletBundle = CI->getOperandBundle(LLVMContext::OB_funclet)) { 565 // This call is nested inside a funclet. If that funclet has an unwind 566 // destination within the inlinee, then unwinding out of this call would 567 // be UB. Rewriting this call to an invoke which targets the inlined 568 // invoke's unwind dest would give the call's parent funclet multiple 569 // unwind destinations, which is something that subsequent EH table 570 // generation can't handle and that the veirifer rejects. So when we 571 // see such a call, leave it as a call. 572 auto *FuncletPad = cast<Instruction>(FuncletBundle->Inputs[0]); 573 Value *UnwindDestToken = 574 getUnwindDestToken(FuncletPad, *FuncletUnwindMap); 575 if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken)) 576 continue; 577 #ifndef NDEBUG 578 Instruction *MemoKey; 579 if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad)) 580 MemoKey = CatchPad->getCatchSwitch(); 581 else 582 MemoKey = FuncletPad; 583 assert(FuncletUnwindMap->count(MemoKey) && 584 (*FuncletUnwindMap)[MemoKey] == UnwindDestToken && 585 "must get memoized to avoid confusing later searches"); 586 #endif // NDEBUG 587 } 588 589 changeToInvokeAndSplitBasicBlock(CI, UnwindEdge); 590 return BB; 591 } 592 return nullptr; 593 } 594 595 /// If we inlined an invoke site, we need to convert calls 596 /// in the body of the inlined function into invokes. 597 /// 598 /// II is the invoke instruction being inlined. FirstNewBlock is the first 599 /// block of the inlined code (the last block is the end of the function), 600 /// and InlineCodeInfo is information about the code that got inlined. 601 static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock, 602 ClonedCodeInfo &InlinedCodeInfo) { 603 BasicBlock *InvokeDest = II->getUnwindDest(); 604 605 Function *Caller = FirstNewBlock->getParent(); 606 607 // The inlined code is currently at the end of the function, scan from the 608 // start of the inlined code to its end, checking for stuff we need to 609 // rewrite. 610 LandingPadInliningInfo Invoke(II); 611 612 // Get all of the inlined landing pad instructions. 613 SmallPtrSet<LandingPadInst*, 16> InlinedLPads; 614 for (Function::iterator I = FirstNewBlock->getIterator(), E = Caller->end(); 615 I != E; ++I) 616 if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) 617 InlinedLPads.insert(II->getLandingPadInst()); 618 619 // Append the clauses from the outer landing pad instruction into the inlined 620 // landing pad instructions. 621 LandingPadInst *OuterLPad = Invoke.getLandingPadInst(); 622 for (LandingPadInst *InlinedLPad : InlinedLPads) { 623 unsigned OuterNum = OuterLPad->getNumClauses(); 624 InlinedLPad->reserveClauses(OuterNum); 625 for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx) 626 InlinedLPad->addClause(OuterLPad->getClause(OuterIdx)); 627 if (OuterLPad->isCleanup()) 628 InlinedLPad->setCleanup(true); 629 } 630 631 for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); 632 BB != E; ++BB) { 633 if (InlinedCodeInfo.ContainsCalls) 634 if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke( 635 &*BB, Invoke.getOuterResumeDest())) 636 // Update any PHI nodes in the exceptional block to indicate that there 637 // is now a new entry in them. 638 Invoke.addIncomingPHIValuesFor(NewBB); 639 640 // Forward any resumes that are remaining here. 641 if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) 642 Invoke.forwardResume(RI, InlinedLPads); 643 } 644 645 // Now that everything is happy, we have one final detail. The PHI nodes in 646 // the exception destination block still have entries due to the original 647 // invoke instruction. Eliminate these entries (which might even delete the 648 // PHI node) now. 649 InvokeDest->removePredecessor(II->getParent()); 650 } 651 652 /// If we inlined an invoke site, we need to convert calls 653 /// in the body of the inlined function into invokes. 654 /// 655 /// II is the invoke instruction being inlined. FirstNewBlock is the first 656 /// block of the inlined code (the last block is the end of the function), 657 /// and InlineCodeInfo is information about the code that got inlined. 658 static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, 659 ClonedCodeInfo &InlinedCodeInfo) { 660 BasicBlock *UnwindDest = II->getUnwindDest(); 661 Function *Caller = FirstNewBlock->getParent(); 662 663 assert(UnwindDest->getFirstNonPHI()->isEHPad() && "unexpected BasicBlock!"); 664 665 // If there are PHI nodes in the unwind destination block, we need to keep 666 // track of which values came into them from the invoke before removing the 667 // edge from this block. 668 SmallVector<Value *, 8> UnwindDestPHIValues; 669 BasicBlock *InvokeBB = II->getParent(); 670 for (PHINode &PHI : UnwindDest->phis()) { 671 // Save the value to use for this edge. 672 UnwindDestPHIValues.push_back(PHI.getIncomingValueForBlock(InvokeBB)); 673 } 674 675 // Add incoming-PHI values to the unwind destination block for the given basic 676 // block, using the values for the original invoke's source block. 677 auto UpdatePHINodes = [&](BasicBlock *Src) { 678 BasicBlock::iterator I = UnwindDest->begin(); 679 for (Value *V : UnwindDestPHIValues) { 680 PHINode *PHI = cast<PHINode>(I); 681 PHI->addIncoming(V, Src); 682 ++I; 683 } 684 }; 685 686 // This connects all the instructions which 'unwind to caller' to the invoke 687 // destination. 688 UnwindDestMemoTy FuncletUnwindMap; 689 for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); 690 BB != E; ++BB) { 691 if (auto *CRI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) { 692 if (CRI->unwindsToCaller()) { 693 auto *CleanupPad = CRI->getCleanupPad(); 694 CleanupReturnInst::Create(CleanupPad, UnwindDest, CRI); 695 CRI->eraseFromParent(); 696 UpdatePHINodes(&*BB); 697 // Finding a cleanupret with an unwind destination would confuse 698 // subsequent calls to getUnwindDestToken, so map the cleanuppad 699 // to short-circuit any such calls and recognize this as an "unwind 700 // to caller" cleanup. 701 assert(!FuncletUnwindMap.count(CleanupPad) || 702 isa<ConstantTokenNone>(FuncletUnwindMap[CleanupPad])); 703 FuncletUnwindMap[CleanupPad] = 704 ConstantTokenNone::get(Caller->getContext()); 705 } 706 } 707 708 Instruction *I = BB->getFirstNonPHI(); 709 if (!I->isEHPad()) 710 continue; 711 712 Instruction *Replacement = nullptr; 713 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) { 714 if (CatchSwitch->unwindsToCaller()) { 715 Value *UnwindDestToken; 716 if (auto *ParentPad = 717 dyn_cast<Instruction>(CatchSwitch->getParentPad())) { 718 // This catchswitch is nested inside another funclet. If that 719 // funclet has an unwind destination within the inlinee, then 720 // unwinding out of this catchswitch would be UB. Rewriting this 721 // catchswitch to unwind to the inlined invoke's unwind dest would 722 // give the parent funclet multiple unwind destinations, which is 723 // something that subsequent EH table generation can't handle and 724 // that the veirifer rejects. So when we see such a call, leave it 725 // as "unwind to caller". 726 UnwindDestToken = getUnwindDestToken(ParentPad, FuncletUnwindMap); 727 if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken)) 728 continue; 729 } else { 730 // This catchswitch has no parent to inherit constraints from, and 731 // none of its descendants can have an unwind edge that exits it and 732 // targets another funclet in the inlinee. It may or may not have a 733 // descendant that definitively has an unwind to caller. In either 734 // case, we'll have to assume that any unwinds out of it may need to 735 // be routed to the caller, so treat it as though it has a definitive 736 // unwind to caller. 737 UnwindDestToken = ConstantTokenNone::get(Caller->getContext()); 738 } 739 auto *NewCatchSwitch = CatchSwitchInst::Create( 740 CatchSwitch->getParentPad(), UnwindDest, 741 CatchSwitch->getNumHandlers(), CatchSwitch->getName(), 742 CatchSwitch); 743 for (BasicBlock *PadBB : CatchSwitch->handlers()) 744 NewCatchSwitch->addHandler(PadBB); 745 // Propagate info for the old catchswitch over to the new one in 746 // the unwind map. This also serves to short-circuit any subsequent 747 // checks for the unwind dest of this catchswitch, which would get 748 // confused if they found the outer handler in the callee. 749 FuncletUnwindMap[NewCatchSwitch] = UnwindDestToken; 750 Replacement = NewCatchSwitch; 751 } 752 } else if (!isa<FuncletPadInst>(I)) { 753 llvm_unreachable("unexpected EHPad!"); 754 } 755 756 if (Replacement) { 757 Replacement->takeName(I); 758 I->replaceAllUsesWith(Replacement); 759 I->eraseFromParent(); 760 UpdatePHINodes(&*BB); 761 } 762 } 763 764 if (InlinedCodeInfo.ContainsCalls) 765 for (Function::iterator BB = FirstNewBlock->getIterator(), 766 E = Caller->end(); 767 BB != E; ++BB) 768 if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke( 769 &*BB, UnwindDest, &FuncletUnwindMap)) 770 // Update any PHI nodes in the exceptional block to indicate that there 771 // is now a new entry in them. 772 UpdatePHINodes(NewBB); 773 774 // Now that everything is happy, we have one final detail. The PHI nodes in 775 // the exception destination block still have entries due to the original 776 // invoke instruction. Eliminate these entries (which might even delete the 777 // PHI node) now. 778 UnwindDest->removePredecessor(InvokeBB); 779 } 780 781 static bool haveCommonPrefix(MDNode *MIBStackContext, 782 MDNode *CallsiteStackContext) { 783 assert(MIBStackContext->getNumOperands() > 0 && 784 CallsiteStackContext->getNumOperands() > 0); 785 // Because of the context trimming performed during matching, the callsite 786 // context could have more stack ids than the MIB. We match up to the end of 787 // the shortest stack context. 788 for (auto MIBStackIter = MIBStackContext->op_begin(), 789 CallsiteStackIter = CallsiteStackContext->op_begin(); 790 MIBStackIter != MIBStackContext->op_end() && 791 CallsiteStackIter != CallsiteStackContext->op_end(); 792 MIBStackIter++, CallsiteStackIter++) { 793 auto *Val1 = mdconst::dyn_extract<ConstantInt>(*MIBStackIter); 794 auto *Val2 = mdconst::dyn_extract<ConstantInt>(*CallsiteStackIter); 795 assert(Val1 && Val2); 796 if (Val1->getZExtValue() != Val2->getZExtValue()) 797 return false; 798 } 799 return true; 800 } 801 802 static void removeMemProfMetadata(CallBase *Call) { 803 Call->setMetadata(LLVMContext::MD_memprof, nullptr); 804 } 805 806 static void removeCallsiteMetadata(CallBase *Call) { 807 Call->setMetadata(LLVMContext::MD_callsite, nullptr); 808 } 809 810 static void updateMemprofMetadata(CallBase *CI, 811 const std::vector<Metadata *> &MIBList) { 812 assert(!MIBList.empty()); 813 // Remove existing memprof, which will either be replaced or may not be needed 814 // if we are able to use a single allocation type function attribute. 815 removeMemProfMetadata(CI); 816 CallStackTrie CallStack; 817 for (Metadata *MIB : MIBList) 818 CallStack.addCallStack(cast<MDNode>(MIB)); 819 bool MemprofMDAttached = CallStack.buildAndAttachMIBMetadata(CI); 820 assert(MemprofMDAttached == CI->hasMetadata(LLVMContext::MD_memprof)); 821 if (!MemprofMDAttached) 822 // If we used a function attribute remove the callsite metadata as well. 823 removeCallsiteMetadata(CI); 824 } 825 826 // Update the metadata on the inlined copy ClonedCall of a call OrigCall in the 827 // inlined callee body, based on the callsite metadata InlinedCallsiteMD from 828 // the call that was inlined. 829 static void propagateMemProfHelper(const CallBase *OrigCall, 830 CallBase *ClonedCall, 831 MDNode *InlinedCallsiteMD) { 832 MDNode *OrigCallsiteMD = ClonedCall->getMetadata(LLVMContext::MD_callsite); 833 MDNode *ClonedCallsiteMD = nullptr; 834 // Check if the call originally had callsite metadata, and update it for the 835 // new call in the inlined body. 836 if (OrigCallsiteMD) { 837 // The cloned call's context is now the concatenation of the original call's 838 // callsite metadata and the callsite metadata on the call where it was 839 // inlined. 840 ClonedCallsiteMD = MDNode::concatenate(OrigCallsiteMD, InlinedCallsiteMD); 841 ClonedCall->setMetadata(LLVMContext::MD_callsite, ClonedCallsiteMD); 842 } 843 844 // Update any memprof metadata on the cloned call. 845 MDNode *OrigMemProfMD = ClonedCall->getMetadata(LLVMContext::MD_memprof); 846 if (!OrigMemProfMD) 847 return; 848 // We currently expect that allocations with memprof metadata also have 849 // callsite metadata for the allocation's part of the context. 850 assert(OrigCallsiteMD); 851 852 // New call's MIB list. 853 std::vector<Metadata *> NewMIBList; 854 855 // For each MIB metadata, check if its call stack context starts with the 856 // new clone's callsite metadata. If so, that MIB goes onto the cloned call in 857 // the inlined body. If not, it stays on the out-of-line original call. 858 for (auto &MIBOp : OrigMemProfMD->operands()) { 859 MDNode *MIB = dyn_cast<MDNode>(MIBOp); 860 // Stack is first operand of MIB. 861 MDNode *StackMD = getMIBStackNode(MIB); 862 assert(StackMD); 863 // See if the new cloned callsite context matches this profiled context. 864 if (haveCommonPrefix(StackMD, ClonedCallsiteMD)) 865 // Add it to the cloned call's MIB list. 866 NewMIBList.push_back(MIB); 867 } 868 if (NewMIBList.empty()) { 869 removeMemProfMetadata(ClonedCall); 870 removeCallsiteMetadata(ClonedCall); 871 return; 872 } 873 if (NewMIBList.size() < OrigMemProfMD->getNumOperands()) 874 updateMemprofMetadata(ClonedCall, NewMIBList); 875 } 876 877 // Update memprof related metadata (!memprof and !callsite) based on the 878 // inlining of Callee into the callsite at CB. The updates include merging the 879 // inlined callee's callsite metadata with that of the inlined call, 880 // and moving the subset of any memprof contexts to the inlined callee 881 // allocations if they match the new inlined call stack. 882 // FIXME: Replace memprof metadata with function attribute if all MIB end up 883 // having the same behavior. Do other context trimming/merging optimizations 884 // too. 885 static void 886 propagateMemProfMetadata(Function *Callee, CallBase &CB, 887 bool ContainsMemProfMetadata, 888 const ValueMap<const Value *, WeakTrackingVH> &VMap) { 889 MDNode *CallsiteMD = CB.getMetadata(LLVMContext::MD_callsite); 890 // Only need to update if the inlined callsite had callsite metadata, or if 891 // there was any memprof metadata inlined. 892 if (!CallsiteMD && !ContainsMemProfMetadata) 893 return; 894 895 // Propagate metadata onto the cloned calls in the inlined callee. 896 for (const auto &Entry : VMap) { 897 // See if this is a call that has been inlined and remapped, and not 898 // simplified away in the process. 899 auto *OrigCall = dyn_cast_or_null<CallBase>(Entry.first); 900 auto *ClonedCall = dyn_cast_or_null<CallBase>(Entry.second); 901 if (!OrigCall || !ClonedCall) 902 continue; 903 // If the inlined callsite did not have any callsite metadata, then it isn't 904 // involved in any profiled call contexts, and we can remove any memprof 905 // metadata on the cloned call. 906 if (!CallsiteMD) { 907 removeMemProfMetadata(ClonedCall); 908 removeCallsiteMetadata(ClonedCall); 909 continue; 910 } 911 propagateMemProfHelper(OrigCall, ClonedCall, CallsiteMD); 912 } 913 } 914 915 /// When inlining a call site that has !llvm.mem.parallel_loop_access, 916 /// !llvm.access.group, !alias.scope or !noalias metadata, that metadata should 917 /// be propagated to all memory-accessing cloned instructions. 918 static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart, 919 Function::iterator FEnd) { 920 MDNode *MemParallelLoopAccess = 921 CB.getMetadata(LLVMContext::MD_mem_parallel_loop_access); 922 MDNode *AccessGroup = CB.getMetadata(LLVMContext::MD_access_group); 923 MDNode *AliasScope = CB.getMetadata(LLVMContext::MD_alias_scope); 924 MDNode *NoAlias = CB.getMetadata(LLVMContext::MD_noalias); 925 if (!MemParallelLoopAccess && !AccessGroup && !AliasScope && !NoAlias) 926 return; 927 928 for (BasicBlock &BB : make_range(FStart, FEnd)) { 929 for (Instruction &I : BB) { 930 // This metadata is only relevant for instructions that access memory. 931 if (!I.mayReadOrWriteMemory()) 932 continue; 933 934 if (MemParallelLoopAccess) { 935 // TODO: This probably should not overwrite MemParalleLoopAccess. 936 MemParallelLoopAccess = MDNode::concatenate( 937 I.getMetadata(LLVMContext::MD_mem_parallel_loop_access), 938 MemParallelLoopAccess); 939 I.setMetadata(LLVMContext::MD_mem_parallel_loop_access, 940 MemParallelLoopAccess); 941 } 942 943 if (AccessGroup) 944 I.setMetadata(LLVMContext::MD_access_group, uniteAccessGroups( 945 I.getMetadata(LLVMContext::MD_access_group), AccessGroup)); 946 947 if (AliasScope) 948 I.setMetadata(LLVMContext::MD_alias_scope, MDNode::concatenate( 949 I.getMetadata(LLVMContext::MD_alias_scope), AliasScope)); 950 951 if (NoAlias) 952 I.setMetadata(LLVMContext::MD_noalias, MDNode::concatenate( 953 I.getMetadata(LLVMContext::MD_noalias), NoAlias)); 954 } 955 } 956 } 957 958 /// Bundle operands of the inlined function must be added to inlined call sites. 959 static void PropagateOperandBundles(Function::iterator InlinedBB, 960 Instruction *CallSiteEHPad) { 961 for (Instruction &II : llvm::make_early_inc_range(*InlinedBB)) { 962 CallBase *I = dyn_cast<CallBase>(&II); 963 if (!I) 964 continue; 965 // Skip call sites which already have a "funclet" bundle. 966 if (I->getOperandBundle(LLVMContext::OB_funclet)) 967 continue; 968 // Skip call sites which are nounwind intrinsics (as long as they don't 969 // lower into regular function calls in the course of IR transformations). 970 auto *CalledFn = 971 dyn_cast<Function>(I->getCalledOperand()->stripPointerCasts()); 972 if (CalledFn && CalledFn->isIntrinsic() && I->doesNotThrow() && 973 !IntrinsicInst::mayLowerToFunctionCall(CalledFn->getIntrinsicID())) 974 continue; 975 976 SmallVector<OperandBundleDef, 1> OpBundles; 977 I->getOperandBundlesAsDefs(OpBundles); 978 OpBundles.emplace_back("funclet", CallSiteEHPad); 979 980 Instruction *NewInst = CallBase::Create(I, OpBundles, I); 981 NewInst->takeName(I); 982 I->replaceAllUsesWith(NewInst); 983 I->eraseFromParent(); 984 } 985 } 986 987 namespace { 988 /// Utility for cloning !noalias and !alias.scope metadata. When a code region 989 /// using scoped alias metadata is inlined, the aliasing relationships may not 990 /// hold between the two version. It is necessary to create a deep clone of the 991 /// metadata, putting the two versions in separate scope domains. 992 class ScopedAliasMetadataDeepCloner { 993 using MetadataMap = DenseMap<const MDNode *, TrackingMDNodeRef>; 994 SetVector<const MDNode *> MD; 995 MetadataMap MDMap; 996 void addRecursiveMetadataUses(); 997 998 public: 999 ScopedAliasMetadataDeepCloner(const Function *F); 1000 1001 /// Create a new clone of the scoped alias metadata, which will be used by 1002 /// subsequent remap() calls. 1003 void clone(); 1004 1005 /// Remap instructions in the given range from the original to the cloned 1006 /// metadata. 1007 void remap(Function::iterator FStart, Function::iterator FEnd); 1008 }; 1009 } // namespace 1010 1011 ScopedAliasMetadataDeepCloner::ScopedAliasMetadataDeepCloner( 1012 const Function *F) { 1013 for (const BasicBlock &BB : *F) { 1014 for (const Instruction &I : BB) { 1015 if (const MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope)) 1016 MD.insert(M); 1017 if (const MDNode *M = I.getMetadata(LLVMContext::MD_noalias)) 1018 MD.insert(M); 1019 1020 // We also need to clone the metadata in noalias intrinsics. 1021 if (const auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I)) 1022 MD.insert(Decl->getScopeList()); 1023 } 1024 } 1025 addRecursiveMetadataUses(); 1026 } 1027 1028 void ScopedAliasMetadataDeepCloner::addRecursiveMetadataUses() { 1029 SmallVector<const Metadata *, 16> Queue(MD.begin(), MD.end()); 1030 while (!Queue.empty()) { 1031 const MDNode *M = cast<MDNode>(Queue.pop_back_val()); 1032 for (const Metadata *Op : M->operands()) 1033 if (const MDNode *OpMD = dyn_cast<MDNode>(Op)) 1034 if (MD.insert(OpMD)) 1035 Queue.push_back(OpMD); 1036 } 1037 } 1038 1039 void ScopedAliasMetadataDeepCloner::clone() { 1040 assert(MDMap.empty() && "clone() already called ?"); 1041 1042 SmallVector<TempMDTuple, 16> DummyNodes; 1043 for (const MDNode *I : MD) { 1044 DummyNodes.push_back(MDTuple::getTemporary(I->getContext(), std::nullopt)); 1045 MDMap[I].reset(DummyNodes.back().get()); 1046 } 1047 1048 // Create new metadata nodes to replace the dummy nodes, replacing old 1049 // metadata references with either a dummy node or an already-created new 1050 // node. 1051 SmallVector<Metadata *, 4> NewOps; 1052 for (const MDNode *I : MD) { 1053 for (const Metadata *Op : I->operands()) { 1054 if (const MDNode *M = dyn_cast<MDNode>(Op)) 1055 NewOps.push_back(MDMap[M]); 1056 else 1057 NewOps.push_back(const_cast<Metadata *>(Op)); 1058 } 1059 1060 MDNode *NewM = MDNode::get(I->getContext(), NewOps); 1061 MDTuple *TempM = cast<MDTuple>(MDMap[I]); 1062 assert(TempM->isTemporary() && "Expected temporary node"); 1063 1064 TempM->replaceAllUsesWith(NewM); 1065 NewOps.clear(); 1066 } 1067 } 1068 1069 void ScopedAliasMetadataDeepCloner::remap(Function::iterator FStart, 1070 Function::iterator FEnd) { 1071 if (MDMap.empty()) 1072 return; // Nothing to do. 1073 1074 for (BasicBlock &BB : make_range(FStart, FEnd)) { 1075 for (Instruction &I : BB) { 1076 // TODO: The null checks for the MDMap.lookup() results should no longer 1077 // be necessary. 1078 if (MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope)) 1079 if (MDNode *MNew = MDMap.lookup(M)) 1080 I.setMetadata(LLVMContext::MD_alias_scope, MNew); 1081 1082 if (MDNode *M = I.getMetadata(LLVMContext::MD_noalias)) 1083 if (MDNode *MNew = MDMap.lookup(M)) 1084 I.setMetadata(LLVMContext::MD_noalias, MNew); 1085 1086 if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I)) 1087 if (MDNode *MNew = MDMap.lookup(Decl->getScopeList())) 1088 Decl->setScopeList(MNew); 1089 } 1090 } 1091 } 1092 1093 /// If the inlined function has noalias arguments, 1094 /// then add new alias scopes for each noalias argument, tag the mapped noalias 1095 /// parameters with noalias metadata specifying the new scope, and tag all 1096 /// non-derived loads, stores and memory intrinsics with the new alias scopes. 1097 static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap, 1098 const DataLayout &DL, AAResults *CalleeAAR, 1099 ClonedCodeInfo &InlinedFunctionInfo) { 1100 if (!EnableNoAliasConversion) 1101 return; 1102 1103 const Function *CalledFunc = CB.getCalledFunction(); 1104 SmallVector<const Argument *, 4> NoAliasArgs; 1105 1106 for (const Argument &Arg : CalledFunc->args()) 1107 if (CB.paramHasAttr(Arg.getArgNo(), Attribute::NoAlias) && !Arg.use_empty()) 1108 NoAliasArgs.push_back(&Arg); 1109 1110 if (NoAliasArgs.empty()) 1111 return; 1112 1113 // To do a good job, if a noalias variable is captured, we need to know if 1114 // the capture point dominates the particular use we're considering. 1115 DominatorTree DT; 1116 DT.recalculate(const_cast<Function&>(*CalledFunc)); 1117 1118 // noalias indicates that pointer values based on the argument do not alias 1119 // pointer values which are not based on it. So we add a new "scope" for each 1120 // noalias function argument. Accesses using pointers based on that argument 1121 // become part of that alias scope, accesses using pointers not based on that 1122 // argument are tagged as noalias with that scope. 1123 1124 DenseMap<const Argument *, MDNode *> NewScopes; 1125 MDBuilder MDB(CalledFunc->getContext()); 1126 1127 // Create a new scope domain for this function. 1128 MDNode *NewDomain = 1129 MDB.createAnonymousAliasScopeDomain(CalledFunc->getName()); 1130 for (unsigned i = 0, e = NoAliasArgs.size(); i != e; ++i) { 1131 const Argument *A = NoAliasArgs[i]; 1132 1133 std::string Name = std::string(CalledFunc->getName()); 1134 if (A->hasName()) { 1135 Name += ": %"; 1136 Name += A->getName(); 1137 } else { 1138 Name += ": argument "; 1139 Name += utostr(i); 1140 } 1141 1142 // Note: We always create a new anonymous root here. This is true regardless 1143 // of the linkage of the callee because the aliasing "scope" is not just a 1144 // property of the callee, but also all control dependencies in the caller. 1145 MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); 1146 NewScopes.insert(std::make_pair(A, NewScope)); 1147 1148 if (UseNoAliasIntrinsic) { 1149 // Introduce a llvm.experimental.noalias.scope.decl for the noalias 1150 // argument. 1151 MDNode *AScopeList = MDNode::get(CalledFunc->getContext(), NewScope); 1152 auto *NoAliasDecl = 1153 IRBuilder<>(&CB).CreateNoAliasScopeDeclaration(AScopeList); 1154 // Ignore the result for now. The result will be used when the 1155 // llvm.noalias intrinsic is introduced. 1156 (void)NoAliasDecl; 1157 } 1158 } 1159 1160 // Iterate over all new instructions in the map; for all memory-access 1161 // instructions, add the alias scope metadata. 1162 for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); 1163 VMI != VMIE; ++VMI) { 1164 if (const Instruction *I = dyn_cast<Instruction>(VMI->first)) { 1165 if (!VMI->second) 1166 continue; 1167 1168 Instruction *NI = dyn_cast<Instruction>(VMI->second); 1169 if (!NI || InlinedFunctionInfo.isSimplified(I, NI)) 1170 continue; 1171 1172 bool IsArgMemOnlyCall = false, IsFuncCall = false; 1173 SmallVector<const Value *, 2> PtrArgs; 1174 1175 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) 1176 PtrArgs.push_back(LI->getPointerOperand()); 1177 else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) 1178 PtrArgs.push_back(SI->getPointerOperand()); 1179 else if (const VAArgInst *VAAI = dyn_cast<VAArgInst>(I)) 1180 PtrArgs.push_back(VAAI->getPointerOperand()); 1181 else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I)) 1182 PtrArgs.push_back(CXI->getPointerOperand()); 1183 else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I)) 1184 PtrArgs.push_back(RMWI->getPointerOperand()); 1185 else if (const auto *Call = dyn_cast<CallBase>(I)) { 1186 // If we know that the call does not access memory, then we'll still 1187 // know that about the inlined clone of this call site, and we don't 1188 // need to add metadata. 1189 if (Call->doesNotAccessMemory()) 1190 continue; 1191 1192 IsFuncCall = true; 1193 if (CalleeAAR) { 1194 MemoryEffects ME = CalleeAAR->getMemoryEffects(Call); 1195 1196 // We'll retain this knowledge without additional metadata. 1197 if (ME.onlyAccessesInaccessibleMem()) 1198 continue; 1199 1200 if (ME.onlyAccessesArgPointees()) 1201 IsArgMemOnlyCall = true; 1202 } 1203 1204 for (Value *Arg : Call->args()) { 1205 // Only care about pointer arguments. If a noalias argument is 1206 // accessed through a non-pointer argument, it must be captured 1207 // first (e.g. via ptrtoint), and we protect against captures below. 1208 if (!Arg->getType()->isPointerTy()) 1209 continue; 1210 1211 PtrArgs.push_back(Arg); 1212 } 1213 } 1214 1215 // If we found no pointers, then this instruction is not suitable for 1216 // pairing with an instruction to receive aliasing metadata. 1217 // However, if this is a call, this we might just alias with none of the 1218 // noalias arguments. 1219 if (PtrArgs.empty() && !IsFuncCall) 1220 continue; 1221 1222 // It is possible that there is only one underlying object, but you 1223 // need to go through several PHIs to see it, and thus could be 1224 // repeated in the Objects list. 1225 SmallPtrSet<const Value *, 4> ObjSet; 1226 SmallVector<Metadata *, 4> Scopes, NoAliases; 1227 1228 SmallSetVector<const Argument *, 4> NAPtrArgs; 1229 for (const Value *V : PtrArgs) { 1230 SmallVector<const Value *, 4> Objects; 1231 getUnderlyingObjects(V, Objects, /* LI = */ nullptr); 1232 1233 for (const Value *O : Objects) 1234 ObjSet.insert(O); 1235 } 1236 1237 // Figure out if we're derived from anything that is not a noalias 1238 // argument. 1239 bool RequiresNoCaptureBefore = false, UsesAliasingPtr = false, 1240 UsesUnknownObject = false; 1241 for (const Value *V : ObjSet) { 1242 // Is this value a constant that cannot be derived from any pointer 1243 // value (we need to exclude constant expressions, for example, that 1244 // are formed from arithmetic on global symbols). 1245 bool IsNonPtrConst = isa<ConstantInt>(V) || isa<ConstantFP>(V) || 1246 isa<ConstantPointerNull>(V) || 1247 isa<ConstantDataVector>(V) || isa<UndefValue>(V); 1248 if (IsNonPtrConst) 1249 continue; 1250 1251 // If this is anything other than a noalias argument, then we cannot 1252 // completely describe the aliasing properties using alias.scope 1253 // metadata (and, thus, won't add any). 1254 if (const Argument *A = dyn_cast<Argument>(V)) { 1255 if (!CB.paramHasAttr(A->getArgNo(), Attribute::NoAlias)) 1256 UsesAliasingPtr = true; 1257 } else { 1258 UsesAliasingPtr = true; 1259 } 1260 1261 if (isEscapeSource(V)) { 1262 // An escape source can only alias with a noalias argument if it has 1263 // been captured beforehand. 1264 RequiresNoCaptureBefore = true; 1265 } else if (!isa<Argument>(V) && !isIdentifiedObject(V)) { 1266 // If this is neither an escape source, nor some identified object 1267 // (which cannot directly alias a noalias argument), nor some other 1268 // argument (which, by definition, also cannot alias a noalias 1269 // argument), conservatively do not make any assumptions. 1270 UsesUnknownObject = true; 1271 } 1272 } 1273 1274 // Nothing we can do if the used underlying object cannot be reliably 1275 // determined. 1276 if (UsesUnknownObject) 1277 continue; 1278 1279 // A function call can always get captured noalias pointers (via other 1280 // parameters, globals, etc.). 1281 if (IsFuncCall && !IsArgMemOnlyCall) 1282 RequiresNoCaptureBefore = true; 1283 1284 // First, we want to figure out all of the sets with which we definitely 1285 // don't alias. Iterate over all noalias set, and add those for which: 1286 // 1. The noalias argument is not in the set of objects from which we 1287 // definitely derive. 1288 // 2. The noalias argument has not yet been captured. 1289 // An arbitrary function that might load pointers could see captured 1290 // noalias arguments via other noalias arguments or globals, and so we 1291 // must always check for prior capture. 1292 for (const Argument *A : NoAliasArgs) { 1293 if (ObjSet.contains(A)) 1294 continue; // May be based on a noalias argument. 1295 1296 // It might be tempting to skip the PointerMayBeCapturedBefore check if 1297 // A->hasNoCaptureAttr() is true, but this is incorrect because 1298 // nocapture only guarantees that no copies outlive the function, not 1299 // that the value cannot be locally captured. 1300 if (!RequiresNoCaptureBefore || 1301 !PointerMayBeCapturedBefore(A, /* ReturnCaptures */ false, 1302 /* StoreCaptures */ false, I, &DT)) 1303 NoAliases.push_back(NewScopes[A]); 1304 } 1305 1306 if (!NoAliases.empty()) 1307 NI->setMetadata(LLVMContext::MD_noalias, 1308 MDNode::concatenate( 1309 NI->getMetadata(LLVMContext::MD_noalias), 1310 MDNode::get(CalledFunc->getContext(), NoAliases))); 1311 1312 // Next, we want to figure out all of the sets to which we might belong. 1313 // We might belong to a set if the noalias argument is in the set of 1314 // underlying objects. If there is some non-noalias argument in our list 1315 // of underlying objects, then we cannot add a scope because the fact 1316 // that some access does not alias with any set of our noalias arguments 1317 // cannot itself guarantee that it does not alias with this access 1318 // (because there is some pointer of unknown origin involved and the 1319 // other access might also depend on this pointer). We also cannot add 1320 // scopes to arbitrary functions unless we know they don't access any 1321 // non-parameter pointer-values. 1322 bool CanAddScopes = !UsesAliasingPtr; 1323 if (CanAddScopes && IsFuncCall) 1324 CanAddScopes = IsArgMemOnlyCall; 1325 1326 if (CanAddScopes) 1327 for (const Argument *A : NoAliasArgs) { 1328 if (ObjSet.count(A)) 1329 Scopes.push_back(NewScopes[A]); 1330 } 1331 1332 if (!Scopes.empty()) 1333 NI->setMetadata( 1334 LLVMContext::MD_alias_scope, 1335 MDNode::concatenate(NI->getMetadata(LLVMContext::MD_alias_scope), 1336 MDNode::get(CalledFunc->getContext(), Scopes))); 1337 } 1338 } 1339 } 1340 1341 static bool MayContainThrowingOrExitingCall(Instruction *Begin, 1342 Instruction *End) { 1343 1344 assert(Begin->getParent() == End->getParent() && 1345 "Expected to be in same basic block!"); 1346 return !llvm::isGuaranteedToTransferExecutionToSuccessor( 1347 Begin->getIterator(), End->getIterator(), InlinerAttributeWindow + 1); 1348 } 1349 1350 static AttrBuilder IdentifyValidAttributes(CallBase &CB) { 1351 1352 AttrBuilder AB(CB.getContext(), CB.getAttributes().getRetAttrs()); 1353 if (!AB.hasAttributes()) 1354 return AB; 1355 AttrBuilder Valid(CB.getContext()); 1356 // Only allow these white listed attributes to be propagated back to the 1357 // callee. This is because other attributes may only be valid on the call 1358 // itself, i.e. attributes such as signext and zeroext. 1359 if (auto DerefBytes = AB.getDereferenceableBytes()) 1360 Valid.addDereferenceableAttr(DerefBytes); 1361 if (auto DerefOrNullBytes = AB.getDereferenceableOrNullBytes()) 1362 Valid.addDereferenceableOrNullAttr(DerefOrNullBytes); 1363 if (AB.contains(Attribute::NoAlias)) 1364 Valid.addAttribute(Attribute::NoAlias); 1365 if (AB.contains(Attribute::NonNull)) 1366 Valid.addAttribute(Attribute::NonNull); 1367 return Valid; 1368 } 1369 1370 static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap) { 1371 if (!UpdateReturnAttributes) 1372 return; 1373 1374 AttrBuilder Valid = IdentifyValidAttributes(CB); 1375 if (!Valid.hasAttributes()) 1376 return; 1377 auto *CalledFunction = CB.getCalledFunction(); 1378 auto &Context = CalledFunction->getContext(); 1379 1380 for (auto &BB : *CalledFunction) { 1381 auto *RI = dyn_cast<ReturnInst>(BB.getTerminator()); 1382 if (!RI || !isa<CallBase>(RI->getOperand(0))) 1383 continue; 1384 auto *RetVal = cast<CallBase>(RI->getOperand(0)); 1385 // Check that the cloned RetVal exists and is a call, otherwise we cannot 1386 // add the attributes on the cloned RetVal. Simplification during inlining 1387 // could have transformed the cloned instruction. 1388 auto *NewRetVal = dyn_cast_or_null<CallBase>(VMap.lookup(RetVal)); 1389 if (!NewRetVal) 1390 continue; 1391 // Backward propagation of attributes to the returned value may be incorrect 1392 // if it is control flow dependent. 1393 // Consider: 1394 // @callee { 1395 // %rv = call @foo() 1396 // %rv2 = call @bar() 1397 // if (%rv2 != null) 1398 // return %rv2 1399 // if (%rv == null) 1400 // exit() 1401 // return %rv 1402 // } 1403 // caller() { 1404 // %val = call nonnull @callee() 1405 // } 1406 // Here we cannot add the nonnull attribute on either foo or bar. So, we 1407 // limit the check to both RetVal and RI are in the same basic block and 1408 // there are no throwing/exiting instructions between these instructions. 1409 if (RI->getParent() != RetVal->getParent() || 1410 MayContainThrowingOrExitingCall(RetVal, RI)) 1411 continue; 1412 // Add to the existing attributes of NewRetVal, i.e. the cloned call 1413 // instruction. 1414 // NB! When we have the same attribute already existing on NewRetVal, but 1415 // with a differing value, the AttributeList's merge API honours the already 1416 // existing attribute value (i.e. attributes such as dereferenceable, 1417 // dereferenceable_or_null etc). See AttrBuilder::merge for more details. 1418 AttributeList AL = NewRetVal->getAttributes(); 1419 AttributeList NewAL = AL.addRetAttributes(Context, Valid); 1420 NewRetVal->setAttributes(NewAL); 1421 } 1422 } 1423 1424 /// If the inlined function has non-byval align arguments, then 1425 /// add @llvm.assume-based alignment assumptions to preserve this information. 1426 static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) { 1427 if (!PreserveAlignmentAssumptions || !IFI.GetAssumptionCache) 1428 return; 1429 1430 AssumptionCache *AC = &IFI.GetAssumptionCache(*CB.getCaller()); 1431 auto &DL = CB.getCaller()->getParent()->getDataLayout(); 1432 1433 // To avoid inserting redundant assumptions, we should check for assumptions 1434 // already in the caller. To do this, we might need a DT of the caller. 1435 DominatorTree DT; 1436 bool DTCalculated = false; 1437 1438 Function *CalledFunc = CB.getCalledFunction(); 1439 for (Argument &Arg : CalledFunc->args()) { 1440 if (!Arg.getType()->isPointerTy() || Arg.hasPassPointeeByValueCopyAttr() || 1441 Arg.hasNUses(0)) 1442 continue; 1443 MaybeAlign Alignment = Arg.getParamAlign(); 1444 if (!Alignment) 1445 continue; 1446 1447 if (!DTCalculated) { 1448 DT.recalculate(*CB.getCaller()); 1449 DTCalculated = true; 1450 } 1451 // If we can already prove the asserted alignment in the context of the 1452 // caller, then don't bother inserting the assumption. 1453 Value *ArgVal = CB.getArgOperand(Arg.getArgNo()); 1454 if (getKnownAlignment(ArgVal, DL, &CB, AC, &DT) >= *Alignment) 1455 continue; 1456 1457 CallInst *NewAsmp = IRBuilder<>(&CB).CreateAlignmentAssumption( 1458 DL, ArgVal, Alignment->value()); 1459 AC->registerAssumption(cast<AssumeInst>(NewAsmp)); 1460 } 1461 } 1462 1463 /// Once we have cloned code over from a callee into the caller, 1464 /// update the specified callgraph to reflect the changes we made. 1465 /// Note that it's possible that not all code was copied over, so only 1466 /// some edges of the callgraph may remain. 1467 static void UpdateCallGraphAfterInlining(CallBase &CB, 1468 Function::iterator FirstNewBlock, 1469 ValueToValueMapTy &VMap, 1470 InlineFunctionInfo &IFI) { 1471 CallGraph &CG = *IFI.CG; 1472 const Function *Caller = CB.getCaller(); 1473 const Function *Callee = CB.getCalledFunction(); 1474 CallGraphNode *CalleeNode = CG[Callee]; 1475 CallGraphNode *CallerNode = CG[Caller]; 1476 1477 // Since we inlined some uninlined call sites in the callee into the caller, 1478 // add edges from the caller to all of the callees of the callee. 1479 CallGraphNode::iterator I = CalleeNode->begin(), E = CalleeNode->end(); 1480 1481 // Consider the case where CalleeNode == CallerNode. 1482 CallGraphNode::CalledFunctionsVector CallCache; 1483 if (CalleeNode == CallerNode) { 1484 CallCache.assign(I, E); 1485 I = CallCache.begin(); 1486 E = CallCache.end(); 1487 } 1488 1489 for (; I != E; ++I) { 1490 // Skip 'refererence' call records. 1491 if (!I->first) 1492 continue; 1493 1494 const Value *OrigCall = *I->first; 1495 1496 ValueToValueMapTy::iterator VMI = VMap.find(OrigCall); 1497 // Only copy the edge if the call was inlined! 1498 if (VMI == VMap.end() || VMI->second == nullptr) 1499 continue; 1500 1501 // If the call was inlined, but then constant folded, there is no edge to 1502 // add. Check for this case. 1503 auto *NewCall = dyn_cast<CallBase>(VMI->second); 1504 if (!NewCall) 1505 continue; 1506 1507 // We do not treat intrinsic calls like real function calls because we 1508 // expect them to become inline code; do not add an edge for an intrinsic. 1509 if (NewCall->getCalledFunction() && 1510 NewCall->getCalledFunction()->isIntrinsic()) 1511 continue; 1512 1513 // Remember that this call site got inlined for the client of 1514 // InlineFunction. 1515 IFI.InlinedCalls.push_back(NewCall); 1516 1517 // It's possible that inlining the callsite will cause it to go from an 1518 // indirect to a direct call by resolving a function pointer. If this 1519 // happens, set the callee of the new call site to a more precise 1520 // destination. This can also happen if the call graph node of the caller 1521 // was just unnecessarily imprecise. 1522 if (!I->second->getFunction()) 1523 if (Function *F = NewCall->getCalledFunction()) { 1524 // Indirect call site resolved to direct call. 1525 CallerNode->addCalledFunction(NewCall, CG[F]); 1526 1527 continue; 1528 } 1529 1530 CallerNode->addCalledFunction(NewCall, I->second); 1531 } 1532 1533 // Update the call graph by deleting the edge from Callee to Caller. We must 1534 // do this after the loop above in case Caller and Callee are the same. 1535 CallerNode->removeCallEdgeFor(*cast<CallBase>(&CB)); 1536 } 1537 1538 static void HandleByValArgumentInit(Type *ByValType, Value *Dst, Value *Src, 1539 Module *M, BasicBlock *InsertBlock, 1540 InlineFunctionInfo &IFI) { 1541 IRBuilder<> Builder(InsertBlock, InsertBlock->begin()); 1542 1543 Value *Size = 1544 Builder.getInt64(M->getDataLayout().getTypeStoreSize(ByValType)); 1545 1546 // Always generate a memcpy of alignment 1 here because we don't know 1547 // the alignment of the src pointer. Other optimizations can infer 1548 // better alignment. 1549 Builder.CreateMemCpy(Dst, /*DstAlign*/ Align(1), Src, 1550 /*SrcAlign*/ Align(1), Size); 1551 } 1552 1553 /// When inlining a call site that has a byval argument, 1554 /// we have to make the implicit memcpy explicit by adding it. 1555 static Value *HandleByValArgument(Type *ByValType, Value *Arg, 1556 Instruction *TheCall, 1557 const Function *CalledFunc, 1558 InlineFunctionInfo &IFI, 1559 MaybeAlign ByValAlignment) { 1560 assert(cast<PointerType>(Arg->getType()) 1561 ->isOpaqueOrPointeeTypeMatches(ByValType)); 1562 Function *Caller = TheCall->getFunction(); 1563 const DataLayout &DL = Caller->getParent()->getDataLayout(); 1564 1565 // If the called function is readonly, then it could not mutate the caller's 1566 // copy of the byval'd memory. In this case, it is safe to elide the copy and 1567 // temporary. 1568 if (CalledFunc->onlyReadsMemory()) { 1569 // If the byval argument has a specified alignment that is greater than the 1570 // passed in pointer, then we either have to round up the input pointer or 1571 // give up on this transformation. 1572 if (ByValAlignment.valueOrOne() == 1) 1573 return Arg; 1574 1575 AssumptionCache *AC = 1576 IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr; 1577 1578 // If the pointer is already known to be sufficiently aligned, or if we can 1579 // round it up to a larger alignment, then we don't need a temporary. 1580 if (getOrEnforceKnownAlignment(Arg, *ByValAlignment, DL, TheCall, AC) >= 1581 *ByValAlignment) 1582 return Arg; 1583 1584 // Otherwise, we have to make a memcpy to get a safe alignment. This is bad 1585 // for code quality, but rarely happens and is required for correctness. 1586 } 1587 1588 // Create the alloca. If we have DataLayout, use nice alignment. 1589 Align Alignment = DL.getPrefTypeAlign(ByValType); 1590 1591 // If the byval had an alignment specified, we *must* use at least that 1592 // alignment, as it is required by the byval argument (and uses of the 1593 // pointer inside the callee). 1594 if (ByValAlignment) 1595 Alignment = std::max(Alignment, *ByValAlignment); 1596 1597 Value *NewAlloca = 1598 new AllocaInst(ByValType, DL.getAllocaAddrSpace(), nullptr, Alignment, 1599 Arg->getName(), &*Caller->begin()->begin()); 1600 IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca)); 1601 1602 // Uses of the argument in the function should use our new alloca 1603 // instead. 1604 return NewAlloca; 1605 } 1606 1607 // Check whether this Value is used by a lifetime intrinsic. 1608 static bool isUsedByLifetimeMarker(Value *V) { 1609 for (User *U : V->users()) 1610 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) 1611 if (II->isLifetimeStartOrEnd()) 1612 return true; 1613 return false; 1614 } 1615 1616 // Check whether the given alloca already has 1617 // lifetime.start or lifetime.end intrinsics. 1618 static bool hasLifetimeMarkers(AllocaInst *AI) { 1619 Type *Ty = AI->getType(); 1620 Type *Int8PtrTy = Type::getInt8PtrTy(Ty->getContext(), 1621 Ty->getPointerAddressSpace()); 1622 if (Ty == Int8PtrTy) 1623 return isUsedByLifetimeMarker(AI); 1624 1625 // Do a scan to find all the casts to i8*. 1626 for (User *U : AI->users()) { 1627 if (U->getType() != Int8PtrTy) continue; 1628 if (U->stripPointerCasts() != AI) continue; 1629 if (isUsedByLifetimeMarker(U)) 1630 return true; 1631 } 1632 return false; 1633 } 1634 1635 /// Return the result of AI->isStaticAlloca() if AI were moved to the entry 1636 /// block. Allocas used in inalloca calls and allocas of dynamic array size 1637 /// cannot be static. 1638 static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) { 1639 return isa<Constant>(AI->getArraySize()) && !AI->isUsedWithInAlloca(); 1640 } 1641 1642 /// Returns a DebugLoc for a new DILocation which is a clone of \p OrigDL 1643 /// inlined at \p InlinedAt. \p IANodes is an inlined-at cache. 1644 static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt, 1645 LLVMContext &Ctx, 1646 DenseMap<const MDNode *, MDNode *> &IANodes) { 1647 auto IA = DebugLoc::appendInlinedAt(OrigDL, InlinedAt, Ctx, IANodes); 1648 return DILocation::get(Ctx, OrigDL.getLine(), OrigDL.getCol(), 1649 OrigDL.getScope(), IA); 1650 } 1651 1652 /// Update inlined instructions' line numbers to 1653 /// to encode location where these instructions are inlined. 1654 static void fixupLineNumbers(Function *Fn, Function::iterator FI, 1655 Instruction *TheCall, bool CalleeHasDebugInfo) { 1656 const DebugLoc &TheCallDL = TheCall->getDebugLoc(); 1657 if (!TheCallDL) 1658 return; 1659 1660 auto &Ctx = Fn->getContext(); 1661 DILocation *InlinedAtNode = TheCallDL; 1662 1663 // Create a unique call site, not to be confused with any other call from the 1664 // same location. 1665 InlinedAtNode = DILocation::getDistinct( 1666 Ctx, InlinedAtNode->getLine(), InlinedAtNode->getColumn(), 1667 InlinedAtNode->getScope(), InlinedAtNode->getInlinedAt()); 1668 1669 // Cache the inlined-at nodes as they're built so they are reused, without 1670 // this every instruction's inlined-at chain would become distinct from each 1671 // other. 1672 DenseMap<const MDNode *, MDNode *> IANodes; 1673 1674 // Check if we are not generating inline line tables and want to use 1675 // the call site location instead. 1676 bool NoInlineLineTables = Fn->hasFnAttribute("no-inline-line-tables"); 1677 1678 for (; FI != Fn->end(); ++FI) { 1679 for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); 1680 BI != BE; ++BI) { 1681 // Loop metadata needs to be updated so that the start and end locs 1682 // reference inlined-at locations. 1683 auto updateLoopInfoLoc = [&Ctx, &InlinedAtNode, 1684 &IANodes](Metadata *MD) -> Metadata * { 1685 if (auto *Loc = dyn_cast_or_null<DILocation>(MD)) 1686 return inlineDebugLoc(Loc, InlinedAtNode, Ctx, IANodes).get(); 1687 return MD; 1688 }; 1689 updateLoopMetadataDebugLocations(*BI, updateLoopInfoLoc); 1690 1691 if (!NoInlineLineTables) 1692 if (DebugLoc DL = BI->getDebugLoc()) { 1693 DebugLoc IDL = 1694 inlineDebugLoc(DL, InlinedAtNode, BI->getContext(), IANodes); 1695 BI->setDebugLoc(IDL); 1696 continue; 1697 } 1698 1699 if (CalleeHasDebugInfo && !NoInlineLineTables) 1700 continue; 1701 1702 // If the inlined instruction has no line number, or if inline info 1703 // is not being generated, make it look as if it originates from the call 1704 // location. This is important for ((__always_inline, __nodebug__)) 1705 // functions which must use caller location for all instructions in their 1706 // function body. 1707 1708 // Don't update static allocas, as they may get moved later. 1709 if (auto *AI = dyn_cast<AllocaInst>(BI)) 1710 if (allocaWouldBeStaticInEntry(AI)) 1711 continue; 1712 1713 BI->setDebugLoc(TheCallDL); 1714 } 1715 1716 // Remove debug info intrinsics if we're not keeping inline info. 1717 if (NoInlineLineTables) { 1718 BasicBlock::iterator BI = FI->begin(); 1719 while (BI != FI->end()) { 1720 if (isa<DbgInfoIntrinsic>(BI)) { 1721 BI = BI->eraseFromParent(); 1722 continue; 1723 } 1724 ++BI; 1725 } 1726 } 1727 1728 } 1729 } 1730 1731 #undef DEBUG_TYPE 1732 #define DEBUG_TYPE "assignment-tracking" 1733 /// Find Alloca and linked DbgAssignIntrinsic for locals escaped by \p CB. 1734 static at::StorageToVarsMap collectEscapedLocals(const DataLayout &DL, 1735 const CallBase &CB) { 1736 at::StorageToVarsMap EscapedLocals; 1737 SmallPtrSet<const Value *, 4> SeenBases; 1738 1739 LLVM_DEBUG( 1740 errs() << "# Finding caller local variables escaped by callee\n"); 1741 for (const Value *Arg : CB.args()) { 1742 LLVM_DEBUG(errs() << "INSPECT: " << *Arg << "\n"); 1743 if (!Arg->getType()->isPointerTy()) { 1744 LLVM_DEBUG(errs() << " | SKIP: Not a pointer\n"); 1745 continue; 1746 } 1747 1748 const Instruction *I = dyn_cast<Instruction>(Arg); 1749 if (!I) { 1750 LLVM_DEBUG(errs() << " | SKIP: Not result of instruction\n"); 1751 continue; 1752 } 1753 1754 // Walk back to the base storage. 1755 assert(Arg->getType()->isPtrOrPtrVectorTy()); 1756 APInt TmpOffset(DL.getIndexTypeSizeInBits(Arg->getType()), 0, false); 1757 const AllocaInst *Base = dyn_cast<AllocaInst>( 1758 Arg->stripAndAccumulateConstantOffsets(DL, TmpOffset, true)); 1759 if (!Base) { 1760 LLVM_DEBUG(errs() << " | SKIP: Couldn't walk back to base storage\n"); 1761 continue; 1762 } 1763 1764 assert(Base); 1765 LLVM_DEBUG(errs() << " | BASE: " << *Base << "\n"); 1766 // We only need to process each base address once - skip any duplicates. 1767 if (!SeenBases.insert(Base).second) 1768 continue; 1769 1770 // Find all local variables associated with the backing storage. 1771 for (auto *DAI : at::getAssignmentMarkers(Base)) { 1772 // Skip variables from inlined functions - they are not local variables. 1773 if (DAI->getDebugLoc().getInlinedAt()) 1774 continue; 1775 LLVM_DEBUG(errs() << " > DEF : " << *DAI << "\n"); 1776 EscapedLocals[Base].insert(at::VarRecord(DAI)); 1777 } 1778 } 1779 return EscapedLocals; 1780 } 1781 1782 static void trackInlinedStores(Function::iterator Start, Function::iterator End, 1783 const CallBase &CB) { 1784 LLVM_DEBUG(errs() << "trackInlinedStores into " 1785 << Start->getParent()->getName() << " from " 1786 << CB.getCalledFunction()->getName() << "\n"); 1787 std::unique_ptr<DataLayout> DL = std::make_unique<DataLayout>(CB.getModule()); 1788 at::trackAssignments(Start, End, collectEscapedLocals(*DL, CB), *DL); 1789 } 1790 1791 /// Update inlined instructions' DIAssignID metadata. We need to do this 1792 /// otherwise a function inlined more than once into the same function 1793 /// will cause DIAssignID to be shared by many instructions. 1794 static void fixupAssignments(Function::iterator Start, Function::iterator End) { 1795 // Map {Old, New} metadata. Not used directly - use GetNewID. 1796 DenseMap<DIAssignID *, DIAssignID *> Map; 1797 auto GetNewID = [&Map](Metadata *Old) { 1798 DIAssignID *OldID = cast<DIAssignID>(Old); 1799 if (DIAssignID *NewID = Map.lookup(OldID)) 1800 return NewID; 1801 DIAssignID *NewID = DIAssignID::getDistinct(OldID->getContext()); 1802 Map[OldID] = NewID; 1803 return NewID; 1804 }; 1805 // Loop over all the inlined instructions. If we find a DIAssignID 1806 // attachment or use, replace it with a new version. 1807 for (auto BBI = Start; BBI != End; ++BBI) { 1808 for (Instruction &I : *BBI) { 1809 if (auto *ID = I.getMetadata(LLVMContext::MD_DIAssignID)) 1810 I.setMetadata(LLVMContext::MD_DIAssignID, GetNewID(ID)); 1811 else if (auto *DAI = dyn_cast<DbgAssignIntrinsic>(&I)) 1812 DAI->setAssignId(GetNewID(DAI->getAssignID())); 1813 } 1814 } 1815 } 1816 #undef DEBUG_TYPE 1817 #define DEBUG_TYPE "inline-function" 1818 1819 /// Update the block frequencies of the caller after a callee has been inlined. 1820 /// 1821 /// Each block cloned into the caller has its block frequency scaled by the 1822 /// ratio of CallSiteFreq/CalleeEntryFreq. This ensures that the cloned copy of 1823 /// callee's entry block gets the same frequency as the callsite block and the 1824 /// relative frequencies of all cloned blocks remain the same after cloning. 1825 static void updateCallerBFI(BasicBlock *CallSiteBlock, 1826 const ValueToValueMapTy &VMap, 1827 BlockFrequencyInfo *CallerBFI, 1828 BlockFrequencyInfo *CalleeBFI, 1829 const BasicBlock &CalleeEntryBlock) { 1830 SmallPtrSet<BasicBlock *, 16> ClonedBBs; 1831 for (auto Entry : VMap) { 1832 if (!isa<BasicBlock>(Entry.first) || !Entry.second) 1833 continue; 1834 auto *OrigBB = cast<BasicBlock>(Entry.first); 1835 auto *ClonedBB = cast<BasicBlock>(Entry.second); 1836 uint64_t Freq = CalleeBFI->getBlockFreq(OrigBB).getFrequency(); 1837 if (!ClonedBBs.insert(ClonedBB).second) { 1838 // Multiple blocks in the callee might get mapped to one cloned block in 1839 // the caller since we prune the callee as we clone it. When that happens, 1840 // we want to use the maximum among the original blocks' frequencies. 1841 uint64_t NewFreq = CallerBFI->getBlockFreq(ClonedBB).getFrequency(); 1842 if (NewFreq > Freq) 1843 Freq = NewFreq; 1844 } 1845 CallerBFI->setBlockFreq(ClonedBB, Freq); 1846 } 1847 BasicBlock *EntryClone = cast<BasicBlock>(VMap.lookup(&CalleeEntryBlock)); 1848 CallerBFI->setBlockFreqAndScale( 1849 EntryClone, CallerBFI->getBlockFreq(CallSiteBlock).getFrequency(), 1850 ClonedBBs); 1851 } 1852 1853 /// Update the branch metadata for cloned call instructions. 1854 static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap, 1855 const ProfileCount &CalleeEntryCount, 1856 const CallBase &TheCall, ProfileSummaryInfo *PSI, 1857 BlockFrequencyInfo *CallerBFI) { 1858 if (CalleeEntryCount.isSynthetic() || CalleeEntryCount.getCount() < 1) 1859 return; 1860 auto CallSiteCount = 1861 PSI ? PSI->getProfileCount(TheCall, CallerBFI) : std::nullopt; 1862 int64_t CallCount = 1863 std::min(CallSiteCount.value_or(0), CalleeEntryCount.getCount()); 1864 updateProfileCallee(Callee, -CallCount, &VMap); 1865 } 1866 1867 void llvm::updateProfileCallee( 1868 Function *Callee, int64_t EntryDelta, 1869 const ValueMap<const Value *, WeakTrackingVH> *VMap) { 1870 auto CalleeCount = Callee->getEntryCount(); 1871 if (!CalleeCount) 1872 return; 1873 1874 const uint64_t PriorEntryCount = CalleeCount->getCount(); 1875 1876 // Since CallSiteCount is an estimate, it could exceed the original callee 1877 // count and has to be set to 0 so guard against underflow. 1878 const uint64_t NewEntryCount = 1879 (EntryDelta < 0 && static_cast<uint64_t>(-EntryDelta) > PriorEntryCount) 1880 ? 0 1881 : PriorEntryCount + EntryDelta; 1882 1883 // During inlining ? 1884 if (VMap) { 1885 uint64_t CloneEntryCount = PriorEntryCount - NewEntryCount; 1886 for (auto Entry : *VMap) 1887 if (isa<CallInst>(Entry.first)) 1888 if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second)) 1889 CI->updateProfWeight(CloneEntryCount, PriorEntryCount); 1890 } 1891 1892 if (EntryDelta) { 1893 Callee->setEntryCount(NewEntryCount); 1894 1895 for (BasicBlock &BB : *Callee) 1896 // No need to update the callsite if it is pruned during inlining. 1897 if (!VMap || VMap->count(&BB)) 1898 for (Instruction &I : BB) 1899 if (CallInst *CI = dyn_cast<CallInst>(&I)) 1900 CI->updateProfWeight(NewEntryCount, PriorEntryCount); 1901 } 1902 } 1903 1904 /// An operand bundle "clang.arc.attachedcall" on a call indicates the call 1905 /// result is implicitly consumed by a call to retainRV or claimRV immediately 1906 /// after the call. This function inlines the retainRV/claimRV calls. 1907 /// 1908 /// There are three cases to consider: 1909 /// 1910 /// 1. If there is a call to autoreleaseRV that takes a pointer to the returned 1911 /// object in the callee return block, the autoreleaseRV call and the 1912 /// retainRV/claimRV call in the caller cancel out. If the call in the caller 1913 /// is a claimRV call, a call to objc_release is emitted. 1914 /// 1915 /// 2. If there is a call in the callee return block that doesn't have operand 1916 /// bundle "clang.arc.attachedcall", the operand bundle on the original call 1917 /// is transferred to the call in the callee. 1918 /// 1919 /// 3. Otherwise, a call to objc_retain is inserted if the call in the caller is 1920 /// a retainRV call. 1921 static void 1922 inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind, 1923 const SmallVectorImpl<ReturnInst *> &Returns) { 1924 Module *Mod = CB.getModule(); 1925 assert(objcarc::isRetainOrClaimRV(RVCallKind) && "unexpected ARC function"); 1926 bool IsRetainRV = RVCallKind == objcarc::ARCInstKind::RetainRV, 1927 IsUnsafeClaimRV = !IsRetainRV; 1928 1929 for (auto *RI : Returns) { 1930 Value *RetOpnd = objcarc::GetRCIdentityRoot(RI->getOperand(0)); 1931 bool InsertRetainCall = IsRetainRV; 1932 IRBuilder<> Builder(RI->getContext()); 1933 1934 // Walk backwards through the basic block looking for either a matching 1935 // autoreleaseRV call or an unannotated call. 1936 auto InstRange = llvm::make_range(++(RI->getIterator().getReverse()), 1937 RI->getParent()->rend()); 1938 for (Instruction &I : llvm::make_early_inc_range(InstRange)) { 1939 // Ignore casts. 1940 if (isa<CastInst>(I)) 1941 continue; 1942 1943 if (auto *II = dyn_cast<IntrinsicInst>(&I)) { 1944 if (II->getIntrinsicID() != Intrinsic::objc_autoreleaseReturnValue || 1945 !II->hasNUses(0) || 1946 objcarc::GetRCIdentityRoot(II->getOperand(0)) != RetOpnd) 1947 break; 1948 1949 // If we've found a matching authoreleaseRV call: 1950 // - If claimRV is attached to the call, insert a call to objc_release 1951 // and erase the autoreleaseRV call. 1952 // - If retainRV is attached to the call, just erase the autoreleaseRV 1953 // call. 1954 if (IsUnsafeClaimRV) { 1955 Builder.SetInsertPoint(II); 1956 Function *IFn = 1957 Intrinsic::getDeclaration(Mod, Intrinsic::objc_release); 1958 Value *BC = Builder.CreateBitCast(RetOpnd, IFn->getArg(0)->getType()); 1959 Builder.CreateCall(IFn, BC, ""); 1960 } 1961 II->eraseFromParent(); 1962 InsertRetainCall = false; 1963 break; 1964 } 1965 1966 auto *CI = dyn_cast<CallInst>(&I); 1967 1968 if (!CI) 1969 break; 1970 1971 if (objcarc::GetRCIdentityRoot(CI) != RetOpnd || 1972 objcarc::hasAttachedCallOpBundle(CI)) 1973 break; 1974 1975 // If we've found an unannotated call that defines RetOpnd, add a 1976 // "clang.arc.attachedcall" operand bundle. 1977 Value *BundleArgs[] = {*objcarc::getAttachedARCFunction(&CB)}; 1978 OperandBundleDef OB("clang.arc.attachedcall", BundleArgs); 1979 auto *NewCall = CallBase::addOperandBundle( 1980 CI, LLVMContext::OB_clang_arc_attachedcall, OB, CI); 1981 NewCall->copyMetadata(*CI); 1982 CI->replaceAllUsesWith(NewCall); 1983 CI->eraseFromParent(); 1984 InsertRetainCall = false; 1985 break; 1986 } 1987 1988 if (InsertRetainCall) { 1989 // The retainRV is attached to the call and we've failed to find a 1990 // matching autoreleaseRV or an annotated call in the callee. Emit a call 1991 // to objc_retain. 1992 Builder.SetInsertPoint(RI); 1993 Function *IFn = Intrinsic::getDeclaration(Mod, Intrinsic::objc_retain); 1994 Value *BC = Builder.CreateBitCast(RetOpnd, IFn->getArg(0)->getType()); 1995 Builder.CreateCall(IFn, BC, ""); 1996 } 1997 } 1998 } 1999 2000 /// This function inlines the called function into the basic block of the 2001 /// caller. This returns false if it is not possible to inline this call. 2002 /// The program is still in a well defined state if this occurs though. 2003 /// 2004 /// Note that this only does one level of inlining. For example, if the 2005 /// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now 2006 /// exists in the instruction stream. Similarly this will inline a recursive 2007 /// function by one level. 2008 llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, 2009 bool MergeAttributes, 2010 AAResults *CalleeAAR, 2011 bool InsertLifetime, 2012 Function *ForwardVarArgsTo) { 2013 assert(CB.getParent() && CB.getFunction() && "Instruction not in function!"); 2014 2015 // FIXME: we don't inline callbr yet. 2016 if (isa<CallBrInst>(CB)) 2017 return InlineResult::failure("We don't inline callbr yet."); 2018 2019 // If IFI has any state in it, zap it before we fill it in. 2020 IFI.reset(); 2021 2022 Function *CalledFunc = CB.getCalledFunction(); 2023 if (!CalledFunc || // Can't inline external function or indirect 2024 CalledFunc->isDeclaration()) // call! 2025 return InlineResult::failure("external or indirect"); 2026 2027 // The inliner does not know how to inline through calls with operand bundles 2028 // in general ... 2029 if (CB.hasOperandBundles()) { 2030 for (int i = 0, e = CB.getNumOperandBundles(); i != e; ++i) { 2031 uint32_t Tag = CB.getOperandBundleAt(i).getTagID(); 2032 // ... but it knows how to inline through "deopt" operand bundles ... 2033 if (Tag == LLVMContext::OB_deopt) 2034 continue; 2035 // ... and "funclet" operand bundles. 2036 if (Tag == LLVMContext::OB_funclet) 2037 continue; 2038 if (Tag == LLVMContext::OB_clang_arc_attachedcall) 2039 continue; 2040 if (Tag == LLVMContext::OB_kcfi) 2041 continue; 2042 2043 return InlineResult::failure("unsupported operand bundle"); 2044 } 2045 } 2046 2047 // If the call to the callee cannot throw, set the 'nounwind' flag on any 2048 // calls that we inline. 2049 bool MarkNoUnwind = CB.doesNotThrow(); 2050 2051 BasicBlock *OrigBB = CB.getParent(); 2052 Function *Caller = OrigBB->getParent(); 2053 2054 // Do not inline strictfp function into non-strictfp one. It would require 2055 // conversion of all FP operations in host function to constrained intrinsics. 2056 if (CalledFunc->getAttributes().hasFnAttr(Attribute::StrictFP) && 2057 !Caller->getAttributes().hasFnAttr(Attribute::StrictFP)) { 2058 return InlineResult::failure("incompatible strictfp attributes"); 2059 } 2060 2061 // GC poses two hazards to inlining, which only occur when the callee has GC: 2062 // 1. If the caller has no GC, then the callee's GC must be propagated to the 2063 // caller. 2064 // 2. If the caller has a differing GC, it is invalid to inline. 2065 if (CalledFunc->hasGC()) { 2066 if (!Caller->hasGC()) 2067 Caller->setGC(CalledFunc->getGC()); 2068 else if (CalledFunc->getGC() != Caller->getGC()) 2069 return InlineResult::failure("incompatible GC"); 2070 } 2071 2072 // Get the personality function from the callee if it contains a landing pad. 2073 Constant *CalledPersonality = 2074 CalledFunc->hasPersonalityFn() 2075 ? CalledFunc->getPersonalityFn()->stripPointerCasts() 2076 : nullptr; 2077 2078 // Find the personality function used by the landing pads of the caller. If it 2079 // exists, then check to see that it matches the personality function used in 2080 // the callee. 2081 Constant *CallerPersonality = 2082 Caller->hasPersonalityFn() 2083 ? Caller->getPersonalityFn()->stripPointerCasts() 2084 : nullptr; 2085 if (CalledPersonality) { 2086 if (!CallerPersonality) 2087 Caller->setPersonalityFn(CalledPersonality); 2088 // If the personality functions match, then we can perform the 2089 // inlining. Otherwise, we can't inline. 2090 // TODO: This isn't 100% true. Some personality functions are proper 2091 // supersets of others and can be used in place of the other. 2092 else if (CalledPersonality != CallerPersonality) 2093 return InlineResult::failure("incompatible personality"); 2094 } 2095 2096 // We need to figure out which funclet the callsite was in so that we may 2097 // properly nest the callee. 2098 Instruction *CallSiteEHPad = nullptr; 2099 if (CallerPersonality) { 2100 EHPersonality Personality = classifyEHPersonality(CallerPersonality); 2101 if (isScopedEHPersonality(Personality)) { 2102 std::optional<OperandBundleUse> ParentFunclet = 2103 CB.getOperandBundle(LLVMContext::OB_funclet); 2104 if (ParentFunclet) 2105 CallSiteEHPad = cast<FuncletPadInst>(ParentFunclet->Inputs.front()); 2106 2107 // OK, the inlining site is legal. What about the target function? 2108 2109 if (CallSiteEHPad) { 2110 if (Personality == EHPersonality::MSVC_CXX) { 2111 // The MSVC personality cannot tolerate catches getting inlined into 2112 // cleanup funclets. 2113 if (isa<CleanupPadInst>(CallSiteEHPad)) { 2114 // Ok, the call site is within a cleanuppad. Let's check the callee 2115 // for catchpads. 2116 for (const BasicBlock &CalledBB : *CalledFunc) { 2117 if (isa<CatchSwitchInst>(CalledBB.getFirstNonPHI())) 2118 return InlineResult::failure("catch in cleanup funclet"); 2119 } 2120 } 2121 } else if (isAsynchronousEHPersonality(Personality)) { 2122 // SEH is even less tolerant, there may not be any sort of exceptional 2123 // funclet in the callee. 2124 for (const BasicBlock &CalledBB : *CalledFunc) { 2125 if (CalledBB.isEHPad()) 2126 return InlineResult::failure("SEH in cleanup funclet"); 2127 } 2128 } 2129 } 2130 } 2131 } 2132 2133 // Determine if we are dealing with a call in an EHPad which does not unwind 2134 // to caller. 2135 bool EHPadForCallUnwindsLocally = false; 2136 if (CallSiteEHPad && isa<CallInst>(CB)) { 2137 UnwindDestMemoTy FuncletUnwindMap; 2138 Value *CallSiteUnwindDestToken = 2139 getUnwindDestToken(CallSiteEHPad, FuncletUnwindMap); 2140 2141 EHPadForCallUnwindsLocally = 2142 CallSiteUnwindDestToken && 2143 !isa<ConstantTokenNone>(CallSiteUnwindDestToken); 2144 } 2145 2146 // Get an iterator to the last basic block in the function, which will have 2147 // the new function inlined after it. 2148 Function::iterator LastBlock = --Caller->end(); 2149 2150 // Make sure to capture all of the return instructions from the cloned 2151 // function. 2152 SmallVector<ReturnInst*, 8> Returns; 2153 ClonedCodeInfo InlinedFunctionInfo; 2154 Function::iterator FirstNewBlock; 2155 2156 { // Scope to destroy VMap after cloning. 2157 ValueToValueMapTy VMap; 2158 struct ByValInit { 2159 Value *Dst; 2160 Value *Src; 2161 Type *Ty; 2162 }; 2163 // Keep a list of pair (dst, src) to emit byval initializations. 2164 SmallVector<ByValInit, 4> ByValInits; 2165 2166 // When inlining a function that contains noalias scope metadata, 2167 // this metadata needs to be cloned so that the inlined blocks 2168 // have different "unique scopes" at every call site. 2169 // Track the metadata that must be cloned. Do this before other changes to 2170 // the function, so that we do not get in trouble when inlining caller == 2171 // callee. 2172 ScopedAliasMetadataDeepCloner SAMetadataCloner(CB.getCalledFunction()); 2173 2174 auto &DL = Caller->getParent()->getDataLayout(); 2175 2176 // Calculate the vector of arguments to pass into the function cloner, which 2177 // matches up the formal to the actual argument values. 2178 auto AI = CB.arg_begin(); 2179 unsigned ArgNo = 0; 2180 for (Function::arg_iterator I = CalledFunc->arg_begin(), 2181 E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) { 2182 Value *ActualArg = *AI; 2183 2184 // When byval arguments actually inlined, we need to make the copy implied 2185 // by them explicit. However, we don't do this if the callee is readonly 2186 // or readnone, because the copy would be unneeded: the callee doesn't 2187 // modify the struct. 2188 if (CB.isByValArgument(ArgNo)) { 2189 ActualArg = HandleByValArgument(CB.getParamByValType(ArgNo), ActualArg, 2190 &CB, CalledFunc, IFI, 2191 CalledFunc->getParamAlign(ArgNo)); 2192 if (ActualArg != *AI) 2193 ByValInits.push_back( 2194 {ActualArg, (Value *)*AI, CB.getParamByValType(ArgNo)}); 2195 } 2196 2197 VMap[&*I] = ActualArg; 2198 } 2199 2200 // TODO: Remove this when users have been updated to the assume bundles. 2201 // Add alignment assumptions if necessary. We do this before the inlined 2202 // instructions are actually cloned into the caller so that we can easily 2203 // check what will be known at the start of the inlined code. 2204 AddAlignmentAssumptions(CB, IFI); 2205 2206 AssumptionCache *AC = 2207 IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr; 2208 2209 /// Preserve all attributes on of the call and its parameters. 2210 salvageKnowledge(&CB, AC); 2211 2212 // We want the inliner to prune the code as it copies. We would LOVE to 2213 // have no dead or constant instructions leftover after inlining occurs 2214 // (which can happen, e.g., because an argument was constant), but we'll be 2215 // happy with whatever the cloner can do. 2216 CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, 2217 /*ModuleLevelChanges=*/false, Returns, ".i", 2218 &InlinedFunctionInfo); 2219 // Remember the first block that is newly cloned over. 2220 FirstNewBlock = LastBlock; ++FirstNewBlock; 2221 2222 // Insert retainRV/clainRV runtime calls. 2223 objcarc::ARCInstKind RVCallKind = objcarc::getAttachedARCFunctionKind(&CB); 2224 if (RVCallKind != objcarc::ARCInstKind::None) 2225 inlineRetainOrClaimRVCalls(CB, RVCallKind, Returns); 2226 2227 // Updated caller/callee profiles only when requested. For sample loader 2228 // inlining, the context-sensitive inlinee profile doesn't need to be 2229 // subtracted from callee profile, and the inlined clone also doesn't need 2230 // to be scaled based on call site count. 2231 if (IFI.UpdateProfile) { 2232 if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr) 2233 // Update the BFI of blocks cloned into the caller. 2234 updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI, 2235 CalledFunc->front()); 2236 2237 if (auto Profile = CalledFunc->getEntryCount()) 2238 updateCallProfile(CalledFunc, VMap, *Profile, CB, IFI.PSI, 2239 IFI.CallerBFI); 2240 } 2241 2242 // Inject byval arguments initialization. 2243 for (ByValInit &Init : ByValInits) 2244 HandleByValArgumentInit(Init.Ty, Init.Dst, Init.Src, Caller->getParent(), 2245 &*FirstNewBlock, IFI); 2246 2247 std::optional<OperandBundleUse> ParentDeopt = 2248 CB.getOperandBundle(LLVMContext::OB_deopt); 2249 if (ParentDeopt) { 2250 SmallVector<OperandBundleDef, 2> OpDefs; 2251 2252 for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) { 2253 CallBase *ICS = dyn_cast_or_null<CallBase>(VH); 2254 if (!ICS) 2255 continue; // instruction was DCE'd or RAUW'ed to undef 2256 2257 OpDefs.clear(); 2258 2259 OpDefs.reserve(ICS->getNumOperandBundles()); 2260 2261 for (unsigned COBi = 0, COBe = ICS->getNumOperandBundles(); COBi < COBe; 2262 ++COBi) { 2263 auto ChildOB = ICS->getOperandBundleAt(COBi); 2264 if (ChildOB.getTagID() != LLVMContext::OB_deopt) { 2265 // If the inlined call has other operand bundles, let them be 2266 OpDefs.emplace_back(ChildOB); 2267 continue; 2268 } 2269 2270 // It may be useful to separate this logic (of handling operand 2271 // bundles) out to a separate "policy" component if this gets crowded. 2272 // Prepend the parent's deoptimization continuation to the newly 2273 // inlined call's deoptimization continuation. 2274 std::vector<Value *> MergedDeoptArgs; 2275 MergedDeoptArgs.reserve(ParentDeopt->Inputs.size() + 2276 ChildOB.Inputs.size()); 2277 2278 llvm::append_range(MergedDeoptArgs, ParentDeopt->Inputs); 2279 llvm::append_range(MergedDeoptArgs, ChildOB.Inputs); 2280 2281 OpDefs.emplace_back("deopt", std::move(MergedDeoptArgs)); 2282 } 2283 2284 Instruction *NewI = CallBase::Create(ICS, OpDefs, ICS); 2285 2286 // Note: the RAUW does the appropriate fixup in VMap, so we need to do 2287 // this even if the call returns void. 2288 ICS->replaceAllUsesWith(NewI); 2289 2290 VH = nullptr; 2291 ICS->eraseFromParent(); 2292 } 2293 } 2294 2295 // Update the callgraph if requested. 2296 if (IFI.CG) 2297 UpdateCallGraphAfterInlining(CB, FirstNewBlock, VMap, IFI); 2298 2299 // For 'nodebug' functions, the associated DISubprogram is always null. 2300 // Conservatively avoid propagating the callsite debug location to 2301 // instructions inlined from a function whose DISubprogram is not null. 2302 fixupLineNumbers(Caller, FirstNewBlock, &CB, 2303 CalledFunc->getSubprogram() != nullptr); 2304 2305 if (isAssignmentTrackingEnabled(*Caller->getParent())) { 2306 // Interpret inlined stores to caller-local variables as assignments. 2307 trackInlinedStores(FirstNewBlock, Caller->end(), CB); 2308 2309 // Update DIAssignID metadata attachments and uses so that they are 2310 // unique to this inlined instance. 2311 fixupAssignments(FirstNewBlock, Caller->end()); 2312 } 2313 2314 // Now clone the inlined noalias scope metadata. 2315 SAMetadataCloner.clone(); 2316 SAMetadataCloner.remap(FirstNewBlock, Caller->end()); 2317 2318 // Add noalias metadata if necessary. 2319 AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR, InlinedFunctionInfo); 2320 2321 // Clone return attributes on the callsite into the calls within the inlined 2322 // function which feed into its return value. 2323 AddReturnAttributes(CB, VMap); 2324 2325 propagateMemProfMetadata(CalledFunc, CB, 2326 InlinedFunctionInfo.ContainsMemProfMetadata, VMap); 2327 2328 // Propagate metadata on the callsite if necessary. 2329 PropagateCallSiteMetadata(CB, FirstNewBlock, Caller->end()); 2330 2331 // Register any cloned assumptions. 2332 if (IFI.GetAssumptionCache) 2333 for (BasicBlock &NewBlock : 2334 make_range(FirstNewBlock->getIterator(), Caller->end())) 2335 for (Instruction &I : NewBlock) 2336 if (auto *II = dyn_cast<CondGuardInst>(&I)) 2337 IFI.GetAssumptionCache(*Caller).registerAssumption(II); 2338 } 2339 2340 // If there are any alloca instructions in the block that used to be the entry 2341 // block for the callee, move them to the entry block of the caller. First 2342 // calculate which instruction they should be inserted before. We insert the 2343 // instructions at the end of the current alloca list. 2344 { 2345 BasicBlock::iterator InsertPoint = Caller->begin()->begin(); 2346 for (BasicBlock::iterator I = FirstNewBlock->begin(), 2347 E = FirstNewBlock->end(); I != E; ) { 2348 AllocaInst *AI = dyn_cast<AllocaInst>(I++); 2349 if (!AI) continue; 2350 2351 // If the alloca is now dead, remove it. This often occurs due to code 2352 // specialization. 2353 if (AI->use_empty()) { 2354 AI->eraseFromParent(); 2355 continue; 2356 } 2357 2358 if (!allocaWouldBeStaticInEntry(AI)) 2359 continue; 2360 2361 // Keep track of the static allocas that we inline into the caller. 2362 IFI.StaticAllocas.push_back(AI); 2363 2364 // Scan for the block of allocas that we can move over, and move them 2365 // all at once. 2366 while (isa<AllocaInst>(I) && 2367 !cast<AllocaInst>(I)->use_empty() && 2368 allocaWouldBeStaticInEntry(cast<AllocaInst>(I))) { 2369 IFI.StaticAllocas.push_back(cast<AllocaInst>(I)); 2370 ++I; 2371 } 2372 2373 // Transfer all of the allocas over in a block. Using splice means 2374 // that the instructions aren't removed from the symbol table, then 2375 // reinserted. 2376 Caller->getEntryBlock().splice(InsertPoint, &*FirstNewBlock, 2377 AI->getIterator(), I); 2378 } 2379 } 2380 2381 SmallVector<Value*,4> VarArgsToForward; 2382 SmallVector<AttributeSet, 4> VarArgsAttrs; 2383 for (unsigned i = CalledFunc->getFunctionType()->getNumParams(); 2384 i < CB.arg_size(); i++) { 2385 VarArgsToForward.push_back(CB.getArgOperand(i)); 2386 VarArgsAttrs.push_back(CB.getAttributes().getParamAttrs(i)); 2387 } 2388 2389 bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false; 2390 if (InlinedFunctionInfo.ContainsCalls) { 2391 CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None; 2392 if (CallInst *CI = dyn_cast<CallInst>(&CB)) 2393 CallSiteTailKind = CI->getTailCallKind(); 2394 2395 // For inlining purposes, the "notail" marker is the same as no marker. 2396 if (CallSiteTailKind == CallInst::TCK_NoTail) 2397 CallSiteTailKind = CallInst::TCK_None; 2398 2399 for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; 2400 ++BB) { 2401 for (Instruction &I : llvm::make_early_inc_range(*BB)) { 2402 CallInst *CI = dyn_cast<CallInst>(&I); 2403 if (!CI) 2404 continue; 2405 2406 // Forward varargs from inlined call site to calls to the 2407 // ForwardVarArgsTo function, if requested, and to musttail calls. 2408 if (!VarArgsToForward.empty() && 2409 ((ForwardVarArgsTo && 2410 CI->getCalledFunction() == ForwardVarArgsTo) || 2411 CI->isMustTailCall())) { 2412 // Collect attributes for non-vararg parameters. 2413 AttributeList Attrs = CI->getAttributes(); 2414 SmallVector<AttributeSet, 8> ArgAttrs; 2415 if (!Attrs.isEmpty() || !VarArgsAttrs.empty()) { 2416 for (unsigned ArgNo = 0; 2417 ArgNo < CI->getFunctionType()->getNumParams(); ++ArgNo) 2418 ArgAttrs.push_back(Attrs.getParamAttrs(ArgNo)); 2419 } 2420 2421 // Add VarArg attributes. 2422 ArgAttrs.append(VarArgsAttrs.begin(), VarArgsAttrs.end()); 2423 Attrs = AttributeList::get(CI->getContext(), Attrs.getFnAttrs(), 2424 Attrs.getRetAttrs(), ArgAttrs); 2425 // Add VarArgs to existing parameters. 2426 SmallVector<Value *, 6> Params(CI->args()); 2427 Params.append(VarArgsToForward.begin(), VarArgsToForward.end()); 2428 CallInst *NewCI = CallInst::Create( 2429 CI->getFunctionType(), CI->getCalledOperand(), Params, "", CI); 2430 NewCI->setDebugLoc(CI->getDebugLoc()); 2431 NewCI->setAttributes(Attrs); 2432 NewCI->setCallingConv(CI->getCallingConv()); 2433 CI->replaceAllUsesWith(NewCI); 2434 CI->eraseFromParent(); 2435 CI = NewCI; 2436 } 2437 2438 if (Function *F = CI->getCalledFunction()) 2439 InlinedDeoptimizeCalls |= 2440 F->getIntrinsicID() == Intrinsic::experimental_deoptimize; 2441 2442 // We need to reduce the strength of any inlined tail calls. For 2443 // musttail, we have to avoid introducing potential unbounded stack 2444 // growth. For example, if functions 'f' and 'g' are mutually recursive 2445 // with musttail, we can inline 'g' into 'f' so long as we preserve 2446 // musttail on the cloned call to 'f'. If either the inlined call site 2447 // or the cloned call site is *not* musttail, the program already has 2448 // one frame of stack growth, so it's safe to remove musttail. Here is 2449 // a table of example transformations: 2450 // 2451 // f -> musttail g -> musttail f ==> f -> musttail f 2452 // f -> musttail g -> tail f ==> f -> tail f 2453 // f -> g -> musttail f ==> f -> f 2454 // f -> g -> tail f ==> f -> f 2455 // 2456 // Inlined notail calls should remain notail calls. 2457 CallInst::TailCallKind ChildTCK = CI->getTailCallKind(); 2458 if (ChildTCK != CallInst::TCK_NoTail) 2459 ChildTCK = std::min(CallSiteTailKind, ChildTCK); 2460 CI->setTailCallKind(ChildTCK); 2461 InlinedMustTailCalls |= CI->isMustTailCall(); 2462 2463 // Call sites inlined through a 'nounwind' call site should be 2464 // 'nounwind' as well. However, avoid marking call sites explicitly 2465 // where possible. This helps expose more opportunities for CSE after 2466 // inlining, commonly when the callee is an intrinsic. 2467 if (MarkNoUnwind && !CI->doesNotThrow()) 2468 CI->setDoesNotThrow(); 2469 } 2470 } 2471 } 2472 2473 // Leave lifetime markers for the static alloca's, scoping them to the 2474 // function we just inlined. 2475 // We need to insert lifetime intrinsics even at O0 to avoid invalid 2476 // access caused by multithreaded coroutines. The check 2477 // `Caller->isPresplitCoroutine()` would affect AlwaysInliner at O0 only. 2478 if ((InsertLifetime || Caller->isPresplitCoroutine()) && 2479 !IFI.StaticAllocas.empty()) { 2480 IRBuilder<> builder(&FirstNewBlock->front()); 2481 for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) { 2482 AllocaInst *AI = IFI.StaticAllocas[ai]; 2483 // Don't mark swifterror allocas. They can't have bitcast uses. 2484 if (AI->isSwiftError()) 2485 continue; 2486 2487 // If the alloca is already scoped to something smaller than the whole 2488 // function then there's no need to add redundant, less accurate markers. 2489 if (hasLifetimeMarkers(AI)) 2490 continue; 2491 2492 // Try to determine the size of the allocation. 2493 ConstantInt *AllocaSize = nullptr; 2494 if (ConstantInt *AIArraySize = 2495 dyn_cast<ConstantInt>(AI->getArraySize())) { 2496 auto &DL = Caller->getParent()->getDataLayout(); 2497 Type *AllocaType = AI->getAllocatedType(); 2498 TypeSize AllocaTypeSize = DL.getTypeAllocSize(AllocaType); 2499 uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); 2500 2501 // Don't add markers for zero-sized allocas. 2502 if (AllocaArraySize == 0) 2503 continue; 2504 2505 // Check that array size doesn't saturate uint64_t and doesn't 2506 // overflow when it's multiplied by type size. 2507 if (!AllocaTypeSize.isScalable() && 2508 AllocaArraySize != std::numeric_limits<uint64_t>::max() && 2509 std::numeric_limits<uint64_t>::max() / AllocaArraySize >= 2510 AllocaTypeSize.getFixedValue()) { 2511 AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), 2512 AllocaArraySize * AllocaTypeSize); 2513 } 2514 } 2515 2516 builder.CreateLifetimeStart(AI, AllocaSize); 2517 for (ReturnInst *RI : Returns) { 2518 // Don't insert llvm.lifetime.end calls between a musttail or deoptimize 2519 // call and a return. The return kills all local allocas. 2520 if (InlinedMustTailCalls && 2521 RI->getParent()->getTerminatingMustTailCall()) 2522 continue; 2523 if (InlinedDeoptimizeCalls && 2524 RI->getParent()->getTerminatingDeoptimizeCall()) 2525 continue; 2526 IRBuilder<>(RI).CreateLifetimeEnd(AI, AllocaSize); 2527 } 2528 } 2529 } 2530 2531 // If the inlined code contained dynamic alloca instructions, wrap the inlined 2532 // code with llvm.stacksave/llvm.stackrestore intrinsics. 2533 if (InlinedFunctionInfo.ContainsDynamicAllocas) { 2534 Module *M = Caller->getParent(); 2535 // Get the two intrinsics we care about. 2536 Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); 2537 Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore); 2538 2539 // Insert the llvm.stacksave. 2540 CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin()) 2541 .CreateCall(StackSave, {}, "savedstack"); 2542 2543 // Insert a call to llvm.stackrestore before any return instructions in the 2544 // inlined function. 2545 for (ReturnInst *RI : Returns) { 2546 // Don't insert llvm.stackrestore calls between a musttail or deoptimize 2547 // call and a return. The return will restore the stack pointer. 2548 if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall()) 2549 continue; 2550 if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall()) 2551 continue; 2552 IRBuilder<>(RI).CreateCall(StackRestore, SavedPtr); 2553 } 2554 } 2555 2556 // If we are inlining for an invoke instruction, we must make sure to rewrite 2557 // any call instructions into invoke instructions. This is sensitive to which 2558 // funclet pads were top-level in the inlinee, so must be done before 2559 // rewriting the "parent pad" links. 2560 if (auto *II = dyn_cast<InvokeInst>(&CB)) { 2561 BasicBlock *UnwindDest = II->getUnwindDest(); 2562 Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI(); 2563 if (isa<LandingPadInst>(FirstNonPHI)) { 2564 HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo); 2565 } else { 2566 HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo); 2567 } 2568 } 2569 2570 // Update the lexical scopes of the new funclets and callsites. 2571 // Anything that had 'none' as its parent is now nested inside the callsite's 2572 // EHPad. 2573 if (CallSiteEHPad) { 2574 for (Function::iterator BB = FirstNewBlock->getIterator(), 2575 E = Caller->end(); 2576 BB != E; ++BB) { 2577 // Add bundle operands to inlined call sites. 2578 PropagateOperandBundles(BB, CallSiteEHPad); 2579 2580 // It is problematic if the inlinee has a cleanupret which unwinds to 2581 // caller and we inline it into a call site which doesn't unwind but into 2582 // an EH pad that does. Such an edge must be dynamically unreachable. 2583 // As such, we replace the cleanupret with unreachable. 2584 if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(BB->getTerminator())) 2585 if (CleanupRet->unwindsToCaller() && EHPadForCallUnwindsLocally) 2586 changeToUnreachable(CleanupRet); 2587 2588 Instruction *I = BB->getFirstNonPHI(); 2589 if (!I->isEHPad()) 2590 continue; 2591 2592 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) { 2593 if (isa<ConstantTokenNone>(CatchSwitch->getParentPad())) 2594 CatchSwitch->setParentPad(CallSiteEHPad); 2595 } else { 2596 auto *FPI = cast<FuncletPadInst>(I); 2597 if (isa<ConstantTokenNone>(FPI->getParentPad())) 2598 FPI->setParentPad(CallSiteEHPad); 2599 } 2600 } 2601 } 2602 2603 if (InlinedDeoptimizeCalls) { 2604 // We need to at least remove the deoptimizing returns from the Return set, 2605 // so that the control flow from those returns does not get merged into the 2606 // caller (but terminate it instead). If the caller's return type does not 2607 // match the callee's return type, we also need to change the return type of 2608 // the intrinsic. 2609 if (Caller->getReturnType() == CB.getType()) { 2610 llvm::erase_if(Returns, [](ReturnInst *RI) { 2611 return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr; 2612 }); 2613 } else { 2614 SmallVector<ReturnInst *, 8> NormalReturns; 2615 Function *NewDeoptIntrinsic = Intrinsic::getDeclaration( 2616 Caller->getParent(), Intrinsic::experimental_deoptimize, 2617 {Caller->getReturnType()}); 2618 2619 for (ReturnInst *RI : Returns) { 2620 CallInst *DeoptCall = RI->getParent()->getTerminatingDeoptimizeCall(); 2621 if (!DeoptCall) { 2622 NormalReturns.push_back(RI); 2623 continue; 2624 } 2625 2626 // The calling convention on the deoptimize call itself may be bogus, 2627 // since the code we're inlining may have undefined behavior (and may 2628 // never actually execute at runtime); but all 2629 // @llvm.experimental.deoptimize declarations have to have the same 2630 // calling convention in a well-formed module. 2631 auto CallingConv = DeoptCall->getCalledFunction()->getCallingConv(); 2632 NewDeoptIntrinsic->setCallingConv(CallingConv); 2633 auto *CurBB = RI->getParent(); 2634 RI->eraseFromParent(); 2635 2636 SmallVector<Value *, 4> CallArgs(DeoptCall->args()); 2637 2638 SmallVector<OperandBundleDef, 1> OpBundles; 2639 DeoptCall->getOperandBundlesAsDefs(OpBundles); 2640 auto DeoptAttributes = DeoptCall->getAttributes(); 2641 DeoptCall->eraseFromParent(); 2642 assert(!OpBundles.empty() && 2643 "Expected at least the deopt operand bundle"); 2644 2645 IRBuilder<> Builder(CurBB); 2646 CallInst *NewDeoptCall = 2647 Builder.CreateCall(NewDeoptIntrinsic, CallArgs, OpBundles); 2648 NewDeoptCall->setCallingConv(CallingConv); 2649 NewDeoptCall->setAttributes(DeoptAttributes); 2650 if (NewDeoptCall->getType()->isVoidTy()) 2651 Builder.CreateRetVoid(); 2652 else 2653 Builder.CreateRet(NewDeoptCall); 2654 } 2655 2656 // Leave behind the normal returns so we can merge control flow. 2657 std::swap(Returns, NormalReturns); 2658 } 2659 } 2660 2661 // Handle any inlined musttail call sites. In order for a new call site to be 2662 // musttail, the source of the clone and the inlined call site must have been 2663 // musttail. Therefore it's safe to return without merging control into the 2664 // phi below. 2665 if (InlinedMustTailCalls) { 2666 // Check if we need to bitcast the result of any musttail calls. 2667 Type *NewRetTy = Caller->getReturnType(); 2668 bool NeedBitCast = !CB.use_empty() && CB.getType() != NewRetTy; 2669 2670 // Handle the returns preceded by musttail calls separately. 2671 SmallVector<ReturnInst *, 8> NormalReturns; 2672 for (ReturnInst *RI : Returns) { 2673 CallInst *ReturnedMustTail = 2674 RI->getParent()->getTerminatingMustTailCall(); 2675 if (!ReturnedMustTail) { 2676 NormalReturns.push_back(RI); 2677 continue; 2678 } 2679 if (!NeedBitCast) 2680 continue; 2681 2682 // Delete the old return and any preceding bitcast. 2683 BasicBlock *CurBB = RI->getParent(); 2684 auto *OldCast = dyn_cast_or_null<BitCastInst>(RI->getReturnValue()); 2685 RI->eraseFromParent(); 2686 if (OldCast) 2687 OldCast->eraseFromParent(); 2688 2689 // Insert a new bitcast and return with the right type. 2690 IRBuilder<> Builder(CurBB); 2691 Builder.CreateRet(Builder.CreateBitCast(ReturnedMustTail, NewRetTy)); 2692 } 2693 2694 // Leave behind the normal returns so we can merge control flow. 2695 std::swap(Returns, NormalReturns); 2696 } 2697 2698 // Now that all of the transforms on the inlined code have taken place but 2699 // before we splice the inlined code into the CFG and lose track of which 2700 // blocks were actually inlined, collect the call sites. We only do this if 2701 // call graph updates weren't requested, as those provide value handle based 2702 // tracking of inlined call sites instead. Calls to intrinsics are not 2703 // collected because they are not inlineable. 2704 if (InlinedFunctionInfo.ContainsCalls && !IFI.CG) { 2705 // Otherwise just collect the raw call sites that were inlined. 2706 for (BasicBlock &NewBB : 2707 make_range(FirstNewBlock->getIterator(), Caller->end())) 2708 for (Instruction &I : NewBB) 2709 if (auto *CB = dyn_cast<CallBase>(&I)) 2710 if (!(CB->getCalledFunction() && 2711 CB->getCalledFunction()->isIntrinsic())) 2712 IFI.InlinedCallSites.push_back(CB); 2713 } 2714 2715 // If we cloned in _exactly one_ basic block, and if that block ends in a 2716 // return instruction, we splice the body of the inlined callee directly into 2717 // the calling basic block. 2718 if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) { 2719 // Move all of the instructions right before the call. 2720 OrigBB->splice(CB.getIterator(), &*FirstNewBlock, FirstNewBlock->begin(), 2721 FirstNewBlock->end()); 2722 // Remove the cloned basic block. 2723 Caller->back().eraseFromParent(); 2724 2725 // If the call site was an invoke instruction, add a branch to the normal 2726 // destination. 2727 if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) { 2728 BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), &CB); 2729 NewBr->setDebugLoc(Returns[0]->getDebugLoc()); 2730 } 2731 2732 // If the return instruction returned a value, replace uses of the call with 2733 // uses of the returned value. 2734 if (!CB.use_empty()) { 2735 ReturnInst *R = Returns[0]; 2736 if (&CB == R->getReturnValue()) 2737 CB.replaceAllUsesWith(UndefValue::get(CB.getType())); 2738 else 2739 CB.replaceAllUsesWith(R->getReturnValue()); 2740 } 2741 // Since we are now done with the Call/Invoke, we can delete it. 2742 CB.eraseFromParent(); 2743 2744 // Since we are now done with the return instruction, delete it also. 2745 Returns[0]->eraseFromParent(); 2746 2747 if (MergeAttributes) 2748 AttributeFuncs::mergeAttributesForInlining(*Caller, *CalledFunc); 2749 2750 // We are now done with the inlining. 2751 return InlineResult::success(); 2752 } 2753 2754 // Otherwise, we have the normal case, of more than one block to inline or 2755 // multiple return sites. 2756 2757 // We want to clone the entire callee function into the hole between the 2758 // "starter" and "ender" blocks. How we accomplish this depends on whether 2759 // this is an invoke instruction or a call instruction. 2760 BasicBlock *AfterCallBB; 2761 BranchInst *CreatedBranchToNormalDest = nullptr; 2762 if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) { 2763 2764 // Add an unconditional branch to make this look like the CallInst case... 2765 CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), &CB); 2766 2767 // Split the basic block. This guarantees that no PHI nodes will have to be 2768 // updated due to new incoming edges, and make the invoke case more 2769 // symmetric to the call case. 2770 AfterCallBB = 2771 OrigBB->splitBasicBlock(CreatedBranchToNormalDest->getIterator(), 2772 CalledFunc->getName() + ".exit"); 2773 2774 } else { // It's a call 2775 // If this is a call instruction, we need to split the basic block that 2776 // the call lives in. 2777 // 2778 AfterCallBB = OrigBB->splitBasicBlock(CB.getIterator(), 2779 CalledFunc->getName() + ".exit"); 2780 } 2781 2782 if (IFI.CallerBFI) { 2783 // Copy original BB's block frequency to AfterCallBB 2784 IFI.CallerBFI->setBlockFreq( 2785 AfterCallBB, IFI.CallerBFI->getBlockFreq(OrigBB).getFrequency()); 2786 } 2787 2788 // Change the branch that used to go to AfterCallBB to branch to the first 2789 // basic block of the inlined function. 2790 // 2791 Instruction *Br = OrigBB->getTerminator(); 2792 assert(Br && Br->getOpcode() == Instruction::Br && 2793 "splitBasicBlock broken!"); 2794 Br->setOperand(0, &*FirstNewBlock); 2795 2796 // Now that the function is correct, make it a little bit nicer. In 2797 // particular, move the basic blocks inserted from the end of the function 2798 // into the space made by splitting the source basic block. 2799 Caller->splice(AfterCallBB->getIterator(), Caller, FirstNewBlock, 2800 Caller->end()); 2801 2802 // Handle all of the return instructions that we just cloned in, and eliminate 2803 // any users of the original call/invoke instruction. 2804 Type *RTy = CalledFunc->getReturnType(); 2805 2806 PHINode *PHI = nullptr; 2807 if (Returns.size() > 1) { 2808 // The PHI node should go at the front of the new basic block to merge all 2809 // possible incoming values. 2810 if (!CB.use_empty()) { 2811 PHI = PHINode::Create(RTy, Returns.size(), CB.getName(), 2812 &AfterCallBB->front()); 2813 // Anything that used the result of the function call should now use the 2814 // PHI node as their operand. 2815 CB.replaceAllUsesWith(PHI); 2816 } 2817 2818 // Loop over all of the return instructions adding entries to the PHI node 2819 // as appropriate. 2820 if (PHI) { 2821 for (unsigned i = 0, e = Returns.size(); i != e; ++i) { 2822 ReturnInst *RI = Returns[i]; 2823 assert(RI->getReturnValue()->getType() == PHI->getType() && 2824 "Ret value not consistent in function!"); 2825 PHI->addIncoming(RI->getReturnValue(), RI->getParent()); 2826 } 2827 } 2828 2829 // Add a branch to the merge points and remove return instructions. 2830 DebugLoc Loc; 2831 for (unsigned i = 0, e = Returns.size(); i != e; ++i) { 2832 ReturnInst *RI = Returns[i]; 2833 BranchInst* BI = BranchInst::Create(AfterCallBB, RI); 2834 Loc = RI->getDebugLoc(); 2835 BI->setDebugLoc(Loc); 2836 RI->eraseFromParent(); 2837 } 2838 // We need to set the debug location to *somewhere* inside the 2839 // inlined function. The line number may be nonsensical, but the 2840 // instruction will at least be associated with the right 2841 // function. 2842 if (CreatedBranchToNormalDest) 2843 CreatedBranchToNormalDest->setDebugLoc(Loc); 2844 } else if (!Returns.empty()) { 2845 // Otherwise, if there is exactly one return value, just replace anything 2846 // using the return value of the call with the computed value. 2847 if (!CB.use_empty()) { 2848 if (&CB == Returns[0]->getReturnValue()) 2849 CB.replaceAllUsesWith(UndefValue::get(CB.getType())); 2850 else 2851 CB.replaceAllUsesWith(Returns[0]->getReturnValue()); 2852 } 2853 2854 // Update PHI nodes that use the ReturnBB to use the AfterCallBB. 2855 BasicBlock *ReturnBB = Returns[0]->getParent(); 2856 ReturnBB->replaceAllUsesWith(AfterCallBB); 2857 2858 // Splice the code from the return block into the block that it will return 2859 // to, which contains the code that was after the call. 2860 AfterCallBB->splice(AfterCallBB->begin(), ReturnBB); 2861 2862 if (CreatedBranchToNormalDest) 2863 CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc()); 2864 2865 // Delete the return instruction now and empty ReturnBB now. 2866 Returns[0]->eraseFromParent(); 2867 ReturnBB->eraseFromParent(); 2868 } else if (!CB.use_empty()) { 2869 // No returns, but something is using the return value of the call. Just 2870 // nuke the result. 2871 CB.replaceAllUsesWith(PoisonValue::get(CB.getType())); 2872 } 2873 2874 // Since we are now done with the Call/Invoke, we can delete it. 2875 CB.eraseFromParent(); 2876 2877 // If we inlined any musttail calls and the original return is now 2878 // unreachable, delete it. It can only contain a bitcast and ret. 2879 if (InlinedMustTailCalls && pred_empty(AfterCallBB)) 2880 AfterCallBB->eraseFromParent(); 2881 2882 // We should always be able to fold the entry block of the function into the 2883 // single predecessor of the block... 2884 assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!"); 2885 BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0); 2886 2887 // Splice the code entry block into calling block, right before the 2888 // unconditional branch. 2889 CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes 2890 OrigBB->splice(Br->getIterator(), CalleeEntry); 2891 2892 // Remove the unconditional branch. 2893 Br->eraseFromParent(); 2894 2895 // Now we can remove the CalleeEntry block, which is now empty. 2896 CalleeEntry->eraseFromParent(); 2897 2898 // If we inserted a phi node, check to see if it has a single value (e.g. all 2899 // the entries are the same or undef). If so, remove the PHI so it doesn't 2900 // block other optimizations. 2901 if (PHI) { 2902 AssumptionCache *AC = 2903 IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr; 2904 auto &DL = Caller->getParent()->getDataLayout(); 2905 if (Value *V = simplifyInstruction(PHI, {DL, nullptr, nullptr, AC})) { 2906 PHI->replaceAllUsesWith(V); 2907 PHI->eraseFromParent(); 2908 } 2909 } 2910 2911 if (MergeAttributes) 2912 AttributeFuncs::mergeAttributesForInlining(*Caller, *CalledFunc); 2913 2914 return InlineResult::success(); 2915 } 2916