1 //===- ArgumentPromotion.cpp - Promote by-reference arguments -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass promotes "by reference" arguments to be "by value" arguments. In 10 // practice, this means looking for internal functions that have pointer 11 // arguments. If it can prove, through the use of alias analysis, that an 12 // argument is *only* loaded, then it can pass the value into the function 13 // instead of the address of the value. This can cause recursive simplification 14 // of code and lead to the elimination of allocas (especially in C++ template 15 // code like the STL). 16 // 17 // This pass also handles aggregate arguments that are passed into a function, 18 // scalarizing them if the elements of the aggregate are only loaded. Note that 19 // by default it refuses to scalarize aggregates which would require passing in 20 // more than three operands to the function, because passing thousands of 21 // operands for a large array or structure is unprofitable! This limit can be 22 // configured or disabled, however. 23 // 24 // Note that this transformation could also be done for arguments that are only 25 // stored to (returning the value instead), but does not currently. This case 26 // would be best handled when and if LLVM begins supporting multiple return 27 // values from functions. 28 // 29 //===----------------------------------------------------------------------===// 30 31 #include "llvm/Transforms/IPO/ArgumentPromotion.h" 32 33 #include "llvm/ADT/DepthFirstIterator.h" 34 #include "llvm/ADT/STLExtras.h" 35 #include "llvm/ADT/ScopeExit.h" 36 #include "llvm/ADT/SmallPtrSet.h" 37 #include "llvm/ADT/SmallVector.h" 38 #include "llvm/ADT/Statistic.h" 39 #include "llvm/ADT/Twine.h" 40 #include "llvm/Analysis/AssumptionCache.h" 41 #include "llvm/Analysis/BasicAliasAnalysis.h" 42 #include "llvm/Analysis/CallGraph.h" 43 #include "llvm/Analysis/Loads.h" 44 #include "llvm/Analysis/MemoryLocation.h" 45 #include "llvm/Analysis/TargetTransformInfo.h" 46 #include "llvm/Analysis/ValueTracking.h" 47 #include "llvm/IR/Argument.h" 48 #include "llvm/IR/Attributes.h" 49 #include "llvm/IR/BasicBlock.h" 50 #include "llvm/IR/CFG.h" 51 #include "llvm/IR/Constants.h" 52 #include "llvm/IR/DataLayout.h" 53 #include "llvm/IR/DerivedTypes.h" 54 #include "llvm/IR/Dominators.h" 55 #include "llvm/IR/Function.h" 56 #include "llvm/IR/IRBuilder.h" 57 #include "llvm/IR/InstrTypes.h" 58 #include "llvm/IR/Instruction.h" 59 #include "llvm/IR/Instructions.h" 60 #include "llvm/IR/Metadata.h" 61 #include "llvm/IR/NoFolder.h" 62 #include "llvm/IR/PassManager.h" 63 #include "llvm/IR/Type.h" 64 #include "llvm/IR/Use.h" 65 #include "llvm/IR/User.h" 66 #include "llvm/IR/Value.h" 67 #include "llvm/Support/Casting.h" 68 #include "llvm/Support/Debug.h" 69 #include "llvm/Support/raw_ostream.h" 70 #include "llvm/Transforms/Utils/Local.h" 71 #include "llvm/Transforms/Utils/PromoteMemToReg.h" 72 #include <algorithm> 73 #include <cassert> 74 #include <cstdint> 75 #include <utility> 76 #include <vector> 77 78 using namespace llvm; 79 80 #define DEBUG_TYPE "argpromotion" 81 82 STATISTIC(NumArgumentsPromoted, "Number of pointer arguments promoted"); 83 STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated"); 84 85 namespace { 86 87 struct ArgPart { 88 Type *Ty; 89 Align Alignment; 90 /// A representative guaranteed-executed load or store instruction for use by 91 /// metadata transfer. 92 Instruction *MustExecInstr; 93 }; 94 95 using OffsetAndArgPart = std::pair<int64_t, ArgPart>; 96 97 } // end anonymous namespace 98 99 static Value *createByteGEP(IRBuilderBase &IRB, const DataLayout &DL, 100 Value *Ptr, Type *ResElemTy, int64_t Offset) { 101 if (Offset != 0) { 102 APInt APOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), Offset); 103 Ptr = IRB.CreateGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(APOffset)); 104 } 105 return Ptr; 106 } 107 108 /// DoPromotion - This method actually performs the promotion of the specified 109 /// arguments, and returns the new function. At this point, we know that it's 110 /// safe to do so. 111 static Function * 112 doPromotion(Function *F, FunctionAnalysisManager &FAM, 113 const DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> 114 &ArgsToPromote) { 115 // Start by computing a new prototype for the function, which is the same as 116 // the old function, but has modified arguments. 117 FunctionType *FTy = F->getFunctionType(); 118 std::vector<Type *> Params; 119 120 // Attribute - Keep track of the parameter attributes for the arguments 121 // that we are *not* promoting. For the ones that we do promote, the parameter 122 // attributes are lost 123 SmallVector<AttributeSet, 8> ArgAttrVec; 124 AttributeList PAL = F->getAttributes(); 125 126 // First, determine the new argument list 127 unsigned ArgNo = 0; 128 for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; 129 ++I, ++ArgNo) { 130 if (!ArgsToPromote.count(&*I)) { 131 // Unchanged argument 132 Params.push_back(I->getType()); 133 ArgAttrVec.push_back(PAL.getParamAttrs(ArgNo)); 134 } else if (I->use_empty()) { 135 // Dead argument (which are always marked as promotable) 136 ++NumArgumentsDead; 137 } else { 138 const auto &ArgParts = ArgsToPromote.find(&*I)->second; 139 for (const auto &Pair : ArgParts) { 140 Params.push_back(Pair.second.Ty); 141 ArgAttrVec.push_back(AttributeSet()); 142 } 143 ++NumArgumentsPromoted; 144 } 145 } 146 147 Type *RetTy = FTy->getReturnType(); 148 149 // Construct the new function type using the new arguments. 150 FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg()); 151 152 // Create the new function body and insert it into the module. 153 Function *NF = Function::Create(NFTy, F->getLinkage(), F->getAddressSpace(), 154 F->getName()); 155 NF->copyAttributesFrom(F); 156 NF->copyMetadata(F, 0); 157 158 // The new function will have the !dbg metadata copied from the original 159 // function. The original function may not be deleted, and dbg metadata need 160 // to be unique, so we need to drop it. 161 F->setSubprogram(nullptr); 162 163 LLVM_DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" 164 << "From: " << *F); 165 166 uint64_t LargestVectorWidth = 0; 167 for (auto *I : Params) 168 if (auto *VT = dyn_cast<llvm::VectorType>(I)) 169 LargestVectorWidth = std::max( 170 LargestVectorWidth, VT->getPrimitiveSizeInBits().getKnownMinValue()); 171 172 // Recompute the parameter attributes list based on the new arguments for 173 // the function. 174 NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttrs(), 175 PAL.getRetAttrs(), ArgAttrVec)); 176 AttributeFuncs::updateMinLegalVectorWidthAttr(*NF, LargestVectorWidth); 177 ArgAttrVec.clear(); 178 179 F->getParent()->getFunctionList().insert(F->getIterator(), NF); 180 NF->takeName(F); 181 182 // Loop over all the callers of the function, transforming the call sites to 183 // pass in the loaded pointers. 184 SmallVector<Value *, 16> Args; 185 const DataLayout &DL = F->getParent()->getDataLayout(); 186 SmallVector<WeakTrackingVH, 16> DeadArgs; 187 188 while (!F->use_empty()) { 189 CallBase &CB = cast<CallBase>(*F->user_back()); 190 assert(CB.getCalledFunction() == F); 191 const AttributeList &CallPAL = CB.getAttributes(); 192 IRBuilder<NoFolder> IRB(&CB); 193 194 // Loop over the operands, inserting GEP and loads in the caller as 195 // appropriate. 196 auto *AI = CB.arg_begin(); 197 ArgNo = 0; 198 for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; 199 ++I, ++AI, ++ArgNo) { 200 if (!ArgsToPromote.count(&*I)) { 201 Args.push_back(*AI); // Unmodified argument 202 ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo)); 203 } else if (!I->use_empty()) { 204 Value *V = *AI; 205 const auto &ArgParts = ArgsToPromote.find(&*I)->second; 206 for (const auto &Pair : ArgParts) { 207 LoadInst *LI = IRB.CreateAlignedLoad( 208 Pair.second.Ty, 209 createByteGEP(IRB, DL, V, Pair.second.Ty, Pair.first), 210 Pair.second.Alignment, V->getName() + ".val"); 211 if (Pair.second.MustExecInstr) { 212 LI->setAAMetadata(Pair.second.MustExecInstr->getAAMetadata()); 213 LI->copyMetadata(*Pair.second.MustExecInstr, 214 {LLVMContext::MD_dereferenceable, 215 LLVMContext::MD_dereferenceable_or_null, 216 LLVMContext::MD_noundef, 217 LLVMContext::MD_nontemporal}); 218 // Only transfer poison-generating metadata if we also have 219 // !noundef. 220 // TODO: Without !noundef, we could merge this metadata across 221 // all promoted loads. 222 if (LI->hasMetadata(LLVMContext::MD_noundef)) 223 LI->copyMetadata(*Pair.second.MustExecInstr, 224 {LLVMContext::MD_range, LLVMContext::MD_nonnull, 225 LLVMContext::MD_align}); 226 } 227 Args.push_back(LI); 228 ArgAttrVec.push_back(AttributeSet()); 229 } 230 } else { 231 assert(ArgsToPromote.count(&*I) && I->use_empty()); 232 DeadArgs.emplace_back(AI->get()); 233 } 234 } 235 236 // Push any varargs arguments on the list. 237 for (; AI != CB.arg_end(); ++AI, ++ArgNo) { 238 Args.push_back(*AI); 239 ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo)); 240 } 241 242 SmallVector<OperandBundleDef, 1> OpBundles; 243 CB.getOperandBundlesAsDefs(OpBundles); 244 245 CallBase *NewCS = nullptr; 246 if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) { 247 NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), 248 Args, OpBundles, "", &CB); 249 } else { 250 auto *NewCall = CallInst::Create(NF, Args, OpBundles, "", &CB); 251 NewCall->setTailCallKind(cast<CallInst>(&CB)->getTailCallKind()); 252 NewCS = NewCall; 253 } 254 NewCS->setCallingConv(CB.getCallingConv()); 255 NewCS->setAttributes(AttributeList::get(F->getContext(), 256 CallPAL.getFnAttrs(), 257 CallPAL.getRetAttrs(), ArgAttrVec)); 258 NewCS->copyMetadata(CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg}); 259 Args.clear(); 260 ArgAttrVec.clear(); 261 262 AttributeFuncs::updateMinLegalVectorWidthAttr(*CB.getCaller(), 263 LargestVectorWidth); 264 265 if (!CB.use_empty()) { 266 CB.replaceAllUsesWith(NewCS); 267 NewCS->takeName(&CB); 268 } 269 270 // Finally, remove the old call from the program, reducing the use-count of 271 // F. 272 CB.eraseFromParent(); 273 } 274 275 RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadArgs); 276 277 // Since we have now created the new function, splice the body of the old 278 // function right into the new function, leaving the old rotting hulk of the 279 // function empty. 280 NF->splice(NF->begin(), F); 281 282 // We will collect all the new created allocas to promote them into registers 283 // after the following loop 284 SmallVector<AllocaInst *, 4> Allocas; 285 286 // Loop over the argument list, transferring uses of the old arguments over to 287 // the new arguments, also transferring over the names as well. 288 Function::arg_iterator I2 = NF->arg_begin(); 289 for (Argument &Arg : F->args()) { 290 if (!ArgsToPromote.count(&Arg)) { 291 // If this is an unmodified argument, move the name and users over to the 292 // new version. 293 Arg.replaceAllUsesWith(&*I2); 294 I2->takeName(&Arg); 295 ++I2; 296 continue; 297 } 298 299 // There potentially are metadata uses for things like llvm.dbg.value. 300 // Replace them with undef, after handling the other regular uses. 301 auto RauwUndefMetadata = make_scope_exit( 302 [&]() { Arg.replaceAllUsesWith(UndefValue::get(Arg.getType())); }); 303 304 if (Arg.use_empty()) 305 continue; 306 307 // Otherwise, if we promoted this argument, we have to create an alloca in 308 // the callee for every promotable part and store each of the new incoming 309 // arguments into the corresponding alloca, what lets the old code (the 310 // store instructions if they are allowed especially) a chance to work as 311 // before. 312 assert(Arg.getType()->isPointerTy() && 313 "Only arguments with a pointer type are promotable"); 314 315 IRBuilder<NoFolder> IRB(&NF->begin()->front()); 316 317 // Add only the promoted elements, so parts from ArgsToPromote 318 SmallDenseMap<int64_t, AllocaInst *> OffsetToAlloca; 319 for (const auto &Pair : ArgsToPromote.find(&Arg)->second) { 320 int64_t Offset = Pair.first; 321 const ArgPart &Part = Pair.second; 322 323 Argument *NewArg = I2++; 324 NewArg->setName(Arg.getName() + "." + Twine(Offset) + ".val"); 325 326 AllocaInst *NewAlloca = IRB.CreateAlloca( 327 Part.Ty, nullptr, Arg.getName() + "." + Twine(Offset) + ".allc"); 328 NewAlloca->setAlignment(Pair.second.Alignment); 329 IRB.CreateAlignedStore(NewArg, NewAlloca, Pair.second.Alignment); 330 331 // Collect the alloca to retarget the users to 332 OffsetToAlloca.insert({Offset, NewAlloca}); 333 } 334 335 auto GetAlloca = [&](Value *Ptr) { 336 APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); 337 Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset, 338 /* AllowNonInbounds */ true); 339 assert(Ptr == &Arg && "Not constant offset from arg?"); 340 return OffsetToAlloca.lookup(Offset.getSExtValue()); 341 }; 342 343 // Cleanup the code from the dead instructions: GEPs and BitCasts in between 344 // the original argument and its users: loads and stores. Retarget every 345 // user to the new created alloca. 346 SmallVector<Value *, 16> Worklist; 347 SmallVector<Instruction *, 16> DeadInsts; 348 append_range(Worklist, Arg.users()); 349 while (!Worklist.empty()) { 350 Value *V = Worklist.pop_back_val(); 351 if (isa<BitCastInst>(V) || isa<GetElementPtrInst>(V)) { 352 DeadInsts.push_back(cast<Instruction>(V)); 353 append_range(Worklist, V->users()); 354 continue; 355 } 356 357 if (auto *LI = dyn_cast<LoadInst>(V)) { 358 Value *Ptr = LI->getPointerOperand(); 359 LI->setOperand(LoadInst::getPointerOperandIndex(), GetAlloca(Ptr)); 360 continue; 361 } 362 363 if (auto *SI = dyn_cast<StoreInst>(V)) { 364 assert(!SI->isVolatile() && "Volatile operations can't be promoted."); 365 Value *Ptr = SI->getPointerOperand(); 366 SI->setOperand(StoreInst::getPointerOperandIndex(), GetAlloca(Ptr)); 367 continue; 368 } 369 370 llvm_unreachable("Unexpected user"); 371 } 372 373 for (Instruction *I : DeadInsts) { 374 I->replaceAllUsesWith(PoisonValue::get(I->getType())); 375 I->eraseFromParent(); 376 } 377 378 // Collect the allocas for promotion 379 for (const auto &Pair : OffsetToAlloca) { 380 assert(isAllocaPromotable(Pair.second) && 381 "By design, only promotable allocas should be produced."); 382 Allocas.push_back(Pair.second); 383 } 384 } 385 386 LLVM_DEBUG(dbgs() << "ARG PROMOTION: " << Allocas.size() 387 << " alloca(s) are promotable by Mem2Reg\n"); 388 389 if (!Allocas.empty()) { 390 // And we are able to call the `promoteMemoryToRegister()` function. 391 // Our earlier checks have ensured that PromoteMemToReg() will 392 // succeed. 393 auto &DT = FAM.getResult<DominatorTreeAnalysis>(*NF); 394 auto &AC = FAM.getResult<AssumptionAnalysis>(*NF); 395 PromoteMemToReg(Allocas, DT, &AC); 396 } 397 398 return NF; 399 } 400 401 /// Return true if we can prove that all callees pass in a valid pointer for the 402 /// specified function argument. 403 static bool allCallersPassValidPointerForArgument(Argument *Arg, 404 Align NeededAlign, 405 uint64_t NeededDerefBytes) { 406 Function *Callee = Arg->getParent(); 407 const DataLayout &DL = Callee->getParent()->getDataLayout(); 408 APInt Bytes(64, NeededDerefBytes); 409 410 // Check if the argument itself is marked dereferenceable and aligned. 411 if (isDereferenceableAndAlignedPointer(Arg, NeededAlign, Bytes, DL)) 412 return true; 413 414 // Look at all call sites of the function. At this point we know we only have 415 // direct callees. 416 return all_of(Callee->users(), [&](User *U) { 417 CallBase &CB = cast<CallBase>(*U); 418 return isDereferenceableAndAlignedPointer(CB.getArgOperand(Arg->getArgNo()), 419 NeededAlign, Bytes, DL); 420 }); 421 } 422 423 /// Determine that this argument is safe to promote, and find the argument 424 /// parts it can be promoted into. 425 static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR, 426 unsigned MaxElements, bool IsRecursive, 427 SmallVectorImpl<OffsetAndArgPart> &ArgPartsVec) { 428 // Quick exit for unused arguments 429 if (Arg->use_empty()) 430 return true; 431 432 // We can only promote this argument if all the uses are loads at known 433 // offsets. 434 // 435 // Promoting the argument causes it to be loaded in the caller 436 // unconditionally. This is only safe if we can prove that either the load 437 // would have happened in the callee anyway (ie, there is a load in the entry 438 // block) or the pointer passed in at every call site is guaranteed to be 439 // valid. 440 // In the former case, invalid loads can happen, but would have happened 441 // anyway, in the latter case, invalid loads won't happen. This prevents us 442 // from introducing an invalid load that wouldn't have happened in the 443 // original code. 444 445 SmallDenseMap<int64_t, ArgPart, 4> ArgParts; 446 Align NeededAlign(1); 447 uint64_t NeededDerefBytes = 0; 448 449 // And if this is a byval argument we also allow to have store instructions. 450 // Only handle in such way arguments with specified alignment; 451 // if it's unspecified, the actual alignment of the argument is 452 // target-specific. 453 bool AreStoresAllowed = Arg->getParamByValType() && Arg->getParamAlign(); 454 455 // An end user of a pointer argument is a load or store instruction. 456 // Returns std::nullopt if this load or store is not based on the argument. 457 // Return true if we can promote the instruction, false otherwise. 458 auto HandleEndUser = [&](auto *I, Type *Ty, 459 bool GuaranteedToExecute) -> std::optional<bool> { 460 // Don't promote volatile or atomic instructions. 461 if (!I->isSimple()) 462 return false; 463 464 Value *Ptr = I->getPointerOperand(); 465 APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); 466 Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset, 467 /* AllowNonInbounds */ true); 468 if (Ptr != Arg) 469 return std::nullopt; 470 471 if (Offset.getSignificantBits() >= 64) 472 return false; 473 474 TypeSize Size = DL.getTypeStoreSize(Ty); 475 // Don't try to promote scalable types. 476 if (Size.isScalable()) 477 return false; 478 479 // If this is a recursive function and one of the types is a pointer, 480 // then promoting it might lead to recursive promotion. 481 if (IsRecursive && Ty->isPointerTy()) 482 return false; 483 484 int64_t Off = Offset.getSExtValue(); 485 auto Pair = ArgParts.try_emplace( 486 Off, ArgPart{Ty, I->getAlign(), GuaranteedToExecute ? I : nullptr}); 487 ArgPart &Part = Pair.first->second; 488 bool OffsetNotSeenBefore = Pair.second; 489 490 // We limit promotion to only promoting up to a fixed number of elements of 491 // the aggregate. 492 if (MaxElements > 0 && ArgParts.size() > MaxElements) { 493 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " 494 << "more than " << MaxElements << " parts\n"); 495 return false; 496 } 497 498 // For now, we only support loading/storing one specific type at a given 499 // offset. 500 if (Part.Ty != Ty) { 501 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " 502 << "accessed as both " << *Part.Ty << " and " << *Ty 503 << " at offset " << Off << "\n"); 504 return false; 505 } 506 507 // If this instruction is not guaranteed to execute, and we haven't seen a 508 // load or store at this offset before (or it had lower alignment), then we 509 // need to remember that requirement. 510 // Note that skipping instructions of previously seen offsets is only 511 // correct because we only allow a single type for a given offset, which 512 // also means that the number of accessed bytes will be the same. 513 if (!GuaranteedToExecute && 514 (OffsetNotSeenBefore || Part.Alignment < I->getAlign())) { 515 // We won't be able to prove dereferenceability for negative offsets. 516 if (Off < 0) 517 return false; 518 519 // If the offset is not aligned, an aligned base pointer won't help. 520 if (!isAligned(I->getAlign(), Off)) 521 return false; 522 523 NeededDerefBytes = std::max(NeededDerefBytes, Off + Size.getFixedValue()); 524 NeededAlign = std::max(NeededAlign, I->getAlign()); 525 } 526 527 Part.Alignment = std::max(Part.Alignment, I->getAlign()); 528 return true; 529 }; 530 531 // Look for loads and stores that are guaranteed to execute on entry. 532 for (Instruction &I : Arg->getParent()->getEntryBlock()) { 533 std::optional<bool> Res{}; 534 if (LoadInst *LI = dyn_cast<LoadInst>(&I)) 535 Res = HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ true); 536 else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) 537 Res = HandleEndUser(SI, SI->getValueOperand()->getType(), 538 /* GuaranteedToExecute */ true); 539 if (Res && !*Res) 540 return false; 541 542 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 543 break; 544 } 545 546 // Now look at all loads of the argument. Remember the load instructions 547 // for the aliasing check below. 548 SmallVector<const Use *, 16> Worklist; 549 SmallPtrSet<const Use *, 16> Visited; 550 SmallVector<LoadInst *, 16> Loads; 551 auto AppendUses = [&](const Value *V) { 552 for (const Use &U : V->uses()) 553 if (Visited.insert(&U).second) 554 Worklist.push_back(&U); 555 }; 556 AppendUses(Arg); 557 while (!Worklist.empty()) { 558 const Use *U = Worklist.pop_back_val(); 559 Value *V = U->getUser(); 560 if (isa<BitCastInst>(V)) { 561 AppendUses(V); 562 continue; 563 } 564 565 if (auto *GEP = dyn_cast<GetElementPtrInst>(V)) { 566 if (!GEP->hasAllConstantIndices()) 567 return false; 568 AppendUses(V); 569 continue; 570 } 571 572 if (auto *LI = dyn_cast<LoadInst>(V)) { 573 if (!*HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ false)) 574 return false; 575 Loads.push_back(LI); 576 continue; 577 } 578 579 // Stores are allowed for byval arguments 580 auto *SI = dyn_cast<StoreInst>(V); 581 if (AreStoresAllowed && SI && 582 U->getOperandNo() == StoreInst::getPointerOperandIndex()) { 583 if (!*HandleEndUser(SI, SI->getValueOperand()->getType(), 584 /* GuaranteedToExecute */ false)) 585 return false; 586 continue; 587 // Only stores TO the argument is allowed, all the other stores are 588 // unknown users 589 } 590 591 // Unknown user. 592 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " 593 << "unknown user " << *V << "\n"); 594 return false; 595 } 596 597 if (NeededDerefBytes || NeededAlign > 1) { 598 // Try to prove a required deref / aligned requirement. 599 if (!allCallersPassValidPointerForArgument(Arg, NeededAlign, 600 NeededDerefBytes)) { 601 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " 602 << "not dereferenceable or aligned\n"); 603 return false; 604 } 605 } 606 607 if (ArgParts.empty()) 608 return true; // No users, this is a dead argument. 609 610 // Sort parts by offset. 611 append_range(ArgPartsVec, ArgParts); 612 sort(ArgPartsVec, llvm::less_first()); 613 614 // Make sure the parts are non-overlapping. 615 int64_t Offset = ArgPartsVec[0].first; 616 for (const auto &Pair : ArgPartsVec) { 617 if (Pair.first < Offset) 618 return false; // Overlap with previous part. 619 620 Offset = Pair.first + DL.getTypeStoreSize(Pair.second.Ty); 621 } 622 623 // If store instructions are allowed, the path from the entry of the function 624 // to each load may be not free of instructions that potentially invalidate 625 // the load, and this is an admissible situation. 626 if (AreStoresAllowed) 627 return true; 628 629 // Okay, now we know that the argument is only used by load instructions, and 630 // it is safe to unconditionally perform all of them. Use alias analysis to 631 // check to see if the pointer is guaranteed to not be modified from entry of 632 // the function to each of the load instructions. 633 634 // Because there could be several/many load instructions, remember which 635 // blocks we know to be transparent to the load. 636 df_iterator_default_set<BasicBlock *, 16> TranspBlocks; 637 638 for (LoadInst *Load : Loads) { 639 // Check to see if the load is invalidated from the start of the block to 640 // the load itself. 641 BasicBlock *BB = Load->getParent(); 642 643 MemoryLocation Loc = MemoryLocation::get(Load); 644 if (AAR.canInstructionRangeModRef(BB->front(), *Load, Loc, ModRefInfo::Mod)) 645 return false; // Pointer is invalidated! 646 647 // Now check every path from the entry block to the load for transparency. 648 // To do this, we perform a depth first search on the inverse CFG from the 649 // loading block. 650 for (BasicBlock *P : predecessors(BB)) { 651 for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks)) 652 if (AAR.canBasicBlockModify(*TranspBB, Loc)) 653 return false; 654 } 655 } 656 657 // If the path from the entry of the function to each load is free of 658 // instructions that potentially invalidate the load, we can make the 659 // transformation! 660 return true; 661 } 662 663 /// Check if callers and callee agree on how promoted arguments would be 664 /// passed. 665 static bool areTypesABICompatible(ArrayRef<Type *> Types, const Function &F, 666 const TargetTransformInfo &TTI) { 667 return all_of(F.uses(), [&](const Use &U) { 668 CallBase *CB = dyn_cast<CallBase>(U.getUser()); 669 if (!CB) 670 return false; 671 672 const Function *Caller = CB->getCaller(); 673 const Function *Callee = CB->getCalledFunction(); 674 return TTI.areTypesABICompatible(Caller, Callee, Types); 675 }); 676 } 677 678 /// PromoteArguments - This method checks the specified function to see if there 679 /// are any promotable arguments and if it is safe to promote the function (for 680 /// example, all callers are direct). If safe to promote some arguments, it 681 /// calls the DoPromotion method. 682 static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM, 683 unsigned MaxElements, bool IsRecursive) { 684 // Don't perform argument promotion for naked functions; otherwise we can end 685 // up removing parameters that are seemingly 'not used' as they are referred 686 // to in the assembly. 687 if (F->hasFnAttribute(Attribute::Naked)) 688 return nullptr; 689 690 // Make sure that it is local to this module. 691 if (!F->hasLocalLinkage()) 692 return nullptr; 693 694 // Don't promote arguments for variadic functions. Adding, removing, or 695 // changing non-pack parameters can change the classification of pack 696 // parameters. Frontends encode that classification at the call site in the 697 // IR, while in the callee the classification is determined dynamically based 698 // on the number of registers consumed so far. 699 if (F->isVarArg()) 700 return nullptr; 701 702 // Don't transform functions that receive inallocas, as the transformation may 703 // not be safe depending on calling convention. 704 if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca)) 705 return nullptr; 706 707 // First check: see if there are any pointer arguments! If not, quick exit. 708 SmallVector<Argument *, 16> PointerArgs; 709 for (Argument &I : F->args()) 710 if (I.getType()->isPointerTy()) 711 PointerArgs.push_back(&I); 712 if (PointerArgs.empty()) 713 return nullptr; 714 715 // Second check: make sure that all callers are direct callers. We can't 716 // transform functions that have indirect callers. Also see if the function 717 // is self-recursive. 718 for (Use &U : F->uses()) { 719 CallBase *CB = dyn_cast<CallBase>(U.getUser()); 720 // Must be a direct call. 721 if (CB == nullptr || !CB->isCallee(&U) || 722 CB->getFunctionType() != F->getFunctionType()) 723 return nullptr; 724 725 // Can't change signature of musttail callee 726 if (CB->isMustTailCall()) 727 return nullptr; 728 729 if (CB->getFunction() == F) 730 IsRecursive = true; 731 } 732 733 // Can't change signature of musttail caller 734 // FIXME: Support promoting whole chain of musttail functions 735 for (BasicBlock &BB : *F) 736 if (BB.getTerminatingMustTailCall()) 737 return nullptr; 738 739 const DataLayout &DL = F->getParent()->getDataLayout(); 740 auto &AAR = FAM.getResult<AAManager>(*F); 741 const auto &TTI = FAM.getResult<TargetIRAnalysis>(*F); 742 743 // Check to see which arguments are promotable. If an argument is promotable, 744 // add it to ArgsToPromote. 745 DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> ArgsToPromote; 746 unsigned NumArgsAfterPromote = F->getFunctionType()->getNumParams(); 747 for (Argument *PtrArg : PointerArgs) { 748 // Replace sret attribute with noalias. This reduces register pressure by 749 // avoiding a register copy. 750 if (PtrArg->hasStructRetAttr()) { 751 unsigned ArgNo = PtrArg->getArgNo(); 752 F->removeParamAttr(ArgNo, Attribute::StructRet); 753 F->addParamAttr(ArgNo, Attribute::NoAlias); 754 for (Use &U : F->uses()) { 755 CallBase &CB = cast<CallBase>(*U.getUser()); 756 CB.removeParamAttr(ArgNo, Attribute::StructRet); 757 CB.addParamAttr(ArgNo, Attribute::NoAlias); 758 } 759 } 760 761 // If we can promote the pointer to its value. 762 SmallVector<OffsetAndArgPart, 4> ArgParts; 763 764 if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts)) { 765 SmallVector<Type *, 4> Types; 766 for (const auto &Pair : ArgParts) 767 Types.push_back(Pair.second.Ty); 768 769 if (areTypesABICompatible(Types, *F, TTI)) { 770 NumArgsAfterPromote += ArgParts.size() - 1; 771 ArgsToPromote.insert({PtrArg, std::move(ArgParts)}); 772 } 773 } 774 } 775 776 // No promotable pointer arguments. 777 if (ArgsToPromote.empty()) 778 return nullptr; 779 780 if (NumArgsAfterPromote > TTI.getMaxNumArgs()) 781 return nullptr; 782 783 return doPromotion(F, FAM, ArgsToPromote); 784 } 785 786 PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C, 787 CGSCCAnalysisManager &AM, 788 LazyCallGraph &CG, 789 CGSCCUpdateResult &UR) { 790 bool Changed = false, LocalChange; 791 792 // Iterate until we stop promoting from this SCC. 793 do { 794 LocalChange = false; 795 796 FunctionAnalysisManager &FAM = 797 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); 798 799 bool IsRecursive = C.size() > 1; 800 for (LazyCallGraph::Node &N : C) { 801 Function &OldF = N.getFunction(); 802 Function *NewF = promoteArguments(&OldF, FAM, MaxElements, IsRecursive); 803 if (!NewF) 804 continue; 805 LocalChange = true; 806 807 // Directly substitute the functions in the call graph. Note that this 808 // requires the old function to be completely dead and completely 809 // replaced by the new function. It does no call graph updates, it merely 810 // swaps out the particular function mapped to a particular node in the 811 // graph. 812 C.getOuterRefSCC().replaceNodeFunction(N, *NewF); 813 FAM.clear(OldF, OldF.getName()); 814 OldF.eraseFromParent(); 815 816 PreservedAnalyses FuncPA; 817 FuncPA.preserveSet<CFGAnalyses>(); 818 for (auto *U : NewF->users()) { 819 auto *UserF = cast<CallBase>(U)->getFunction(); 820 FAM.invalidate(*UserF, FuncPA); 821 } 822 } 823 824 Changed |= LocalChange; 825 } while (LocalChange); 826 827 if (!Changed) 828 return PreservedAnalyses::all(); 829 830 PreservedAnalyses PA; 831 // We've cleared out analyses for deleted functions. 832 PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); 833 // We've manually invalidated analyses for functions we've modified. 834 PA.preserveSet<AllAnalysesOn<Function>>(); 835 return PA; 836 } 837