1 //===- ArgumentPromotion.cpp - Promote by-reference arguments -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass promotes "by reference" arguments to be "by value" arguments. In 10 // practice, this means looking for internal functions that have pointer 11 // arguments. If it can prove, through the use of alias analysis, that an 12 // argument is *only* loaded, then it can pass the value into the function 13 // instead of the address of the value. This can cause recursive simplification 14 // of code and lead to the elimination of allocas (especially in C++ template 15 // code like the STL). 16 // 17 // This pass also handles aggregate arguments that are passed into a function, 18 // scalarizing them if the elements of the aggregate are only loaded. Note that 19 // by default it refuses to scalarize aggregates which would require passing in 20 // more than three operands to the function, because passing thousands of 21 // operands for a large array or structure is unprofitable! This limit can be 22 // configured or disabled, however. 23 // 24 // Note that this transformation could also be done for arguments that are only 25 // stored to (returning the value instead), but does not currently. This case 26 // would be best handled when and if LLVM begins supporting multiple return 27 // values from functions. 28 // 29 //===----------------------------------------------------------------------===// 30 31 #include "llvm/Transforms/IPO/ArgumentPromotion.h" 32 33 #include "llvm/ADT/DepthFirstIterator.h" 34 #include "llvm/ADT/STLExtras.h" 35 #include "llvm/ADT/ScopeExit.h" 36 #include "llvm/ADT/SmallPtrSet.h" 37 #include "llvm/ADT/SmallVector.h" 38 #include "llvm/ADT/Statistic.h" 39 #include "llvm/ADT/Twine.h" 40 #include "llvm/Analysis/AssumptionCache.h" 41 #include "llvm/Analysis/BasicAliasAnalysis.h" 42 #include "llvm/Analysis/CallGraph.h" 43 #include "llvm/Analysis/Loads.h" 44 #include "llvm/Analysis/MemoryLocation.h" 45 #include "llvm/Analysis/TargetTransformInfo.h" 46 #include "llvm/Analysis/ValueTracking.h" 47 #include "llvm/IR/Argument.h" 48 #include "llvm/IR/Attributes.h" 49 #include "llvm/IR/BasicBlock.h" 50 #include "llvm/IR/CFG.h" 51 #include "llvm/IR/Constants.h" 52 #include "llvm/IR/DataLayout.h" 53 #include "llvm/IR/DerivedTypes.h" 54 #include "llvm/IR/Dominators.h" 55 #include "llvm/IR/Function.h" 56 #include "llvm/IR/IRBuilder.h" 57 #include "llvm/IR/InstrTypes.h" 58 #include "llvm/IR/Instruction.h" 59 #include "llvm/IR/Instructions.h" 60 #include "llvm/IR/Metadata.h" 61 #include "llvm/IR/NoFolder.h" 62 #include "llvm/IR/PassManager.h" 63 #include "llvm/IR/Type.h" 64 #include "llvm/IR/Use.h" 65 #include "llvm/IR/User.h" 66 #include "llvm/IR/Value.h" 67 #include "llvm/Support/Casting.h" 68 #include "llvm/Support/Debug.h" 69 #include "llvm/Support/raw_ostream.h" 70 #include "llvm/Transforms/Utils/Local.h" 71 #include "llvm/Transforms/Utils/PromoteMemToReg.h" 72 #include <algorithm> 73 #include <cassert> 74 #include <cstdint> 75 #include <utility> 76 #include <vector> 77 78 using namespace llvm; 79 80 #define DEBUG_TYPE "argpromotion" 81 82 STATISTIC(NumArgumentsPromoted, "Number of pointer arguments promoted"); 83 STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated"); 84 85 namespace { 86 87 struct ArgPart { 88 Type *Ty; 89 Align Alignment; 90 /// A representative guaranteed-executed load or store instruction for use by 91 /// metadata transfer. 92 Instruction *MustExecInstr; 93 }; 94 95 using OffsetAndArgPart = std::pair<int64_t, ArgPart>; 96 97 } // end anonymous namespace 98 99 static Value *createByteGEP(IRBuilderBase &IRB, const DataLayout &DL, 100 Value *Ptr, Type *ResElemTy, int64_t Offset) { 101 if (Offset != 0) { 102 APInt APOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), Offset); 103 Ptr = IRB.CreatePtrAdd(Ptr, IRB.getInt(APOffset)); 104 } 105 return Ptr; 106 } 107 108 /// DoPromotion - This method actually performs the promotion of the specified 109 /// arguments, and returns the new function. At this point, we know that it's 110 /// safe to do so. 111 static Function * 112 doPromotion(Function *F, FunctionAnalysisManager &FAM, 113 const DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> 114 &ArgsToPromote) { 115 // Start by computing a new prototype for the function, which is the same as 116 // the old function, but has modified arguments. 117 FunctionType *FTy = F->getFunctionType(); 118 std::vector<Type *> Params; 119 120 // Attribute - Keep track of the parameter attributes for the arguments 121 // that we are *not* promoting. For the ones that we do promote, the parameter 122 // attributes are lost 123 SmallVector<AttributeSet, 8> ArgAttrVec; 124 // Mapping from old to new argument indices. -1 for promoted or removed 125 // arguments. 126 SmallVector<unsigned> NewArgIndices; 127 AttributeList PAL = F->getAttributes(); 128 129 // First, determine the new argument list 130 unsigned ArgNo = 0, NewArgNo = 0; 131 for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; 132 ++I, ++ArgNo) { 133 if (!ArgsToPromote.count(&*I)) { 134 // Unchanged argument 135 Params.push_back(I->getType()); 136 ArgAttrVec.push_back(PAL.getParamAttrs(ArgNo)); 137 NewArgIndices.push_back(NewArgNo++); 138 } else if (I->use_empty()) { 139 // Dead argument (which are always marked as promotable) 140 ++NumArgumentsDead; 141 NewArgIndices.push_back((unsigned)-1); 142 } else { 143 const auto &ArgParts = ArgsToPromote.find(&*I)->second; 144 for (const auto &Pair : ArgParts) { 145 Params.push_back(Pair.second.Ty); 146 ArgAttrVec.push_back(AttributeSet()); 147 } 148 ++NumArgumentsPromoted; 149 NewArgIndices.push_back((unsigned)-1); 150 NewArgNo += ArgParts.size(); 151 } 152 } 153 154 Type *RetTy = FTy->getReturnType(); 155 156 // Construct the new function type using the new arguments. 157 FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg()); 158 159 // Create the new function body and insert it into the module. 160 Function *NF = Function::Create(NFTy, F->getLinkage(), F->getAddressSpace(), 161 F->getName()); 162 NF->copyAttributesFrom(F); 163 NF->copyMetadata(F, 0); 164 NF->setIsNewDbgInfoFormat(F->IsNewDbgInfoFormat); 165 166 // The new function will have the !dbg metadata copied from the original 167 // function. The original function may not be deleted, and dbg metadata need 168 // to be unique, so we need to drop it. 169 F->setSubprogram(nullptr); 170 171 LLVM_DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" 172 << "From: " << *F); 173 174 uint64_t LargestVectorWidth = 0; 175 for (auto *I : Params) 176 if (auto *VT = dyn_cast<llvm::VectorType>(I)) 177 LargestVectorWidth = std::max( 178 LargestVectorWidth, VT->getPrimitiveSizeInBits().getKnownMinValue()); 179 180 // Recompute the parameter attributes list based on the new arguments for 181 // the function. 182 NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttrs(), 183 PAL.getRetAttrs(), ArgAttrVec)); 184 185 // Remap argument indices in allocsize attribute. 186 if (auto AllocSize = NF->getAttributes().getFnAttrs().getAllocSizeArgs()) { 187 unsigned Arg1 = NewArgIndices[AllocSize->first]; 188 assert(Arg1 != (unsigned)-1 && "allocsize cannot be promoted argument"); 189 std::optional<unsigned> Arg2; 190 if (AllocSize->second) { 191 Arg2 = NewArgIndices[*AllocSize->second]; 192 assert(Arg2 != (unsigned)-1 && "allocsize cannot be promoted argument"); 193 } 194 NF->addFnAttr(Attribute::getWithAllocSizeArgs(F->getContext(), Arg1, Arg2)); 195 } 196 197 AttributeFuncs::updateMinLegalVectorWidthAttr(*NF, LargestVectorWidth); 198 ArgAttrVec.clear(); 199 200 F->getParent()->getFunctionList().insert(F->getIterator(), NF); 201 NF->takeName(F); 202 203 // Loop over all the callers of the function, transforming the call sites to 204 // pass in the loaded pointers. 205 SmallVector<Value *, 16> Args; 206 const DataLayout &DL = F->getParent()->getDataLayout(); 207 SmallVector<WeakTrackingVH, 16> DeadArgs; 208 209 while (!F->use_empty()) { 210 CallBase &CB = cast<CallBase>(*F->user_back()); 211 assert(CB.getCalledFunction() == F); 212 const AttributeList &CallPAL = CB.getAttributes(); 213 IRBuilder<NoFolder> IRB(&CB); 214 215 // Loop over the operands, inserting GEP and loads in the caller as 216 // appropriate. 217 auto *AI = CB.arg_begin(); 218 ArgNo = 0; 219 for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; 220 ++I, ++AI, ++ArgNo) { 221 if (!ArgsToPromote.count(&*I)) { 222 Args.push_back(*AI); // Unmodified argument 223 ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo)); 224 } else if (!I->use_empty()) { 225 Value *V = *AI; 226 const auto &ArgParts = ArgsToPromote.find(&*I)->second; 227 for (const auto &Pair : ArgParts) { 228 LoadInst *LI = IRB.CreateAlignedLoad( 229 Pair.second.Ty, 230 createByteGEP(IRB, DL, V, Pair.second.Ty, Pair.first), 231 Pair.second.Alignment, V->getName() + ".val"); 232 if (Pair.second.MustExecInstr) { 233 LI->setAAMetadata(Pair.second.MustExecInstr->getAAMetadata()); 234 LI->copyMetadata(*Pair.second.MustExecInstr, 235 {LLVMContext::MD_dereferenceable, 236 LLVMContext::MD_dereferenceable_or_null, 237 LLVMContext::MD_noundef, 238 LLVMContext::MD_nontemporal}); 239 // Only transfer poison-generating metadata if we also have 240 // !noundef. 241 // TODO: Without !noundef, we could merge this metadata across 242 // all promoted loads. 243 if (LI->hasMetadata(LLVMContext::MD_noundef)) 244 LI->copyMetadata(*Pair.second.MustExecInstr, 245 {LLVMContext::MD_range, LLVMContext::MD_nonnull, 246 LLVMContext::MD_align}); 247 } 248 Args.push_back(LI); 249 ArgAttrVec.push_back(AttributeSet()); 250 } 251 } else { 252 assert(ArgsToPromote.count(&*I) && I->use_empty()); 253 DeadArgs.emplace_back(AI->get()); 254 } 255 } 256 257 // Push any varargs arguments on the list. 258 for (; AI != CB.arg_end(); ++AI, ++ArgNo) { 259 Args.push_back(*AI); 260 ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo)); 261 } 262 263 SmallVector<OperandBundleDef, 1> OpBundles; 264 CB.getOperandBundlesAsDefs(OpBundles); 265 266 CallBase *NewCS = nullptr; 267 if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) { 268 NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), 269 Args, OpBundles, "", &CB); 270 } else { 271 auto *NewCall = CallInst::Create(NF, Args, OpBundles, "", &CB); 272 NewCall->setTailCallKind(cast<CallInst>(&CB)->getTailCallKind()); 273 NewCS = NewCall; 274 } 275 NewCS->setCallingConv(CB.getCallingConv()); 276 NewCS->setAttributes(AttributeList::get(F->getContext(), 277 CallPAL.getFnAttrs(), 278 CallPAL.getRetAttrs(), ArgAttrVec)); 279 NewCS->copyMetadata(CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg}); 280 Args.clear(); 281 ArgAttrVec.clear(); 282 283 AttributeFuncs::updateMinLegalVectorWidthAttr(*CB.getCaller(), 284 LargestVectorWidth); 285 286 if (!CB.use_empty()) { 287 CB.replaceAllUsesWith(NewCS); 288 NewCS->takeName(&CB); 289 } 290 291 // Finally, remove the old call from the program, reducing the use-count of 292 // F. 293 CB.eraseFromParent(); 294 } 295 296 RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadArgs); 297 298 // Since we have now created the new function, splice the body of the old 299 // function right into the new function, leaving the old rotting hulk of the 300 // function empty. 301 NF->splice(NF->begin(), F); 302 303 // We will collect all the new created allocas to promote them into registers 304 // after the following loop 305 SmallVector<AllocaInst *, 4> Allocas; 306 307 // Loop over the argument list, transferring uses of the old arguments over to 308 // the new arguments, also transferring over the names as well. 309 Function::arg_iterator I2 = NF->arg_begin(); 310 for (Argument &Arg : F->args()) { 311 if (!ArgsToPromote.count(&Arg)) { 312 // If this is an unmodified argument, move the name and users over to the 313 // new version. 314 Arg.replaceAllUsesWith(&*I2); 315 I2->takeName(&Arg); 316 ++I2; 317 continue; 318 } 319 320 // There potentially are metadata uses for things like llvm.dbg.value. 321 // Replace them with undef, after handling the other regular uses. 322 auto RauwUndefMetadata = make_scope_exit( 323 [&]() { Arg.replaceAllUsesWith(UndefValue::get(Arg.getType())); }); 324 325 if (Arg.use_empty()) 326 continue; 327 328 // Otherwise, if we promoted this argument, we have to create an alloca in 329 // the callee for every promotable part and store each of the new incoming 330 // arguments into the corresponding alloca, what lets the old code (the 331 // store instructions if they are allowed especially) a chance to work as 332 // before. 333 assert(Arg.getType()->isPointerTy() && 334 "Only arguments with a pointer type are promotable"); 335 336 IRBuilder<NoFolder> IRB(&NF->begin()->front()); 337 338 // Add only the promoted elements, so parts from ArgsToPromote 339 SmallDenseMap<int64_t, AllocaInst *> OffsetToAlloca; 340 for (const auto &Pair : ArgsToPromote.find(&Arg)->second) { 341 int64_t Offset = Pair.first; 342 const ArgPart &Part = Pair.second; 343 344 Argument *NewArg = I2++; 345 NewArg->setName(Arg.getName() + "." + Twine(Offset) + ".val"); 346 347 AllocaInst *NewAlloca = IRB.CreateAlloca( 348 Part.Ty, nullptr, Arg.getName() + "." + Twine(Offset) + ".allc"); 349 NewAlloca->setAlignment(Pair.second.Alignment); 350 IRB.CreateAlignedStore(NewArg, NewAlloca, Pair.second.Alignment); 351 352 // Collect the alloca to retarget the users to 353 OffsetToAlloca.insert({Offset, NewAlloca}); 354 } 355 356 auto GetAlloca = [&](Value *Ptr) { 357 APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); 358 Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset, 359 /* AllowNonInbounds */ true); 360 assert(Ptr == &Arg && "Not constant offset from arg?"); 361 return OffsetToAlloca.lookup(Offset.getSExtValue()); 362 }; 363 364 // Cleanup the code from the dead instructions: GEPs and BitCasts in between 365 // the original argument and its users: loads and stores. Retarget every 366 // user to the new created alloca. 367 SmallVector<Value *, 16> Worklist; 368 SmallVector<Instruction *, 16> DeadInsts; 369 append_range(Worklist, Arg.users()); 370 while (!Worklist.empty()) { 371 Value *V = Worklist.pop_back_val(); 372 if (isa<BitCastInst>(V) || isa<GetElementPtrInst>(V)) { 373 DeadInsts.push_back(cast<Instruction>(V)); 374 append_range(Worklist, V->users()); 375 continue; 376 } 377 378 if (auto *LI = dyn_cast<LoadInst>(V)) { 379 Value *Ptr = LI->getPointerOperand(); 380 LI->setOperand(LoadInst::getPointerOperandIndex(), GetAlloca(Ptr)); 381 continue; 382 } 383 384 if (auto *SI = dyn_cast<StoreInst>(V)) { 385 assert(!SI->isVolatile() && "Volatile operations can't be promoted."); 386 Value *Ptr = SI->getPointerOperand(); 387 SI->setOperand(StoreInst::getPointerOperandIndex(), GetAlloca(Ptr)); 388 continue; 389 } 390 391 llvm_unreachable("Unexpected user"); 392 } 393 394 for (Instruction *I : DeadInsts) { 395 I->replaceAllUsesWith(PoisonValue::get(I->getType())); 396 I->eraseFromParent(); 397 } 398 399 // Collect the allocas for promotion 400 for (const auto &Pair : OffsetToAlloca) { 401 assert(isAllocaPromotable(Pair.second) && 402 "By design, only promotable allocas should be produced."); 403 Allocas.push_back(Pair.second); 404 } 405 } 406 407 LLVM_DEBUG(dbgs() << "ARG PROMOTION: " << Allocas.size() 408 << " alloca(s) are promotable by Mem2Reg\n"); 409 410 if (!Allocas.empty()) { 411 // And we are able to call the `promoteMemoryToRegister()` function. 412 // Our earlier checks have ensured that PromoteMemToReg() will 413 // succeed. 414 auto &DT = FAM.getResult<DominatorTreeAnalysis>(*NF); 415 auto &AC = FAM.getResult<AssumptionAnalysis>(*NF); 416 PromoteMemToReg(Allocas, DT, &AC); 417 } 418 419 return NF; 420 } 421 422 /// Return true if we can prove that all callees pass in a valid pointer for the 423 /// specified function argument. 424 static bool allCallersPassValidPointerForArgument(Argument *Arg, 425 Align NeededAlign, 426 uint64_t NeededDerefBytes) { 427 Function *Callee = Arg->getParent(); 428 const DataLayout &DL = Callee->getParent()->getDataLayout(); 429 APInt Bytes(64, NeededDerefBytes); 430 431 // Check if the argument itself is marked dereferenceable and aligned. 432 if (isDereferenceableAndAlignedPointer(Arg, NeededAlign, Bytes, DL)) 433 return true; 434 435 // Look at all call sites of the function. At this point we know we only have 436 // direct callees. 437 return all_of(Callee->users(), [&](User *U) { 438 CallBase &CB = cast<CallBase>(*U); 439 return isDereferenceableAndAlignedPointer(CB.getArgOperand(Arg->getArgNo()), 440 NeededAlign, Bytes, DL); 441 }); 442 } 443 444 /// Determine that this argument is safe to promote, and find the argument 445 /// parts it can be promoted into. 446 static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR, 447 unsigned MaxElements, bool IsRecursive, 448 SmallVectorImpl<OffsetAndArgPart> &ArgPartsVec) { 449 // Quick exit for unused arguments 450 if (Arg->use_empty()) 451 return true; 452 453 // We can only promote this argument if all the uses are loads at known 454 // offsets. 455 // 456 // Promoting the argument causes it to be loaded in the caller 457 // unconditionally. This is only safe if we can prove that either the load 458 // would have happened in the callee anyway (ie, there is a load in the entry 459 // block) or the pointer passed in at every call site is guaranteed to be 460 // valid. 461 // In the former case, invalid loads can happen, but would have happened 462 // anyway, in the latter case, invalid loads won't happen. This prevents us 463 // from introducing an invalid load that wouldn't have happened in the 464 // original code. 465 466 SmallDenseMap<int64_t, ArgPart, 4> ArgParts; 467 Align NeededAlign(1); 468 uint64_t NeededDerefBytes = 0; 469 470 // And if this is a byval argument we also allow to have store instructions. 471 // Only handle in such way arguments with specified alignment; 472 // if it's unspecified, the actual alignment of the argument is 473 // target-specific. 474 bool AreStoresAllowed = Arg->getParamByValType() && Arg->getParamAlign(); 475 476 // An end user of a pointer argument is a load or store instruction. 477 // Returns std::nullopt if this load or store is not based on the argument. 478 // Return true if we can promote the instruction, false otherwise. 479 auto HandleEndUser = [&](auto *I, Type *Ty, 480 bool GuaranteedToExecute) -> std::optional<bool> { 481 // Don't promote volatile or atomic instructions. 482 if (!I->isSimple()) 483 return false; 484 485 Value *Ptr = I->getPointerOperand(); 486 APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); 487 Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset, 488 /* AllowNonInbounds */ true); 489 if (Ptr != Arg) 490 return std::nullopt; 491 492 if (Offset.getSignificantBits() >= 64) 493 return false; 494 495 TypeSize Size = DL.getTypeStoreSize(Ty); 496 // Don't try to promote scalable types. 497 if (Size.isScalable()) 498 return false; 499 500 // If this is a recursive function and one of the types is a pointer, 501 // then promoting it might lead to recursive promotion. 502 if (IsRecursive && Ty->isPointerTy()) 503 return false; 504 505 int64_t Off = Offset.getSExtValue(); 506 auto Pair = ArgParts.try_emplace( 507 Off, ArgPart{Ty, I->getAlign(), GuaranteedToExecute ? I : nullptr}); 508 ArgPart &Part = Pair.first->second; 509 bool OffsetNotSeenBefore = Pair.second; 510 511 // We limit promotion to only promoting up to a fixed number of elements of 512 // the aggregate. 513 if (MaxElements > 0 && ArgParts.size() > MaxElements) { 514 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " 515 << "more than " << MaxElements << " parts\n"); 516 return false; 517 } 518 519 // For now, we only support loading/storing one specific type at a given 520 // offset. 521 if (Part.Ty != Ty) { 522 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " 523 << "accessed as both " << *Part.Ty << " and " << *Ty 524 << " at offset " << Off << "\n"); 525 return false; 526 } 527 528 // If this instruction is not guaranteed to execute, and we haven't seen a 529 // load or store at this offset before (or it had lower alignment), then we 530 // need to remember that requirement. 531 // Note that skipping instructions of previously seen offsets is only 532 // correct because we only allow a single type for a given offset, which 533 // also means that the number of accessed bytes will be the same. 534 if (!GuaranteedToExecute && 535 (OffsetNotSeenBefore || Part.Alignment < I->getAlign())) { 536 // We won't be able to prove dereferenceability for negative offsets. 537 if (Off < 0) 538 return false; 539 540 // If the offset is not aligned, an aligned base pointer won't help. 541 if (!isAligned(I->getAlign(), Off)) 542 return false; 543 544 NeededDerefBytes = std::max(NeededDerefBytes, Off + Size.getFixedValue()); 545 NeededAlign = std::max(NeededAlign, I->getAlign()); 546 } 547 548 Part.Alignment = std::max(Part.Alignment, I->getAlign()); 549 return true; 550 }; 551 552 // Look for loads and stores that are guaranteed to execute on entry. 553 for (Instruction &I : Arg->getParent()->getEntryBlock()) { 554 std::optional<bool> Res{}; 555 if (LoadInst *LI = dyn_cast<LoadInst>(&I)) 556 Res = HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ true); 557 else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) 558 Res = HandleEndUser(SI, SI->getValueOperand()->getType(), 559 /* GuaranteedToExecute */ true); 560 if (Res && !*Res) 561 return false; 562 563 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 564 break; 565 } 566 567 // Now look at all loads of the argument. Remember the load instructions 568 // for the aliasing check below. 569 SmallVector<const Use *, 16> Worklist; 570 SmallPtrSet<const Use *, 16> Visited; 571 SmallVector<LoadInst *, 16> Loads; 572 auto AppendUses = [&](const Value *V) { 573 for (const Use &U : V->uses()) 574 if (Visited.insert(&U).second) 575 Worklist.push_back(&U); 576 }; 577 AppendUses(Arg); 578 while (!Worklist.empty()) { 579 const Use *U = Worklist.pop_back_val(); 580 Value *V = U->getUser(); 581 if (isa<BitCastInst>(V)) { 582 AppendUses(V); 583 continue; 584 } 585 586 if (auto *GEP = dyn_cast<GetElementPtrInst>(V)) { 587 if (!GEP->hasAllConstantIndices()) 588 return false; 589 AppendUses(V); 590 continue; 591 } 592 593 if (auto *LI = dyn_cast<LoadInst>(V)) { 594 if (!*HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ false)) 595 return false; 596 Loads.push_back(LI); 597 continue; 598 } 599 600 // Stores are allowed for byval arguments 601 auto *SI = dyn_cast<StoreInst>(V); 602 if (AreStoresAllowed && SI && 603 U->getOperandNo() == StoreInst::getPointerOperandIndex()) { 604 if (!*HandleEndUser(SI, SI->getValueOperand()->getType(), 605 /* GuaranteedToExecute */ false)) 606 return false; 607 continue; 608 // Only stores TO the argument is allowed, all the other stores are 609 // unknown users 610 } 611 612 // Unknown user. 613 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " 614 << "unknown user " << *V << "\n"); 615 return false; 616 } 617 618 if (NeededDerefBytes || NeededAlign > 1) { 619 // Try to prove a required deref / aligned requirement. 620 if (!allCallersPassValidPointerForArgument(Arg, NeededAlign, 621 NeededDerefBytes)) { 622 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " 623 << "not dereferenceable or aligned\n"); 624 return false; 625 } 626 } 627 628 if (ArgParts.empty()) 629 return true; // No users, this is a dead argument. 630 631 // Sort parts by offset. 632 append_range(ArgPartsVec, ArgParts); 633 sort(ArgPartsVec, llvm::less_first()); 634 635 // Make sure the parts are non-overlapping. 636 int64_t Offset = ArgPartsVec[0].first; 637 for (const auto &Pair : ArgPartsVec) { 638 if (Pair.first < Offset) 639 return false; // Overlap with previous part. 640 641 Offset = Pair.first + DL.getTypeStoreSize(Pair.second.Ty); 642 } 643 644 // If store instructions are allowed, the path from the entry of the function 645 // to each load may be not free of instructions that potentially invalidate 646 // the load, and this is an admissible situation. 647 if (AreStoresAllowed) 648 return true; 649 650 // Okay, now we know that the argument is only used by load instructions, and 651 // it is safe to unconditionally perform all of them. Use alias analysis to 652 // check to see if the pointer is guaranteed to not be modified from entry of 653 // the function to each of the load instructions. 654 655 for (LoadInst *Load : Loads) { 656 // Check to see if the load is invalidated from the start of the block to 657 // the load itself. 658 BasicBlock *BB = Load->getParent(); 659 660 MemoryLocation Loc = MemoryLocation::get(Load); 661 if (AAR.canInstructionRangeModRef(BB->front(), *Load, Loc, ModRefInfo::Mod)) 662 return false; // Pointer is invalidated! 663 664 // Now check every path from the entry block to the load for transparency. 665 // To do this, we perform a depth first search on the inverse CFG from the 666 // loading block. 667 for (BasicBlock *P : predecessors(BB)) { 668 for (BasicBlock *TranspBB : inverse_depth_first(P)) 669 if (AAR.canBasicBlockModify(*TranspBB, Loc)) 670 return false; 671 } 672 } 673 674 // If the path from the entry of the function to each load is free of 675 // instructions that potentially invalidate the load, we can make the 676 // transformation! 677 return true; 678 } 679 680 /// Check if callers and callee agree on how promoted arguments would be 681 /// passed. 682 static bool areTypesABICompatible(ArrayRef<Type *> Types, const Function &F, 683 const TargetTransformInfo &TTI) { 684 return all_of(F.uses(), [&](const Use &U) { 685 CallBase *CB = dyn_cast<CallBase>(U.getUser()); 686 if (!CB) 687 return false; 688 689 const Function *Caller = CB->getCaller(); 690 const Function *Callee = CB->getCalledFunction(); 691 return TTI.areTypesABICompatible(Caller, Callee, Types); 692 }); 693 } 694 695 /// PromoteArguments - This method checks the specified function to see if there 696 /// are any promotable arguments and if it is safe to promote the function (for 697 /// example, all callers are direct). If safe to promote some arguments, it 698 /// calls the DoPromotion method. 699 static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM, 700 unsigned MaxElements, bool IsRecursive) { 701 // Don't perform argument promotion for naked functions; otherwise we can end 702 // up removing parameters that are seemingly 'not used' as they are referred 703 // to in the assembly. 704 if (F->hasFnAttribute(Attribute::Naked)) 705 return nullptr; 706 707 // Make sure that it is local to this module. 708 if (!F->hasLocalLinkage()) 709 return nullptr; 710 711 // Don't promote arguments for variadic functions. Adding, removing, or 712 // changing non-pack parameters can change the classification of pack 713 // parameters. Frontends encode that classification at the call site in the 714 // IR, while in the callee the classification is determined dynamically based 715 // on the number of registers consumed so far. 716 if (F->isVarArg()) 717 return nullptr; 718 719 // Don't transform functions that receive inallocas, as the transformation may 720 // not be safe depending on calling convention. 721 if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca)) 722 return nullptr; 723 724 // First check: see if there are any pointer arguments! If not, quick exit. 725 SmallVector<Argument *, 16> PointerArgs; 726 for (Argument &I : F->args()) 727 if (I.getType()->isPointerTy()) 728 PointerArgs.push_back(&I); 729 if (PointerArgs.empty()) 730 return nullptr; 731 732 // Second check: make sure that all callers are direct callers. We can't 733 // transform functions that have indirect callers. Also see if the function 734 // is self-recursive. 735 for (Use &U : F->uses()) { 736 CallBase *CB = dyn_cast<CallBase>(U.getUser()); 737 // Must be a direct call. 738 if (CB == nullptr || !CB->isCallee(&U) || 739 CB->getFunctionType() != F->getFunctionType()) 740 return nullptr; 741 742 // Can't change signature of musttail callee 743 if (CB->isMustTailCall()) 744 return nullptr; 745 746 if (CB->getFunction() == F) 747 IsRecursive = true; 748 } 749 750 // Can't change signature of musttail caller 751 // FIXME: Support promoting whole chain of musttail functions 752 for (BasicBlock &BB : *F) 753 if (BB.getTerminatingMustTailCall()) 754 return nullptr; 755 756 const DataLayout &DL = F->getParent()->getDataLayout(); 757 auto &AAR = FAM.getResult<AAManager>(*F); 758 const auto &TTI = FAM.getResult<TargetIRAnalysis>(*F); 759 760 // Check to see which arguments are promotable. If an argument is promotable, 761 // add it to ArgsToPromote. 762 DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> ArgsToPromote; 763 unsigned NumArgsAfterPromote = F->getFunctionType()->getNumParams(); 764 for (Argument *PtrArg : PointerArgs) { 765 // Replace sret attribute with noalias. This reduces register pressure by 766 // avoiding a register copy. 767 if (PtrArg->hasStructRetAttr()) { 768 unsigned ArgNo = PtrArg->getArgNo(); 769 F->removeParamAttr(ArgNo, Attribute::StructRet); 770 F->addParamAttr(ArgNo, Attribute::NoAlias); 771 for (Use &U : F->uses()) { 772 CallBase &CB = cast<CallBase>(*U.getUser()); 773 CB.removeParamAttr(ArgNo, Attribute::StructRet); 774 CB.addParamAttr(ArgNo, Attribute::NoAlias); 775 } 776 } 777 778 // If we can promote the pointer to its value. 779 SmallVector<OffsetAndArgPart, 4> ArgParts; 780 781 if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts)) { 782 SmallVector<Type *, 4> Types; 783 for (const auto &Pair : ArgParts) 784 Types.push_back(Pair.second.Ty); 785 786 if (areTypesABICompatible(Types, *F, TTI)) { 787 NumArgsAfterPromote += ArgParts.size() - 1; 788 ArgsToPromote.insert({PtrArg, std::move(ArgParts)}); 789 } 790 } 791 } 792 793 // No promotable pointer arguments. 794 if (ArgsToPromote.empty()) 795 return nullptr; 796 797 if (NumArgsAfterPromote > TTI.getMaxNumArgs()) 798 return nullptr; 799 800 return doPromotion(F, FAM, ArgsToPromote); 801 } 802 803 PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C, 804 CGSCCAnalysisManager &AM, 805 LazyCallGraph &CG, 806 CGSCCUpdateResult &UR) { 807 bool Changed = false, LocalChange; 808 809 // Iterate until we stop promoting from this SCC. 810 do { 811 LocalChange = false; 812 813 FunctionAnalysisManager &FAM = 814 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); 815 816 bool IsRecursive = C.size() > 1; 817 for (LazyCallGraph::Node &N : C) { 818 Function &OldF = N.getFunction(); 819 Function *NewF = promoteArguments(&OldF, FAM, MaxElements, IsRecursive); 820 if (!NewF) 821 continue; 822 LocalChange = true; 823 824 // Directly substitute the functions in the call graph. Note that this 825 // requires the old function to be completely dead and completely 826 // replaced by the new function. It does no call graph updates, it merely 827 // swaps out the particular function mapped to a particular node in the 828 // graph. 829 C.getOuterRefSCC().replaceNodeFunction(N, *NewF); 830 FAM.clear(OldF, OldF.getName()); 831 OldF.eraseFromParent(); 832 833 PreservedAnalyses FuncPA; 834 FuncPA.preserveSet<CFGAnalyses>(); 835 for (auto *U : NewF->users()) { 836 auto *UserF = cast<CallBase>(U)->getFunction(); 837 FAM.invalidate(*UserF, FuncPA); 838 } 839 } 840 841 Changed |= LocalChange; 842 } while (LocalChange); 843 844 if (!Changed) 845 return PreservedAnalyses::all(); 846 847 PreservedAnalyses PA; 848 // We've cleared out analyses for deleted functions. 849 PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); 850 // We've manually invalidated analyses for functions we've modified. 851 PA.preserveSet<AllAnalysesOn<Function>>(); 852 return PA; 853 } 854