1 //===- ArgumentPromotion.cpp - Promote by-reference arguments -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass promotes "by reference" arguments to be "by value" arguments. In 10 // practice, this means looking for internal functions that have pointer 11 // arguments. If it can prove, through the use of alias analysis, that an 12 // argument is *only* loaded, then it can pass the value into the function 13 // instead of the address of the value. This can cause recursive simplification 14 // of code and lead to the elimination of allocas (especially in C++ template 15 // code like the STL). 16 // 17 // This pass also handles aggregate arguments that are passed into a function, 18 // scalarizing them if the elements of the aggregate are only loaded. Note that 19 // by default it refuses to scalarize aggregates which would require passing in 20 // more than three operands to the function, because passing thousands of 21 // operands for a large array or structure is unprofitable! This limit can be 22 // configured or disabled, however. 23 // 24 // Note that this transformation could also be done for arguments that are only 25 // stored to (returning the value instead), but does not currently. This case 26 // would be best handled when and if LLVM begins supporting multiple return 27 // values from functions. 28 // 29 //===----------------------------------------------------------------------===// 30 31 #include "llvm/Transforms/IPO/ArgumentPromotion.h" 32 33 #include "llvm/ADT/DepthFirstIterator.h" 34 #include "llvm/ADT/STLExtras.h" 35 #include "llvm/ADT/ScopeExit.h" 36 #include "llvm/ADT/SmallPtrSet.h" 37 #include "llvm/ADT/SmallVector.h" 38 #include "llvm/ADT/Statistic.h" 39 #include "llvm/ADT/Twine.h" 40 #include "llvm/Analysis/AssumptionCache.h" 41 #include "llvm/Analysis/BasicAliasAnalysis.h" 42 #include "llvm/Analysis/CallGraph.h" 43 #include "llvm/Analysis/Loads.h" 44 #include "llvm/Analysis/MemoryLocation.h" 45 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 46 #include "llvm/Analysis/TargetTransformInfo.h" 47 #include "llvm/Analysis/ValueTracking.h" 48 #include "llvm/IR/Argument.h" 49 #include "llvm/IR/Attributes.h" 50 #include "llvm/IR/BasicBlock.h" 51 #include "llvm/IR/CFG.h" 52 #include "llvm/IR/Constants.h" 53 #include "llvm/IR/DataLayout.h" 54 #include "llvm/IR/DerivedTypes.h" 55 #include "llvm/IR/Dominators.h" 56 #include "llvm/IR/Function.h" 57 #include "llvm/IR/IRBuilder.h" 58 #include "llvm/IR/InstrTypes.h" 59 #include "llvm/IR/Instruction.h" 60 #include "llvm/IR/Instructions.h" 61 #include "llvm/IR/Metadata.h" 62 #include "llvm/IR/Module.h" 63 #include "llvm/IR/NoFolder.h" 64 #include "llvm/IR/PassManager.h" 65 #include "llvm/IR/Type.h" 66 #include "llvm/IR/Use.h" 67 #include "llvm/IR/User.h" 68 #include "llvm/IR/Value.h" 69 #include "llvm/Support/Casting.h" 70 #include "llvm/Support/Debug.h" 71 #include "llvm/Support/raw_ostream.h" 72 #include "llvm/Transforms/Utils/Local.h" 73 #include "llvm/Transforms/Utils/PromoteMemToReg.h" 74 #include <algorithm> 75 #include <cassert> 76 #include <cstdint> 77 #include <utility> 78 #include <vector> 79 80 using namespace llvm; 81 82 #define DEBUG_TYPE "argpromotion" 83 84 STATISTIC(NumArgumentsPromoted, "Number of pointer arguments promoted"); 85 STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated"); 86 87 namespace { 88 89 struct ArgPart { 90 Type *Ty; 91 Align Alignment; 92 /// A representative guaranteed-executed load or store instruction for use by 93 /// metadata transfer. 94 Instruction *MustExecInstr; 95 }; 96 97 using OffsetAndArgPart = std::pair<int64_t, ArgPart>; 98 99 } // end anonymous namespace 100 101 static Value *createByteGEP(IRBuilderBase &IRB, const DataLayout &DL, 102 Value *Ptr, Type *ResElemTy, int64_t Offset) { 103 if (Offset != 0) { 104 APInt APOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), Offset, 105 /*isSigned=*/true); 106 Ptr = IRB.CreatePtrAdd(Ptr, IRB.getInt(APOffset)); 107 } 108 return Ptr; 109 } 110 111 /// DoPromotion - This method actually performs the promotion of the specified 112 /// arguments, and returns the new function. At this point, we know that it's 113 /// safe to do so. 114 static Function * 115 doPromotion(Function *F, FunctionAnalysisManager &FAM, 116 const DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> 117 &ArgsToPromote) { 118 // Start by computing a new prototype for the function, which is the same as 119 // the old function, but has modified arguments. 120 FunctionType *FTy = F->getFunctionType(); 121 std::vector<Type *> Params; 122 123 // Attribute - Keep track of the parameter attributes for the arguments 124 // that we are *not* promoting. For the ones that we do promote, the parameter 125 // attributes are lost 126 SmallVector<AttributeSet, 8> ArgAttrVec; 127 // Mapping from old to new argument indices. -1 for promoted or removed 128 // arguments. 129 SmallVector<unsigned> NewArgIndices; 130 AttributeList PAL = F->getAttributes(); 131 OptimizationRemarkEmitter ORE(F); 132 133 // First, determine the new argument list 134 unsigned ArgNo = 0, NewArgNo = 0; 135 for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; 136 ++I, ++ArgNo) { 137 auto It = ArgsToPromote.find(&*I); 138 if (It == ArgsToPromote.end()) { 139 // Unchanged argument 140 Params.push_back(I->getType()); 141 ArgAttrVec.push_back(PAL.getParamAttrs(ArgNo)); 142 NewArgIndices.push_back(NewArgNo++); 143 } else if (I->use_empty()) { 144 // Dead argument (which are always marked as promotable) 145 ++NumArgumentsDead; 146 ORE.emit([&]() { 147 return OptimizationRemark(DEBUG_TYPE, "ArgumentRemoved", F) 148 << "eliminating argument " << ore::NV("ArgName", I->getName()) 149 << "(" << ore::NV("ArgIndex", ArgNo) << ")"; 150 }); 151 152 NewArgIndices.push_back((unsigned)-1); 153 } else { 154 const auto &ArgParts = It->second; 155 for (const auto &Pair : ArgParts) { 156 Params.push_back(Pair.second.Ty); 157 ArgAttrVec.push_back(AttributeSet()); 158 } 159 ++NumArgumentsPromoted; 160 ORE.emit([&]() { 161 return OptimizationRemark(DEBUG_TYPE, "ArgumentPromoted", F) 162 << "promoting argument " << ore::NV("ArgName", I->getName()) 163 << "(" << ore::NV("ArgIndex", ArgNo) << ")" 164 << " to pass by value"; 165 }); 166 167 NewArgIndices.push_back((unsigned)-1); 168 NewArgNo += ArgParts.size(); 169 } 170 } 171 172 Type *RetTy = FTy->getReturnType(); 173 174 // Construct the new function type using the new arguments. 175 FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg()); 176 177 // Create the new function body and insert it into the module. 178 Function *NF = Function::Create(NFTy, F->getLinkage(), F->getAddressSpace(), 179 F->getName()); 180 NF->copyAttributesFrom(F); 181 NF->copyMetadata(F, 0); 182 183 // The new function will have the !dbg metadata copied from the original 184 // function. The original function may not be deleted, and dbg metadata need 185 // to be unique, so we need to drop it. 186 F->setSubprogram(nullptr); 187 188 LLVM_DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" 189 << "From: " << *F); 190 191 uint64_t LargestVectorWidth = 0; 192 for (auto *I : Params) 193 if (auto *VT = dyn_cast<llvm::VectorType>(I)) 194 LargestVectorWidth = std::max( 195 LargestVectorWidth, VT->getPrimitiveSizeInBits().getKnownMinValue()); 196 197 // Recompute the parameter attributes list based on the new arguments for 198 // the function. 199 NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttrs(), 200 PAL.getRetAttrs(), ArgAttrVec)); 201 202 // Remap argument indices in allocsize attribute. 203 if (auto AllocSize = NF->getAttributes().getFnAttrs().getAllocSizeArgs()) { 204 unsigned Arg1 = NewArgIndices[AllocSize->first]; 205 assert(Arg1 != (unsigned)-1 && "allocsize cannot be promoted argument"); 206 std::optional<unsigned> Arg2; 207 if (AllocSize->second) { 208 Arg2 = NewArgIndices[*AllocSize->second]; 209 assert(Arg2 != (unsigned)-1 && "allocsize cannot be promoted argument"); 210 } 211 NF->addFnAttr(Attribute::getWithAllocSizeArgs(F->getContext(), Arg1, Arg2)); 212 } 213 214 AttributeFuncs::updateMinLegalVectorWidthAttr(*NF, LargestVectorWidth); 215 ArgAttrVec.clear(); 216 217 F->getParent()->getFunctionList().insert(F->getIterator(), NF); 218 NF->takeName(F); 219 220 // Loop over all the callers of the function, transforming the call sites to 221 // pass in the loaded pointers. 222 SmallVector<Value *, 16> Args; 223 const DataLayout &DL = F->getDataLayout(); 224 SmallVector<WeakTrackingVH, 16> DeadArgs; 225 226 while (!F->use_empty()) { 227 CallBase &CB = cast<CallBase>(*F->user_back()); 228 assert(CB.getCalledFunction() == F); 229 const AttributeList &CallPAL = CB.getAttributes(); 230 IRBuilder<NoFolder> IRB(&CB); 231 232 // Loop over the operands, inserting GEP and loads in the caller as 233 // appropriate. 234 auto *AI = CB.arg_begin(); 235 ArgNo = 0; 236 for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; 237 ++I, ++AI, ++ArgNo) { 238 auto ArgIt = ArgsToPromote.find(&*I); 239 if (ArgIt == ArgsToPromote.end()) { 240 Args.push_back(*AI); // Unmodified argument 241 ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo)); 242 } else if (!I->use_empty()) { 243 Value *V = *AI; 244 for (const auto &Pair : ArgIt->second) { 245 LoadInst *LI = IRB.CreateAlignedLoad( 246 Pair.second.Ty, 247 createByteGEP(IRB, DL, V, Pair.second.Ty, Pair.first), 248 Pair.second.Alignment, V->getName() + ".val"); 249 if (Pair.second.MustExecInstr) { 250 LI->setAAMetadata(Pair.second.MustExecInstr->getAAMetadata()); 251 LI->copyMetadata(*Pair.second.MustExecInstr, 252 {LLVMContext::MD_dereferenceable, 253 LLVMContext::MD_dereferenceable_or_null, 254 LLVMContext::MD_noundef, 255 LLVMContext::MD_nontemporal}); 256 // Only transfer poison-generating metadata if we also have 257 // !noundef. 258 // TODO: Without !noundef, we could merge this metadata across 259 // all promoted loads. 260 if (LI->hasMetadata(LLVMContext::MD_noundef)) 261 LI->copyMetadata(*Pair.second.MustExecInstr, 262 Metadata::PoisonGeneratingIDs); 263 } 264 Args.push_back(LI); 265 ArgAttrVec.push_back(AttributeSet()); 266 } 267 } else { 268 assert(I->use_empty()); 269 DeadArgs.emplace_back(AI->get()); 270 } 271 } 272 273 // Push any varargs arguments on the list. 274 for (; AI != CB.arg_end(); ++AI, ++ArgNo) { 275 Args.push_back(*AI); 276 ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo)); 277 } 278 279 SmallVector<OperandBundleDef, 1> OpBundles; 280 CB.getOperandBundlesAsDefs(OpBundles); 281 282 CallBase *NewCS = nullptr; 283 if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) { 284 NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), 285 Args, OpBundles, "", CB.getIterator()); 286 } else { 287 auto *NewCall = 288 CallInst::Create(NF, Args, OpBundles, "", CB.getIterator()); 289 NewCall->setTailCallKind(cast<CallInst>(&CB)->getTailCallKind()); 290 NewCS = NewCall; 291 } 292 NewCS->setCallingConv(CB.getCallingConv()); 293 NewCS->setAttributes(AttributeList::get(F->getContext(), 294 CallPAL.getFnAttrs(), 295 CallPAL.getRetAttrs(), ArgAttrVec)); 296 NewCS->copyMetadata(CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg}); 297 Args.clear(); 298 ArgAttrVec.clear(); 299 300 AttributeFuncs::updateMinLegalVectorWidthAttr(*CB.getCaller(), 301 LargestVectorWidth); 302 303 if (!CB.use_empty()) { 304 CB.replaceAllUsesWith(NewCS); 305 NewCS->takeName(&CB); 306 } 307 308 // Finally, remove the old call from the program, reducing the use-count of 309 // F. 310 CB.eraseFromParent(); 311 } 312 313 RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadArgs); 314 315 // Since we have now created the new function, splice the body of the old 316 // function right into the new function, leaving the old rotting hulk of the 317 // function empty. 318 NF->splice(NF->begin(), F); 319 320 // We will collect all the new created allocas to promote them into registers 321 // after the following loop 322 SmallVector<AllocaInst *, 4> Allocas; 323 324 // Loop over the argument list, transferring uses of the old arguments over to 325 // the new arguments, also transferring over the names as well. 326 Function::arg_iterator I2 = NF->arg_begin(); 327 for (Argument &Arg : F->args()) { 328 if (!ArgsToPromote.count(&Arg)) { 329 // If this is an unmodified argument, move the name and users over to the 330 // new version. 331 Arg.replaceAllUsesWith(&*I2); 332 I2->takeName(&Arg); 333 ++I2; 334 continue; 335 } 336 337 // There potentially are metadata uses for things like llvm.dbg.value. 338 // Replace them with poison, after handling the other regular uses. 339 auto RauwPoisonMetadata = make_scope_exit( 340 [&]() { Arg.replaceAllUsesWith(PoisonValue::get(Arg.getType())); }); 341 342 if (Arg.use_empty()) 343 continue; 344 345 // Otherwise, if we promoted this argument, we have to create an alloca in 346 // the callee for every promotable part and store each of the new incoming 347 // arguments into the corresponding alloca, what lets the old code (the 348 // store instructions if they are allowed especially) a chance to work as 349 // before. 350 assert(Arg.getType()->isPointerTy() && 351 "Only arguments with a pointer type are promotable"); 352 353 IRBuilder<NoFolder> IRB(&NF->begin()->front()); 354 355 // Add only the promoted elements, so parts from ArgsToPromote 356 SmallDenseMap<int64_t, AllocaInst *> OffsetToAlloca; 357 for (const auto &Pair : ArgsToPromote.find(&Arg)->second) { 358 int64_t Offset = Pair.first; 359 const ArgPart &Part = Pair.second; 360 361 Argument *NewArg = I2++; 362 NewArg->setName(Arg.getName() + "." + Twine(Offset) + ".val"); 363 364 AllocaInst *NewAlloca = IRB.CreateAlloca( 365 Part.Ty, nullptr, Arg.getName() + "." + Twine(Offset) + ".allc"); 366 NewAlloca->setAlignment(Pair.second.Alignment); 367 IRB.CreateAlignedStore(NewArg, NewAlloca, Pair.second.Alignment); 368 369 // Collect the alloca to retarget the users to 370 OffsetToAlloca.insert({Offset, NewAlloca}); 371 } 372 373 auto GetAlloca = [&](Value *Ptr) { 374 APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); 375 Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset, 376 /* AllowNonInbounds */ true); 377 assert(Ptr == &Arg && "Not constant offset from arg?"); 378 return OffsetToAlloca.lookup(Offset.getSExtValue()); 379 }; 380 381 // Cleanup the code from the dead instructions: GEPs and BitCasts in between 382 // the original argument and its users: loads and stores. Retarget every 383 // user to the new created alloca. 384 SmallVector<Value *, 16> Worklist(Arg.users()); 385 SmallVector<Instruction *, 16> DeadInsts; 386 while (!Worklist.empty()) { 387 Value *V = Worklist.pop_back_val(); 388 if (isa<GetElementPtrInst>(V)) { 389 DeadInsts.push_back(cast<Instruction>(V)); 390 append_range(Worklist, V->users()); 391 continue; 392 } 393 394 if (auto *LI = dyn_cast<LoadInst>(V)) { 395 Value *Ptr = LI->getPointerOperand(); 396 LI->setOperand(LoadInst::getPointerOperandIndex(), GetAlloca(Ptr)); 397 continue; 398 } 399 400 if (auto *SI = dyn_cast<StoreInst>(V)) { 401 assert(!SI->isVolatile() && "Volatile operations can't be promoted."); 402 Value *Ptr = SI->getPointerOperand(); 403 SI->setOperand(StoreInst::getPointerOperandIndex(), GetAlloca(Ptr)); 404 continue; 405 } 406 407 llvm_unreachable("Unexpected user"); 408 } 409 410 for (Instruction *I : DeadInsts) { 411 I->replaceAllUsesWith(PoisonValue::get(I->getType())); 412 I->eraseFromParent(); 413 } 414 415 // Collect the allocas for promotion 416 for (const auto &Pair : OffsetToAlloca) { 417 assert(isAllocaPromotable(Pair.second) && 418 "By design, only promotable allocas should be produced."); 419 Allocas.push_back(Pair.second); 420 } 421 } 422 423 LLVM_DEBUG(dbgs() << "ARG PROMOTION: " << Allocas.size() 424 << " alloca(s) are promotable by Mem2Reg\n"); 425 426 if (!Allocas.empty()) { 427 // And we are able to call the `promoteMemoryToRegister()` function. 428 // Our earlier checks have ensured that PromoteMemToReg() will 429 // succeed. 430 auto &DT = FAM.getResult<DominatorTreeAnalysis>(*NF); 431 auto &AC = FAM.getResult<AssumptionAnalysis>(*NF); 432 PromoteMemToReg(Allocas, DT, &AC); 433 } 434 435 return NF; 436 } 437 438 /// Return true if we can prove that all callees pass in a valid pointer for the 439 /// specified function argument. 440 static bool allCallersPassValidPointerForArgument( 441 Argument *Arg, SmallPtrSetImpl<CallBase *> &RecursiveCalls, 442 Align NeededAlign, uint64_t NeededDerefBytes) { 443 Function *Callee = Arg->getParent(); 444 const DataLayout &DL = Callee->getDataLayout(); 445 APInt Bytes(64, NeededDerefBytes); 446 447 // Check if the argument itself is marked dereferenceable and aligned. 448 if (isDereferenceableAndAlignedPointer(Arg, NeededAlign, Bytes, DL)) 449 return true; 450 451 // Look at all call sites of the function. At this point we know we only have 452 // direct callees. 453 return all_of(Callee->users(), [&](User *U) { 454 CallBase &CB = cast<CallBase>(*U); 455 // In case of functions with recursive calls, this check 456 // (isDereferenceableAndAlignedPointer) will fail when it tries to look at 457 // the first caller of this function. The caller may or may not have a load, 458 // incase it doesn't load the pointer being passed, this check will fail. 459 // So, it's safe to skip the check incase we know that we are dealing with a 460 // recursive call. For example we have a IR given below. 461 // 462 // def fun(ptr %a) { 463 // ... 464 // %loadres = load i32, ptr %a, align 4 465 // %res = call i32 @fun(ptr %a) 466 // ... 467 // } 468 // 469 // def bar(ptr %x) { 470 // ... 471 // %resbar = call i32 @fun(ptr %x) 472 // ... 473 // } 474 // 475 // Since we record processed recursive calls, we check if the current 476 // CallBase has been processed before. If yes it means that it is a 477 // recursive call and we can skip the check just for this call. So, just 478 // return true. 479 if (RecursiveCalls.contains(&CB)) 480 return true; 481 482 return isDereferenceableAndAlignedPointer(CB.getArgOperand(Arg->getArgNo()), 483 NeededAlign, Bytes, DL); 484 }); 485 } 486 487 // Try to prove that all Calls to F do not modify the memory pointed to by Arg, 488 // using alias analysis local to each caller of F. 489 static bool isArgUnmodifiedByAllCalls(Argument *Arg, 490 FunctionAnalysisManager &FAM) { 491 for (User *U : Arg->getParent()->users()) { 492 493 auto *Call = cast<CallBase>(U); 494 495 MemoryLocation Loc = 496 MemoryLocation::getForArgument(Call, Arg->getArgNo(), nullptr); 497 498 AAResults &AAR = FAM.getResult<AAManager>(*Call->getFunction()); 499 // Bail as soon as we find a Call where Arg may be modified. 500 if (isModSet(AAR.getModRefInfo(Call, Loc))) 501 return false; 502 } 503 504 // All Users are Calls which do not modify the Arg. 505 return true; 506 } 507 508 /// Determine that this argument is safe to promote, and find the argument 509 /// parts it can be promoted into. 510 static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR, 511 unsigned MaxElements, bool IsRecursive, 512 SmallVectorImpl<OffsetAndArgPart> &ArgPartsVec, 513 FunctionAnalysisManager &FAM) { 514 // Quick exit for unused arguments 515 if (Arg->use_empty()) 516 return true; 517 518 // We can only promote this argument if all the uses are loads at known 519 // offsets. 520 // 521 // Promoting the argument causes it to be loaded in the caller 522 // unconditionally. This is only safe if we can prove that either the load 523 // would have happened in the callee anyway (ie, there is a load in the entry 524 // block) or the pointer passed in at every call site is guaranteed to be 525 // valid. 526 // In the former case, invalid loads can happen, but would have happened 527 // anyway, in the latter case, invalid loads won't happen. This prevents us 528 // from introducing an invalid load that wouldn't have happened in the 529 // original code. 530 531 SmallDenseMap<int64_t, ArgPart, 4> ArgParts; 532 Align NeededAlign(1); 533 uint64_t NeededDerefBytes = 0; 534 535 // And if this is a byval argument we also allow to have store instructions. 536 // Only handle in such way arguments with specified alignment; 537 // if it's unspecified, the actual alignment of the argument is 538 // target-specific. 539 bool AreStoresAllowed = Arg->getParamByValType() && Arg->getParamAlign(); 540 541 // An end user of a pointer argument is a load or store instruction. 542 // Returns std::nullopt if this load or store is not based on the argument. 543 // Return true if we can promote the instruction, false otherwise. 544 auto HandleEndUser = [&](auto *I, Type *Ty, 545 bool GuaranteedToExecute) -> std::optional<bool> { 546 // Don't promote volatile or atomic instructions. 547 if (!I->isSimple()) 548 return false; 549 550 Value *Ptr = I->getPointerOperand(); 551 APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); 552 Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset, 553 /* AllowNonInbounds */ true); 554 if (Ptr != Arg) 555 return std::nullopt; 556 557 if (Offset.getSignificantBits() >= 64) 558 return false; 559 560 TypeSize Size = DL.getTypeStoreSize(Ty); 561 // Don't try to promote scalable types. 562 if (Size.isScalable()) 563 return false; 564 565 // If this is a recursive function and one of the types is a pointer, 566 // then promoting it might lead to recursive promotion. 567 if (IsRecursive && Ty->isPointerTy()) 568 return false; 569 570 int64_t Off = Offset.getSExtValue(); 571 auto Pair = ArgParts.try_emplace( 572 Off, ArgPart{Ty, I->getAlign(), GuaranteedToExecute ? I : nullptr}); 573 ArgPart &Part = Pair.first->second; 574 bool OffsetNotSeenBefore = Pair.second; 575 576 // We limit promotion to only promoting up to a fixed number of elements of 577 // the aggregate. 578 if (MaxElements > 0 && ArgParts.size() > MaxElements) { 579 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " 580 << "more than " << MaxElements << " parts\n"); 581 return false; 582 } 583 584 // For now, we only support loading/storing one specific type at a given 585 // offset. 586 if (Part.Ty != Ty) { 587 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " 588 << "accessed as both " << *Part.Ty << " and " << *Ty 589 << " at offset " << Off << "\n"); 590 return false; 591 } 592 593 // If this instruction is not guaranteed to execute, and we haven't seen a 594 // load or store at this offset before (or it had lower alignment), then we 595 // need to remember that requirement. 596 // Note that skipping instructions of previously seen offsets is only 597 // correct because we only allow a single type for a given offset, which 598 // also means that the number of accessed bytes will be the same. 599 if (!GuaranteedToExecute && 600 (OffsetNotSeenBefore || Part.Alignment < I->getAlign())) { 601 // We won't be able to prove dereferenceability for negative offsets. 602 if (Off < 0) 603 return false; 604 605 // If the offset is not aligned, an aligned base pointer won't help. 606 if (!isAligned(I->getAlign(), Off)) 607 return false; 608 609 NeededDerefBytes = std::max(NeededDerefBytes, Off + Size.getFixedValue()); 610 NeededAlign = std::max(NeededAlign, I->getAlign()); 611 } 612 613 Part.Alignment = std::max(Part.Alignment, I->getAlign()); 614 return true; 615 }; 616 617 // Look for loads and stores that are guaranteed to execute on entry. 618 for (Instruction &I : Arg->getParent()->getEntryBlock()) { 619 std::optional<bool> Res{}; 620 if (LoadInst *LI = dyn_cast<LoadInst>(&I)) 621 Res = HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ true); 622 else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) 623 Res = HandleEndUser(SI, SI->getValueOperand()->getType(), 624 /* GuaranteedToExecute */ true); 625 if (Res && !*Res) 626 return false; 627 628 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 629 break; 630 } 631 632 // Now look at all loads of the argument. Remember the load instructions 633 // for the aliasing check below. 634 SmallVector<const Use *, 16> Worklist; 635 SmallPtrSet<const Use *, 16> Visited; 636 SmallVector<LoadInst *, 16> Loads; 637 SmallPtrSet<CallBase *, 4> RecursiveCalls; 638 auto AppendUses = [&](const Value *V) { 639 for (const Use &U : V->uses()) 640 if (Visited.insert(&U).second) 641 Worklist.push_back(&U); 642 }; 643 AppendUses(Arg); 644 while (!Worklist.empty()) { 645 const Use *U = Worklist.pop_back_val(); 646 Value *V = U->getUser(); 647 648 if (auto *GEP = dyn_cast<GetElementPtrInst>(V)) { 649 if (!GEP->hasAllConstantIndices()) 650 return false; 651 AppendUses(V); 652 continue; 653 } 654 655 if (auto *LI = dyn_cast<LoadInst>(V)) { 656 if (!*HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ false)) 657 return false; 658 Loads.push_back(LI); 659 continue; 660 } 661 662 // Stores are allowed for byval arguments 663 auto *SI = dyn_cast<StoreInst>(V); 664 if (AreStoresAllowed && SI && 665 U->getOperandNo() == StoreInst::getPointerOperandIndex()) { 666 if (!*HandleEndUser(SI, SI->getValueOperand()->getType(), 667 /* GuaranteedToExecute */ false)) 668 return false; 669 continue; 670 // Only stores TO the argument is allowed, all the other stores are 671 // unknown users 672 } 673 674 auto *CB = dyn_cast<CallBase>(V); 675 Value *PtrArg = U->get(); 676 if (CB && CB->getCalledFunction() == CB->getFunction()) { 677 if (PtrArg != Arg) { 678 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " 679 << "pointer offset is not equal to zero\n"); 680 return false; 681 } 682 683 unsigned int ArgNo = Arg->getArgNo(); 684 if (U->getOperandNo() != ArgNo) { 685 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " 686 << "arg position is different in callee\n"); 687 return false; 688 } 689 690 // We limit promotion to only promoting up to a fixed number of elements 691 // of the aggregate. 692 if (MaxElements > 0 && ArgParts.size() > MaxElements) { 693 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " 694 << "more than " << MaxElements << " parts\n"); 695 return false; 696 } 697 698 RecursiveCalls.insert(CB); 699 continue; 700 } 701 // Unknown user. 702 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " 703 << "unknown user " << *V << "\n"); 704 return false; 705 } 706 707 if (NeededDerefBytes || NeededAlign > 1) { 708 // Try to prove a required deref / aligned requirement. 709 if (!allCallersPassValidPointerForArgument(Arg, RecursiveCalls, NeededAlign, 710 NeededDerefBytes)) { 711 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " 712 << "not dereferenceable or aligned\n"); 713 return false; 714 } 715 } 716 717 if (ArgParts.empty()) 718 return true; // No users, this is a dead argument. 719 720 // Sort parts by offset. 721 append_range(ArgPartsVec, ArgParts); 722 sort(ArgPartsVec, llvm::less_first()); 723 724 // Make sure the parts are non-overlapping. 725 int64_t Offset = ArgPartsVec[0].first; 726 for (const auto &Pair : ArgPartsVec) { 727 if (Pair.first < Offset) 728 return false; // Overlap with previous part. 729 730 Offset = Pair.first + DL.getTypeStoreSize(Pair.second.Ty); 731 } 732 733 // If store instructions are allowed, the path from the entry of the function 734 // to each load may be not free of instructions that potentially invalidate 735 // the load, and this is an admissible situation. 736 if (AreStoresAllowed) 737 return true; 738 739 // Okay, now we know that the argument is only used by load instructions, and 740 // it is safe to unconditionally perform all of them. 741 742 // If we can determine that no call to the Function modifies the memory region 743 // accessed through Arg, through alias analysis using actual arguments in the 744 // callers, we know that it is guaranteed to be safe to promote the argument. 745 if (isArgUnmodifiedByAllCalls(Arg, FAM)) 746 return true; 747 748 // Otherwise, use alias analysis to check if the pointer is guaranteed to not 749 // be modified from entry of the function to each of the load instructions. 750 for (LoadInst *Load : Loads) { 751 // Check to see if the load is invalidated from the start of the block to 752 // the load itself. 753 BasicBlock *BB = Load->getParent(); 754 755 MemoryLocation Loc = MemoryLocation::get(Load); 756 if (AAR.canInstructionRangeModRef(BB->front(), *Load, Loc, ModRefInfo::Mod)) 757 return false; // Pointer is invalidated! 758 759 // Now check every path from the entry block to the load for transparency. 760 // To do this, we perform a depth first search on the inverse CFG from the 761 // loading block. 762 for (BasicBlock *P : predecessors(BB)) { 763 for (BasicBlock *TranspBB : inverse_depth_first(P)) 764 if (AAR.canBasicBlockModify(*TranspBB, Loc)) 765 return false; 766 } 767 } 768 769 // If the path from the entry of the function to each load is free of 770 // instructions that potentially invalidate the load, we can make the 771 // transformation! 772 return true; 773 } 774 775 /// Check if callers and callee agree on how promoted arguments would be 776 /// passed. 777 static bool areTypesABICompatible(ArrayRef<Type *> Types, const Function &F, 778 const TargetTransformInfo &TTI) { 779 return all_of(F.uses(), [&](const Use &U) { 780 CallBase *CB = dyn_cast<CallBase>(U.getUser()); 781 if (!CB) 782 return false; 783 784 const Function *Caller = CB->getCaller(); 785 const Function *Callee = CB->getCalledFunction(); 786 return TTI.areTypesABICompatible(Caller, Callee, Types); 787 }); 788 } 789 790 /// PromoteArguments - This method checks the specified function to see if there 791 /// are any promotable arguments and if it is safe to promote the function (for 792 /// example, all callers are direct). If safe to promote some arguments, it 793 /// calls the DoPromotion method. 794 static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM, 795 unsigned MaxElements, bool IsRecursive) { 796 // Don't perform argument promotion for naked functions; otherwise we can end 797 // up removing parameters that are seemingly 'not used' as they are referred 798 // to in the assembly. 799 if (F->hasFnAttribute(Attribute::Naked)) 800 return nullptr; 801 802 // Make sure that it is local to this module. 803 if (!F->hasLocalLinkage()) 804 return nullptr; 805 806 // Don't promote arguments for variadic functions. Adding, removing, or 807 // changing non-pack parameters can change the classification of pack 808 // parameters. Frontends encode that classification at the call site in the 809 // IR, while in the callee the classification is determined dynamically based 810 // on the number of registers consumed so far. 811 if (F->isVarArg()) 812 return nullptr; 813 814 // Don't transform functions that receive inallocas, as the transformation may 815 // not be safe depending on calling convention. 816 if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca)) 817 return nullptr; 818 819 // First check: see if there are any pointer arguments! If not, quick exit. 820 SmallVector<Argument *, 16> PointerArgs; 821 for (Argument &I : F->args()) 822 if (I.getType()->isPointerTy()) 823 PointerArgs.push_back(&I); 824 if (PointerArgs.empty()) 825 return nullptr; 826 827 // Second check: make sure that all callers are direct callers. We can't 828 // transform functions that have indirect callers. Also see if the function 829 // is self-recursive. 830 for (Use &U : F->uses()) { 831 CallBase *CB = dyn_cast<CallBase>(U.getUser()); 832 // Must be a direct call. 833 if (CB == nullptr || !CB->isCallee(&U) || 834 CB->getFunctionType() != F->getFunctionType()) 835 return nullptr; 836 837 // Can't change signature of musttail callee 838 if (CB->isMustTailCall()) 839 return nullptr; 840 841 if (CB->getFunction() == F) 842 IsRecursive = true; 843 } 844 845 // Can't change signature of musttail caller 846 // FIXME: Support promoting whole chain of musttail functions 847 for (BasicBlock &BB : *F) 848 if (BB.getTerminatingMustTailCall()) 849 return nullptr; 850 851 const DataLayout &DL = F->getDataLayout(); 852 auto &AAR = FAM.getResult<AAManager>(*F); 853 const auto &TTI = FAM.getResult<TargetIRAnalysis>(*F); 854 855 // Check to see which arguments are promotable. If an argument is promotable, 856 // add it to ArgsToPromote. 857 DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> ArgsToPromote; 858 unsigned NumArgsAfterPromote = F->getFunctionType()->getNumParams(); 859 for (Argument *PtrArg : PointerArgs) { 860 // Replace sret attribute with noalias. This reduces register pressure by 861 // avoiding a register copy. 862 if (PtrArg->hasStructRetAttr()) { 863 unsigned ArgNo = PtrArg->getArgNo(); 864 F->removeParamAttr(ArgNo, Attribute::StructRet); 865 F->addParamAttr(ArgNo, Attribute::NoAlias); 866 for (Use &U : F->uses()) { 867 CallBase &CB = cast<CallBase>(*U.getUser()); 868 CB.removeParamAttr(ArgNo, Attribute::StructRet); 869 CB.addParamAttr(ArgNo, Attribute::NoAlias); 870 } 871 } 872 873 // If we can promote the pointer to its value. 874 SmallVector<OffsetAndArgPart, 4> ArgParts; 875 876 if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts, 877 FAM)) { 878 SmallVector<Type *, 4> Types; 879 for (const auto &Pair : ArgParts) 880 Types.push_back(Pair.second.Ty); 881 882 if (areTypesABICompatible(Types, *F, TTI)) { 883 NumArgsAfterPromote += ArgParts.size() - 1; 884 ArgsToPromote.insert({PtrArg, std::move(ArgParts)}); 885 } 886 } 887 } 888 889 // No promotable pointer arguments. 890 if (ArgsToPromote.empty()) 891 return nullptr; 892 893 if (NumArgsAfterPromote > TTI.getMaxNumArgs()) 894 return nullptr; 895 896 return doPromotion(F, FAM, ArgsToPromote); 897 } 898 899 PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C, 900 CGSCCAnalysisManager &AM, 901 LazyCallGraph &CG, 902 CGSCCUpdateResult &UR) { 903 bool Changed = false, LocalChange; 904 905 // Iterate until we stop promoting from this SCC. 906 do { 907 LocalChange = false; 908 909 FunctionAnalysisManager &FAM = 910 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); 911 912 bool IsRecursive = C.size() > 1; 913 for (LazyCallGraph::Node &N : C) { 914 Function &OldF = N.getFunction(); 915 Function *NewF = promoteArguments(&OldF, FAM, MaxElements, IsRecursive); 916 if (!NewF) 917 continue; 918 LocalChange = true; 919 920 // Directly substitute the functions in the call graph. Note that this 921 // requires the old function to be completely dead and completely 922 // replaced by the new function. It does no call graph updates, it merely 923 // swaps out the particular function mapped to a particular node in the 924 // graph. 925 C.getOuterRefSCC().replaceNodeFunction(N, *NewF); 926 FAM.clear(OldF, OldF.getName()); 927 OldF.eraseFromParent(); 928 929 PreservedAnalyses FuncPA; 930 FuncPA.preserveSet<CFGAnalyses>(); 931 for (auto *U : NewF->users()) { 932 auto *UserF = cast<CallBase>(U)->getFunction(); 933 FAM.invalidate(*UserF, FuncPA); 934 } 935 } 936 937 Changed |= LocalChange; 938 } while (LocalChange); 939 940 if (!Changed) 941 return PreservedAnalyses::all(); 942 943 PreservedAnalyses PA; 944 // We've cleared out analyses for deleted functions. 945 PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); 946 // We've manually invalidated analyses for functions we've modified. 947 PA.preserveSet<AllAnalysesOn<Function>>(); 948 return PA; 949 } 950