xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp (revision a03411e84728e9b267056fd31c7d1d9d1dc1b01e)
1 //===- ArgumentPromotion.cpp - Promote by-reference arguments -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass promotes "by reference" arguments to be "by value" arguments.  In
10 // practice, this means looking for internal functions that have pointer
11 // arguments.  If it can prove, through the use of alias analysis, that an
12 // argument is *only* loaded, then it can pass the value into the function
13 // instead of the address of the value.  This can cause recursive simplification
14 // of code and lead to the elimination of allocas (especially in C++ template
15 // code like the STL).
16 //
17 // This pass also handles aggregate arguments that are passed into a function,
18 // scalarizing them if the elements of the aggregate are only loaded.  Note that
19 // by default it refuses to scalarize aggregates which would require passing in
20 // more than three operands to the function, because passing thousands of
21 // operands for a large array or structure is unprofitable! This limit can be
22 // configured or disabled, however.
23 //
24 // Note that this transformation could also be done for arguments that are only
25 // stored to (returning the value instead), but does not currently.  This case
26 // would be best handled when and if LLVM begins supporting multiple return
27 // values from functions.
28 //
29 //===----------------------------------------------------------------------===//
30 
31 #include "llvm/Transforms/IPO/ArgumentPromotion.h"
32 
33 #include "llvm/ADT/DepthFirstIterator.h"
34 #include "llvm/ADT/STLExtras.h"
35 #include "llvm/ADT/ScopeExit.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallVector.h"
38 #include "llvm/ADT/Statistic.h"
39 #include "llvm/ADT/Twine.h"
40 #include "llvm/Analysis/AssumptionCache.h"
41 #include "llvm/Analysis/BasicAliasAnalysis.h"
42 #include "llvm/Analysis/CallGraph.h"
43 #include "llvm/Analysis/Loads.h"
44 #include "llvm/Analysis/MemoryLocation.h"
45 #include "llvm/Analysis/TargetTransformInfo.h"
46 #include "llvm/Analysis/ValueTracking.h"
47 #include "llvm/IR/Argument.h"
48 #include "llvm/IR/Attributes.h"
49 #include "llvm/IR/BasicBlock.h"
50 #include "llvm/IR/CFG.h"
51 #include "llvm/IR/Constants.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Dominators.h"
55 #include "llvm/IR/Function.h"
56 #include "llvm/IR/IRBuilder.h"
57 #include "llvm/IR/InstrTypes.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/Metadata.h"
61 #include "llvm/IR/NoFolder.h"
62 #include "llvm/IR/PassManager.h"
63 #include "llvm/IR/Type.h"
64 #include "llvm/IR/Use.h"
65 #include "llvm/IR/User.h"
66 #include "llvm/IR/Value.h"
67 #include "llvm/Support/Casting.h"
68 #include "llvm/Support/Debug.h"
69 #include "llvm/Support/raw_ostream.h"
70 #include "llvm/Transforms/Utils/Local.h"
71 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
72 #include <algorithm>
73 #include <cassert>
74 #include <cstdint>
75 #include <utility>
76 #include <vector>
77 
78 using namespace llvm;
79 
80 #define DEBUG_TYPE "argpromotion"
81 
82 STATISTIC(NumArgumentsPromoted, "Number of pointer arguments promoted");
83 STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated");
84 
85 namespace {
86 
87 struct ArgPart {
88   Type *Ty;
89   Align Alignment;
90   /// A representative guaranteed-executed load or store instruction for use by
91   /// metadata transfer.
92   Instruction *MustExecInstr;
93 };
94 
95 using OffsetAndArgPart = std::pair<int64_t, ArgPart>;
96 
97 } // end anonymous namespace
98 
99 static Value *createByteGEP(IRBuilderBase &IRB, const DataLayout &DL,
100                             Value *Ptr, Type *ResElemTy, int64_t Offset) {
101   if (Offset != 0) {
102     APInt APOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), Offset);
103     Ptr = IRB.CreateGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(APOffset));
104   }
105   return Ptr;
106 }
107 
108 /// DoPromotion - This method actually performs the promotion of the specified
109 /// arguments, and returns the new function.  At this point, we know that it's
110 /// safe to do so.
111 static Function *
112 doPromotion(Function *F, FunctionAnalysisManager &FAM,
113             const DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>>
114                 &ArgsToPromote) {
115   // Start by computing a new prototype for the function, which is the same as
116   // the old function, but has modified arguments.
117   FunctionType *FTy = F->getFunctionType();
118   std::vector<Type *> Params;
119 
120   // Attribute - Keep track of the parameter attributes for the arguments
121   // that we are *not* promoting. For the ones that we do promote, the parameter
122   // attributes are lost
123   SmallVector<AttributeSet, 8> ArgAttrVec;
124   AttributeList PAL = F->getAttributes();
125 
126   // First, determine the new argument list
127   unsigned ArgNo = 0;
128   for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
129        ++I, ++ArgNo) {
130     if (!ArgsToPromote.count(&*I)) {
131       // Unchanged argument
132       Params.push_back(I->getType());
133       ArgAttrVec.push_back(PAL.getParamAttrs(ArgNo));
134     } else if (I->use_empty()) {
135       // Dead argument (which are always marked as promotable)
136       ++NumArgumentsDead;
137     } else {
138       const auto &ArgParts = ArgsToPromote.find(&*I)->second;
139       for (const auto &Pair : ArgParts) {
140         Params.push_back(Pair.second.Ty);
141         ArgAttrVec.push_back(AttributeSet());
142       }
143       ++NumArgumentsPromoted;
144     }
145   }
146 
147   Type *RetTy = FTy->getReturnType();
148 
149   // Construct the new function type using the new arguments.
150   FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());
151 
152   // Create the new function body and insert it into the module.
153   Function *NF = Function::Create(NFTy, F->getLinkage(), F->getAddressSpace(),
154                                   F->getName());
155   NF->copyAttributesFrom(F);
156   NF->copyMetadata(F, 0);
157 
158   // The new function will have the !dbg metadata copied from the original
159   // function. The original function may not be deleted, and dbg metadata need
160   // to be unique, so we need to drop it.
161   F->setSubprogram(nullptr);
162 
163   LLVM_DEBUG(dbgs() << "ARG PROMOTION:  Promoting to:" << *NF << "\n"
164                     << "From: " << *F);
165 
166   uint64_t LargestVectorWidth = 0;
167   for (auto *I : Params)
168     if (auto *VT = dyn_cast<llvm::VectorType>(I))
169       LargestVectorWidth = std::max(
170           LargestVectorWidth, VT->getPrimitiveSizeInBits().getKnownMinValue());
171 
172   // Recompute the parameter attributes list based on the new arguments for
173   // the function.
174   NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttrs(),
175                                        PAL.getRetAttrs(), ArgAttrVec));
176   AttributeFuncs::updateMinLegalVectorWidthAttr(*NF, LargestVectorWidth);
177   ArgAttrVec.clear();
178 
179   F->getParent()->getFunctionList().insert(F->getIterator(), NF);
180   NF->takeName(F);
181 
182   // Loop over all the callers of the function, transforming the call sites to
183   // pass in the loaded pointers.
184   SmallVector<Value *, 16> Args;
185   const DataLayout &DL = F->getParent()->getDataLayout();
186   SmallVector<WeakTrackingVH, 16> DeadArgs;
187 
188   while (!F->use_empty()) {
189     CallBase &CB = cast<CallBase>(*F->user_back());
190     assert(CB.getCalledFunction() == F);
191     const AttributeList &CallPAL = CB.getAttributes();
192     IRBuilder<NoFolder> IRB(&CB);
193 
194     // Loop over the operands, inserting GEP and loads in the caller as
195     // appropriate.
196     auto *AI = CB.arg_begin();
197     ArgNo = 0;
198     for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
199          ++I, ++AI, ++ArgNo) {
200       if (!ArgsToPromote.count(&*I)) {
201         Args.push_back(*AI); // Unmodified argument
202         ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo));
203       } else if (!I->use_empty()) {
204         Value *V = *AI;
205         const auto &ArgParts = ArgsToPromote.find(&*I)->second;
206         for (const auto &Pair : ArgParts) {
207           LoadInst *LI = IRB.CreateAlignedLoad(
208               Pair.second.Ty,
209               createByteGEP(IRB, DL, V, Pair.second.Ty, Pair.first),
210               Pair.second.Alignment, V->getName() + ".val");
211           if (Pair.second.MustExecInstr) {
212             LI->setAAMetadata(Pair.second.MustExecInstr->getAAMetadata());
213             LI->copyMetadata(*Pair.second.MustExecInstr,
214                              {LLVMContext::MD_dereferenceable,
215                               LLVMContext::MD_dereferenceable_or_null,
216                               LLVMContext::MD_noundef,
217                               LLVMContext::MD_nontemporal});
218             // Only transfer poison-generating metadata if we also have
219             // !noundef.
220             // TODO: Without !noundef, we could merge this metadata across
221             // all promoted loads.
222             if (LI->hasMetadata(LLVMContext::MD_noundef))
223               LI->copyMetadata(*Pair.second.MustExecInstr,
224                                {LLVMContext::MD_range, LLVMContext::MD_nonnull,
225                                 LLVMContext::MD_align});
226           }
227           Args.push_back(LI);
228           ArgAttrVec.push_back(AttributeSet());
229         }
230       } else {
231         assert(ArgsToPromote.count(&*I) && I->use_empty());
232         DeadArgs.emplace_back(AI->get());
233       }
234     }
235 
236     // Push any varargs arguments on the list.
237     for (; AI != CB.arg_end(); ++AI, ++ArgNo) {
238       Args.push_back(*AI);
239       ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo));
240     }
241 
242     SmallVector<OperandBundleDef, 1> OpBundles;
243     CB.getOperandBundlesAsDefs(OpBundles);
244 
245     CallBase *NewCS = nullptr;
246     if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
247       NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
248                                  Args, OpBundles, "", &CB);
249     } else {
250       auto *NewCall = CallInst::Create(NF, Args, OpBundles, "", &CB);
251       NewCall->setTailCallKind(cast<CallInst>(&CB)->getTailCallKind());
252       NewCS = NewCall;
253     }
254     NewCS->setCallingConv(CB.getCallingConv());
255     NewCS->setAttributes(AttributeList::get(F->getContext(),
256                                             CallPAL.getFnAttrs(),
257                                             CallPAL.getRetAttrs(), ArgAttrVec));
258     NewCS->copyMetadata(CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg});
259     Args.clear();
260     ArgAttrVec.clear();
261 
262     AttributeFuncs::updateMinLegalVectorWidthAttr(*CB.getCaller(),
263                                                   LargestVectorWidth);
264 
265     if (!CB.use_empty()) {
266       CB.replaceAllUsesWith(NewCS);
267       NewCS->takeName(&CB);
268     }
269 
270     // Finally, remove the old call from the program, reducing the use-count of
271     // F.
272     CB.eraseFromParent();
273   }
274 
275   RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadArgs);
276 
277   // Since we have now created the new function, splice the body of the old
278   // function right into the new function, leaving the old rotting hulk of the
279   // function empty.
280   NF->splice(NF->begin(), F);
281 
282   // We will collect all the new created allocas to promote them into registers
283   // after the following loop
284   SmallVector<AllocaInst *, 4> Allocas;
285 
286   // Loop over the argument list, transferring uses of the old arguments over to
287   // the new arguments, also transferring over the names as well.
288   Function::arg_iterator I2 = NF->arg_begin();
289   for (Argument &Arg : F->args()) {
290     if (!ArgsToPromote.count(&Arg)) {
291       // If this is an unmodified argument, move the name and users over to the
292       // new version.
293       Arg.replaceAllUsesWith(&*I2);
294       I2->takeName(&Arg);
295       ++I2;
296       continue;
297     }
298 
299     // There potentially are metadata uses for things like llvm.dbg.value.
300     // Replace them with undef, after handling the other regular uses.
301     auto RauwUndefMetadata = make_scope_exit(
302         [&]() { Arg.replaceAllUsesWith(UndefValue::get(Arg.getType())); });
303 
304     if (Arg.use_empty())
305       continue;
306 
307     // Otherwise, if we promoted this argument, we have to create an alloca in
308     // the callee for every promotable part and store each of the new incoming
309     // arguments into the corresponding alloca, what lets the old code (the
310     // store instructions if they are allowed especially) a chance to work as
311     // before.
312     assert(Arg.getType()->isPointerTy() &&
313            "Only arguments with a pointer type are promotable");
314 
315     IRBuilder<NoFolder> IRB(&NF->begin()->front());
316 
317     // Add only the promoted elements, so parts from ArgsToPromote
318     SmallDenseMap<int64_t, AllocaInst *> OffsetToAlloca;
319     for (const auto &Pair : ArgsToPromote.find(&Arg)->second) {
320       int64_t Offset = Pair.first;
321       const ArgPart &Part = Pair.second;
322 
323       Argument *NewArg = I2++;
324       NewArg->setName(Arg.getName() + "." + Twine(Offset) + ".val");
325 
326       AllocaInst *NewAlloca = IRB.CreateAlloca(
327           Part.Ty, nullptr, Arg.getName() + "." + Twine(Offset) + ".allc");
328       NewAlloca->setAlignment(Pair.second.Alignment);
329       IRB.CreateAlignedStore(NewArg, NewAlloca, Pair.second.Alignment);
330 
331       // Collect the alloca to retarget the users to
332       OffsetToAlloca.insert({Offset, NewAlloca});
333     }
334 
335     auto GetAlloca = [&](Value *Ptr) {
336       APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
337       Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
338                                                    /* AllowNonInbounds */ true);
339       assert(Ptr == &Arg && "Not constant offset from arg?");
340       return OffsetToAlloca.lookup(Offset.getSExtValue());
341     };
342 
343     // Cleanup the code from the dead instructions: GEPs and BitCasts in between
344     // the original argument and its users: loads and stores. Retarget every
345     // user to the new created alloca.
346     SmallVector<Value *, 16> Worklist;
347     SmallVector<Instruction *, 16> DeadInsts;
348     append_range(Worklist, Arg.users());
349     while (!Worklist.empty()) {
350       Value *V = Worklist.pop_back_val();
351       if (isa<BitCastInst>(V) || isa<GetElementPtrInst>(V)) {
352         DeadInsts.push_back(cast<Instruction>(V));
353         append_range(Worklist, V->users());
354         continue;
355       }
356 
357       if (auto *LI = dyn_cast<LoadInst>(V)) {
358         Value *Ptr = LI->getPointerOperand();
359         LI->setOperand(LoadInst::getPointerOperandIndex(), GetAlloca(Ptr));
360         continue;
361       }
362 
363       if (auto *SI = dyn_cast<StoreInst>(V)) {
364         assert(!SI->isVolatile() && "Volatile operations can't be promoted.");
365         Value *Ptr = SI->getPointerOperand();
366         SI->setOperand(StoreInst::getPointerOperandIndex(), GetAlloca(Ptr));
367         continue;
368       }
369 
370       llvm_unreachable("Unexpected user");
371     }
372 
373     for (Instruction *I : DeadInsts) {
374       I->replaceAllUsesWith(PoisonValue::get(I->getType()));
375       I->eraseFromParent();
376     }
377 
378     // Collect the allocas for promotion
379     for (const auto &Pair : OffsetToAlloca) {
380       assert(isAllocaPromotable(Pair.second) &&
381              "By design, only promotable allocas should be produced.");
382       Allocas.push_back(Pair.second);
383     }
384   }
385 
386   LLVM_DEBUG(dbgs() << "ARG PROMOTION: " << Allocas.size()
387                     << " alloca(s) are promotable by Mem2Reg\n");
388 
389   if (!Allocas.empty()) {
390     // And we are able to call the `promoteMemoryToRegister()` function.
391     // Our earlier checks have ensured that PromoteMemToReg() will
392     // succeed.
393     auto &DT = FAM.getResult<DominatorTreeAnalysis>(*NF);
394     auto &AC = FAM.getResult<AssumptionAnalysis>(*NF);
395     PromoteMemToReg(Allocas, DT, &AC);
396   }
397 
398   return NF;
399 }
400 
401 /// Return true if we can prove that all callees pass in a valid pointer for the
402 /// specified function argument.
403 static bool allCallersPassValidPointerForArgument(Argument *Arg,
404                                                   Align NeededAlign,
405                                                   uint64_t NeededDerefBytes) {
406   Function *Callee = Arg->getParent();
407   const DataLayout &DL = Callee->getParent()->getDataLayout();
408   APInt Bytes(64, NeededDerefBytes);
409 
410   // Check if the argument itself is marked dereferenceable and aligned.
411   if (isDereferenceableAndAlignedPointer(Arg, NeededAlign, Bytes, DL))
412     return true;
413 
414   // Look at all call sites of the function.  At this point we know we only have
415   // direct callees.
416   return all_of(Callee->users(), [&](User *U) {
417     CallBase &CB = cast<CallBase>(*U);
418     return isDereferenceableAndAlignedPointer(CB.getArgOperand(Arg->getArgNo()),
419                                               NeededAlign, Bytes, DL);
420   });
421 }
422 
423 /// Determine that this argument is safe to promote, and find the argument
424 /// parts it can be promoted into.
425 static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
426                          unsigned MaxElements, bool IsRecursive,
427                          SmallVectorImpl<OffsetAndArgPart> &ArgPartsVec) {
428   // Quick exit for unused arguments
429   if (Arg->use_empty())
430     return true;
431 
432   // We can only promote this argument if all the uses are loads at known
433   // offsets.
434   //
435   // Promoting the argument causes it to be loaded in the caller
436   // unconditionally. This is only safe if we can prove that either the load
437   // would have happened in the callee anyway (ie, there is a load in the entry
438   // block) or the pointer passed in at every call site is guaranteed to be
439   // valid.
440   // In the former case, invalid loads can happen, but would have happened
441   // anyway, in the latter case, invalid loads won't happen. This prevents us
442   // from introducing an invalid load that wouldn't have happened in the
443   // original code.
444 
445   SmallDenseMap<int64_t, ArgPart, 4> ArgParts;
446   Align NeededAlign(1);
447   uint64_t NeededDerefBytes = 0;
448 
449   // And if this is a byval argument we also allow to have store instructions.
450   // Only handle in such way arguments with specified alignment;
451   // if it's unspecified, the actual alignment of the argument is
452   // target-specific.
453   bool AreStoresAllowed = Arg->getParamByValType() && Arg->getParamAlign();
454 
455   // An end user of a pointer argument is a load or store instruction.
456   // Returns std::nullopt if this load or store is not based on the argument.
457   // Return true if we can promote the instruction, false otherwise.
458   auto HandleEndUser = [&](auto *I, Type *Ty,
459                            bool GuaranteedToExecute) -> std::optional<bool> {
460     // Don't promote volatile or atomic instructions.
461     if (!I->isSimple())
462       return false;
463 
464     Value *Ptr = I->getPointerOperand();
465     APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
466     Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
467                                                  /* AllowNonInbounds */ true);
468     if (Ptr != Arg)
469       return std::nullopt;
470 
471     if (Offset.getSignificantBits() >= 64)
472       return false;
473 
474     TypeSize Size = DL.getTypeStoreSize(Ty);
475     // Don't try to promote scalable types.
476     if (Size.isScalable())
477       return false;
478 
479     // If this is a recursive function and one of the types is a pointer,
480     // then promoting it might lead to recursive promotion.
481     if (IsRecursive && Ty->isPointerTy())
482       return false;
483 
484     int64_t Off = Offset.getSExtValue();
485     auto Pair = ArgParts.try_emplace(
486         Off, ArgPart{Ty, I->getAlign(), GuaranteedToExecute ? I : nullptr});
487     ArgPart &Part = Pair.first->second;
488     bool OffsetNotSeenBefore = Pair.second;
489 
490     // We limit promotion to only promoting up to a fixed number of elements of
491     // the aggregate.
492     if (MaxElements > 0 && ArgParts.size() > MaxElements) {
493       LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
494                         << "more than " << MaxElements << " parts\n");
495       return false;
496     }
497 
498     // For now, we only support loading/storing one specific type at a given
499     // offset.
500     if (Part.Ty != Ty) {
501       LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
502                         << "accessed as both " << *Part.Ty << " and " << *Ty
503                         << " at offset " << Off << "\n");
504       return false;
505     }
506 
507     // If this instruction is not guaranteed to execute, and we haven't seen a
508     // load or store at this offset before (or it had lower alignment), then we
509     // need to remember that requirement.
510     // Note that skipping instructions of previously seen offsets is only
511     // correct because we only allow a single type for a given offset, which
512     // also means that the number of accessed bytes will be the same.
513     if (!GuaranteedToExecute &&
514         (OffsetNotSeenBefore || Part.Alignment < I->getAlign())) {
515       // We won't be able to prove dereferenceability for negative offsets.
516       if (Off < 0)
517         return false;
518 
519       // If the offset is not aligned, an aligned base pointer won't help.
520       if (!isAligned(I->getAlign(), Off))
521         return false;
522 
523       NeededDerefBytes = std::max(NeededDerefBytes, Off + Size.getFixedValue());
524       NeededAlign = std::max(NeededAlign, I->getAlign());
525     }
526 
527     Part.Alignment = std::max(Part.Alignment, I->getAlign());
528     return true;
529   };
530 
531   // Look for loads and stores that are guaranteed to execute on entry.
532   for (Instruction &I : Arg->getParent()->getEntryBlock()) {
533     std::optional<bool> Res{};
534     if (LoadInst *LI = dyn_cast<LoadInst>(&I))
535       Res = HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ true);
536     else if (StoreInst *SI = dyn_cast<StoreInst>(&I))
537       Res = HandleEndUser(SI, SI->getValueOperand()->getType(),
538                           /* GuaranteedToExecute */ true);
539     if (Res && !*Res)
540       return false;
541 
542     if (!isGuaranteedToTransferExecutionToSuccessor(&I))
543       break;
544   }
545 
546   // Now look at all loads of the argument. Remember the load instructions
547   // for the aliasing check below.
548   SmallVector<const Use *, 16> Worklist;
549   SmallPtrSet<const Use *, 16> Visited;
550   SmallVector<LoadInst *, 16> Loads;
551   auto AppendUses = [&](const Value *V) {
552     for (const Use &U : V->uses())
553       if (Visited.insert(&U).second)
554         Worklist.push_back(&U);
555   };
556   AppendUses(Arg);
557   while (!Worklist.empty()) {
558     const Use *U = Worklist.pop_back_val();
559     Value *V = U->getUser();
560     if (isa<BitCastInst>(V)) {
561       AppendUses(V);
562       continue;
563     }
564 
565     if (auto *GEP = dyn_cast<GetElementPtrInst>(V)) {
566       if (!GEP->hasAllConstantIndices())
567         return false;
568       AppendUses(V);
569       continue;
570     }
571 
572     if (auto *LI = dyn_cast<LoadInst>(V)) {
573       if (!*HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ false))
574         return false;
575       Loads.push_back(LI);
576       continue;
577     }
578 
579     // Stores are allowed for byval arguments
580     auto *SI = dyn_cast<StoreInst>(V);
581     if (AreStoresAllowed && SI &&
582         U->getOperandNo() == StoreInst::getPointerOperandIndex()) {
583       if (!*HandleEndUser(SI, SI->getValueOperand()->getType(),
584                           /* GuaranteedToExecute */ false))
585         return false;
586       continue;
587       // Only stores TO the argument is allowed, all the other stores are
588       // unknown users
589     }
590 
591     // Unknown user.
592     LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
593                       << "unknown user " << *V << "\n");
594     return false;
595   }
596 
597   if (NeededDerefBytes || NeededAlign > 1) {
598     // Try to prove a required deref / aligned requirement.
599     if (!allCallersPassValidPointerForArgument(Arg, NeededAlign,
600                                                NeededDerefBytes)) {
601       LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
602                         << "not dereferenceable or aligned\n");
603       return false;
604     }
605   }
606 
607   if (ArgParts.empty())
608     return true; // No users, this is a dead argument.
609 
610   // Sort parts by offset.
611   append_range(ArgPartsVec, ArgParts);
612   sort(ArgPartsVec, llvm::less_first());
613 
614   // Make sure the parts are non-overlapping.
615   int64_t Offset = ArgPartsVec[0].first;
616   for (const auto &Pair : ArgPartsVec) {
617     if (Pair.first < Offset)
618       return false; // Overlap with previous part.
619 
620     Offset = Pair.first + DL.getTypeStoreSize(Pair.second.Ty);
621   }
622 
623   // If store instructions are allowed, the path from the entry of the function
624   // to each load may be not free of instructions that potentially invalidate
625   // the load, and this is an admissible situation.
626   if (AreStoresAllowed)
627     return true;
628 
629   // Okay, now we know that the argument is only used by load instructions, and
630   // it is safe to unconditionally perform all of them. Use alias analysis to
631   // check to see if the pointer is guaranteed to not be modified from entry of
632   // the function to each of the load instructions.
633 
634   // Because there could be several/many load instructions, remember which
635   // blocks we know to be transparent to the load.
636   df_iterator_default_set<BasicBlock *, 16> TranspBlocks;
637 
638   for (LoadInst *Load : Loads) {
639     // Check to see if the load is invalidated from the start of the block to
640     // the load itself.
641     BasicBlock *BB = Load->getParent();
642 
643     MemoryLocation Loc = MemoryLocation::get(Load);
644     if (AAR.canInstructionRangeModRef(BB->front(), *Load, Loc, ModRefInfo::Mod))
645       return false; // Pointer is invalidated!
646 
647     // Now check every path from the entry block to the load for transparency.
648     // To do this, we perform a depth first search on the inverse CFG from the
649     // loading block.
650     for (BasicBlock *P : predecessors(BB)) {
651       for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks))
652         if (AAR.canBasicBlockModify(*TranspBB, Loc))
653           return false;
654     }
655   }
656 
657   // If the path from the entry of the function to each load is free of
658   // instructions that potentially invalidate the load, we can make the
659   // transformation!
660   return true;
661 }
662 
663 /// Check if callers and callee agree on how promoted arguments would be
664 /// passed.
665 static bool areTypesABICompatible(ArrayRef<Type *> Types, const Function &F,
666                                   const TargetTransformInfo &TTI) {
667   return all_of(F.uses(), [&](const Use &U) {
668     CallBase *CB = dyn_cast<CallBase>(U.getUser());
669     if (!CB)
670       return false;
671 
672     const Function *Caller = CB->getCaller();
673     const Function *Callee = CB->getCalledFunction();
674     return TTI.areTypesABICompatible(Caller, Callee, Types);
675   });
676 }
677 
678 /// PromoteArguments - This method checks the specified function to see if there
679 /// are any promotable arguments and if it is safe to promote the function (for
680 /// example, all callers are direct).  If safe to promote some arguments, it
681 /// calls the DoPromotion method.
682 static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,
683                                   unsigned MaxElements, bool IsRecursive) {
684   // Don't perform argument promotion for naked functions; otherwise we can end
685   // up removing parameters that are seemingly 'not used' as they are referred
686   // to in the assembly.
687   if (F->hasFnAttribute(Attribute::Naked))
688     return nullptr;
689 
690   // Make sure that it is local to this module.
691   if (!F->hasLocalLinkage())
692     return nullptr;
693 
694   // Don't promote arguments for variadic functions. Adding, removing, or
695   // changing non-pack parameters can change the classification of pack
696   // parameters. Frontends encode that classification at the call site in the
697   // IR, while in the callee the classification is determined dynamically based
698   // on the number of registers consumed so far.
699   if (F->isVarArg())
700     return nullptr;
701 
702   // Don't transform functions that receive inallocas, as the transformation may
703   // not be safe depending on calling convention.
704   if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca))
705     return nullptr;
706 
707   // First check: see if there are any pointer arguments!  If not, quick exit.
708   SmallVector<Argument *, 16> PointerArgs;
709   for (Argument &I : F->args())
710     if (I.getType()->isPointerTy())
711       PointerArgs.push_back(&I);
712   if (PointerArgs.empty())
713     return nullptr;
714 
715   // Second check: make sure that all callers are direct callers.  We can't
716   // transform functions that have indirect callers.  Also see if the function
717   // is self-recursive.
718   for (Use &U : F->uses()) {
719     CallBase *CB = dyn_cast<CallBase>(U.getUser());
720     // Must be a direct call.
721     if (CB == nullptr || !CB->isCallee(&U) ||
722         CB->getFunctionType() != F->getFunctionType())
723       return nullptr;
724 
725     // Can't change signature of musttail callee
726     if (CB->isMustTailCall())
727       return nullptr;
728 
729     if (CB->getFunction() == F)
730       IsRecursive = true;
731   }
732 
733   // Can't change signature of musttail caller
734   // FIXME: Support promoting whole chain of musttail functions
735   for (BasicBlock &BB : *F)
736     if (BB.getTerminatingMustTailCall())
737       return nullptr;
738 
739   const DataLayout &DL = F->getParent()->getDataLayout();
740   auto &AAR = FAM.getResult<AAManager>(*F);
741   const auto &TTI = FAM.getResult<TargetIRAnalysis>(*F);
742 
743   // Check to see which arguments are promotable.  If an argument is promotable,
744   // add it to ArgsToPromote.
745   DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> ArgsToPromote;
746   unsigned NumArgsAfterPromote = F->getFunctionType()->getNumParams();
747   for (Argument *PtrArg : PointerArgs) {
748     // Replace sret attribute with noalias. This reduces register pressure by
749     // avoiding a register copy.
750     if (PtrArg->hasStructRetAttr()) {
751       unsigned ArgNo = PtrArg->getArgNo();
752       F->removeParamAttr(ArgNo, Attribute::StructRet);
753       F->addParamAttr(ArgNo, Attribute::NoAlias);
754       for (Use &U : F->uses()) {
755         CallBase &CB = cast<CallBase>(*U.getUser());
756         CB.removeParamAttr(ArgNo, Attribute::StructRet);
757         CB.addParamAttr(ArgNo, Attribute::NoAlias);
758       }
759     }
760 
761     // If we can promote the pointer to its value.
762     SmallVector<OffsetAndArgPart, 4> ArgParts;
763 
764     if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts)) {
765       SmallVector<Type *, 4> Types;
766       for (const auto &Pair : ArgParts)
767         Types.push_back(Pair.second.Ty);
768 
769       if (areTypesABICompatible(Types, *F, TTI)) {
770         NumArgsAfterPromote += ArgParts.size() - 1;
771         ArgsToPromote.insert({PtrArg, std::move(ArgParts)});
772       }
773     }
774   }
775 
776   // No promotable pointer arguments.
777   if (ArgsToPromote.empty())
778     return nullptr;
779 
780   if (NumArgsAfterPromote > TTI.getMaxNumArgs())
781     return nullptr;
782 
783   return doPromotion(F, FAM, ArgsToPromote);
784 }
785 
786 PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
787                                              CGSCCAnalysisManager &AM,
788                                              LazyCallGraph &CG,
789                                              CGSCCUpdateResult &UR) {
790   bool Changed = false, LocalChange;
791 
792   // Iterate until we stop promoting from this SCC.
793   do {
794     LocalChange = false;
795 
796     FunctionAnalysisManager &FAM =
797         AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
798 
799     bool IsRecursive = C.size() > 1;
800     for (LazyCallGraph::Node &N : C) {
801       Function &OldF = N.getFunction();
802       Function *NewF = promoteArguments(&OldF, FAM, MaxElements, IsRecursive);
803       if (!NewF)
804         continue;
805       LocalChange = true;
806 
807       // Directly substitute the functions in the call graph. Note that this
808       // requires the old function to be completely dead and completely
809       // replaced by the new function. It does no call graph updates, it merely
810       // swaps out the particular function mapped to a particular node in the
811       // graph.
812       C.getOuterRefSCC().replaceNodeFunction(N, *NewF);
813       FAM.clear(OldF, OldF.getName());
814       OldF.eraseFromParent();
815 
816       PreservedAnalyses FuncPA;
817       FuncPA.preserveSet<CFGAnalyses>();
818       for (auto *U : NewF->users()) {
819         auto *UserF = cast<CallBase>(U)->getFunction();
820         FAM.invalidate(*UserF, FuncPA);
821       }
822     }
823 
824     Changed |= LocalChange;
825   } while (LocalChange);
826 
827   if (!Changed)
828     return PreservedAnalyses::all();
829 
830   PreservedAnalyses PA;
831   // We've cleared out analyses for deleted functions.
832   PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
833   // We've manually invalidated analyses for functions we've modified.
834   PA.preserveSet<AllAnalysesOn<Function>>();
835   return PA;
836 }
837