xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/IPO/ExpandVariadics.cpp (revision b2d2a78ad80ec68d4a17f5aef97d21686cb1e29b)
1 //===-- ExpandVariadicsPass.cpp --------------------------------*- C++ -*-=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This is an optimization pass for variadic functions. If called from codegen,
10 // it can serve as the implementation of variadic functions for a given target.
11 //
12 // The strategy is to turn the ... part of a variadic function into a va_list
13 // and fix up the call sites. The majority of the pass is target independent.
14 // The exceptions are the va_list type itself and the rules for where to store
15 // variables in memory such that va_arg can iterate over them given a va_list.
16 //
17 // The majority of the plumbing is splitting the variadic function into a
18 // single basic block that packs the variadic arguments into a va_list and
19 // a second function that does the work of the original. That packing is
20 // exactly what is done by va_start. Further, the transform from ... to va_list
21 // replaced va_start with an operation to copy a va_list from the new argument,
22 // which is exactly a va_copy. This is useful for reducing target-dependence.
23 //
24 // A va_list instance is a forward iterator, where the primary operation va_arg
25 // is dereference-then-increment. This interface forces significant convergent
26 // evolution between target specific implementations. The variation in runtime
27 // data layout is limited to that representable by the iterator, parameterised
28 // by the type passed to the va_arg instruction.
29 //
30 // Therefore the majority of the target specific subtlety is packing arguments
31 // into a stack allocated buffer such that a va_list can be initialised with it
32 // and the va_arg expansion for the target will find the arguments at runtime.
33 //
34 // The aggregate effect is to unblock other transforms, most critically the
35 // general purpose inliner. Known calls to variadic functions become zero cost.
36 //
37 // Consistency with clang is primarily tested by emitting va_arg using clang
38 // then expanding the variadic functions using this pass, followed by trying
39 // to constant fold the functions to no-ops.
40 //
41 // Target specific behaviour is tested in IR - mainly checking that values are
42 // put into positions in call frames that make sense for that particular target.
43 //
44 // There is one "clever" invariant in use. va_start intrinsics that are not
45 // within a varidic functions are an error in the IR verifier. When this
46 // transform moves blocks from a variadic function into a fixed arity one, it
47 // moves va_start intrinsics along with everything else. That means that the
48 // va_start intrinsics that need to be rewritten to use the trailing argument
49 // are exactly those that are in non-variadic functions so no further state
50 // is needed to distinguish those that need to be rewritten.
51 //
52 //===----------------------------------------------------------------------===//
53 
54 #include "llvm/Transforms/IPO/ExpandVariadics.h"
55 #include "llvm/ADT/SmallVector.h"
56 #include "llvm/IR/Constants.h"
57 #include "llvm/IR/IRBuilder.h"
58 #include "llvm/IR/IntrinsicInst.h"
59 #include "llvm/IR/Module.h"
60 #include "llvm/IR/PassManager.h"
61 #include "llvm/InitializePasses.h"
62 #include "llvm/Pass.h"
63 #include "llvm/Support/CommandLine.h"
64 #include "llvm/TargetParser/Triple.h"
65 #include "llvm/Transforms/Utils/ModuleUtils.h"
66 
67 #define DEBUG_TYPE "expand-variadics"
68 
69 using namespace llvm;
70 
71 namespace {
72 
73 cl::opt<ExpandVariadicsMode> ExpandVariadicsModeOption(
74     DEBUG_TYPE "-override", cl::desc("Override the behaviour of " DEBUG_TYPE),
75     cl::init(ExpandVariadicsMode::Unspecified),
76     cl::values(clEnumValN(ExpandVariadicsMode::Unspecified, "unspecified",
77                           "Use the implementation defaults"),
78                clEnumValN(ExpandVariadicsMode::Disable, "disable",
79                           "Disable the pass entirely"),
80                clEnumValN(ExpandVariadicsMode::Optimize, "optimize",
81                           "Optimise without changing ABI"),
82                clEnumValN(ExpandVariadicsMode::Lowering, "lowering",
83                           "Change variadic calling convention")));
84 
85 bool commandLineOverride() {
86   return ExpandVariadicsModeOption != ExpandVariadicsMode::Unspecified;
87 }
88 
89 // Instances of this class encapsulate the target-dependant behaviour as a
90 // function of triple. Implementing a new ABI is adding a case to the switch
91 // in create(llvm::Triple) at the end of this file.
92 // This class may end up instantiated in TargetMachine instances, keeping it
93 // here for now until enough targets are implemented for the API to evolve.
94 class VariadicABIInfo {
95 protected:
96   VariadicABIInfo() = default;
97 
98 public:
99   static std::unique_ptr<VariadicABIInfo> create(const Triple &T);
100 
101   // Allow overriding whether the pass runs on a per-target basis
102   virtual bool enableForTarget() = 0;
103 
104   // Whether a valist instance is passed by value or by address
105   // I.e. does it need to be alloca'ed and stored into, or can
106   // it be passed directly in a SSA register
107   virtual bool vaListPassedInSSARegister() = 0;
108 
109   // The type of a va_list iterator object
110   virtual Type *vaListType(LLVMContext &Ctx) = 0;
111 
112   // The type of a va_list as a function argument as lowered by C
113   virtual Type *vaListParameterType(Module &M) = 0;
114 
115   // Initialize an allocated va_list object to point to an already
116   // initialized contiguous memory region.
117   // Return the value to pass as the va_list argument
118   virtual Value *initializeVaList(Module &M, LLVMContext &Ctx,
119                                   IRBuilder<> &Builder, AllocaInst *VaList,
120                                   Value *Buffer) = 0;
121 
122   struct VAArgSlotInfo {
123     Align DataAlign; // With respect to the call frame
124     bool Indirect;   // Passed via a pointer
125   };
126   virtual VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) = 0;
127 
128   // Targets implemented so far all have the same trivial lowering for these
129   bool vaEndIsNop() { return true; }
130   bool vaCopyIsMemcpy() { return true; }
131 
132   virtual ~VariadicABIInfo() = default;
133 };
134 
135 // Module implements getFunction() which returns nullptr on missing declaration
136 // and getOrInsertFunction which creates one when absent. Intrinsics.h only
137 // implements getDeclaration which creates one when missing. Checking whether
138 // an intrinsic exists thus inserts it in the module and it then needs to be
139 // deleted again to clean up.
140 // The right name for the two functions on intrinsics would match Module::,
141 // but doing that in a single change would introduce nullptr dereferences
142 // where currently there are none. The minimal collateral damage approach
143 // would split the change over a release to help downstream branches. As it
144 // is unclear what approach will be preferred, implementing the trivial
145 // function here in the meantime to decouple from that discussion.
146 Function *getPreexistingDeclaration(Module *M, Intrinsic::ID Id,
147                                     ArrayRef<Type *> Tys = {}) {
148   auto *FT = Intrinsic::getType(M->getContext(), Id, Tys);
149   return M->getFunction(Tys.empty() ? Intrinsic::getName(Id)
150                                     : Intrinsic::getName(Id, Tys, M, FT));
151 }
152 
153 class ExpandVariadics : public ModulePass {
154 
155   // The pass construction sets the default to optimize when called from middle
156   // end and lowering when called from the backend. The command line variable
157   // overrides that. This is useful for testing and debugging. It also allows
158   // building an applications with variadic functions wholly removed if one
159   // has sufficient control over the dependencies, e.g. a statically linked
160   // clang that has no variadic function calls remaining in the binary.
161 
162 public:
163   static char ID;
164   const ExpandVariadicsMode Mode;
165   std::unique_ptr<VariadicABIInfo> ABI;
166 
167   ExpandVariadics(ExpandVariadicsMode Mode)
168       : ModulePass(ID),
169         Mode(commandLineOverride() ? ExpandVariadicsModeOption : Mode) {}
170 
171   StringRef getPassName() const override { return "Expand variadic functions"; }
172 
173   bool rewriteABI() { return Mode == ExpandVariadicsMode::Lowering; }
174 
175   bool runOnModule(Module &M) override;
176 
177   bool runOnFunction(Module &M, IRBuilder<> &Builder, Function *F);
178 
179   Function *replaceAllUsesWithNewDeclaration(Module &M,
180                                              Function *OriginalFunction);
181 
182   Function *deriveFixedArityReplacement(Module &M, IRBuilder<> &Builder,
183                                         Function *OriginalFunction);
184 
185   Function *defineVariadicWrapper(Module &M, IRBuilder<> &Builder,
186                                   Function *VariadicWrapper,
187                                   Function *FixedArityReplacement);
188 
189   bool expandCall(Module &M, IRBuilder<> &Builder, CallBase *CB, FunctionType *,
190                   Function *NF);
191 
192   // The intrinsic functions va_copy and va_end are removed unconditionally.
193   // They correspond to a memcpy and a no-op on all implemented targets.
194   // The va_start intrinsic is removed from basic blocks that were not created
195   // by this pass, some may remain if needed to maintain the external ABI.
196 
197   template <Intrinsic::ID ID, typename InstructionType>
198   bool expandIntrinsicUsers(Module &M, IRBuilder<> &Builder,
199                             PointerType *IntrinsicArgType) {
200     bool Changed = false;
201     const DataLayout &DL = M.getDataLayout();
202     if (Function *Intrinsic =
203             getPreexistingDeclaration(&M, ID, {IntrinsicArgType})) {
204       for (User *U : make_early_inc_range(Intrinsic->users()))
205         if (auto *I = dyn_cast<InstructionType>(U))
206           Changed |= expandVAIntrinsicCall(Builder, DL, I);
207 
208       if (Intrinsic->use_empty())
209         Intrinsic->eraseFromParent();
210     }
211     return Changed;
212   }
213 
214   bool expandVAIntrinsicUsersWithAddrspace(Module &M, IRBuilder<> &Builder,
215                                            unsigned Addrspace) {
216     auto &Ctx = M.getContext();
217     PointerType *IntrinsicArgType = PointerType::get(Ctx, Addrspace);
218     bool Changed = false;
219 
220     // expand vastart before vacopy as vastart may introduce a vacopy
221     Changed |= expandIntrinsicUsers<Intrinsic::vastart, VAStartInst>(
222         M, Builder, IntrinsicArgType);
223     Changed |= expandIntrinsicUsers<Intrinsic::vaend, VAEndInst>(
224         M, Builder, IntrinsicArgType);
225     Changed |= expandIntrinsicUsers<Intrinsic::vacopy, VACopyInst>(
226         M, Builder, IntrinsicArgType);
227     return Changed;
228   }
229 
230   bool expandVAIntrinsicCall(IRBuilder<> &Builder, const DataLayout &DL,
231                              VAStartInst *Inst);
232 
233   bool expandVAIntrinsicCall(IRBuilder<> &, const DataLayout &,
234                              VAEndInst *Inst);
235 
236   bool expandVAIntrinsicCall(IRBuilder<> &Builder, const DataLayout &DL,
237                              VACopyInst *Inst);
238 
239   FunctionType *inlinableVariadicFunctionType(Module &M, FunctionType *FTy) {
240     // The type of "FTy" with the ... removed and a va_list appended
241     SmallVector<Type *> ArgTypes(FTy->param_begin(), FTy->param_end());
242     ArgTypes.push_back(ABI->vaListParameterType(M));
243     return FunctionType::get(FTy->getReturnType(), ArgTypes,
244                              /*IsVarArgs=*/false);
245   }
246 
247   static ConstantInt *sizeOfAlloca(LLVMContext &Ctx, const DataLayout &DL,
248                                    AllocaInst *Alloced) {
249     std::optional<TypeSize> AllocaTypeSize = Alloced->getAllocationSize(DL);
250     uint64_t AsInt = AllocaTypeSize ? AllocaTypeSize->getFixedValue() : 0;
251     return ConstantInt::get(Type::getInt64Ty(Ctx), AsInt);
252   }
253 
254   bool expansionApplicableToFunction(Module &M, Function *F) {
255     if (F->isIntrinsic() || !F->isVarArg() ||
256         F->hasFnAttribute(Attribute::Naked))
257       return false;
258 
259     if (F->getCallingConv() != CallingConv::C)
260       return false;
261 
262     if (rewriteABI())
263       return true;
264 
265     if (!F->hasExactDefinition())
266       return false;
267 
268     return true;
269   }
270 
271   bool expansionApplicableToFunctionCall(CallBase *CB) {
272     if (CallInst *CI = dyn_cast<CallInst>(CB)) {
273       if (CI->isMustTailCall()) {
274         // Cannot expand musttail calls
275         return false;
276       }
277 
278       if (CI->getCallingConv() != CallingConv::C)
279         return false;
280 
281       return true;
282     }
283 
284     if (isa<InvokeInst>(CB)) {
285       // Invoke not implemented in initial implementation of pass
286       return false;
287     }
288 
289     // Other unimplemented derivative of CallBase
290     return false;
291   }
292 
293   class ExpandedCallFrame {
294     // Helper for constructing an alloca instance containing the arguments bound
295     // to the variadic ... parameter, rearranged to allow indexing through a
296     // va_list iterator
297     enum { N = 4 };
298     SmallVector<Type *, N> FieldTypes;
299     enum Tag { Store, Memcpy, Padding };
300     SmallVector<std::tuple<Value *, uint64_t, Tag>, N> Source;
301 
302     template <Tag tag> void append(Type *FieldType, Value *V, uint64_t Bytes) {
303       FieldTypes.push_back(FieldType);
304       Source.push_back({V, Bytes, tag});
305     }
306 
307   public:
308     void store(LLVMContext &Ctx, Type *T, Value *V) { append<Store>(T, V, 0); }
309 
310     void memcpy(LLVMContext &Ctx, Type *T, Value *V, uint64_t Bytes) {
311       append<Memcpy>(T, V, Bytes);
312     }
313 
314     void padding(LLVMContext &Ctx, uint64_t By) {
315       append<Padding>(ArrayType::get(Type::getInt8Ty(Ctx), By), nullptr, 0);
316     }
317 
318     size_t size() const { return FieldTypes.size(); }
319     bool empty() const { return FieldTypes.empty(); }
320 
321     StructType *asStruct(LLVMContext &Ctx, StringRef Name) {
322       const bool IsPacked = true;
323       return StructType::create(Ctx, FieldTypes,
324                                 (Twine(Name) + ".vararg").str(), IsPacked);
325     }
326 
327     void initializeStructAlloca(const DataLayout &DL, IRBuilder<> &Builder,
328                                 AllocaInst *Alloced) {
329 
330       StructType *VarargsTy = cast<StructType>(Alloced->getAllocatedType());
331 
332       for (size_t I = 0; I < size(); I++) {
333 
334         auto [V, bytes, tag] = Source[I];
335 
336         if (tag == Padding) {
337           assert(V == nullptr);
338           continue;
339         }
340 
341         auto Dst = Builder.CreateStructGEP(VarargsTy, Alloced, I);
342 
343         assert(V != nullptr);
344 
345         if (tag == Store)
346           Builder.CreateStore(V, Dst);
347 
348         if (tag == Memcpy)
349           Builder.CreateMemCpy(Dst, {}, V, {}, bytes);
350       }
351     }
352   };
353 };
354 
355 bool ExpandVariadics::runOnModule(Module &M) {
356   bool Changed = false;
357   if (Mode == ExpandVariadicsMode::Disable)
358     return Changed;
359 
360   Triple TT(M.getTargetTriple());
361   ABI = VariadicABIInfo::create(TT);
362   if (!ABI)
363     return Changed;
364 
365   if (!ABI->enableForTarget())
366     return Changed;
367 
368   auto &Ctx = M.getContext();
369   const DataLayout &DL = M.getDataLayout();
370   IRBuilder<> Builder(Ctx);
371 
372   // Lowering needs to run on all functions exactly once.
373   // Optimize could run on functions containing va_start exactly once.
374   for (Function &F : make_early_inc_range(M))
375     Changed |= runOnFunction(M, Builder, &F);
376 
377   // After runOnFunction, all known calls to known variadic functions have been
378   // replaced. va_start intrinsics are presently (and invalidly!) only present
379   // in functions that used to be variadic and have now been replaced to take a
380   // va_list instead. If lowering as opposed to optimising, calls to unknown
381   // variadic functions have also been replaced.
382 
383   {
384     // 0 and AllocaAddrSpace are sufficient for the targets implemented so far
385     unsigned Addrspace = 0;
386     Changed |= expandVAIntrinsicUsersWithAddrspace(M, Builder, Addrspace);
387 
388     Addrspace = DL.getAllocaAddrSpace();
389     if (Addrspace != 0)
390       Changed |= expandVAIntrinsicUsersWithAddrspace(M, Builder, Addrspace);
391   }
392 
393   if (Mode != ExpandVariadicsMode::Lowering)
394     return Changed;
395 
396   for (Function &F : make_early_inc_range(M)) {
397     if (F.isDeclaration())
398       continue;
399 
400     // Now need to track down indirect calls. Can't find those
401     // by walking uses of variadic functions, need to crawl the instruction
402     // stream. Fortunately this is only necessary for the ABI rewrite case.
403     for (BasicBlock &BB : F) {
404       for (Instruction &I : make_early_inc_range(BB)) {
405         if (CallBase *CB = dyn_cast<CallBase>(&I)) {
406           if (CB->isIndirectCall()) {
407             FunctionType *FTy = CB->getFunctionType();
408             if (FTy->isVarArg())
409               Changed |= expandCall(M, Builder, CB, FTy, 0);
410           }
411         }
412       }
413     }
414   }
415 
416   return Changed;
417 }
418 
419 bool ExpandVariadics::runOnFunction(Module &M, IRBuilder<> &Builder,
420                                     Function *OriginalFunction) {
421   bool Changed = false;
422 
423   if (!expansionApplicableToFunction(M, OriginalFunction))
424     return Changed;
425 
426   [[maybe_unused]] const bool OriginalFunctionIsDeclaration =
427       OriginalFunction->isDeclaration();
428   assert(rewriteABI() || !OriginalFunctionIsDeclaration);
429 
430   // Declare a new function and redirect every use to that new function
431   Function *VariadicWrapper =
432       replaceAllUsesWithNewDeclaration(M, OriginalFunction);
433   assert(VariadicWrapper->isDeclaration());
434   assert(OriginalFunction->use_empty());
435 
436   // Create a new function taking va_list containing the implementation of the
437   // original
438   Function *FixedArityReplacement =
439       deriveFixedArityReplacement(M, Builder, OriginalFunction);
440   assert(OriginalFunction->isDeclaration());
441   assert(FixedArityReplacement->isDeclaration() ==
442          OriginalFunctionIsDeclaration);
443   assert(VariadicWrapper->isDeclaration());
444 
445   // Create a single block forwarding wrapper that turns a ... into a va_list
446   [[maybe_unused]] Function *VariadicWrapperDefine =
447       defineVariadicWrapper(M, Builder, VariadicWrapper, FixedArityReplacement);
448   assert(VariadicWrapperDefine == VariadicWrapper);
449   assert(!VariadicWrapper->isDeclaration());
450 
451   // We now have:
452   // 1. the original function, now as a declaration with no uses
453   // 2. a variadic function that unconditionally calls a fixed arity replacement
454   // 3. a fixed arity function equivalent to the original function
455 
456   // Replace known calls to the variadic with calls to the va_list equivalent
457   for (User *U : make_early_inc_range(VariadicWrapper->users())) {
458     if (CallBase *CB = dyn_cast<CallBase>(U)) {
459       Value *CalledOperand = CB->getCalledOperand();
460       if (VariadicWrapper == CalledOperand)
461         Changed |=
462             expandCall(M, Builder, CB, VariadicWrapper->getFunctionType(),
463                        FixedArityReplacement);
464     }
465   }
466 
467   // The original function will be erased.
468   // One of the two new functions will become a replacement for the original.
469   // When preserving the ABI, the other is an internal implementation detail.
470   // When rewriting the ABI, RAUW then the variadic one.
471   Function *const ExternallyAccessible =
472       rewriteABI() ? FixedArityReplacement : VariadicWrapper;
473   Function *const InternalOnly =
474       rewriteABI() ? VariadicWrapper : FixedArityReplacement;
475 
476   // The external function is the replacement for the original
477   ExternallyAccessible->setLinkage(OriginalFunction->getLinkage());
478   ExternallyAccessible->setVisibility(OriginalFunction->getVisibility());
479   ExternallyAccessible->setComdat(OriginalFunction->getComdat());
480   ExternallyAccessible->takeName(OriginalFunction);
481 
482   // Annotate the internal one as internal
483   InternalOnly->setVisibility(GlobalValue::DefaultVisibility);
484   InternalOnly->setLinkage(GlobalValue::InternalLinkage);
485 
486   // The original is unused and obsolete
487   OriginalFunction->eraseFromParent();
488 
489   InternalOnly->removeDeadConstantUsers();
490 
491   if (rewriteABI()) {
492     // All known calls to the function have been removed by expandCall
493     // Resolve everything else by replaceAllUsesWith
494     VariadicWrapper->replaceAllUsesWith(FixedArityReplacement);
495     VariadicWrapper->eraseFromParent();
496   }
497 
498   return Changed;
499 }
500 
501 Function *
502 ExpandVariadics::replaceAllUsesWithNewDeclaration(Module &M,
503                                                   Function *OriginalFunction) {
504   auto &Ctx = M.getContext();
505   Function &F = *OriginalFunction;
506   FunctionType *FTy = F.getFunctionType();
507   Function *NF = Function::Create(FTy, F.getLinkage(), F.getAddressSpace());
508 
509   NF->setName(F.getName() + ".varargs");
510   NF->IsNewDbgInfoFormat = F.IsNewDbgInfoFormat;
511 
512   F.getParent()->getFunctionList().insert(F.getIterator(), NF);
513 
514   AttrBuilder ParamAttrs(Ctx);
515   AttributeList Attrs = NF->getAttributes();
516   Attrs = Attrs.addParamAttributes(Ctx, FTy->getNumParams(), ParamAttrs);
517   NF->setAttributes(Attrs);
518 
519   OriginalFunction->replaceAllUsesWith(NF);
520   return NF;
521 }
522 
523 Function *
524 ExpandVariadics::deriveFixedArityReplacement(Module &M, IRBuilder<> &Builder,
525                                              Function *OriginalFunction) {
526   Function &F = *OriginalFunction;
527   // The purpose here is split the variadic function F into two functions
528   // One is a variadic function that bundles the passed argument into a va_list
529   // and passes it to the second function. The second function does whatever
530   // the original F does, except that it takes a va_list instead of the ...
531 
532   assert(expansionApplicableToFunction(M, &F));
533 
534   auto &Ctx = M.getContext();
535 
536   // Returned value isDeclaration() is equal to F.isDeclaration()
537   // but that property is not invariant throughout this function
538   const bool FunctionIsDefinition = !F.isDeclaration();
539 
540   FunctionType *FTy = F.getFunctionType();
541   SmallVector<Type *> ArgTypes(FTy->param_begin(), FTy->param_end());
542   ArgTypes.push_back(ABI->vaListParameterType(M));
543 
544   FunctionType *NFTy = inlinableVariadicFunctionType(M, FTy);
545   Function *NF = Function::Create(NFTy, F.getLinkage(), F.getAddressSpace());
546 
547   // Note - same attribute handling as DeadArgumentElimination
548   NF->copyAttributesFrom(&F);
549   NF->setComdat(F.getComdat());
550   F.getParent()->getFunctionList().insert(F.getIterator(), NF);
551   NF->setName(F.getName() + ".valist");
552   NF->IsNewDbgInfoFormat = F.IsNewDbgInfoFormat;
553 
554   AttrBuilder ParamAttrs(Ctx);
555 
556   AttributeList Attrs = NF->getAttributes();
557   Attrs = Attrs.addParamAttributes(Ctx, NFTy->getNumParams() - 1, ParamAttrs);
558   NF->setAttributes(Attrs);
559 
560   // Splice the implementation into the new function with minimal changes
561   if (FunctionIsDefinition) {
562     NF->splice(NF->begin(), &F);
563 
564     auto NewArg = NF->arg_begin();
565     for (Argument &Arg : F.args()) {
566       Arg.replaceAllUsesWith(NewArg);
567       NewArg->setName(Arg.getName()); // takeName without killing the old one
568       ++NewArg;
569     }
570     NewArg->setName("varargs");
571   }
572 
573   SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
574   F.getAllMetadata(MDs);
575   for (auto [KindID, Node] : MDs)
576     NF->addMetadata(KindID, *Node);
577   F.clearMetadata();
578 
579   return NF;
580 }
581 
582 Function *
583 ExpandVariadics::defineVariadicWrapper(Module &M, IRBuilder<> &Builder,
584                                        Function *VariadicWrapper,
585                                        Function *FixedArityReplacement) {
586   auto &Ctx = Builder.getContext();
587   const DataLayout &DL = M.getDataLayout();
588   assert(VariadicWrapper->isDeclaration());
589   Function &F = *VariadicWrapper;
590 
591   assert(F.isDeclaration());
592   Type *VaListTy = ABI->vaListType(Ctx);
593 
594   auto *BB = BasicBlock::Create(Ctx, "entry", &F);
595   Builder.SetInsertPoint(BB);
596 
597   AllocaInst *VaListInstance =
598       Builder.CreateAlloca(VaListTy, nullptr, "va_start");
599 
600   Builder.CreateLifetimeStart(VaListInstance,
601                               sizeOfAlloca(Ctx, DL, VaListInstance));
602 
603   Builder.CreateIntrinsic(Intrinsic::vastart, {DL.getAllocaPtrType(Ctx)},
604                           {VaListInstance});
605 
606   SmallVector<Value *> Args;
607   for (Argument &A : F.args())
608     Args.push_back(&A);
609 
610   Type *ParameterType = ABI->vaListParameterType(M);
611   if (ABI->vaListPassedInSSARegister())
612     Args.push_back(Builder.CreateLoad(ParameterType, VaListInstance));
613   else
614     Args.push_back(Builder.CreateAddrSpaceCast(VaListInstance, ParameterType));
615 
616   CallInst *Result = Builder.CreateCall(FixedArityReplacement, Args);
617 
618   Builder.CreateIntrinsic(Intrinsic::vaend, {DL.getAllocaPtrType(Ctx)},
619                           {VaListInstance});
620   Builder.CreateLifetimeEnd(VaListInstance,
621                             sizeOfAlloca(Ctx, DL, VaListInstance));
622 
623   if (Result->getType()->isVoidTy())
624     Builder.CreateRetVoid();
625   else
626     Builder.CreateRet(Result);
627 
628   return VariadicWrapper;
629 }
630 
631 bool ExpandVariadics::expandCall(Module &M, IRBuilder<> &Builder, CallBase *CB,
632                                  FunctionType *VarargFunctionType,
633                                  Function *NF) {
634   bool Changed = false;
635   const DataLayout &DL = M.getDataLayout();
636 
637   if (!expansionApplicableToFunctionCall(CB)) {
638     if (rewriteABI())
639       report_fatal_error("Cannot lower callbase instruction");
640     return Changed;
641   }
642 
643   // This is tricky. The call instruction's function type might not match
644   // the type of the caller. When optimising, can leave it unchanged.
645   // Webassembly detects that inconsistency and repairs it.
646   FunctionType *FuncType = CB->getFunctionType();
647   if (FuncType != VarargFunctionType) {
648     if (!rewriteABI())
649       return Changed;
650     FuncType = VarargFunctionType;
651   }
652 
653   auto &Ctx = CB->getContext();
654 
655   Align MaxFieldAlign(1);
656 
657   // The strategy is to allocate a call frame containing the variadic
658   // arguments laid out such that a target specific va_list can be initialized
659   // with it, such that target specific va_arg instructions will correctly
660   // iterate over it. This means getting the alignment right and sometimes
661   // embedding a pointer to the value instead of embedding the value itself.
662 
663   Function *CBF = CB->getParent()->getParent();
664 
665   ExpandedCallFrame Frame;
666 
667   uint64_t CurrentOffset = 0;
668 
669   for (unsigned I = FuncType->getNumParams(), E = CB->arg_size(); I < E; ++I) {
670     Value *ArgVal = CB->getArgOperand(I);
671     const bool IsByVal = CB->paramHasAttr(I, Attribute::ByVal);
672     const bool IsByRef = CB->paramHasAttr(I, Attribute::ByRef);
673 
674     // The type of the value being passed, decoded from byval/byref metadata if
675     // required
676     Type *const UnderlyingType = IsByVal   ? CB->getParamByValType(I)
677                                  : IsByRef ? CB->getParamByRefType(I)
678                                            : ArgVal->getType();
679     const uint64_t UnderlyingSize =
680         DL.getTypeAllocSize(UnderlyingType).getFixedValue();
681 
682     // The type to be written into the call frame
683     Type *FrameFieldType = UnderlyingType;
684 
685     // The value to copy from when initialising the frame alloca
686     Value *SourceValue = ArgVal;
687 
688     VariadicABIInfo::VAArgSlotInfo SlotInfo = ABI->slotInfo(DL, UnderlyingType);
689 
690     if (SlotInfo.Indirect) {
691       // The va_arg lowering loads through a pointer. Set up an alloca to aim
692       // that pointer at.
693       Builder.SetInsertPointPastAllocas(CBF);
694       Builder.SetCurrentDebugLocation(CB->getStableDebugLoc());
695       Value *CallerCopy =
696           Builder.CreateAlloca(UnderlyingType, nullptr, "IndirectAlloca");
697 
698       Builder.SetInsertPoint(CB);
699       if (IsByVal)
700         Builder.CreateMemCpy(CallerCopy, {}, ArgVal, {}, UnderlyingSize);
701       else
702         Builder.CreateStore(ArgVal, CallerCopy);
703 
704       // Indirection now handled, pass the alloca ptr by value
705       FrameFieldType = DL.getAllocaPtrType(Ctx);
706       SourceValue = CallerCopy;
707     }
708 
709     // Alignment of the value within the frame
710     // This probably needs to be controllable as a function of type
711     Align DataAlign = SlotInfo.DataAlign;
712 
713     MaxFieldAlign = std::max(MaxFieldAlign, DataAlign);
714 
715     uint64_t DataAlignV = DataAlign.value();
716     if (uint64_t Rem = CurrentOffset % DataAlignV) {
717       // Inject explicit padding to deal with alignment requirements
718       uint64_t Padding = DataAlignV - Rem;
719       Frame.padding(Ctx, Padding);
720       CurrentOffset += Padding;
721     }
722 
723     if (SlotInfo.Indirect) {
724       Frame.store(Ctx, FrameFieldType, SourceValue);
725     } else {
726       if (IsByVal)
727         Frame.memcpy(Ctx, FrameFieldType, SourceValue, UnderlyingSize);
728       else
729         Frame.store(Ctx, FrameFieldType, SourceValue);
730     }
731 
732     CurrentOffset += DL.getTypeAllocSize(FrameFieldType).getFixedValue();
733   }
734 
735   if (Frame.empty()) {
736     // Not passing any arguments, hopefully va_arg won't try to read any
737     // Creating a single byte frame containing nothing to point the va_list
738     // instance as that is less special-casey in the compiler and probably
739     // easier to interpret in a debugger.
740     Frame.padding(Ctx, 1);
741   }
742 
743   StructType *VarargsTy = Frame.asStruct(Ctx, CBF->getName());
744 
745   // The struct instance needs to be at least MaxFieldAlign for the alignment of
746   // the fields to be correct at runtime. Use the native stack alignment instead
747   // if that's greater as that tends to give better codegen.
748   // This is an awkward way to guess whether there is a known stack alignment
749   // without hitting an assert in DL.getStackAlignment, 1024 is an arbitrary
750   // number likely to be greater than the natural stack alignment.
751   // TODO: DL.getStackAlignment could return a MaybeAlign instead of assert
752   Align AllocaAlign = MaxFieldAlign;
753   if (DL.exceedsNaturalStackAlignment(Align(1024)))
754     AllocaAlign = std::max(AllocaAlign, DL.getStackAlignment());
755 
756   // Put the alloca to hold the variadic args in the entry basic block.
757   Builder.SetInsertPointPastAllocas(CBF);
758 
759   // SetCurrentDebugLocation when the builder SetInsertPoint method does not
760   Builder.SetCurrentDebugLocation(CB->getStableDebugLoc());
761 
762   // The awkward construction here is to set the alignment on the instance
763   AllocaInst *Alloced = Builder.Insert(
764       new AllocaInst(VarargsTy, DL.getAllocaAddrSpace(), nullptr, AllocaAlign),
765       "vararg_buffer");
766   Changed = true;
767   assert(Alloced->getAllocatedType() == VarargsTy);
768 
769   // Initialize the fields in the struct
770   Builder.SetInsertPoint(CB);
771   Builder.CreateLifetimeStart(Alloced, sizeOfAlloca(Ctx, DL, Alloced));
772   Frame.initializeStructAlloca(DL, Builder, Alloced);
773 
774   const unsigned NumArgs = FuncType->getNumParams();
775   SmallVector<Value *> Args(CB->arg_begin(), CB->arg_begin() + NumArgs);
776 
777   // Initialize a va_list pointing to that struct and pass it as the last
778   // argument
779   AllocaInst *VaList = nullptr;
780   {
781     if (!ABI->vaListPassedInSSARegister()) {
782       Type *VaListTy = ABI->vaListType(Ctx);
783       Builder.SetInsertPointPastAllocas(CBF);
784       Builder.SetCurrentDebugLocation(CB->getStableDebugLoc());
785       VaList = Builder.CreateAlloca(VaListTy, nullptr, "va_argument");
786       Builder.SetInsertPoint(CB);
787       Builder.CreateLifetimeStart(VaList, sizeOfAlloca(Ctx, DL, VaList));
788     }
789     Builder.SetInsertPoint(CB);
790     Args.push_back(ABI->initializeVaList(M, Ctx, Builder, VaList, Alloced));
791   }
792 
793   // Attributes excluding any on the vararg arguments
794   AttributeList PAL = CB->getAttributes();
795   if (!PAL.isEmpty()) {
796     SmallVector<AttributeSet, 8> ArgAttrs;
797     for (unsigned ArgNo = 0; ArgNo < NumArgs; ArgNo++)
798       ArgAttrs.push_back(PAL.getParamAttrs(ArgNo));
799     PAL =
800         AttributeList::get(Ctx, PAL.getFnAttrs(), PAL.getRetAttrs(), ArgAttrs);
801   }
802 
803   SmallVector<OperandBundleDef, 1> OpBundles;
804   CB->getOperandBundlesAsDefs(OpBundles);
805 
806   CallBase *NewCB = nullptr;
807 
808   if (CallInst *CI = dyn_cast<CallInst>(CB)) {
809     Value *Dst = NF ? NF : CI->getCalledOperand();
810     FunctionType *NFTy = inlinableVariadicFunctionType(M, VarargFunctionType);
811 
812     NewCB = CallInst::Create(NFTy, Dst, Args, OpBundles, "", CI);
813 
814     CallInst::TailCallKind TCK = CI->getTailCallKind();
815     assert(TCK != CallInst::TCK_MustTail);
816 
817     // Can't tail call a function that is being passed a pointer to an alloca
818     if (TCK == CallInst::TCK_Tail)
819       TCK = CallInst::TCK_None;
820     CI->setTailCallKind(TCK);
821 
822   } else {
823     llvm_unreachable("Unreachable when !expansionApplicableToFunctionCall()");
824   }
825 
826   if (VaList)
827     Builder.CreateLifetimeEnd(VaList, sizeOfAlloca(Ctx, DL, VaList));
828 
829   Builder.CreateLifetimeEnd(Alloced, sizeOfAlloca(Ctx, DL, Alloced));
830 
831   NewCB->setAttributes(PAL);
832   NewCB->takeName(CB);
833   NewCB->setCallingConv(CB->getCallingConv());
834   NewCB->setDebugLoc(DebugLoc());
835 
836   // DeadArgElim and ArgPromotion copy exactly this metadata
837   NewCB->copyMetadata(*CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg});
838 
839   CB->replaceAllUsesWith(NewCB);
840   CB->eraseFromParent();
841   return Changed;
842 }
843 
844 bool ExpandVariadics::expandVAIntrinsicCall(IRBuilder<> &Builder,
845                                             const DataLayout &DL,
846                                             VAStartInst *Inst) {
847   // Only removing va_start instructions that are not in variadic functions.
848   // Those would be rejected by the IR verifier before this pass.
849   // After splicing basic blocks from a variadic function into a fixed arity
850   // one the va_start that used to refer to the ... parameter still exist.
851   // There are also variadic functions that this pass did not change and
852   // va_start instances in the created single block wrapper functions.
853   // Replace exactly the instances in non-variadic functions as those are
854   // the ones to be fixed up to use the va_list passed as the final argument.
855 
856   Function *ContainingFunction = Inst->getFunction();
857   if (ContainingFunction->isVarArg()) {
858     return false;
859   }
860 
861   // The last argument is a vaListParameterType, either a va_list
862   // or a pointer to one depending on the target.
863   bool PassedByValue = ABI->vaListPassedInSSARegister();
864   Argument *PassedVaList =
865       ContainingFunction->getArg(ContainingFunction->arg_size() - 1);
866 
867   // va_start takes a pointer to a va_list, e.g. one on the stack
868   Value *VaStartArg = Inst->getArgList();
869 
870   Builder.SetInsertPoint(Inst);
871 
872   if (PassedByValue) {
873     // The general thing to do is create an alloca, store the va_list argument
874     // to it, then create a va_copy. When vaCopyIsMemcpy(), this optimises to a
875     // store to the VaStartArg.
876     assert(ABI->vaCopyIsMemcpy());
877     Builder.CreateStore(PassedVaList, VaStartArg);
878   } else {
879 
880     // Otherwise emit a vacopy to pick up target-specific handling if any
881     auto &Ctx = Builder.getContext();
882 
883     Builder.CreateIntrinsic(Intrinsic::vacopy, {DL.getAllocaPtrType(Ctx)},
884                             {VaStartArg, PassedVaList});
885   }
886 
887   Inst->eraseFromParent();
888   return true;
889 }
890 
891 bool ExpandVariadics::expandVAIntrinsicCall(IRBuilder<> &, const DataLayout &,
892                                             VAEndInst *Inst) {
893   assert(ABI->vaEndIsNop());
894   Inst->eraseFromParent();
895   return true;
896 }
897 
898 bool ExpandVariadics::expandVAIntrinsicCall(IRBuilder<> &Builder,
899                                             const DataLayout &DL,
900                                             VACopyInst *Inst) {
901   assert(ABI->vaCopyIsMemcpy());
902   Builder.SetInsertPoint(Inst);
903 
904   auto &Ctx = Builder.getContext();
905   Type *VaListTy = ABI->vaListType(Ctx);
906   uint64_t Size = DL.getTypeAllocSize(VaListTy).getFixedValue();
907 
908   Builder.CreateMemCpy(Inst->getDest(), {}, Inst->getSrc(), {},
909                        Builder.getInt32(Size));
910 
911   Inst->eraseFromParent();
912   return true;
913 }
914 
915 struct Amdgpu final : public VariadicABIInfo {
916 
917   bool enableForTarget() override { return true; }
918 
919   bool vaListPassedInSSARegister() override { return true; }
920 
921   Type *vaListType(LLVMContext &Ctx) override {
922     return PointerType::getUnqual(Ctx);
923   }
924 
925   Type *vaListParameterType(Module &M) override {
926     return PointerType::getUnqual(M.getContext());
927   }
928 
929   Value *initializeVaList(Module &M, LLVMContext &Ctx, IRBuilder<> &Builder,
930                           AllocaInst * /*va_list*/, Value *Buffer) override {
931     // Given Buffer, which is an AllocInst of vararg_buffer
932     // need to return something usable as parameter type
933     return Builder.CreateAddrSpaceCast(Buffer, vaListParameterType(M));
934   }
935 
936   VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) override {
937     return {Align(4), false};
938   }
939 };
940 
941 struct NVPTX final : public VariadicABIInfo {
942 
943   bool enableForTarget() override { return true; }
944 
945   bool vaListPassedInSSARegister() override { return true; }
946 
947   Type *vaListType(LLVMContext &Ctx) override {
948     return PointerType::getUnqual(Ctx);
949   }
950 
951   Type *vaListParameterType(Module &M) override {
952     return PointerType::getUnqual(M.getContext());
953   }
954 
955   Value *initializeVaList(Module &M, LLVMContext &Ctx, IRBuilder<> &Builder,
956                           AllocaInst *, Value *Buffer) override {
957     return Builder.CreateAddrSpaceCast(Buffer, vaListParameterType(M));
958   }
959 
960   VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) override {
961     // NVPTX expects natural alignment in all cases. The variadic call ABI will
962     // handle promoting types to their appropriate size and alignment.
963     Align A = DL.getABITypeAlign(Parameter);
964     return {A, false};
965   }
966 };
967 
968 struct Wasm final : public VariadicABIInfo {
969 
970   bool enableForTarget() override {
971     // Currently wasm is only used for testing.
972     return commandLineOverride();
973   }
974 
975   bool vaListPassedInSSARegister() override { return true; }
976 
977   Type *vaListType(LLVMContext &Ctx) override {
978     return PointerType::getUnqual(Ctx);
979   }
980 
981   Type *vaListParameterType(Module &M) override {
982     return PointerType::getUnqual(M.getContext());
983   }
984 
985   Value *initializeVaList(Module &M, LLVMContext &Ctx, IRBuilder<> &Builder,
986                           AllocaInst * /*va_list*/, Value *Buffer) override {
987     return Buffer;
988   }
989 
990   VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) override {
991     LLVMContext &Ctx = Parameter->getContext();
992     const unsigned MinAlign = 4;
993     Align A = DL.getABITypeAlign(Parameter);
994     if (A < MinAlign)
995       A = Align(MinAlign);
996 
997     if (auto *S = dyn_cast<StructType>(Parameter)) {
998       if (S->getNumElements() > 1) {
999         return {DL.getABITypeAlign(PointerType::getUnqual(Ctx)), true};
1000       }
1001     }
1002 
1003     return {A, false};
1004   }
1005 };
1006 
1007 std::unique_ptr<VariadicABIInfo> VariadicABIInfo::create(const Triple &T) {
1008   switch (T.getArch()) {
1009   case Triple::r600:
1010   case Triple::amdgcn: {
1011     return std::make_unique<Amdgpu>();
1012   }
1013 
1014   case Triple::wasm32: {
1015     return std::make_unique<Wasm>();
1016   }
1017 
1018   case Triple::nvptx:
1019   case Triple::nvptx64: {
1020     return std::make_unique<NVPTX>();
1021   }
1022 
1023   default:
1024     return {};
1025   }
1026 }
1027 
1028 } // namespace
1029 
1030 char ExpandVariadics::ID = 0;
1031 
1032 INITIALIZE_PASS(ExpandVariadics, DEBUG_TYPE, "Expand variadic functions", false,
1033                 false)
1034 
1035 ModulePass *llvm::createExpandVariadicsPass(ExpandVariadicsMode M) {
1036   return new ExpandVariadics(M);
1037 }
1038 
1039 PreservedAnalyses ExpandVariadicsPass::run(Module &M, ModuleAnalysisManager &) {
1040   return ExpandVariadics(Mode).runOnModule(M) ? PreservedAnalyses::none()
1041                                               : PreservedAnalyses::all();
1042 }
1043 
1044 ExpandVariadicsPass::ExpandVariadicsPass(ExpandVariadicsMode M) : Mode(M) {}
1045