xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/IPO/ExpandVariadics.cpp (revision 770cf0a5f02dc8983a89c6568d741fbc25baa999)
1 //===-- ExpandVariadicsPass.cpp --------------------------------*- C++ -*-=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This is an optimization pass for variadic functions. If called from codegen,
10 // it can serve as the implementation of variadic functions for a given target.
11 //
12 // The strategy is to turn the ... part of a variadic function into a va_list
13 // and fix up the call sites. The majority of the pass is target independent.
14 // The exceptions are the va_list type itself and the rules for where to store
15 // variables in memory such that va_arg can iterate over them given a va_list.
16 //
17 // The majority of the plumbing is splitting the variadic function into a
18 // single basic block that packs the variadic arguments into a va_list and
19 // a second function that does the work of the original. That packing is
20 // exactly what is done by va_start. Further, the transform from ... to va_list
21 // replaced va_start with an operation to copy a va_list from the new argument,
22 // which is exactly a va_copy. This is useful for reducing target-dependence.
23 //
24 // A va_list instance is a forward iterator, where the primary operation va_arg
25 // is dereference-then-increment. This interface forces significant convergent
26 // evolution between target specific implementations. The variation in runtime
27 // data layout is limited to that representable by the iterator, parameterised
28 // by the type passed to the va_arg instruction.
29 //
30 // Therefore the majority of the target specific subtlety is packing arguments
31 // into a stack allocated buffer such that a va_list can be initialised with it
32 // and the va_arg expansion for the target will find the arguments at runtime.
33 //
34 // The aggregate effect is to unblock other transforms, most critically the
35 // general purpose inliner. Known calls to variadic functions become zero cost.
36 //
37 // Consistency with clang is primarily tested by emitting va_arg using clang
38 // then expanding the variadic functions using this pass, followed by trying
39 // to constant fold the functions to no-ops.
40 //
41 // Target specific behaviour is tested in IR - mainly checking that values are
42 // put into positions in call frames that make sense for that particular target.
43 //
44 // There is one "clever" invariant in use. va_start intrinsics that are not
45 // within a varidic functions are an error in the IR verifier. When this
46 // transform moves blocks from a variadic function into a fixed arity one, it
47 // moves va_start intrinsics along with everything else. That means that the
48 // va_start intrinsics that need to be rewritten to use the trailing argument
49 // are exactly those that are in non-variadic functions so no further state
50 // is needed to distinguish those that need to be rewritten.
51 //
52 //===----------------------------------------------------------------------===//
53 
54 #include "llvm/Transforms/IPO/ExpandVariadics.h"
55 #include "llvm/ADT/SmallVector.h"
56 #include "llvm/IR/Constants.h"
57 #include "llvm/IR/IRBuilder.h"
58 #include "llvm/IR/IntrinsicInst.h"
59 #include "llvm/IR/Module.h"
60 #include "llvm/IR/PassManager.h"
61 #include "llvm/InitializePasses.h"
62 #include "llvm/Pass.h"
63 #include "llvm/Support/CommandLine.h"
64 #include "llvm/TargetParser/Triple.h"
65 #include "llvm/Transforms/Utils/ModuleUtils.h"
66 
67 #define DEBUG_TYPE "expand-variadics"
68 
69 using namespace llvm;
70 
71 namespace {
72 
73 cl::opt<ExpandVariadicsMode> ExpandVariadicsModeOption(
74     DEBUG_TYPE "-override", cl::desc("Override the behaviour of " DEBUG_TYPE),
75     cl::init(ExpandVariadicsMode::Unspecified),
76     cl::values(clEnumValN(ExpandVariadicsMode::Unspecified, "unspecified",
77                           "Use the implementation defaults"),
78                clEnumValN(ExpandVariadicsMode::Disable, "disable",
79                           "Disable the pass entirely"),
80                clEnumValN(ExpandVariadicsMode::Optimize, "optimize",
81                           "Optimise without changing ABI"),
82                clEnumValN(ExpandVariadicsMode::Lowering, "lowering",
83                           "Change variadic calling convention")));
84 
85 bool commandLineOverride() {
86   return ExpandVariadicsModeOption != ExpandVariadicsMode::Unspecified;
87 }
88 
89 // Instances of this class encapsulate the target-dependant behaviour as a
90 // function of triple. Implementing a new ABI is adding a case to the switch
91 // in create(llvm::Triple) at the end of this file.
92 // This class may end up instantiated in TargetMachine instances, keeping it
93 // here for now until enough targets are implemented for the API to evolve.
94 class VariadicABIInfo {
95 protected:
96   VariadicABIInfo() = default;
97 
98 public:
99   static std::unique_ptr<VariadicABIInfo> create(const Triple &T);
100 
101   // Allow overriding whether the pass runs on a per-target basis
102   virtual bool enableForTarget() = 0;
103 
104   // Whether a valist instance is passed by value or by address
105   // I.e. does it need to be alloca'ed and stored into, or can
106   // it be passed directly in a SSA register
107   virtual bool vaListPassedInSSARegister() = 0;
108 
109   // The type of a va_list iterator object
110   virtual Type *vaListType(LLVMContext &Ctx) = 0;
111 
112   // The type of a va_list as a function argument as lowered by C
113   virtual Type *vaListParameterType(Module &M) = 0;
114 
115   // Initialize an allocated va_list object to point to an already
116   // initialized contiguous memory region.
117   // Return the value to pass as the va_list argument
118   virtual Value *initializeVaList(Module &M, LLVMContext &Ctx,
119                                   IRBuilder<> &Builder, AllocaInst *VaList,
120                                   Value *Buffer) = 0;
121 
122   struct VAArgSlotInfo {
123     Align DataAlign; // With respect to the call frame
124     bool Indirect;   // Passed via a pointer
125   };
126   virtual VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) = 0;
127 
128   // Targets implemented so far all have the same trivial lowering for these
129   bool vaEndIsNop() { return true; }
130   bool vaCopyIsMemcpy() { return true; }
131 
132   virtual ~VariadicABIInfo() = default;
133 };
134 
135 class ExpandVariadics : public ModulePass {
136 
137   // The pass construction sets the default to optimize when called from middle
138   // end and lowering when called from the backend. The command line variable
139   // overrides that. This is useful for testing and debugging. It also allows
140   // building an applications with variadic functions wholly removed if one
141   // has sufficient control over the dependencies, e.g. a statically linked
142   // clang that has no variadic function calls remaining in the binary.
143 
144 public:
145   static char ID;
146   const ExpandVariadicsMode Mode;
147   std::unique_ptr<VariadicABIInfo> ABI;
148 
149   ExpandVariadics(ExpandVariadicsMode Mode)
150       : ModulePass(ID),
151         Mode(commandLineOverride() ? ExpandVariadicsModeOption : Mode) {}
152 
153   StringRef getPassName() const override { return "Expand variadic functions"; }
154 
155   bool rewriteABI() { return Mode == ExpandVariadicsMode::Lowering; }
156 
157   bool runOnModule(Module &M) override;
158 
159   bool runOnFunction(Module &M, IRBuilder<> &Builder, Function *F);
160 
161   Function *replaceAllUsesWithNewDeclaration(Module &M,
162                                              Function *OriginalFunction);
163 
164   Function *deriveFixedArityReplacement(Module &M, IRBuilder<> &Builder,
165                                         Function *OriginalFunction);
166 
167   Function *defineVariadicWrapper(Module &M, IRBuilder<> &Builder,
168                                   Function *VariadicWrapper,
169                                   Function *FixedArityReplacement);
170 
171   bool expandCall(Module &M, IRBuilder<> &Builder, CallBase *CB, FunctionType *,
172                   Function *NF);
173 
174   // The intrinsic functions va_copy and va_end are removed unconditionally.
175   // They correspond to a memcpy and a no-op on all implemented targets.
176   // The va_start intrinsic is removed from basic blocks that were not created
177   // by this pass, some may remain if needed to maintain the external ABI.
178 
179   template <Intrinsic::ID ID, typename InstructionType>
180   bool expandIntrinsicUsers(Module &M, IRBuilder<> &Builder,
181                             PointerType *IntrinsicArgType) {
182     bool Changed = false;
183     const DataLayout &DL = M.getDataLayout();
184     if (Function *Intrinsic =
185             Intrinsic::getDeclarationIfExists(&M, ID, {IntrinsicArgType})) {
186       for (User *U : make_early_inc_range(Intrinsic->users()))
187         if (auto *I = dyn_cast<InstructionType>(U))
188           Changed |= expandVAIntrinsicCall(Builder, DL, I);
189 
190       if (Intrinsic->use_empty())
191         Intrinsic->eraseFromParent();
192     }
193     return Changed;
194   }
195 
196   bool expandVAIntrinsicUsersWithAddrspace(Module &M, IRBuilder<> &Builder,
197                                            unsigned Addrspace) {
198     auto &Ctx = M.getContext();
199     PointerType *IntrinsicArgType = PointerType::get(Ctx, Addrspace);
200     bool Changed = false;
201 
202     // expand vastart before vacopy as vastart may introduce a vacopy
203     Changed |= expandIntrinsicUsers<Intrinsic::vastart, VAStartInst>(
204         M, Builder, IntrinsicArgType);
205     Changed |= expandIntrinsicUsers<Intrinsic::vaend, VAEndInst>(
206         M, Builder, IntrinsicArgType);
207     Changed |= expandIntrinsicUsers<Intrinsic::vacopy, VACopyInst>(
208         M, Builder, IntrinsicArgType);
209     return Changed;
210   }
211 
212   bool expandVAIntrinsicCall(IRBuilder<> &Builder, const DataLayout &DL,
213                              VAStartInst *Inst);
214 
215   bool expandVAIntrinsicCall(IRBuilder<> &, const DataLayout &,
216                              VAEndInst *Inst);
217 
218   bool expandVAIntrinsicCall(IRBuilder<> &Builder, const DataLayout &DL,
219                              VACopyInst *Inst);
220 
221   FunctionType *inlinableVariadicFunctionType(Module &M, FunctionType *FTy) {
222     // The type of "FTy" with the ... removed and a va_list appended
223     SmallVector<Type *> ArgTypes(FTy->params());
224     ArgTypes.push_back(ABI->vaListParameterType(M));
225     return FunctionType::get(FTy->getReturnType(), ArgTypes,
226                              /*IsVarArgs=*/false);
227   }
228 
229   static ConstantInt *sizeOfAlloca(LLVMContext &Ctx, const DataLayout &DL,
230                                    AllocaInst *Alloced) {
231     std::optional<TypeSize> AllocaTypeSize = Alloced->getAllocationSize(DL);
232     uint64_t AsInt = AllocaTypeSize ? AllocaTypeSize->getFixedValue() : 0;
233     return ConstantInt::get(Type::getInt64Ty(Ctx), AsInt);
234   }
235 
236   bool expansionApplicableToFunction(Module &M, Function *F) {
237     if (F->isIntrinsic() || !F->isVarArg() ||
238         F->hasFnAttribute(Attribute::Naked))
239       return false;
240 
241     if (F->getCallingConv() != CallingConv::C)
242       return false;
243 
244     if (rewriteABI())
245       return true;
246 
247     if (!F->hasExactDefinition())
248       return false;
249 
250     return true;
251   }
252 
253   bool expansionApplicableToFunctionCall(CallBase *CB) {
254     if (CallInst *CI = dyn_cast<CallInst>(CB)) {
255       if (CI->isMustTailCall()) {
256         // Cannot expand musttail calls
257         return false;
258       }
259 
260       if (CI->getCallingConv() != CallingConv::C)
261         return false;
262 
263       return true;
264     }
265 
266     if (isa<InvokeInst>(CB)) {
267       // Invoke not implemented in initial implementation of pass
268       return false;
269     }
270 
271     // Other unimplemented derivative of CallBase
272     return false;
273   }
274 
275   class ExpandedCallFrame {
276     // Helper for constructing an alloca instance containing the arguments bound
277     // to the variadic ... parameter, rearranged to allow indexing through a
278     // va_list iterator
279     enum { N = 4 };
280     SmallVector<Type *, N> FieldTypes;
281     enum Tag { Store, Memcpy, Padding };
282     SmallVector<std::tuple<Value *, uint64_t, Tag>, N> Source;
283 
284     template <Tag tag> void append(Type *FieldType, Value *V, uint64_t Bytes) {
285       FieldTypes.push_back(FieldType);
286       Source.push_back({V, Bytes, tag});
287     }
288 
289   public:
290     void store(LLVMContext &Ctx, Type *T, Value *V) { append<Store>(T, V, 0); }
291 
292     void memcpy(LLVMContext &Ctx, Type *T, Value *V, uint64_t Bytes) {
293       append<Memcpy>(T, V, Bytes);
294     }
295 
296     void padding(LLVMContext &Ctx, uint64_t By) {
297       append<Padding>(ArrayType::get(Type::getInt8Ty(Ctx), By), nullptr, 0);
298     }
299 
300     size_t size() const { return FieldTypes.size(); }
301     bool empty() const { return FieldTypes.empty(); }
302 
303     StructType *asStruct(LLVMContext &Ctx, StringRef Name) {
304       const bool IsPacked = true;
305       return StructType::create(Ctx, FieldTypes,
306                                 (Twine(Name) + ".vararg").str(), IsPacked);
307     }
308 
309     void initializeStructAlloca(const DataLayout &DL, IRBuilder<> &Builder,
310                                 AllocaInst *Alloced) {
311 
312       StructType *VarargsTy = cast<StructType>(Alloced->getAllocatedType());
313 
314       for (size_t I = 0; I < size(); I++) {
315 
316         auto [V, bytes, tag] = Source[I];
317 
318         if (tag == Padding) {
319           assert(V == nullptr);
320           continue;
321         }
322 
323         auto Dst = Builder.CreateStructGEP(VarargsTy, Alloced, I);
324 
325         assert(V != nullptr);
326 
327         if (tag == Store)
328           Builder.CreateStore(V, Dst);
329 
330         if (tag == Memcpy)
331           Builder.CreateMemCpy(Dst, {}, V, {}, bytes);
332       }
333     }
334   };
335 };
336 
337 bool ExpandVariadics::runOnModule(Module &M) {
338   bool Changed = false;
339   if (Mode == ExpandVariadicsMode::Disable)
340     return Changed;
341 
342   Triple TT(M.getTargetTriple());
343   ABI = VariadicABIInfo::create(TT);
344   if (!ABI)
345     return Changed;
346 
347   if (!ABI->enableForTarget())
348     return Changed;
349 
350   auto &Ctx = M.getContext();
351   const DataLayout &DL = M.getDataLayout();
352   IRBuilder<> Builder(Ctx);
353 
354   // Lowering needs to run on all functions exactly once.
355   // Optimize could run on functions containing va_start exactly once.
356   for (Function &F : make_early_inc_range(M))
357     Changed |= runOnFunction(M, Builder, &F);
358 
359   // After runOnFunction, all known calls to known variadic functions have been
360   // replaced. va_start intrinsics are presently (and invalidly!) only present
361   // in functions that used to be variadic and have now been replaced to take a
362   // va_list instead. If lowering as opposed to optimising, calls to unknown
363   // variadic functions have also been replaced.
364 
365   {
366     // 0 and AllocaAddrSpace are sufficient for the targets implemented so far
367     unsigned Addrspace = 0;
368     Changed |= expandVAIntrinsicUsersWithAddrspace(M, Builder, Addrspace);
369 
370     Addrspace = DL.getAllocaAddrSpace();
371     if (Addrspace != 0)
372       Changed |= expandVAIntrinsicUsersWithAddrspace(M, Builder, Addrspace);
373   }
374 
375   if (Mode != ExpandVariadicsMode::Lowering)
376     return Changed;
377 
378   for (Function &F : make_early_inc_range(M)) {
379     if (F.isDeclaration())
380       continue;
381 
382     // Now need to track down indirect calls. Can't find those
383     // by walking uses of variadic functions, need to crawl the instruction
384     // stream. Fortunately this is only necessary for the ABI rewrite case.
385     for (BasicBlock &BB : F) {
386       for (Instruction &I : make_early_inc_range(BB)) {
387         if (CallBase *CB = dyn_cast<CallBase>(&I)) {
388           if (CB->isIndirectCall()) {
389             FunctionType *FTy = CB->getFunctionType();
390             if (FTy->isVarArg())
391               Changed |= expandCall(M, Builder, CB, FTy, 0);
392           }
393         }
394       }
395     }
396   }
397 
398   return Changed;
399 }
400 
401 bool ExpandVariadics::runOnFunction(Module &M, IRBuilder<> &Builder,
402                                     Function *OriginalFunction) {
403   bool Changed = false;
404 
405   if (!expansionApplicableToFunction(M, OriginalFunction))
406     return Changed;
407 
408   [[maybe_unused]] const bool OriginalFunctionIsDeclaration =
409       OriginalFunction->isDeclaration();
410   assert(rewriteABI() || !OriginalFunctionIsDeclaration);
411 
412   // Declare a new function and redirect every use to that new function
413   Function *VariadicWrapper =
414       replaceAllUsesWithNewDeclaration(M, OriginalFunction);
415   assert(VariadicWrapper->isDeclaration());
416   assert(OriginalFunction->use_empty());
417 
418   // Create a new function taking va_list containing the implementation of the
419   // original
420   Function *FixedArityReplacement =
421       deriveFixedArityReplacement(M, Builder, OriginalFunction);
422   assert(OriginalFunction->isDeclaration());
423   assert(FixedArityReplacement->isDeclaration() ==
424          OriginalFunctionIsDeclaration);
425   assert(VariadicWrapper->isDeclaration());
426 
427   // Create a single block forwarding wrapper that turns a ... into a va_list
428   [[maybe_unused]] Function *VariadicWrapperDefine =
429       defineVariadicWrapper(M, Builder, VariadicWrapper, FixedArityReplacement);
430   assert(VariadicWrapperDefine == VariadicWrapper);
431   assert(!VariadicWrapper->isDeclaration());
432 
433   // We now have:
434   // 1. the original function, now as a declaration with no uses
435   // 2. a variadic function that unconditionally calls a fixed arity replacement
436   // 3. a fixed arity function equivalent to the original function
437 
438   // Replace known calls to the variadic with calls to the va_list equivalent
439   for (User *U : make_early_inc_range(VariadicWrapper->users())) {
440     if (CallBase *CB = dyn_cast<CallBase>(U)) {
441       Value *CalledOperand = CB->getCalledOperand();
442       if (VariadicWrapper == CalledOperand)
443         Changed |=
444             expandCall(M, Builder, CB, VariadicWrapper->getFunctionType(),
445                        FixedArityReplacement);
446     }
447   }
448 
449   // The original function will be erased.
450   // One of the two new functions will become a replacement for the original.
451   // When preserving the ABI, the other is an internal implementation detail.
452   // When rewriting the ABI, RAUW then the variadic one.
453   Function *const ExternallyAccessible =
454       rewriteABI() ? FixedArityReplacement : VariadicWrapper;
455   Function *const InternalOnly =
456       rewriteABI() ? VariadicWrapper : FixedArityReplacement;
457 
458   // The external function is the replacement for the original
459   ExternallyAccessible->setLinkage(OriginalFunction->getLinkage());
460   ExternallyAccessible->setVisibility(OriginalFunction->getVisibility());
461   ExternallyAccessible->setComdat(OriginalFunction->getComdat());
462   ExternallyAccessible->takeName(OriginalFunction);
463 
464   // Annotate the internal one as internal
465   InternalOnly->setVisibility(GlobalValue::DefaultVisibility);
466   InternalOnly->setLinkage(GlobalValue::InternalLinkage);
467 
468   // The original is unused and obsolete
469   OriginalFunction->eraseFromParent();
470 
471   InternalOnly->removeDeadConstantUsers();
472 
473   if (rewriteABI()) {
474     // All known calls to the function have been removed by expandCall
475     // Resolve everything else by replaceAllUsesWith
476     VariadicWrapper->replaceAllUsesWith(FixedArityReplacement);
477     VariadicWrapper->eraseFromParent();
478   }
479 
480   return Changed;
481 }
482 
483 Function *
484 ExpandVariadics::replaceAllUsesWithNewDeclaration(Module &M,
485                                                   Function *OriginalFunction) {
486   auto &Ctx = M.getContext();
487   Function &F = *OriginalFunction;
488   FunctionType *FTy = F.getFunctionType();
489   Function *NF = Function::Create(FTy, F.getLinkage(), F.getAddressSpace());
490 
491   NF->setName(F.getName() + ".varargs");
492 
493   F.getParent()->getFunctionList().insert(F.getIterator(), NF);
494 
495   AttrBuilder ParamAttrs(Ctx);
496   AttributeList Attrs = NF->getAttributes();
497   Attrs = Attrs.addParamAttributes(Ctx, FTy->getNumParams(), ParamAttrs);
498   NF->setAttributes(Attrs);
499 
500   OriginalFunction->replaceAllUsesWith(NF);
501   return NF;
502 }
503 
504 Function *
505 ExpandVariadics::deriveFixedArityReplacement(Module &M, IRBuilder<> &Builder,
506                                              Function *OriginalFunction) {
507   Function &F = *OriginalFunction;
508   // The purpose here is split the variadic function F into two functions
509   // One is a variadic function that bundles the passed argument into a va_list
510   // and passes it to the second function. The second function does whatever
511   // the original F does, except that it takes a va_list instead of the ...
512 
513   assert(expansionApplicableToFunction(M, &F));
514 
515   auto &Ctx = M.getContext();
516 
517   // Returned value isDeclaration() is equal to F.isDeclaration()
518   // but that property is not invariant throughout this function
519   const bool FunctionIsDefinition = !F.isDeclaration();
520 
521   FunctionType *FTy = F.getFunctionType();
522   SmallVector<Type *> ArgTypes(FTy->params());
523   ArgTypes.push_back(ABI->vaListParameterType(M));
524 
525   FunctionType *NFTy = inlinableVariadicFunctionType(M, FTy);
526   Function *NF = Function::Create(NFTy, F.getLinkage(), F.getAddressSpace());
527 
528   // Note - same attribute handling as DeadArgumentElimination
529   NF->copyAttributesFrom(&F);
530   NF->setComdat(F.getComdat());
531   F.getParent()->getFunctionList().insert(F.getIterator(), NF);
532   NF->setName(F.getName() + ".valist");
533 
534   AttrBuilder ParamAttrs(Ctx);
535 
536   AttributeList Attrs = NF->getAttributes();
537   Attrs = Attrs.addParamAttributes(Ctx, NFTy->getNumParams() - 1, ParamAttrs);
538   NF->setAttributes(Attrs);
539 
540   // Splice the implementation into the new function with minimal changes
541   if (FunctionIsDefinition) {
542     NF->splice(NF->begin(), &F);
543 
544     auto NewArg = NF->arg_begin();
545     for (Argument &Arg : F.args()) {
546       Arg.replaceAllUsesWith(NewArg);
547       NewArg->setName(Arg.getName()); // takeName without killing the old one
548       ++NewArg;
549     }
550     NewArg->setName("varargs");
551   }
552 
553   SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
554   F.getAllMetadata(MDs);
555   for (auto [KindID, Node] : MDs)
556     NF->addMetadata(KindID, *Node);
557   F.clearMetadata();
558 
559   return NF;
560 }
561 
562 Function *
563 ExpandVariadics::defineVariadicWrapper(Module &M, IRBuilder<> &Builder,
564                                        Function *VariadicWrapper,
565                                        Function *FixedArityReplacement) {
566   auto &Ctx = Builder.getContext();
567   const DataLayout &DL = M.getDataLayout();
568   assert(VariadicWrapper->isDeclaration());
569   Function &F = *VariadicWrapper;
570 
571   assert(F.isDeclaration());
572   Type *VaListTy = ABI->vaListType(Ctx);
573 
574   auto *BB = BasicBlock::Create(Ctx, "entry", &F);
575   Builder.SetInsertPoint(BB);
576 
577   AllocaInst *VaListInstance =
578       Builder.CreateAlloca(VaListTy, nullptr, "va_start");
579 
580   Builder.CreateLifetimeStart(VaListInstance,
581                               sizeOfAlloca(Ctx, DL, VaListInstance));
582 
583   Builder.CreateIntrinsic(Intrinsic::vastart, {DL.getAllocaPtrType(Ctx)},
584                           {VaListInstance});
585 
586   SmallVector<Value *> Args(llvm::make_pointer_range(F.args()));
587 
588   Type *ParameterType = ABI->vaListParameterType(M);
589   if (ABI->vaListPassedInSSARegister())
590     Args.push_back(Builder.CreateLoad(ParameterType, VaListInstance));
591   else
592     Args.push_back(Builder.CreateAddrSpaceCast(VaListInstance, ParameterType));
593 
594   CallInst *Result = Builder.CreateCall(FixedArityReplacement, Args);
595 
596   Builder.CreateIntrinsic(Intrinsic::vaend, {DL.getAllocaPtrType(Ctx)},
597                           {VaListInstance});
598   Builder.CreateLifetimeEnd(VaListInstance,
599                             sizeOfAlloca(Ctx, DL, VaListInstance));
600 
601   if (Result->getType()->isVoidTy())
602     Builder.CreateRetVoid();
603   else
604     Builder.CreateRet(Result);
605 
606   return VariadicWrapper;
607 }
608 
609 bool ExpandVariadics::expandCall(Module &M, IRBuilder<> &Builder, CallBase *CB,
610                                  FunctionType *VarargFunctionType,
611                                  Function *NF) {
612   bool Changed = false;
613   const DataLayout &DL = M.getDataLayout();
614 
615   if (!expansionApplicableToFunctionCall(CB)) {
616     if (rewriteABI())
617       report_fatal_error("Cannot lower callbase instruction");
618     return Changed;
619   }
620 
621   // This is tricky. The call instruction's function type might not match
622   // the type of the caller. When optimising, can leave it unchanged.
623   // Webassembly detects that inconsistency and repairs it.
624   FunctionType *FuncType = CB->getFunctionType();
625   if (FuncType != VarargFunctionType) {
626     if (!rewriteABI())
627       return Changed;
628     FuncType = VarargFunctionType;
629   }
630 
631   auto &Ctx = CB->getContext();
632 
633   Align MaxFieldAlign(1);
634 
635   // The strategy is to allocate a call frame containing the variadic
636   // arguments laid out such that a target specific va_list can be initialized
637   // with it, such that target specific va_arg instructions will correctly
638   // iterate over it. This means getting the alignment right and sometimes
639   // embedding a pointer to the value instead of embedding the value itself.
640 
641   Function *CBF = CB->getParent()->getParent();
642 
643   ExpandedCallFrame Frame;
644 
645   uint64_t CurrentOffset = 0;
646 
647   for (unsigned I = FuncType->getNumParams(), E = CB->arg_size(); I < E; ++I) {
648     Value *ArgVal = CB->getArgOperand(I);
649     const bool IsByVal = CB->paramHasAttr(I, Attribute::ByVal);
650     const bool IsByRef = CB->paramHasAttr(I, Attribute::ByRef);
651 
652     // The type of the value being passed, decoded from byval/byref metadata if
653     // required
654     Type *const UnderlyingType = IsByVal   ? CB->getParamByValType(I)
655                                  : IsByRef ? CB->getParamByRefType(I)
656                                            : ArgVal->getType();
657     const uint64_t UnderlyingSize =
658         DL.getTypeAllocSize(UnderlyingType).getFixedValue();
659 
660     // The type to be written into the call frame
661     Type *FrameFieldType = UnderlyingType;
662 
663     // The value to copy from when initialising the frame alloca
664     Value *SourceValue = ArgVal;
665 
666     VariadicABIInfo::VAArgSlotInfo SlotInfo = ABI->slotInfo(DL, UnderlyingType);
667 
668     if (SlotInfo.Indirect) {
669       // The va_arg lowering loads through a pointer. Set up an alloca to aim
670       // that pointer at.
671       Builder.SetInsertPointPastAllocas(CBF);
672       Builder.SetCurrentDebugLocation(CB->getStableDebugLoc());
673       Value *CallerCopy =
674           Builder.CreateAlloca(UnderlyingType, nullptr, "IndirectAlloca");
675 
676       Builder.SetInsertPoint(CB);
677       if (IsByVal)
678         Builder.CreateMemCpy(CallerCopy, {}, ArgVal, {}, UnderlyingSize);
679       else
680         Builder.CreateStore(ArgVal, CallerCopy);
681 
682       // Indirection now handled, pass the alloca ptr by value
683       FrameFieldType = DL.getAllocaPtrType(Ctx);
684       SourceValue = CallerCopy;
685     }
686 
687     // Alignment of the value within the frame
688     // This probably needs to be controllable as a function of type
689     Align DataAlign = SlotInfo.DataAlign;
690 
691     MaxFieldAlign = std::max(MaxFieldAlign, DataAlign);
692 
693     uint64_t DataAlignV = DataAlign.value();
694     if (uint64_t Rem = CurrentOffset % DataAlignV) {
695       // Inject explicit padding to deal with alignment requirements
696       uint64_t Padding = DataAlignV - Rem;
697       Frame.padding(Ctx, Padding);
698       CurrentOffset += Padding;
699     }
700 
701     if (SlotInfo.Indirect) {
702       Frame.store(Ctx, FrameFieldType, SourceValue);
703     } else {
704       if (IsByVal)
705         Frame.memcpy(Ctx, FrameFieldType, SourceValue, UnderlyingSize);
706       else
707         Frame.store(Ctx, FrameFieldType, SourceValue);
708     }
709 
710     CurrentOffset += DL.getTypeAllocSize(FrameFieldType).getFixedValue();
711   }
712 
713   if (Frame.empty()) {
714     // Not passing any arguments, hopefully va_arg won't try to read any
715     // Creating a single byte frame containing nothing to point the va_list
716     // instance as that is less special-casey in the compiler and probably
717     // easier to interpret in a debugger.
718     Frame.padding(Ctx, 1);
719   }
720 
721   StructType *VarargsTy = Frame.asStruct(Ctx, CBF->getName());
722 
723   // The struct instance needs to be at least MaxFieldAlign for the alignment of
724   // the fields to be correct at runtime. Use the native stack alignment instead
725   // if that's greater as that tends to give better codegen.
726   // This is an awkward way to guess whether there is a known stack alignment
727   // without hitting an assert in DL.getStackAlignment, 1024 is an arbitrary
728   // number likely to be greater than the natural stack alignment.
729   Align AllocaAlign = MaxFieldAlign;
730   if (MaybeAlign StackAlign = DL.getStackAlignment();
731       StackAlign && *StackAlign > AllocaAlign)
732     AllocaAlign = *StackAlign;
733 
734   // Put the alloca to hold the variadic args in the entry basic block.
735   Builder.SetInsertPointPastAllocas(CBF);
736 
737   // SetCurrentDebugLocation when the builder SetInsertPoint method does not
738   Builder.SetCurrentDebugLocation(CB->getStableDebugLoc());
739 
740   // The awkward construction here is to set the alignment on the instance
741   AllocaInst *Alloced = Builder.Insert(
742       new AllocaInst(VarargsTy, DL.getAllocaAddrSpace(), nullptr, AllocaAlign),
743       "vararg_buffer");
744   Changed = true;
745   assert(Alloced->getAllocatedType() == VarargsTy);
746 
747   // Initialize the fields in the struct
748   Builder.SetInsertPoint(CB);
749   Builder.CreateLifetimeStart(Alloced, sizeOfAlloca(Ctx, DL, Alloced));
750   Frame.initializeStructAlloca(DL, Builder, Alloced);
751 
752   const unsigned NumArgs = FuncType->getNumParams();
753   SmallVector<Value *> Args(CB->arg_begin(), CB->arg_begin() + NumArgs);
754 
755   // Initialize a va_list pointing to that struct and pass it as the last
756   // argument
757   AllocaInst *VaList = nullptr;
758   {
759     if (!ABI->vaListPassedInSSARegister()) {
760       Type *VaListTy = ABI->vaListType(Ctx);
761       Builder.SetInsertPointPastAllocas(CBF);
762       Builder.SetCurrentDebugLocation(CB->getStableDebugLoc());
763       VaList = Builder.CreateAlloca(VaListTy, nullptr, "va_argument");
764       Builder.SetInsertPoint(CB);
765       Builder.CreateLifetimeStart(VaList, sizeOfAlloca(Ctx, DL, VaList));
766     }
767     Builder.SetInsertPoint(CB);
768     Args.push_back(ABI->initializeVaList(M, Ctx, Builder, VaList, Alloced));
769   }
770 
771   // Attributes excluding any on the vararg arguments
772   AttributeList PAL = CB->getAttributes();
773   if (!PAL.isEmpty()) {
774     SmallVector<AttributeSet, 8> ArgAttrs;
775     for (unsigned ArgNo = 0; ArgNo < NumArgs; ArgNo++)
776       ArgAttrs.push_back(PAL.getParamAttrs(ArgNo));
777     PAL =
778         AttributeList::get(Ctx, PAL.getFnAttrs(), PAL.getRetAttrs(), ArgAttrs);
779   }
780 
781   SmallVector<OperandBundleDef, 1> OpBundles;
782   CB->getOperandBundlesAsDefs(OpBundles);
783 
784   CallBase *NewCB = nullptr;
785 
786   if (CallInst *CI = dyn_cast<CallInst>(CB)) {
787     Value *Dst = NF ? NF : CI->getCalledOperand();
788     FunctionType *NFTy = inlinableVariadicFunctionType(M, VarargFunctionType);
789 
790     NewCB = CallInst::Create(NFTy, Dst, Args, OpBundles, "", CI->getIterator());
791 
792     CallInst::TailCallKind TCK = CI->getTailCallKind();
793     assert(TCK != CallInst::TCK_MustTail);
794 
795     // Can't tail call a function that is being passed a pointer to an alloca
796     if (TCK == CallInst::TCK_Tail)
797       TCK = CallInst::TCK_None;
798     CI->setTailCallKind(TCK);
799 
800   } else {
801     llvm_unreachable("Unreachable when !expansionApplicableToFunctionCall()");
802   }
803 
804   if (VaList)
805     Builder.CreateLifetimeEnd(VaList, sizeOfAlloca(Ctx, DL, VaList));
806 
807   Builder.CreateLifetimeEnd(Alloced, sizeOfAlloca(Ctx, DL, Alloced));
808 
809   NewCB->setAttributes(PAL);
810   NewCB->takeName(CB);
811   NewCB->setCallingConv(CB->getCallingConv());
812   NewCB->setDebugLoc(DebugLoc());
813 
814   // DeadArgElim and ArgPromotion copy exactly this metadata
815   NewCB->copyMetadata(*CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg});
816 
817   CB->replaceAllUsesWith(NewCB);
818   CB->eraseFromParent();
819   return Changed;
820 }
821 
822 bool ExpandVariadics::expandVAIntrinsicCall(IRBuilder<> &Builder,
823                                             const DataLayout &DL,
824                                             VAStartInst *Inst) {
825   // Only removing va_start instructions that are not in variadic functions.
826   // Those would be rejected by the IR verifier before this pass.
827   // After splicing basic blocks from a variadic function into a fixed arity
828   // one the va_start that used to refer to the ... parameter still exist.
829   // There are also variadic functions that this pass did not change and
830   // va_start instances in the created single block wrapper functions.
831   // Replace exactly the instances in non-variadic functions as those are
832   // the ones to be fixed up to use the va_list passed as the final argument.
833 
834   Function *ContainingFunction = Inst->getFunction();
835   if (ContainingFunction->isVarArg()) {
836     return false;
837   }
838 
839   // The last argument is a vaListParameterType, either a va_list
840   // or a pointer to one depending on the target.
841   bool PassedByValue = ABI->vaListPassedInSSARegister();
842   Argument *PassedVaList =
843       ContainingFunction->getArg(ContainingFunction->arg_size() - 1);
844 
845   // va_start takes a pointer to a va_list, e.g. one on the stack
846   Value *VaStartArg = Inst->getArgList();
847 
848   Builder.SetInsertPoint(Inst);
849 
850   if (PassedByValue) {
851     // The general thing to do is create an alloca, store the va_list argument
852     // to it, then create a va_copy. When vaCopyIsMemcpy(), this optimises to a
853     // store to the VaStartArg.
854     assert(ABI->vaCopyIsMemcpy());
855     Builder.CreateStore(PassedVaList, VaStartArg);
856   } else {
857 
858     // Otherwise emit a vacopy to pick up target-specific handling if any
859     auto &Ctx = Builder.getContext();
860 
861     Builder.CreateIntrinsic(Intrinsic::vacopy, {DL.getAllocaPtrType(Ctx)},
862                             {VaStartArg, PassedVaList});
863   }
864 
865   Inst->eraseFromParent();
866   return true;
867 }
868 
869 bool ExpandVariadics::expandVAIntrinsicCall(IRBuilder<> &, const DataLayout &,
870                                             VAEndInst *Inst) {
871   assert(ABI->vaEndIsNop());
872   Inst->eraseFromParent();
873   return true;
874 }
875 
876 bool ExpandVariadics::expandVAIntrinsicCall(IRBuilder<> &Builder,
877                                             const DataLayout &DL,
878                                             VACopyInst *Inst) {
879   assert(ABI->vaCopyIsMemcpy());
880   Builder.SetInsertPoint(Inst);
881 
882   auto &Ctx = Builder.getContext();
883   Type *VaListTy = ABI->vaListType(Ctx);
884   uint64_t Size = DL.getTypeAllocSize(VaListTy).getFixedValue();
885 
886   Builder.CreateMemCpy(Inst->getDest(), {}, Inst->getSrc(), {},
887                        Builder.getInt32(Size));
888 
889   Inst->eraseFromParent();
890   return true;
891 }
892 
893 struct Amdgpu final : public VariadicABIInfo {
894 
895   bool enableForTarget() override { return true; }
896 
897   bool vaListPassedInSSARegister() override { return true; }
898 
899   Type *vaListType(LLVMContext &Ctx) override {
900     return PointerType::getUnqual(Ctx);
901   }
902 
903   Type *vaListParameterType(Module &M) override {
904     return PointerType::getUnqual(M.getContext());
905   }
906 
907   Value *initializeVaList(Module &M, LLVMContext &Ctx, IRBuilder<> &Builder,
908                           AllocaInst * /*va_list*/, Value *Buffer) override {
909     // Given Buffer, which is an AllocInst of vararg_buffer
910     // need to return something usable as parameter type
911     return Builder.CreateAddrSpaceCast(Buffer, vaListParameterType(M));
912   }
913 
914   VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) override {
915     return {Align(4), false};
916   }
917 };
918 
919 struct NVPTX final : public VariadicABIInfo {
920 
921   bool enableForTarget() override { return true; }
922 
923   bool vaListPassedInSSARegister() override { return true; }
924 
925   Type *vaListType(LLVMContext &Ctx) override {
926     return PointerType::getUnqual(Ctx);
927   }
928 
929   Type *vaListParameterType(Module &M) override {
930     return PointerType::getUnqual(M.getContext());
931   }
932 
933   Value *initializeVaList(Module &M, LLVMContext &Ctx, IRBuilder<> &Builder,
934                           AllocaInst *, Value *Buffer) override {
935     return Builder.CreateAddrSpaceCast(Buffer, vaListParameterType(M));
936   }
937 
938   VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) override {
939     // NVPTX expects natural alignment in all cases. The variadic call ABI will
940     // handle promoting types to their appropriate size and alignment.
941     Align A = DL.getABITypeAlign(Parameter);
942     return {A, false};
943   }
944 };
945 
946 struct Wasm final : public VariadicABIInfo {
947 
948   bool enableForTarget() override {
949     // Currently wasm is only used for testing.
950     return commandLineOverride();
951   }
952 
953   bool vaListPassedInSSARegister() override { return true; }
954 
955   Type *vaListType(LLVMContext &Ctx) override {
956     return PointerType::getUnqual(Ctx);
957   }
958 
959   Type *vaListParameterType(Module &M) override {
960     return PointerType::getUnqual(M.getContext());
961   }
962 
963   Value *initializeVaList(Module &M, LLVMContext &Ctx, IRBuilder<> &Builder,
964                           AllocaInst * /*va_list*/, Value *Buffer) override {
965     return Buffer;
966   }
967 
968   VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) override {
969     LLVMContext &Ctx = Parameter->getContext();
970     const unsigned MinAlign = 4;
971     Align A = DL.getABITypeAlign(Parameter);
972     if (A < MinAlign)
973       A = Align(MinAlign);
974 
975     if (auto *S = dyn_cast<StructType>(Parameter)) {
976       if (S->getNumElements() > 1) {
977         return {DL.getABITypeAlign(PointerType::getUnqual(Ctx)), true};
978       }
979     }
980 
981     return {A, false};
982   }
983 };
984 
985 std::unique_ptr<VariadicABIInfo> VariadicABIInfo::create(const Triple &T) {
986   switch (T.getArch()) {
987   case Triple::r600:
988   case Triple::amdgcn: {
989     return std::make_unique<Amdgpu>();
990   }
991 
992   case Triple::wasm32: {
993     return std::make_unique<Wasm>();
994   }
995 
996   case Triple::nvptx:
997   case Triple::nvptx64: {
998     return std::make_unique<NVPTX>();
999   }
1000 
1001   default:
1002     return {};
1003   }
1004 }
1005 
1006 } // namespace
1007 
1008 char ExpandVariadics::ID = 0;
1009 
1010 INITIALIZE_PASS(ExpandVariadics, DEBUG_TYPE, "Expand variadic functions", false,
1011                 false)
1012 
1013 ModulePass *llvm::createExpandVariadicsPass(ExpandVariadicsMode M) {
1014   return new ExpandVariadics(M);
1015 }
1016 
1017 PreservedAnalyses ExpandVariadicsPass::run(Module &M, ModuleAnalysisManager &) {
1018   return ExpandVariadics(Mode).runOnModule(M) ? PreservedAnalyses::none()
1019                                               : PreservedAnalyses::all();
1020 }
1021 
1022 ExpandVariadicsPass::ExpandVariadicsPass(ExpandVariadicsMode M) : Mode(M) {}
1023