1 //===-- ExpandVariadicsPass.cpp --------------------------------*- C++ -*-=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This is an optimization pass for variadic functions. If called from codegen,
10 // it can serve as the implementation of variadic functions for a given target.
11 //
12 // The strategy is to turn the ... part of a variadic function into a va_list
13 // and fix up the call sites. The majority of the pass is target independent.
14 // The exceptions are the va_list type itself and the rules for where to store
15 // variables in memory such that va_arg can iterate over them given a va_list.
16 //
17 // The majority of the plumbing is splitting the variadic function into a
18 // single basic block that packs the variadic arguments into a va_list and
19 // a second function that does the work of the original. That packing is
20 // exactly what is done by va_start. Further, the transform from ... to va_list
21 // replaced va_start with an operation to copy a va_list from the new argument,
22 // which is exactly a va_copy. This is useful for reducing target-dependence.
23 //
24 // A va_list instance is a forward iterator, where the primary operation va_arg
25 // is dereference-then-increment. This interface forces significant convergent
26 // evolution between target specific implementations. The variation in runtime
27 // data layout is limited to that representable by the iterator, parameterised
28 // by the type passed to the va_arg instruction.
29 //
30 // Therefore the majority of the target specific subtlety is packing arguments
31 // into a stack allocated buffer such that a va_list can be initialised with it
32 // and the va_arg expansion for the target will find the arguments at runtime.
33 //
34 // The aggregate effect is to unblock other transforms, most critically the
35 // general purpose inliner. Known calls to variadic functions become zero cost.
36 //
37 // Consistency with clang is primarily tested by emitting va_arg using clang
38 // then expanding the variadic functions using this pass, followed by trying
39 // to constant fold the functions to no-ops.
40 //
41 // Target specific behaviour is tested in IR - mainly checking that values are
42 // put into positions in call frames that make sense for that particular target.
43 //
44 // There is one "clever" invariant in use. va_start intrinsics that are not
45 // within a varidic functions are an error in the IR verifier. When this
46 // transform moves blocks from a variadic function into a fixed arity one, it
47 // moves va_start intrinsics along with everything else. That means that the
48 // va_start intrinsics that need to be rewritten to use the trailing argument
49 // are exactly those that are in non-variadic functions so no further state
50 // is needed to distinguish those that need to be rewritten.
51 //
52 //===----------------------------------------------------------------------===//
53
54 #include "llvm/Transforms/IPO/ExpandVariadics.h"
55 #include "llvm/ADT/SmallVector.h"
56 #include "llvm/IR/Constants.h"
57 #include "llvm/IR/IRBuilder.h"
58 #include "llvm/IR/IntrinsicInst.h"
59 #include "llvm/IR/Module.h"
60 #include "llvm/IR/PassManager.h"
61 #include "llvm/InitializePasses.h"
62 #include "llvm/Pass.h"
63 #include "llvm/Support/CommandLine.h"
64 #include "llvm/TargetParser/Triple.h"
65 #include "llvm/Transforms/Utils/ModuleUtils.h"
66
67 #define DEBUG_TYPE "expand-variadics"
68
69 using namespace llvm;
70
71 namespace {
72
73 cl::opt<ExpandVariadicsMode> ExpandVariadicsModeOption(
74 DEBUG_TYPE "-override", cl::desc("Override the behaviour of " DEBUG_TYPE),
75 cl::init(ExpandVariadicsMode::Unspecified),
76 cl::values(clEnumValN(ExpandVariadicsMode::Unspecified, "unspecified",
77 "Use the implementation defaults"),
78 clEnumValN(ExpandVariadicsMode::Disable, "disable",
79 "Disable the pass entirely"),
80 clEnumValN(ExpandVariadicsMode::Optimize, "optimize",
81 "Optimise without changing ABI"),
82 clEnumValN(ExpandVariadicsMode::Lowering, "lowering",
83 "Change variadic calling convention")));
84
commandLineOverride()85 bool commandLineOverride() {
86 return ExpandVariadicsModeOption != ExpandVariadicsMode::Unspecified;
87 }
88
89 // Instances of this class encapsulate the target-dependant behaviour as a
90 // function of triple. Implementing a new ABI is adding a case to the switch
91 // in create(llvm::Triple) at the end of this file.
92 // This class may end up instantiated in TargetMachine instances, keeping it
93 // here for now until enough targets are implemented for the API to evolve.
94 class VariadicABIInfo {
95 protected:
96 VariadicABIInfo() = default;
97
98 public:
99 static std::unique_ptr<VariadicABIInfo> create(const Triple &T);
100
101 // Allow overriding whether the pass runs on a per-target basis
102 virtual bool enableForTarget() = 0;
103
104 // Whether a valist instance is passed by value or by address
105 // I.e. does it need to be alloca'ed and stored into, or can
106 // it be passed directly in a SSA register
107 virtual bool vaListPassedInSSARegister() = 0;
108
109 // The type of a va_list iterator object
110 virtual Type *vaListType(LLVMContext &Ctx) = 0;
111
112 // The type of a va_list as a function argument as lowered by C
113 virtual Type *vaListParameterType(Module &M) = 0;
114
115 // Initialize an allocated va_list object to point to an already
116 // initialized contiguous memory region.
117 // Return the value to pass as the va_list argument
118 virtual Value *initializeVaList(Module &M, LLVMContext &Ctx,
119 IRBuilder<> &Builder, AllocaInst *VaList,
120 Value *Buffer) = 0;
121
122 struct VAArgSlotInfo {
123 Align DataAlign; // With respect to the call frame
124 bool Indirect; // Passed via a pointer
125 };
126 virtual VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) = 0;
127
128 // Targets implemented so far all have the same trivial lowering for these
vaEndIsNop()129 bool vaEndIsNop() { return true; }
vaCopyIsMemcpy()130 bool vaCopyIsMemcpy() { return true; }
131
132 virtual ~VariadicABIInfo() = default;
133 };
134
135 // Module implements getFunction() which returns nullptr on missing declaration
136 // and getOrInsertFunction which creates one when absent. Intrinsics.h only
137 // implements getDeclaration which creates one when missing. Checking whether
138 // an intrinsic exists thus inserts it in the module and it then needs to be
139 // deleted again to clean up.
140 // The right name for the two functions on intrinsics would match Module::,
141 // but doing that in a single change would introduce nullptr dereferences
142 // where currently there are none. The minimal collateral damage approach
143 // would split the change over a release to help downstream branches. As it
144 // is unclear what approach will be preferred, implementing the trivial
145 // function here in the meantime to decouple from that discussion.
getPreexistingDeclaration(Module * M,Intrinsic::ID Id,ArrayRef<Type * > Tys={})146 Function *getPreexistingDeclaration(Module *M, Intrinsic::ID Id,
147 ArrayRef<Type *> Tys = {}) {
148 auto *FT = Intrinsic::getType(M->getContext(), Id, Tys);
149 return M->getFunction(Tys.empty() ? Intrinsic::getName(Id)
150 : Intrinsic::getName(Id, Tys, M, FT));
151 }
152
153 class ExpandVariadics : public ModulePass {
154
155 // The pass construction sets the default to optimize when called from middle
156 // end and lowering when called from the backend. The command line variable
157 // overrides that. This is useful for testing and debugging. It also allows
158 // building an applications with variadic functions wholly removed if one
159 // has sufficient control over the dependencies, e.g. a statically linked
160 // clang that has no variadic function calls remaining in the binary.
161
162 public:
163 static char ID;
164 const ExpandVariadicsMode Mode;
165 std::unique_ptr<VariadicABIInfo> ABI;
166
ExpandVariadics(ExpandVariadicsMode Mode)167 ExpandVariadics(ExpandVariadicsMode Mode)
168 : ModulePass(ID),
169 Mode(commandLineOverride() ? ExpandVariadicsModeOption : Mode) {}
170
getPassName() const171 StringRef getPassName() const override { return "Expand variadic functions"; }
172
rewriteABI()173 bool rewriteABI() { return Mode == ExpandVariadicsMode::Lowering; }
174
175 bool runOnModule(Module &M) override;
176
177 bool runOnFunction(Module &M, IRBuilder<> &Builder, Function *F);
178
179 Function *replaceAllUsesWithNewDeclaration(Module &M,
180 Function *OriginalFunction);
181
182 Function *deriveFixedArityReplacement(Module &M, IRBuilder<> &Builder,
183 Function *OriginalFunction);
184
185 Function *defineVariadicWrapper(Module &M, IRBuilder<> &Builder,
186 Function *VariadicWrapper,
187 Function *FixedArityReplacement);
188
189 bool expandCall(Module &M, IRBuilder<> &Builder, CallBase *CB, FunctionType *,
190 Function *NF);
191
192 // The intrinsic functions va_copy and va_end are removed unconditionally.
193 // They correspond to a memcpy and a no-op on all implemented targets.
194 // The va_start intrinsic is removed from basic blocks that were not created
195 // by this pass, some may remain if needed to maintain the external ABI.
196
197 template <Intrinsic::ID ID, typename InstructionType>
expandIntrinsicUsers(Module & M,IRBuilder<> & Builder,PointerType * IntrinsicArgType)198 bool expandIntrinsicUsers(Module &M, IRBuilder<> &Builder,
199 PointerType *IntrinsicArgType) {
200 bool Changed = false;
201 const DataLayout &DL = M.getDataLayout();
202 if (Function *Intrinsic =
203 getPreexistingDeclaration(&M, ID, {IntrinsicArgType})) {
204 for (User *U : make_early_inc_range(Intrinsic->users()))
205 if (auto *I = dyn_cast<InstructionType>(U))
206 Changed |= expandVAIntrinsicCall(Builder, DL, I);
207
208 if (Intrinsic->use_empty())
209 Intrinsic->eraseFromParent();
210 }
211 return Changed;
212 }
213
expandVAIntrinsicUsersWithAddrspace(Module & M,IRBuilder<> & Builder,unsigned Addrspace)214 bool expandVAIntrinsicUsersWithAddrspace(Module &M, IRBuilder<> &Builder,
215 unsigned Addrspace) {
216 auto &Ctx = M.getContext();
217 PointerType *IntrinsicArgType = PointerType::get(Ctx, Addrspace);
218 bool Changed = false;
219
220 // expand vastart before vacopy as vastart may introduce a vacopy
221 Changed |= expandIntrinsicUsers<Intrinsic::vastart, VAStartInst>(
222 M, Builder, IntrinsicArgType);
223 Changed |= expandIntrinsicUsers<Intrinsic::vaend, VAEndInst>(
224 M, Builder, IntrinsicArgType);
225 Changed |= expandIntrinsicUsers<Intrinsic::vacopy, VACopyInst>(
226 M, Builder, IntrinsicArgType);
227 return Changed;
228 }
229
230 bool expandVAIntrinsicCall(IRBuilder<> &Builder, const DataLayout &DL,
231 VAStartInst *Inst);
232
233 bool expandVAIntrinsicCall(IRBuilder<> &, const DataLayout &,
234 VAEndInst *Inst);
235
236 bool expandVAIntrinsicCall(IRBuilder<> &Builder, const DataLayout &DL,
237 VACopyInst *Inst);
238
inlinableVariadicFunctionType(Module & M,FunctionType * FTy)239 FunctionType *inlinableVariadicFunctionType(Module &M, FunctionType *FTy) {
240 // The type of "FTy" with the ... removed and a va_list appended
241 SmallVector<Type *> ArgTypes(FTy->param_begin(), FTy->param_end());
242 ArgTypes.push_back(ABI->vaListParameterType(M));
243 return FunctionType::get(FTy->getReturnType(), ArgTypes,
244 /*IsVarArgs=*/false);
245 }
246
sizeOfAlloca(LLVMContext & Ctx,const DataLayout & DL,AllocaInst * Alloced)247 static ConstantInt *sizeOfAlloca(LLVMContext &Ctx, const DataLayout &DL,
248 AllocaInst *Alloced) {
249 std::optional<TypeSize> AllocaTypeSize = Alloced->getAllocationSize(DL);
250 uint64_t AsInt = AllocaTypeSize ? AllocaTypeSize->getFixedValue() : 0;
251 return ConstantInt::get(Type::getInt64Ty(Ctx), AsInt);
252 }
253
expansionApplicableToFunction(Module & M,Function * F)254 bool expansionApplicableToFunction(Module &M, Function *F) {
255 if (F->isIntrinsic() || !F->isVarArg() ||
256 F->hasFnAttribute(Attribute::Naked))
257 return false;
258
259 if (F->getCallingConv() != CallingConv::C)
260 return false;
261
262 if (rewriteABI())
263 return true;
264
265 if (!F->hasExactDefinition())
266 return false;
267
268 return true;
269 }
270
expansionApplicableToFunctionCall(CallBase * CB)271 bool expansionApplicableToFunctionCall(CallBase *CB) {
272 if (CallInst *CI = dyn_cast<CallInst>(CB)) {
273 if (CI->isMustTailCall()) {
274 // Cannot expand musttail calls
275 return false;
276 }
277
278 if (CI->getCallingConv() != CallingConv::C)
279 return false;
280
281 return true;
282 }
283
284 if (isa<InvokeInst>(CB)) {
285 // Invoke not implemented in initial implementation of pass
286 return false;
287 }
288
289 // Other unimplemented derivative of CallBase
290 return false;
291 }
292
293 class ExpandedCallFrame {
294 // Helper for constructing an alloca instance containing the arguments bound
295 // to the variadic ... parameter, rearranged to allow indexing through a
296 // va_list iterator
297 enum { N = 4 };
298 SmallVector<Type *, N> FieldTypes;
299 enum Tag { Store, Memcpy, Padding };
300 SmallVector<std::tuple<Value *, uint64_t, Tag>, N> Source;
301
append(Type * FieldType,Value * V,uint64_t Bytes)302 template <Tag tag> void append(Type *FieldType, Value *V, uint64_t Bytes) {
303 FieldTypes.push_back(FieldType);
304 Source.push_back({V, Bytes, tag});
305 }
306
307 public:
store(LLVMContext & Ctx,Type * T,Value * V)308 void store(LLVMContext &Ctx, Type *T, Value *V) { append<Store>(T, V, 0); }
309
memcpy(LLVMContext & Ctx,Type * T,Value * V,uint64_t Bytes)310 void memcpy(LLVMContext &Ctx, Type *T, Value *V, uint64_t Bytes) {
311 append<Memcpy>(T, V, Bytes);
312 }
313
padding(LLVMContext & Ctx,uint64_t By)314 void padding(LLVMContext &Ctx, uint64_t By) {
315 append<Padding>(ArrayType::get(Type::getInt8Ty(Ctx), By), nullptr, 0);
316 }
317
size() const318 size_t size() const { return FieldTypes.size(); }
empty() const319 bool empty() const { return FieldTypes.empty(); }
320
asStruct(LLVMContext & Ctx,StringRef Name)321 StructType *asStruct(LLVMContext &Ctx, StringRef Name) {
322 const bool IsPacked = true;
323 return StructType::create(Ctx, FieldTypes,
324 (Twine(Name) + ".vararg").str(), IsPacked);
325 }
326
initializeStructAlloca(const DataLayout & DL,IRBuilder<> & Builder,AllocaInst * Alloced)327 void initializeStructAlloca(const DataLayout &DL, IRBuilder<> &Builder,
328 AllocaInst *Alloced) {
329
330 StructType *VarargsTy = cast<StructType>(Alloced->getAllocatedType());
331
332 for (size_t I = 0; I < size(); I++) {
333
334 auto [V, bytes, tag] = Source[I];
335
336 if (tag == Padding) {
337 assert(V == nullptr);
338 continue;
339 }
340
341 auto Dst = Builder.CreateStructGEP(VarargsTy, Alloced, I);
342
343 assert(V != nullptr);
344
345 if (tag == Store)
346 Builder.CreateStore(V, Dst);
347
348 if (tag == Memcpy)
349 Builder.CreateMemCpy(Dst, {}, V, {}, bytes);
350 }
351 }
352 };
353 };
354
runOnModule(Module & M)355 bool ExpandVariadics::runOnModule(Module &M) {
356 bool Changed = false;
357 if (Mode == ExpandVariadicsMode::Disable)
358 return Changed;
359
360 Triple TT(M.getTargetTriple());
361 ABI = VariadicABIInfo::create(TT);
362 if (!ABI)
363 return Changed;
364
365 if (!ABI->enableForTarget())
366 return Changed;
367
368 auto &Ctx = M.getContext();
369 const DataLayout &DL = M.getDataLayout();
370 IRBuilder<> Builder(Ctx);
371
372 // Lowering needs to run on all functions exactly once.
373 // Optimize could run on functions containing va_start exactly once.
374 for (Function &F : make_early_inc_range(M))
375 Changed |= runOnFunction(M, Builder, &F);
376
377 // After runOnFunction, all known calls to known variadic functions have been
378 // replaced. va_start intrinsics are presently (and invalidly!) only present
379 // in functions that used to be variadic and have now been replaced to take a
380 // va_list instead. If lowering as opposed to optimising, calls to unknown
381 // variadic functions have also been replaced.
382
383 {
384 // 0 and AllocaAddrSpace are sufficient for the targets implemented so far
385 unsigned Addrspace = 0;
386 Changed |= expandVAIntrinsicUsersWithAddrspace(M, Builder, Addrspace);
387
388 Addrspace = DL.getAllocaAddrSpace();
389 if (Addrspace != 0)
390 Changed |= expandVAIntrinsicUsersWithAddrspace(M, Builder, Addrspace);
391 }
392
393 if (Mode != ExpandVariadicsMode::Lowering)
394 return Changed;
395
396 for (Function &F : make_early_inc_range(M)) {
397 if (F.isDeclaration())
398 continue;
399
400 // Now need to track down indirect calls. Can't find those
401 // by walking uses of variadic functions, need to crawl the instruction
402 // stream. Fortunately this is only necessary for the ABI rewrite case.
403 for (BasicBlock &BB : F) {
404 for (Instruction &I : make_early_inc_range(BB)) {
405 if (CallBase *CB = dyn_cast<CallBase>(&I)) {
406 if (CB->isIndirectCall()) {
407 FunctionType *FTy = CB->getFunctionType();
408 if (FTy->isVarArg())
409 Changed |= expandCall(M, Builder, CB, FTy, 0);
410 }
411 }
412 }
413 }
414 }
415
416 return Changed;
417 }
418
runOnFunction(Module & M,IRBuilder<> & Builder,Function * OriginalFunction)419 bool ExpandVariadics::runOnFunction(Module &M, IRBuilder<> &Builder,
420 Function *OriginalFunction) {
421 bool Changed = false;
422
423 if (!expansionApplicableToFunction(M, OriginalFunction))
424 return Changed;
425
426 [[maybe_unused]] const bool OriginalFunctionIsDeclaration =
427 OriginalFunction->isDeclaration();
428 assert(rewriteABI() || !OriginalFunctionIsDeclaration);
429
430 // Declare a new function and redirect every use to that new function
431 Function *VariadicWrapper =
432 replaceAllUsesWithNewDeclaration(M, OriginalFunction);
433 assert(VariadicWrapper->isDeclaration());
434 assert(OriginalFunction->use_empty());
435
436 // Create a new function taking va_list containing the implementation of the
437 // original
438 Function *FixedArityReplacement =
439 deriveFixedArityReplacement(M, Builder, OriginalFunction);
440 assert(OriginalFunction->isDeclaration());
441 assert(FixedArityReplacement->isDeclaration() ==
442 OriginalFunctionIsDeclaration);
443 assert(VariadicWrapper->isDeclaration());
444
445 // Create a single block forwarding wrapper that turns a ... into a va_list
446 [[maybe_unused]] Function *VariadicWrapperDefine =
447 defineVariadicWrapper(M, Builder, VariadicWrapper, FixedArityReplacement);
448 assert(VariadicWrapperDefine == VariadicWrapper);
449 assert(!VariadicWrapper->isDeclaration());
450
451 // We now have:
452 // 1. the original function, now as a declaration with no uses
453 // 2. a variadic function that unconditionally calls a fixed arity replacement
454 // 3. a fixed arity function equivalent to the original function
455
456 // Replace known calls to the variadic with calls to the va_list equivalent
457 for (User *U : make_early_inc_range(VariadicWrapper->users())) {
458 if (CallBase *CB = dyn_cast<CallBase>(U)) {
459 Value *CalledOperand = CB->getCalledOperand();
460 if (VariadicWrapper == CalledOperand)
461 Changed |=
462 expandCall(M, Builder, CB, VariadicWrapper->getFunctionType(),
463 FixedArityReplacement);
464 }
465 }
466
467 // The original function will be erased.
468 // One of the two new functions will become a replacement for the original.
469 // When preserving the ABI, the other is an internal implementation detail.
470 // When rewriting the ABI, RAUW then the variadic one.
471 Function *const ExternallyAccessible =
472 rewriteABI() ? FixedArityReplacement : VariadicWrapper;
473 Function *const InternalOnly =
474 rewriteABI() ? VariadicWrapper : FixedArityReplacement;
475
476 // The external function is the replacement for the original
477 ExternallyAccessible->setLinkage(OriginalFunction->getLinkage());
478 ExternallyAccessible->setVisibility(OriginalFunction->getVisibility());
479 ExternallyAccessible->setComdat(OriginalFunction->getComdat());
480 ExternallyAccessible->takeName(OriginalFunction);
481
482 // Annotate the internal one as internal
483 InternalOnly->setVisibility(GlobalValue::DefaultVisibility);
484 InternalOnly->setLinkage(GlobalValue::InternalLinkage);
485
486 // The original is unused and obsolete
487 OriginalFunction->eraseFromParent();
488
489 InternalOnly->removeDeadConstantUsers();
490
491 if (rewriteABI()) {
492 // All known calls to the function have been removed by expandCall
493 // Resolve everything else by replaceAllUsesWith
494 VariadicWrapper->replaceAllUsesWith(FixedArityReplacement);
495 VariadicWrapper->eraseFromParent();
496 }
497
498 return Changed;
499 }
500
501 Function *
replaceAllUsesWithNewDeclaration(Module & M,Function * OriginalFunction)502 ExpandVariadics::replaceAllUsesWithNewDeclaration(Module &M,
503 Function *OriginalFunction) {
504 auto &Ctx = M.getContext();
505 Function &F = *OriginalFunction;
506 FunctionType *FTy = F.getFunctionType();
507 Function *NF = Function::Create(FTy, F.getLinkage(), F.getAddressSpace());
508
509 NF->setName(F.getName() + ".varargs");
510 NF->IsNewDbgInfoFormat = F.IsNewDbgInfoFormat;
511
512 F.getParent()->getFunctionList().insert(F.getIterator(), NF);
513
514 AttrBuilder ParamAttrs(Ctx);
515 AttributeList Attrs = NF->getAttributes();
516 Attrs = Attrs.addParamAttributes(Ctx, FTy->getNumParams(), ParamAttrs);
517 NF->setAttributes(Attrs);
518
519 OriginalFunction->replaceAllUsesWith(NF);
520 return NF;
521 }
522
523 Function *
deriveFixedArityReplacement(Module & M,IRBuilder<> & Builder,Function * OriginalFunction)524 ExpandVariadics::deriveFixedArityReplacement(Module &M, IRBuilder<> &Builder,
525 Function *OriginalFunction) {
526 Function &F = *OriginalFunction;
527 // The purpose here is split the variadic function F into two functions
528 // One is a variadic function that bundles the passed argument into a va_list
529 // and passes it to the second function. The second function does whatever
530 // the original F does, except that it takes a va_list instead of the ...
531
532 assert(expansionApplicableToFunction(M, &F));
533
534 auto &Ctx = M.getContext();
535
536 // Returned value isDeclaration() is equal to F.isDeclaration()
537 // but that property is not invariant throughout this function
538 const bool FunctionIsDefinition = !F.isDeclaration();
539
540 FunctionType *FTy = F.getFunctionType();
541 SmallVector<Type *> ArgTypes(FTy->param_begin(), FTy->param_end());
542 ArgTypes.push_back(ABI->vaListParameterType(M));
543
544 FunctionType *NFTy = inlinableVariadicFunctionType(M, FTy);
545 Function *NF = Function::Create(NFTy, F.getLinkage(), F.getAddressSpace());
546
547 // Note - same attribute handling as DeadArgumentElimination
548 NF->copyAttributesFrom(&F);
549 NF->setComdat(F.getComdat());
550 F.getParent()->getFunctionList().insert(F.getIterator(), NF);
551 NF->setName(F.getName() + ".valist");
552 NF->IsNewDbgInfoFormat = F.IsNewDbgInfoFormat;
553
554 AttrBuilder ParamAttrs(Ctx);
555
556 AttributeList Attrs = NF->getAttributes();
557 Attrs = Attrs.addParamAttributes(Ctx, NFTy->getNumParams() - 1, ParamAttrs);
558 NF->setAttributes(Attrs);
559
560 // Splice the implementation into the new function with minimal changes
561 if (FunctionIsDefinition) {
562 NF->splice(NF->begin(), &F);
563
564 auto NewArg = NF->arg_begin();
565 for (Argument &Arg : F.args()) {
566 Arg.replaceAllUsesWith(NewArg);
567 NewArg->setName(Arg.getName()); // takeName without killing the old one
568 ++NewArg;
569 }
570 NewArg->setName("varargs");
571 }
572
573 SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
574 F.getAllMetadata(MDs);
575 for (auto [KindID, Node] : MDs)
576 NF->addMetadata(KindID, *Node);
577 F.clearMetadata();
578
579 return NF;
580 }
581
582 Function *
defineVariadicWrapper(Module & M,IRBuilder<> & Builder,Function * VariadicWrapper,Function * FixedArityReplacement)583 ExpandVariadics::defineVariadicWrapper(Module &M, IRBuilder<> &Builder,
584 Function *VariadicWrapper,
585 Function *FixedArityReplacement) {
586 auto &Ctx = Builder.getContext();
587 const DataLayout &DL = M.getDataLayout();
588 assert(VariadicWrapper->isDeclaration());
589 Function &F = *VariadicWrapper;
590
591 assert(F.isDeclaration());
592 Type *VaListTy = ABI->vaListType(Ctx);
593
594 auto *BB = BasicBlock::Create(Ctx, "entry", &F);
595 Builder.SetInsertPoint(BB);
596
597 AllocaInst *VaListInstance =
598 Builder.CreateAlloca(VaListTy, nullptr, "va_start");
599
600 Builder.CreateLifetimeStart(VaListInstance,
601 sizeOfAlloca(Ctx, DL, VaListInstance));
602
603 Builder.CreateIntrinsic(Intrinsic::vastart, {DL.getAllocaPtrType(Ctx)},
604 {VaListInstance});
605
606 SmallVector<Value *> Args;
607 for (Argument &A : F.args())
608 Args.push_back(&A);
609
610 Type *ParameterType = ABI->vaListParameterType(M);
611 if (ABI->vaListPassedInSSARegister())
612 Args.push_back(Builder.CreateLoad(ParameterType, VaListInstance));
613 else
614 Args.push_back(Builder.CreateAddrSpaceCast(VaListInstance, ParameterType));
615
616 CallInst *Result = Builder.CreateCall(FixedArityReplacement, Args);
617
618 Builder.CreateIntrinsic(Intrinsic::vaend, {DL.getAllocaPtrType(Ctx)},
619 {VaListInstance});
620 Builder.CreateLifetimeEnd(VaListInstance,
621 sizeOfAlloca(Ctx, DL, VaListInstance));
622
623 if (Result->getType()->isVoidTy())
624 Builder.CreateRetVoid();
625 else
626 Builder.CreateRet(Result);
627
628 return VariadicWrapper;
629 }
630
expandCall(Module & M,IRBuilder<> & Builder,CallBase * CB,FunctionType * VarargFunctionType,Function * NF)631 bool ExpandVariadics::expandCall(Module &M, IRBuilder<> &Builder, CallBase *CB,
632 FunctionType *VarargFunctionType,
633 Function *NF) {
634 bool Changed = false;
635 const DataLayout &DL = M.getDataLayout();
636
637 if (!expansionApplicableToFunctionCall(CB)) {
638 if (rewriteABI())
639 report_fatal_error("Cannot lower callbase instruction");
640 return Changed;
641 }
642
643 // This is tricky. The call instruction's function type might not match
644 // the type of the caller. When optimising, can leave it unchanged.
645 // Webassembly detects that inconsistency and repairs it.
646 FunctionType *FuncType = CB->getFunctionType();
647 if (FuncType != VarargFunctionType) {
648 if (!rewriteABI())
649 return Changed;
650 FuncType = VarargFunctionType;
651 }
652
653 auto &Ctx = CB->getContext();
654
655 Align MaxFieldAlign(1);
656
657 // The strategy is to allocate a call frame containing the variadic
658 // arguments laid out such that a target specific va_list can be initialized
659 // with it, such that target specific va_arg instructions will correctly
660 // iterate over it. This means getting the alignment right and sometimes
661 // embedding a pointer to the value instead of embedding the value itself.
662
663 Function *CBF = CB->getParent()->getParent();
664
665 ExpandedCallFrame Frame;
666
667 uint64_t CurrentOffset = 0;
668
669 for (unsigned I = FuncType->getNumParams(), E = CB->arg_size(); I < E; ++I) {
670 Value *ArgVal = CB->getArgOperand(I);
671 const bool IsByVal = CB->paramHasAttr(I, Attribute::ByVal);
672 const bool IsByRef = CB->paramHasAttr(I, Attribute::ByRef);
673
674 // The type of the value being passed, decoded from byval/byref metadata if
675 // required
676 Type *const UnderlyingType = IsByVal ? CB->getParamByValType(I)
677 : IsByRef ? CB->getParamByRefType(I)
678 : ArgVal->getType();
679 const uint64_t UnderlyingSize =
680 DL.getTypeAllocSize(UnderlyingType).getFixedValue();
681
682 // The type to be written into the call frame
683 Type *FrameFieldType = UnderlyingType;
684
685 // The value to copy from when initialising the frame alloca
686 Value *SourceValue = ArgVal;
687
688 VariadicABIInfo::VAArgSlotInfo SlotInfo = ABI->slotInfo(DL, UnderlyingType);
689
690 if (SlotInfo.Indirect) {
691 // The va_arg lowering loads through a pointer. Set up an alloca to aim
692 // that pointer at.
693 Builder.SetInsertPointPastAllocas(CBF);
694 Builder.SetCurrentDebugLocation(CB->getStableDebugLoc());
695 Value *CallerCopy =
696 Builder.CreateAlloca(UnderlyingType, nullptr, "IndirectAlloca");
697
698 Builder.SetInsertPoint(CB);
699 if (IsByVal)
700 Builder.CreateMemCpy(CallerCopy, {}, ArgVal, {}, UnderlyingSize);
701 else
702 Builder.CreateStore(ArgVal, CallerCopy);
703
704 // Indirection now handled, pass the alloca ptr by value
705 FrameFieldType = DL.getAllocaPtrType(Ctx);
706 SourceValue = CallerCopy;
707 }
708
709 // Alignment of the value within the frame
710 // This probably needs to be controllable as a function of type
711 Align DataAlign = SlotInfo.DataAlign;
712
713 MaxFieldAlign = std::max(MaxFieldAlign, DataAlign);
714
715 uint64_t DataAlignV = DataAlign.value();
716 if (uint64_t Rem = CurrentOffset % DataAlignV) {
717 // Inject explicit padding to deal with alignment requirements
718 uint64_t Padding = DataAlignV - Rem;
719 Frame.padding(Ctx, Padding);
720 CurrentOffset += Padding;
721 }
722
723 if (SlotInfo.Indirect) {
724 Frame.store(Ctx, FrameFieldType, SourceValue);
725 } else {
726 if (IsByVal)
727 Frame.memcpy(Ctx, FrameFieldType, SourceValue, UnderlyingSize);
728 else
729 Frame.store(Ctx, FrameFieldType, SourceValue);
730 }
731
732 CurrentOffset += DL.getTypeAllocSize(FrameFieldType).getFixedValue();
733 }
734
735 if (Frame.empty()) {
736 // Not passing any arguments, hopefully va_arg won't try to read any
737 // Creating a single byte frame containing nothing to point the va_list
738 // instance as that is less special-casey in the compiler and probably
739 // easier to interpret in a debugger.
740 Frame.padding(Ctx, 1);
741 }
742
743 StructType *VarargsTy = Frame.asStruct(Ctx, CBF->getName());
744
745 // The struct instance needs to be at least MaxFieldAlign for the alignment of
746 // the fields to be correct at runtime. Use the native stack alignment instead
747 // if that's greater as that tends to give better codegen.
748 // This is an awkward way to guess whether there is a known stack alignment
749 // without hitting an assert in DL.getStackAlignment, 1024 is an arbitrary
750 // number likely to be greater than the natural stack alignment.
751 // TODO: DL.getStackAlignment could return a MaybeAlign instead of assert
752 Align AllocaAlign = MaxFieldAlign;
753 if (DL.exceedsNaturalStackAlignment(Align(1024)))
754 AllocaAlign = std::max(AllocaAlign, DL.getStackAlignment());
755
756 // Put the alloca to hold the variadic args in the entry basic block.
757 Builder.SetInsertPointPastAllocas(CBF);
758
759 // SetCurrentDebugLocation when the builder SetInsertPoint method does not
760 Builder.SetCurrentDebugLocation(CB->getStableDebugLoc());
761
762 // The awkward construction here is to set the alignment on the instance
763 AllocaInst *Alloced = Builder.Insert(
764 new AllocaInst(VarargsTy, DL.getAllocaAddrSpace(), nullptr, AllocaAlign),
765 "vararg_buffer");
766 Changed = true;
767 assert(Alloced->getAllocatedType() == VarargsTy);
768
769 // Initialize the fields in the struct
770 Builder.SetInsertPoint(CB);
771 Builder.CreateLifetimeStart(Alloced, sizeOfAlloca(Ctx, DL, Alloced));
772 Frame.initializeStructAlloca(DL, Builder, Alloced);
773
774 const unsigned NumArgs = FuncType->getNumParams();
775 SmallVector<Value *> Args(CB->arg_begin(), CB->arg_begin() + NumArgs);
776
777 // Initialize a va_list pointing to that struct and pass it as the last
778 // argument
779 AllocaInst *VaList = nullptr;
780 {
781 if (!ABI->vaListPassedInSSARegister()) {
782 Type *VaListTy = ABI->vaListType(Ctx);
783 Builder.SetInsertPointPastAllocas(CBF);
784 Builder.SetCurrentDebugLocation(CB->getStableDebugLoc());
785 VaList = Builder.CreateAlloca(VaListTy, nullptr, "va_argument");
786 Builder.SetInsertPoint(CB);
787 Builder.CreateLifetimeStart(VaList, sizeOfAlloca(Ctx, DL, VaList));
788 }
789 Builder.SetInsertPoint(CB);
790 Args.push_back(ABI->initializeVaList(M, Ctx, Builder, VaList, Alloced));
791 }
792
793 // Attributes excluding any on the vararg arguments
794 AttributeList PAL = CB->getAttributes();
795 if (!PAL.isEmpty()) {
796 SmallVector<AttributeSet, 8> ArgAttrs;
797 for (unsigned ArgNo = 0; ArgNo < NumArgs; ArgNo++)
798 ArgAttrs.push_back(PAL.getParamAttrs(ArgNo));
799 PAL =
800 AttributeList::get(Ctx, PAL.getFnAttrs(), PAL.getRetAttrs(), ArgAttrs);
801 }
802
803 SmallVector<OperandBundleDef, 1> OpBundles;
804 CB->getOperandBundlesAsDefs(OpBundles);
805
806 CallBase *NewCB = nullptr;
807
808 if (CallInst *CI = dyn_cast<CallInst>(CB)) {
809 Value *Dst = NF ? NF : CI->getCalledOperand();
810 FunctionType *NFTy = inlinableVariadicFunctionType(M, VarargFunctionType);
811
812 NewCB = CallInst::Create(NFTy, Dst, Args, OpBundles, "", CI);
813
814 CallInst::TailCallKind TCK = CI->getTailCallKind();
815 assert(TCK != CallInst::TCK_MustTail);
816
817 // Can't tail call a function that is being passed a pointer to an alloca
818 if (TCK == CallInst::TCK_Tail)
819 TCK = CallInst::TCK_None;
820 CI->setTailCallKind(TCK);
821
822 } else {
823 llvm_unreachable("Unreachable when !expansionApplicableToFunctionCall()");
824 }
825
826 if (VaList)
827 Builder.CreateLifetimeEnd(VaList, sizeOfAlloca(Ctx, DL, VaList));
828
829 Builder.CreateLifetimeEnd(Alloced, sizeOfAlloca(Ctx, DL, Alloced));
830
831 NewCB->setAttributes(PAL);
832 NewCB->takeName(CB);
833 NewCB->setCallingConv(CB->getCallingConv());
834 NewCB->setDebugLoc(DebugLoc());
835
836 // DeadArgElim and ArgPromotion copy exactly this metadata
837 NewCB->copyMetadata(*CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg});
838
839 CB->replaceAllUsesWith(NewCB);
840 CB->eraseFromParent();
841 return Changed;
842 }
843
expandVAIntrinsicCall(IRBuilder<> & Builder,const DataLayout & DL,VAStartInst * Inst)844 bool ExpandVariadics::expandVAIntrinsicCall(IRBuilder<> &Builder,
845 const DataLayout &DL,
846 VAStartInst *Inst) {
847 // Only removing va_start instructions that are not in variadic functions.
848 // Those would be rejected by the IR verifier before this pass.
849 // After splicing basic blocks from a variadic function into a fixed arity
850 // one the va_start that used to refer to the ... parameter still exist.
851 // There are also variadic functions that this pass did not change and
852 // va_start instances in the created single block wrapper functions.
853 // Replace exactly the instances in non-variadic functions as those are
854 // the ones to be fixed up to use the va_list passed as the final argument.
855
856 Function *ContainingFunction = Inst->getFunction();
857 if (ContainingFunction->isVarArg()) {
858 return false;
859 }
860
861 // The last argument is a vaListParameterType, either a va_list
862 // or a pointer to one depending on the target.
863 bool PassedByValue = ABI->vaListPassedInSSARegister();
864 Argument *PassedVaList =
865 ContainingFunction->getArg(ContainingFunction->arg_size() - 1);
866
867 // va_start takes a pointer to a va_list, e.g. one on the stack
868 Value *VaStartArg = Inst->getArgList();
869
870 Builder.SetInsertPoint(Inst);
871
872 if (PassedByValue) {
873 // The general thing to do is create an alloca, store the va_list argument
874 // to it, then create a va_copy. When vaCopyIsMemcpy(), this optimises to a
875 // store to the VaStartArg.
876 assert(ABI->vaCopyIsMemcpy());
877 Builder.CreateStore(PassedVaList, VaStartArg);
878 } else {
879
880 // Otherwise emit a vacopy to pick up target-specific handling if any
881 auto &Ctx = Builder.getContext();
882
883 Builder.CreateIntrinsic(Intrinsic::vacopy, {DL.getAllocaPtrType(Ctx)},
884 {VaStartArg, PassedVaList});
885 }
886
887 Inst->eraseFromParent();
888 return true;
889 }
890
expandVAIntrinsicCall(IRBuilder<> &,const DataLayout &,VAEndInst * Inst)891 bool ExpandVariadics::expandVAIntrinsicCall(IRBuilder<> &, const DataLayout &,
892 VAEndInst *Inst) {
893 assert(ABI->vaEndIsNop());
894 Inst->eraseFromParent();
895 return true;
896 }
897
expandVAIntrinsicCall(IRBuilder<> & Builder,const DataLayout & DL,VACopyInst * Inst)898 bool ExpandVariadics::expandVAIntrinsicCall(IRBuilder<> &Builder,
899 const DataLayout &DL,
900 VACopyInst *Inst) {
901 assert(ABI->vaCopyIsMemcpy());
902 Builder.SetInsertPoint(Inst);
903
904 auto &Ctx = Builder.getContext();
905 Type *VaListTy = ABI->vaListType(Ctx);
906 uint64_t Size = DL.getTypeAllocSize(VaListTy).getFixedValue();
907
908 Builder.CreateMemCpy(Inst->getDest(), {}, Inst->getSrc(), {},
909 Builder.getInt32(Size));
910
911 Inst->eraseFromParent();
912 return true;
913 }
914
915 struct Amdgpu final : public VariadicABIInfo {
916
enableForTarget__anon8629f2db0111::Amdgpu917 bool enableForTarget() override { return true; }
918
vaListPassedInSSARegister__anon8629f2db0111::Amdgpu919 bool vaListPassedInSSARegister() override { return true; }
920
vaListType__anon8629f2db0111::Amdgpu921 Type *vaListType(LLVMContext &Ctx) override {
922 return PointerType::getUnqual(Ctx);
923 }
924
vaListParameterType__anon8629f2db0111::Amdgpu925 Type *vaListParameterType(Module &M) override {
926 return PointerType::getUnqual(M.getContext());
927 }
928
initializeVaList__anon8629f2db0111::Amdgpu929 Value *initializeVaList(Module &M, LLVMContext &Ctx, IRBuilder<> &Builder,
930 AllocaInst * /*va_list*/, Value *Buffer) override {
931 // Given Buffer, which is an AllocInst of vararg_buffer
932 // need to return something usable as parameter type
933 return Builder.CreateAddrSpaceCast(Buffer, vaListParameterType(M));
934 }
935
slotInfo__anon8629f2db0111::Amdgpu936 VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) override {
937 return {Align(4), false};
938 }
939 };
940
941 struct NVPTX final : public VariadicABIInfo {
942
enableForTarget__anon8629f2db0111::NVPTX943 bool enableForTarget() override { return true; }
944
vaListPassedInSSARegister__anon8629f2db0111::NVPTX945 bool vaListPassedInSSARegister() override { return true; }
946
vaListType__anon8629f2db0111::NVPTX947 Type *vaListType(LLVMContext &Ctx) override {
948 return PointerType::getUnqual(Ctx);
949 }
950
vaListParameterType__anon8629f2db0111::NVPTX951 Type *vaListParameterType(Module &M) override {
952 return PointerType::getUnqual(M.getContext());
953 }
954
initializeVaList__anon8629f2db0111::NVPTX955 Value *initializeVaList(Module &M, LLVMContext &Ctx, IRBuilder<> &Builder,
956 AllocaInst *, Value *Buffer) override {
957 return Builder.CreateAddrSpaceCast(Buffer, vaListParameterType(M));
958 }
959
slotInfo__anon8629f2db0111::NVPTX960 VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) override {
961 // NVPTX expects natural alignment in all cases. The variadic call ABI will
962 // handle promoting types to their appropriate size and alignment.
963 Align A = DL.getABITypeAlign(Parameter);
964 return {A, false};
965 }
966 };
967
968 struct Wasm final : public VariadicABIInfo {
969
enableForTarget__anon8629f2db0111::Wasm970 bool enableForTarget() override {
971 // Currently wasm is only used for testing.
972 return commandLineOverride();
973 }
974
vaListPassedInSSARegister__anon8629f2db0111::Wasm975 bool vaListPassedInSSARegister() override { return true; }
976
vaListType__anon8629f2db0111::Wasm977 Type *vaListType(LLVMContext &Ctx) override {
978 return PointerType::getUnqual(Ctx);
979 }
980
vaListParameterType__anon8629f2db0111::Wasm981 Type *vaListParameterType(Module &M) override {
982 return PointerType::getUnqual(M.getContext());
983 }
984
initializeVaList__anon8629f2db0111::Wasm985 Value *initializeVaList(Module &M, LLVMContext &Ctx, IRBuilder<> &Builder,
986 AllocaInst * /*va_list*/, Value *Buffer) override {
987 return Buffer;
988 }
989
slotInfo__anon8629f2db0111::Wasm990 VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) override {
991 LLVMContext &Ctx = Parameter->getContext();
992 const unsigned MinAlign = 4;
993 Align A = DL.getABITypeAlign(Parameter);
994 if (A < MinAlign)
995 A = Align(MinAlign);
996
997 if (auto *S = dyn_cast<StructType>(Parameter)) {
998 if (S->getNumElements() > 1) {
999 return {DL.getABITypeAlign(PointerType::getUnqual(Ctx)), true};
1000 }
1001 }
1002
1003 return {A, false};
1004 }
1005 };
1006
create(const Triple & T)1007 std::unique_ptr<VariadicABIInfo> VariadicABIInfo::create(const Triple &T) {
1008 switch (T.getArch()) {
1009 case Triple::r600:
1010 case Triple::amdgcn: {
1011 return std::make_unique<Amdgpu>();
1012 }
1013
1014 case Triple::wasm32: {
1015 return std::make_unique<Wasm>();
1016 }
1017
1018 case Triple::nvptx:
1019 case Triple::nvptx64: {
1020 return std::make_unique<NVPTX>();
1021 }
1022
1023 default:
1024 return {};
1025 }
1026 }
1027
1028 } // namespace
1029
1030 char ExpandVariadics::ID = 0;
1031
1032 INITIALIZE_PASS(ExpandVariadics, DEBUG_TYPE, "Expand variadic functions", false,
1033 false)
1034
createExpandVariadicsPass(ExpandVariadicsMode M)1035 ModulePass *llvm::createExpandVariadicsPass(ExpandVariadicsMode M) {
1036 return new ExpandVariadics(M);
1037 }
1038
run(Module & M,ModuleAnalysisManager &)1039 PreservedAnalyses ExpandVariadicsPass::run(Module &M, ModuleAnalysisManager &) {
1040 return ExpandVariadics(Mode).runOnModule(M) ? PreservedAnalyses::none()
1041 : PreservedAnalyses::all();
1042 }
1043
ExpandVariadicsPass(ExpandVariadicsMode M)1044 ExpandVariadicsPass::ExpandVariadicsPass(ExpandVariadicsMode M) : Mode(M) {}
1045