xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp (revision a3266ba2697a383d2ede56803320d941866c7e76)
1 //===--- AArch64CallLowering.cpp - Call lowering --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the lowering of LLVM calls to machine code calls for
11 /// GlobalISel.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "AArch64CallLowering.h"
16 #include "AArch64ISelLowering.h"
17 #include "AArch64MachineFunctionInfo.h"
18 #include "AArch64Subtarget.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/CodeGen/Analysis.h"
22 #include "llvm/CodeGen/CallingConvLower.h"
23 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
24 #include "llvm/CodeGen/GlobalISel/Utils.h"
25 #include "llvm/CodeGen/LowLevelType.h"
26 #include "llvm/CodeGen/MachineBasicBlock.h"
27 #include "llvm/CodeGen/MachineFrameInfo.h"
28 #include "llvm/CodeGen/MachineFunction.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 #include "llvm/CodeGen/MachineMemOperand.h"
31 #include "llvm/CodeGen/MachineOperand.h"
32 #include "llvm/CodeGen/MachineRegisterInfo.h"
33 #include "llvm/CodeGen/TargetRegisterInfo.h"
34 #include "llvm/CodeGen/TargetSubtargetInfo.h"
35 #include "llvm/CodeGen/ValueTypes.h"
36 #include "llvm/IR/Argument.h"
37 #include "llvm/IR/Attributes.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/IR/Type.h"
40 #include "llvm/IR/Value.h"
41 #include "llvm/Support/MachineValueType.h"
42 #include <algorithm>
43 #include <cassert>
44 #include <cstdint>
45 #include <iterator>
46 
47 #define DEBUG_TYPE "aarch64-call-lowering"
48 
49 using namespace llvm;
50 
51 AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
52   : CallLowering(&TLI) {}
53 
54 namespace {
55 struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
56   IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
57                      CCAssignFn *AssignFn)
58       : IncomingValueHandler(MIRBuilder, MRI, AssignFn), StackUsed(0) {}
59 
60   Register getStackAddress(uint64_t Size, int64_t Offset,
61                            MachinePointerInfo &MPO) override {
62     auto &MFI = MIRBuilder.getMF().getFrameInfo();
63     int FI = MFI.CreateFixedObject(Size, Offset, true);
64     MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
65     auto AddrReg = MIRBuilder.buildFrameIndex(LLT::pointer(0, 64), FI);
66     StackUsed = std::max(StackUsed, Size + Offset);
67     return AddrReg.getReg(0);
68   }
69 
70   void assignValueToReg(Register ValVReg, Register PhysReg,
71                         CCValAssign &VA) override {
72     markPhysRegUsed(PhysReg);
73     switch (VA.getLocInfo()) {
74     default:
75       MIRBuilder.buildCopy(ValVReg, PhysReg);
76       break;
77     case CCValAssign::LocInfo::SExt:
78     case CCValAssign::LocInfo::ZExt:
79     case CCValAssign::LocInfo::AExt: {
80       auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg);
81       MIRBuilder.buildTrunc(ValVReg, Copy);
82       break;
83     }
84     }
85   }
86 
87   void assignValueToAddress(Register ValVReg, Register Addr, uint64_t MemSize,
88                             MachinePointerInfo &MPO, CCValAssign &VA) override {
89     MachineFunction &MF = MIRBuilder.getMF();
90 
91     // The reported memory location may be wider than the value.
92     const LLT RegTy = MRI.getType(ValVReg);
93     MemSize = std::min(static_cast<uint64_t>(RegTy.getSizeInBytes()), MemSize);
94 
95     auto MMO = MF.getMachineMemOperand(
96         MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, MemSize,
97         inferAlignFromPtrInfo(MF, MPO));
98     MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
99   }
100 
101   /// How the physical register gets marked varies between formal
102   /// parameters (it's a basic-block live-in), and a call instruction
103   /// (it's an implicit-def of the BL).
104   virtual void markPhysRegUsed(MCRegister PhysReg) = 0;
105 
106   uint64_t StackUsed;
107 };
108 
109 struct FormalArgHandler : public IncomingArgHandler {
110   FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
111                    CCAssignFn *AssignFn)
112     : IncomingArgHandler(MIRBuilder, MRI, AssignFn) {}
113 
114   void markPhysRegUsed(MCRegister PhysReg) override {
115     MIRBuilder.getMRI()->addLiveIn(PhysReg);
116     MIRBuilder.getMBB().addLiveIn(PhysReg);
117   }
118 };
119 
120 struct CallReturnHandler : public IncomingArgHandler {
121   CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
122                     MachineInstrBuilder MIB, CCAssignFn *AssignFn)
123     : IncomingArgHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
124 
125   void markPhysRegUsed(MCRegister PhysReg) override {
126     MIB.addDef(PhysReg, RegState::Implicit);
127   }
128 
129   MachineInstrBuilder MIB;
130 };
131 
132 struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
133   OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
134                      MachineInstrBuilder MIB, CCAssignFn *AssignFn,
135                      CCAssignFn *AssignFnVarArg, bool IsTailCall = false,
136                      int FPDiff = 0)
137       : OutgoingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB),
138         AssignFnVarArg(AssignFnVarArg), IsTailCall(IsTailCall), FPDiff(FPDiff),
139         StackSize(0), SPReg(0) {}
140 
141   Register getStackAddress(uint64_t Size, int64_t Offset,
142                            MachinePointerInfo &MPO) override {
143     MachineFunction &MF = MIRBuilder.getMF();
144     LLT p0 = LLT::pointer(0, 64);
145     LLT s64 = LLT::scalar(64);
146 
147     if (IsTailCall) {
148       Offset += FPDiff;
149       int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
150       auto FIReg = MIRBuilder.buildFrameIndex(p0, FI);
151       MPO = MachinePointerInfo::getFixedStack(MF, FI);
152       return FIReg.getReg(0);
153     }
154 
155     if (!SPReg)
156       SPReg = MIRBuilder.buildCopy(p0, Register(AArch64::SP)).getReg(0);
157 
158     auto OffsetReg = MIRBuilder.buildConstant(s64, Offset);
159 
160     auto AddrReg = MIRBuilder.buildPtrAdd(p0, SPReg, OffsetReg);
161 
162     MPO = MachinePointerInfo::getStack(MF, Offset);
163     return AddrReg.getReg(0);
164   }
165 
166   void assignValueToReg(Register ValVReg, Register PhysReg,
167                         CCValAssign &VA) override {
168     MIB.addUse(PhysReg, RegState::Implicit);
169     Register ExtReg = extendRegister(ValVReg, VA);
170     MIRBuilder.buildCopy(PhysReg, ExtReg);
171   }
172 
173   void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
174                             MachinePointerInfo &MPO, CCValAssign &VA) override {
175     MachineFunction &MF = MIRBuilder.getMF();
176     auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, Size,
177                                        inferAlignFromPtrInfo(MF, MPO));
178     MIRBuilder.buildStore(ValVReg, Addr, *MMO);
179   }
180 
181   void assignValueToAddress(const CallLowering::ArgInfo &Arg, Register Addr,
182                             uint64_t Size, MachinePointerInfo &MPO,
183                             CCValAssign &VA) override {
184     unsigned MaxSize = Size * 8;
185     // For varargs, we always want to extend them to 8 bytes, in which case
186     // we disable setting a max.
187     if (!Arg.IsFixed)
188       MaxSize = 0;
189 
190     assert(Arg.Regs.size() == 1);
191 
192     Register ValVReg = VA.getLocInfo() != CCValAssign::LocInfo::FPExt
193                            ? extendRegister(Arg.Regs[0], VA, MaxSize)
194                            : Arg.Regs[0];
195 
196     // If we extended we might need to adjust the MMO's Size.
197     const LLT RegTy = MRI.getType(ValVReg);
198     if (RegTy.getSizeInBytes() > Size)
199       Size = RegTy.getSizeInBytes();
200 
201     assignValueToAddress(ValVReg, Addr, Size, MPO, VA);
202   }
203 
204   bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
205                  CCValAssign::LocInfo LocInfo,
206                  const CallLowering::ArgInfo &Info,
207                  ISD::ArgFlagsTy Flags,
208                  CCState &State) override {
209     bool Res;
210     if (Info.IsFixed)
211       Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State);
212     else
213       Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Flags, State);
214 
215     StackSize = State.getNextStackOffset();
216     return Res;
217   }
218 
219   MachineInstrBuilder MIB;
220   CCAssignFn *AssignFnVarArg;
221   bool IsTailCall;
222 
223   /// For tail calls, the byte offset of the call's argument area from the
224   /// callee's. Unused elsewhere.
225   int FPDiff;
226   uint64_t StackSize;
227 
228   // Cache the SP register vreg if we need it more than once in this call site.
229   Register SPReg;
230 };
231 } // namespace
232 
233 static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) {
234   return CallConv == CallingConv::Fast && TailCallOpt;
235 }
236 
237 void AArch64CallLowering::splitToValueTypes(
238     const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs,
239     const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv) const {
240   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
241   LLVMContext &Ctx = OrigArg.Ty->getContext();
242 
243   SmallVector<EVT, 4> SplitVTs;
244   SmallVector<uint64_t, 4> Offsets;
245   ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);
246 
247   if (SplitVTs.size() == 0)
248     return;
249 
250   if (SplitVTs.size() == 1) {
251     // No splitting to do, but we want to replace the original type (e.g. [1 x
252     // double] -> double).
253     SplitArgs.emplace_back(OrigArg.Regs[0], SplitVTs[0].getTypeForEVT(Ctx),
254                            OrigArg.Flags[0], OrigArg.IsFixed);
255     return;
256   }
257 
258   // Create one ArgInfo for each virtual register in the original ArgInfo.
259   assert(OrigArg.Regs.size() == SplitVTs.size() && "Regs / types mismatch");
260 
261   bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
262       OrigArg.Ty, CallConv, false);
263   for (unsigned i = 0, e = SplitVTs.size(); i < e; ++i) {
264     Type *SplitTy = SplitVTs[i].getTypeForEVT(Ctx);
265     SplitArgs.emplace_back(OrigArg.Regs[i], SplitTy, OrigArg.Flags[0],
266                            OrigArg.IsFixed);
267     if (NeedsRegBlock)
268       SplitArgs.back().Flags[0].setInConsecutiveRegs();
269   }
270 
271   SplitArgs.back().Flags[0].setInConsecutiveRegsLast();
272 }
273 
274 bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
275                                       const Value *Val,
276                                       ArrayRef<Register> VRegs,
277                                       FunctionLoweringInfo &FLI,
278                                       Register SwiftErrorVReg) const {
279   auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR);
280   assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
281          "Return value without a vreg");
282 
283   bool Success = true;
284   if (!VRegs.empty()) {
285     MachineFunction &MF = MIRBuilder.getMF();
286     const Function &F = MF.getFunction();
287 
288     MachineRegisterInfo &MRI = MF.getRegInfo();
289     const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
290     CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv());
291     auto &DL = F.getParent()->getDataLayout();
292     LLVMContext &Ctx = Val->getType()->getContext();
293 
294     SmallVector<EVT, 4> SplitEVTs;
295     ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
296     assert(VRegs.size() == SplitEVTs.size() &&
297            "For each split Type there should be exactly one VReg.");
298 
299     SmallVector<ArgInfo, 8> SplitArgs;
300     CallingConv::ID CC = F.getCallingConv();
301 
302     for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
303       if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) > 1) {
304         LLVM_DEBUG(dbgs() << "Can't handle extended arg types which need split");
305         return false;
306       }
307 
308       Register CurVReg = VRegs[i];
309       ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Ctx)};
310       setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
311 
312       // i1 is a special case because SDAG i1 true is naturally zero extended
313       // when widened using ANYEXT. We need to do it explicitly here.
314       if (MRI.getType(CurVReg).getSizeInBits() == 1) {
315         CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0);
316       } else {
317         // Some types will need extending as specified by the CC.
318         MVT NewVT = TLI.getRegisterTypeForCallingConv(Ctx, CC, SplitEVTs[i]);
319         if (EVT(NewVT) != SplitEVTs[i]) {
320           unsigned ExtendOp = TargetOpcode::G_ANYEXT;
321           if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex,
322                                              Attribute::SExt))
323             ExtendOp = TargetOpcode::G_SEXT;
324           else if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex,
325                                                   Attribute::ZExt))
326             ExtendOp = TargetOpcode::G_ZEXT;
327 
328           LLT NewLLT(NewVT);
329           LLT OldLLT(MVT::getVT(CurArgInfo.Ty));
330           CurArgInfo.Ty = EVT(NewVT).getTypeForEVT(Ctx);
331           // Instead of an extend, we might have a vector type which needs
332           // padding with more elements, e.g. <2 x half> -> <4 x half>.
333           if (NewVT.isVector()) {
334             if (OldLLT.isVector()) {
335               if (NewLLT.getNumElements() > OldLLT.getNumElements()) {
336                 // We don't handle VA types which are not exactly twice the
337                 // size, but can easily be done in future.
338                 if (NewLLT.getNumElements() != OldLLT.getNumElements() * 2) {
339                   LLVM_DEBUG(dbgs() << "Outgoing vector ret has too many elts");
340                   return false;
341                 }
342                 auto Undef = MIRBuilder.buildUndef({OldLLT});
343                 CurVReg =
344                     MIRBuilder.buildMerge({NewLLT}, {CurVReg, Undef}).getReg(0);
345               } else {
346                 // Just do a vector extend.
347                 CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg})
348                               .getReg(0);
349               }
350             } else if (NewLLT.getNumElements() == 2) {
351               // We need to pad a <1 x S> type to <2 x S>. Since we don't have
352               // <1 x S> vector types in GISel we use a build_vector instead
353               // of a vector merge/concat.
354               auto Undef = MIRBuilder.buildUndef({OldLLT});
355               CurVReg =
356                   MIRBuilder
357                       .buildBuildVector({NewLLT}, {CurVReg, Undef.getReg(0)})
358                       .getReg(0);
359             } else {
360               LLVM_DEBUG(dbgs() << "Could not handle ret ty");
361               return false;
362             }
363           } else {
364             // A scalar extend.
365             CurVReg =
366                 MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg}).getReg(0);
367           }
368         }
369       }
370       if (CurVReg != CurArgInfo.Regs[0]) {
371         CurArgInfo.Regs[0] = CurVReg;
372         // Reset the arg flags after modifying CurVReg.
373         setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
374       }
375      splitToValueTypes(CurArgInfo, SplitArgs, DL, MRI, CC);
376     }
377 
378     OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFn, AssignFn);
379     Success = handleAssignments(MIRBuilder, SplitArgs, Handler);
380   }
381 
382   if (SwiftErrorVReg) {
383     MIB.addUse(AArch64::X21, RegState::Implicit);
384     MIRBuilder.buildCopy(AArch64::X21, SwiftErrorVReg);
385   }
386 
387   MIRBuilder.insertInstr(MIB);
388   return Success;
389 }
390 
391 /// Helper function to compute forwarded registers for musttail calls. Computes
392 /// the forwarded registers, sets MBB liveness, and emits COPY instructions that
393 /// can be used to save + restore registers later.
394 static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder,
395                                              CCAssignFn *AssignFn) {
396   MachineBasicBlock &MBB = MIRBuilder.getMBB();
397   MachineFunction &MF = MIRBuilder.getMF();
398   MachineFrameInfo &MFI = MF.getFrameInfo();
399 
400   if (!MFI.hasMustTailInVarArgFunc())
401     return;
402 
403   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
404   const Function &F = MF.getFunction();
405   assert(F.isVarArg() && "Expected F to be vararg?");
406 
407   // Compute the set of forwarded registers. The rest are scratch.
408   SmallVector<CCValAssign, 16> ArgLocs;
409   CCState CCInfo(F.getCallingConv(), /*IsVarArg=*/true, MF, ArgLocs,
410                  F.getContext());
411   SmallVector<MVT, 2> RegParmTypes;
412   RegParmTypes.push_back(MVT::i64);
413   RegParmTypes.push_back(MVT::f128);
414 
415   // Later on, we can use this vector to restore the registers if necessary.
416   SmallVectorImpl<ForwardedRegister> &Forwards =
417       FuncInfo->getForwardedMustTailRegParms();
418   CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, AssignFn);
419 
420   // Conservatively forward X8, since it might be used for an aggregate
421   // return.
422   if (!CCInfo.isAllocated(AArch64::X8)) {
423     Register X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
424     Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
425   }
426 
427   // Add the forwards to the MachineBasicBlock and MachineFunction.
428   for (const auto &F : Forwards) {
429     MBB.addLiveIn(F.PReg);
430     MIRBuilder.buildCopy(Register(F.VReg), Register(F.PReg));
431   }
432 }
433 
434 bool AArch64CallLowering::fallBackToDAGISel(const Function &F) const {
435   if (isa<ScalableVectorType>(F.getReturnType()))
436     return true;
437   return llvm::any_of(F.args(), [](const Argument &A) {
438     return isa<ScalableVectorType>(A.getType());
439   });
440 }
441 
442 bool AArch64CallLowering::lowerFormalArguments(
443     MachineIRBuilder &MIRBuilder, const Function &F,
444     ArrayRef<ArrayRef<Register>> VRegs, FunctionLoweringInfo &FLI) const {
445   MachineFunction &MF = MIRBuilder.getMF();
446   MachineBasicBlock &MBB = MIRBuilder.getMBB();
447   MachineRegisterInfo &MRI = MF.getRegInfo();
448   auto &DL = F.getParent()->getDataLayout();
449 
450   SmallVector<ArgInfo, 8> SplitArgs;
451   unsigned i = 0;
452   for (auto &Arg : F.args()) {
453     if (DL.getTypeStoreSize(Arg.getType()).isZero())
454       continue;
455 
456     ArgInfo OrigArg{VRegs[i], Arg.getType()};
457     setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F);
458 
459     splitToValueTypes(OrigArg, SplitArgs, DL, MRI, F.getCallingConv());
460     ++i;
461   }
462 
463   if (!MBB.empty())
464     MIRBuilder.setInstr(*MBB.begin());
465 
466   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
467   CCAssignFn *AssignFn =
468       TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false);
469 
470   FormalArgHandler Handler(MIRBuilder, MRI, AssignFn);
471   if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
472     return false;
473 
474   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
475   uint64_t StackOffset = Handler.StackUsed;
476   if (F.isVarArg()) {
477     auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
478     if (!Subtarget.isTargetDarwin()) {
479         // FIXME: we need to reimplement saveVarArgsRegisters from
480       // AArch64ISelLowering.
481       return false;
482     }
483 
484     // We currently pass all varargs at 8-byte alignment, or 4 in ILP32.
485     StackOffset = alignTo(Handler.StackUsed, Subtarget.isTargetILP32() ? 4 : 8);
486 
487     auto &MFI = MIRBuilder.getMF().getFrameInfo();
488     FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
489   }
490 
491   if (doesCalleeRestoreStack(F.getCallingConv(),
492                              MF.getTarget().Options.GuaranteedTailCallOpt)) {
493     // We have a non-standard ABI, so why not make full use of the stack that
494     // we're going to pop? It must be aligned to 16 B in any case.
495     StackOffset = alignTo(StackOffset, 16);
496 
497     // If we're expected to restore the stack (e.g. fastcc), then we'll be
498     // adding a multiple of 16.
499     FuncInfo->setArgumentStackToRestore(StackOffset);
500 
501     // Our own callers will guarantee that the space is free by giving an
502     // aligned value to CALLSEQ_START.
503   }
504 
505   // When we tail call, we need to check if the callee's arguments
506   // will fit on the caller's stack. So, whenever we lower formal arguments,
507   // we should keep track of this information, since we might lower a tail call
508   // in this function later.
509   FuncInfo->setBytesInStackArgArea(StackOffset);
510 
511   auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
512   if (Subtarget.hasCustomCallingConv())
513     Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
514 
515   handleMustTailForwardedRegisters(MIRBuilder, AssignFn);
516 
517   // Move back to the end of the basic block.
518   MIRBuilder.setMBB(MBB);
519 
520   return true;
521 }
522 
523 /// Return true if the calling convention is one that we can guarantee TCO for.
524 static bool canGuaranteeTCO(CallingConv::ID CC) {
525   return CC == CallingConv::Fast;
526 }
527 
528 /// Return true if we might ever do TCO for calls with this calling convention.
529 static bool mayTailCallThisCC(CallingConv::ID CC) {
530   switch (CC) {
531   case CallingConv::C:
532   case CallingConv::PreserveMost:
533   case CallingConv::Swift:
534     return true;
535   default:
536     return canGuaranteeTCO(CC);
537   }
538 }
539 
540 /// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for
541 /// CC.
542 static std::pair<CCAssignFn *, CCAssignFn *>
543 getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI) {
544   return {TLI.CCAssignFnForCall(CC, false), TLI.CCAssignFnForCall(CC, true)};
545 }
546 
547 bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay(
548     CallLoweringInfo &Info, MachineFunction &MF,
549     SmallVectorImpl<ArgInfo> &InArgs) const {
550   const Function &CallerF = MF.getFunction();
551   CallingConv::ID CalleeCC = Info.CallConv;
552   CallingConv::ID CallerCC = CallerF.getCallingConv();
553 
554   // If the calling conventions match, then everything must be the same.
555   if (CalleeCC == CallerCC)
556     return true;
557 
558   // Check if the caller and callee will handle arguments in the same way.
559   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
560   CCAssignFn *CalleeAssignFnFixed;
561   CCAssignFn *CalleeAssignFnVarArg;
562   std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) =
563       getAssignFnsForCC(CalleeCC, TLI);
564 
565   CCAssignFn *CallerAssignFnFixed;
566   CCAssignFn *CallerAssignFnVarArg;
567   std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) =
568       getAssignFnsForCC(CallerCC, TLI);
569 
570   if (!resultsCompatible(Info, MF, InArgs, *CalleeAssignFnFixed,
571                          *CalleeAssignFnVarArg, *CallerAssignFnFixed,
572                          *CallerAssignFnVarArg))
573     return false;
574 
575   // Make sure that the caller and callee preserve all of the same registers.
576   auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
577   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
578   const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
579   if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) {
580     TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
581     TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
582   }
583 
584   return TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved);
585 }
586 
587 bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable(
588     CallLoweringInfo &Info, MachineFunction &MF,
589     SmallVectorImpl<ArgInfo> &OutArgs) const {
590   // If there are no outgoing arguments, then we are done.
591   if (OutArgs.empty())
592     return true;
593 
594   const Function &CallerF = MF.getFunction();
595   CallingConv::ID CalleeCC = Info.CallConv;
596   CallingConv::ID CallerCC = CallerF.getCallingConv();
597   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
598 
599   CCAssignFn *AssignFnFixed;
600   CCAssignFn *AssignFnVarArg;
601   std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
602 
603   // We have outgoing arguments. Make sure that we can tail call with them.
604   SmallVector<CCValAssign, 16> OutLocs;
605   CCState OutInfo(CalleeCC, false, MF, OutLocs, CallerF.getContext());
606 
607   if (!analyzeArgInfo(OutInfo, OutArgs, *AssignFnFixed, *AssignFnVarArg)) {
608     LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n");
609     return false;
610   }
611 
612   // Make sure that they can fit on the caller's stack.
613   const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
614   if (OutInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea()) {
615     LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n");
616     return false;
617   }
618 
619   // Verify that the parameters in callee-saved registers match.
620   // TODO: Port this over to CallLowering as general code once swiftself is
621   // supported.
622   auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
623   const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC);
624   MachineRegisterInfo &MRI = MF.getRegInfo();
625 
626   if (Info.IsVarArg) {
627     // Be conservative and disallow variadic memory operands to match SDAG's
628     // behaviour.
629     // FIXME: If the caller's calling convention is C, then we can
630     // potentially use its argument area. However, for cases like fastcc,
631     // we can't do anything.
632     for (unsigned i = 0; i < OutLocs.size(); ++i) {
633       auto &ArgLoc = OutLocs[i];
634       if (ArgLoc.isRegLoc())
635         continue;
636 
637       LLVM_DEBUG(
638           dbgs()
639           << "... Cannot tail call vararg function with stack arguments\n");
640       return false;
641     }
642   }
643 
644   return parametersInCSRMatch(MRI, CallerPreservedMask, OutLocs, OutArgs);
645 }
646 
647 bool AArch64CallLowering::isEligibleForTailCallOptimization(
648     MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
649     SmallVectorImpl<ArgInfo> &InArgs,
650     SmallVectorImpl<ArgInfo> &OutArgs) const {
651 
652   // Must pass all target-independent checks in order to tail call optimize.
653   if (!Info.IsTailCall)
654     return false;
655 
656   CallingConv::ID CalleeCC = Info.CallConv;
657   MachineFunction &MF = MIRBuilder.getMF();
658   const Function &CallerF = MF.getFunction();
659 
660   LLVM_DEBUG(dbgs() << "Attempting to lower call as tail call\n");
661 
662   if (Info.SwiftErrorVReg) {
663     // TODO: We should handle this.
664     // Note that this is also handled by the check for no outgoing arguments.
665     // Proactively disabling this though, because the swifterror handling in
666     // lowerCall inserts a COPY *after* the location of the call.
667     LLVM_DEBUG(dbgs() << "... Cannot handle tail calls with swifterror yet.\n");
668     return false;
669   }
670 
671   if (!mayTailCallThisCC(CalleeCC)) {
672     LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n");
673     return false;
674   }
675 
676   // Byval parameters hand the function a pointer directly into the stack area
677   // we want to reuse during a tail call. Working around this *is* possible (see
678   // X86).
679   //
680   // FIXME: In AArch64ISelLowering, this isn't worked around. Can/should we try
681   // it?
682   //
683   // On Windows, "inreg" attributes signify non-aggregate indirect returns.
684   // In this case, it is necessary to save/restore X0 in the callee. Tail
685   // call opt interferes with this. So we disable tail call opt when the
686   // caller has an argument with "inreg" attribute.
687   //
688   // FIXME: Check whether the callee also has an "inreg" argument.
689   //
690   // When the caller has a swifterror argument, we don't want to tail call
691   // because would have to move into the swifterror register before the
692   // tail call.
693   if (any_of(CallerF.args(), [](const Argument &A) {
694         return A.hasByValAttr() || A.hasInRegAttr() || A.hasSwiftErrorAttr();
695       })) {
696     LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval, "
697                          "inreg, or swifterror arguments\n");
698     return false;
699   }
700 
701   // Externally-defined functions with weak linkage should not be
702   // tail-called on AArch64 when the OS does not support dynamic
703   // pre-emption of symbols, as the AAELF spec requires normal calls
704   // to undefined weak functions to be replaced with a NOP or jump to the
705   // next instruction. The behaviour of branch instructions in this
706   // situation (as used for tail calls) is implementation-defined, so we
707   // cannot rely on the linker replacing the tail call with a return.
708   if (Info.Callee.isGlobal()) {
709     const GlobalValue *GV = Info.Callee.getGlobal();
710     const Triple &TT = MF.getTarget().getTargetTriple();
711     if (GV->hasExternalWeakLinkage() &&
712         (!TT.isOSWindows() || TT.isOSBinFormatELF() ||
713          TT.isOSBinFormatMachO())) {
714       LLVM_DEBUG(dbgs() << "... Cannot tail call externally-defined function "
715                            "with weak linkage for this OS.\n");
716       return false;
717     }
718   }
719 
720   // If we have -tailcallopt, then we're done.
721   if (MF.getTarget().Options.GuaranteedTailCallOpt)
722     return canGuaranteeTCO(CalleeCC) && CalleeCC == CallerF.getCallingConv();
723 
724   // We don't have -tailcallopt, so we're allowed to change the ABI (sibcall).
725   // Try to find cases where we can do that.
726 
727   // I want anyone implementing a new calling convention to think long and hard
728   // about this assert.
729   assert((!Info.IsVarArg || CalleeCC == CallingConv::C) &&
730          "Unexpected variadic calling convention");
731 
732   // Verify that the incoming and outgoing arguments from the callee are
733   // safe to tail call.
734   if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) {
735     LLVM_DEBUG(
736         dbgs()
737         << "... Caller and callee have incompatible calling conventions.\n");
738     return false;
739   }
740 
741   if (!areCalleeOutgoingArgsTailCallable(Info, MF, OutArgs))
742     return false;
743 
744   LLVM_DEBUG(
745       dbgs() << "... Call is eligible for tail call optimization.\n");
746   return true;
747 }
748 
749 static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
750                               bool IsTailCall) {
751   if (!IsTailCall)
752     return IsIndirect ? getBLRCallOpcode(CallerF) : (unsigned)AArch64::BL;
753 
754   if (!IsIndirect)
755     return AArch64::TCRETURNdi;
756 
757   // When BTI is enabled, we need to use TCRETURNriBTI to make sure that we use
758   // x16 or x17.
759   if (CallerF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
760     return AArch64::TCRETURNriBTI;
761 
762   return AArch64::TCRETURNri;
763 }
764 
765 bool AArch64CallLowering::lowerTailCall(
766     MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
767     SmallVectorImpl<ArgInfo> &OutArgs) const {
768   MachineFunction &MF = MIRBuilder.getMF();
769   const Function &F = MF.getFunction();
770   MachineRegisterInfo &MRI = MF.getRegInfo();
771   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
772   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
773 
774   // True when we're tail calling, but without -tailcallopt.
775   bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt;
776 
777   // TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64
778   // register class. Until we can do that, we should fall back here.
779   if (MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) {
780     LLVM_DEBUG(
781         dbgs() << "Cannot lower indirect tail calls with BTI enabled yet.\n");
782     return false;
783   }
784 
785   // Find out which ABI gets to decide where things go.
786   CallingConv::ID CalleeCC = Info.CallConv;
787   CCAssignFn *AssignFnFixed;
788   CCAssignFn *AssignFnVarArg;
789   std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
790 
791   MachineInstrBuilder CallSeqStart;
792   if (!IsSibCall)
793     CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
794 
795   unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), true);
796   auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
797   MIB.add(Info.Callee);
798 
799   // Byte offset for the tail call. When we are sibcalling, this will always
800   // be 0.
801   MIB.addImm(0);
802 
803   // Tell the call which registers are clobbered.
804   auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
805   const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC);
806   if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv())
807     TRI->UpdateCustomCallPreservedMask(MF, &Mask);
808   MIB.addRegMask(Mask);
809 
810   if (TRI->isAnyArgRegReserved(MF))
811     TRI->emitReservedArgRegCallError(MF);
812 
813   // FPDiff is the byte offset of the call's argument area from the callee's.
814   // Stores to callee stack arguments will be placed in FixedStackSlots offset
815   // by this amount for a tail call. In a sibling call it must be 0 because the
816   // caller will deallocate the entire stack and the callee still expects its
817   // arguments to begin at SP+0.
818   int FPDiff = 0;
819 
820   // This will be 0 for sibcalls, potentially nonzero for tail calls produced
821   // by -tailcallopt. For sibcalls, the memory operands for the call are
822   // already available in the caller's incoming argument space.
823   unsigned NumBytes = 0;
824   if (!IsSibCall) {
825     // We aren't sibcalling, so we need to compute FPDiff. We need to do this
826     // before handling assignments, because FPDiff must be known for memory
827     // arguments.
828     unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
829     SmallVector<CCValAssign, 16> OutLocs;
830     CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext());
831     analyzeArgInfo(OutInfo, OutArgs, *AssignFnFixed, *AssignFnVarArg);
832 
833     // The callee will pop the argument stack as a tail call. Thus, we must
834     // keep it 16-byte aligned.
835     NumBytes = alignTo(OutInfo.getNextStackOffset(), 16);
836 
837     // FPDiff will be negative if this tail call requires more space than we
838     // would automatically have in our incoming argument space. Positive if we
839     // actually shrink the stack.
840     FPDiff = NumReusableBytes - NumBytes;
841 
842     // The stack pointer must be 16-byte aligned at all times it's used for a
843     // memory operation, which in practice means at *all* times and in
844     // particular across call boundaries. Therefore our own arguments started at
845     // a 16-byte aligned SP and the delta applied for the tail call should
846     // satisfy the same constraint.
847     assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
848   }
849 
850   const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
851 
852   // Do the actual argument marshalling.
853   OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed,
854                              AssignFnVarArg, true, FPDiff);
855   if (!handleAssignments(MIRBuilder, OutArgs, Handler))
856     return false;
857 
858   if (Info.IsVarArg && Info.IsMustTailCall) {
859     // Now we know what's being passed to the function. Add uses to the call for
860     // the forwarded registers that we *aren't* passing as parameters. This will
861     // preserve the copies we build earlier.
862     for (const auto &F : Forwards) {
863       Register ForwardedReg = F.PReg;
864       // If the register is already passed, or aliases a register which is
865       // already being passed, then skip it.
866       if (any_of(MIB->uses(), [&ForwardedReg, &TRI](const MachineOperand &Use) {
867             if (!Use.isReg())
868               return false;
869             return TRI->regsOverlap(Use.getReg(), ForwardedReg);
870           }))
871         continue;
872 
873       // We aren't passing it already, so we should add it to the call.
874       MIRBuilder.buildCopy(ForwardedReg, Register(F.VReg));
875       MIB.addReg(ForwardedReg, RegState::Implicit);
876     }
877   }
878 
879   // If we have -tailcallopt, we need to adjust the stack. We'll do the call
880   // sequence start and end here.
881   if (!IsSibCall) {
882     MIB->getOperand(1).setImm(FPDiff);
883     CallSeqStart.addImm(NumBytes).addImm(0);
884     // End the call sequence *before* emitting the call. Normally, we would
885     // tidy the frame up after the call. However, here, we've laid out the
886     // parameters so that when SP is reset, they will be in the correct
887     // location.
888     MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP).addImm(NumBytes).addImm(0);
889   }
890 
891   // Now we can add the actual call instruction to the correct basic block.
892   MIRBuilder.insertInstr(MIB);
893 
894   // If Callee is a reg, since it is used by a target specific instruction,
895   // it must have a register class matching the constraint of that instruction.
896   if (Info.Callee.isReg())
897     constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
898                              *MF.getSubtarget().getRegBankInfo(), *MIB,
899                              MIB->getDesc(), Info.Callee, 0);
900 
901   MF.getFrameInfo().setHasTailCall();
902   Info.LoweredTailCall = true;
903   return true;
904 }
905 
906 bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
907                                     CallLoweringInfo &Info) const {
908   MachineFunction &MF = MIRBuilder.getMF();
909   const Function &F = MF.getFunction();
910   MachineRegisterInfo &MRI = MF.getRegInfo();
911   auto &DL = F.getParent()->getDataLayout();
912   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
913 
914   SmallVector<ArgInfo, 8> OutArgs;
915   for (auto &OrigArg : Info.OrigArgs) {
916     splitToValueTypes(OrigArg, OutArgs, DL, MRI, Info.CallConv);
917     // AAPCS requires that we zero-extend i1 to 8 bits by the caller.
918     if (OrigArg.Ty->isIntegerTy(1))
919       OutArgs.back().Flags[0].setZExt();
920   }
921 
922   SmallVector<ArgInfo, 8> InArgs;
923   if (!Info.OrigRet.Ty->isVoidTy())
924     splitToValueTypes(Info.OrigRet, InArgs, DL, MRI, F.getCallingConv());
925 
926   // If we can lower as a tail call, do that instead.
927   bool CanTailCallOpt =
928       isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs, OutArgs);
929 
930   // We must emit a tail call if we have musttail.
931   if (Info.IsMustTailCall && !CanTailCallOpt) {
932     // There are types of incoming/outgoing arguments we can't handle yet, so
933     // it doesn't make sense to actually die here like in ISelLowering. Instead,
934     // fall back to SelectionDAG and let it try to handle this.
935     LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n");
936     return false;
937   }
938 
939   if (CanTailCallOpt)
940     return lowerTailCall(MIRBuilder, Info, OutArgs);
941 
942   // Find out which ABI gets to decide where things go.
943   CCAssignFn *AssignFnFixed;
944   CCAssignFn *AssignFnVarArg;
945   std::tie(AssignFnFixed, AssignFnVarArg) =
946       getAssignFnsForCC(Info.CallConv, TLI);
947 
948   MachineInstrBuilder CallSeqStart;
949   CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
950 
951   // Create a temporarily-floating call instruction so we can add the implicit
952   // uses of arg registers.
953   unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), false);
954 
955   auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
956   MIB.add(Info.Callee);
957 
958   // Tell the call which registers are clobbered.
959   auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
960   const uint32_t *Mask = TRI->getCallPreservedMask(MF, Info.CallConv);
961   if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv())
962     TRI->UpdateCustomCallPreservedMask(MF, &Mask);
963   MIB.addRegMask(Mask);
964 
965   if (TRI->isAnyArgRegReserved(MF))
966     TRI->emitReservedArgRegCallError(MF);
967 
968   // Do the actual argument marshalling.
969   OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed,
970                              AssignFnVarArg, false);
971   if (!handleAssignments(MIRBuilder, OutArgs, Handler))
972     return false;
973 
974   // Now we can add the actual call instruction to the correct basic block.
975   MIRBuilder.insertInstr(MIB);
976 
977   // If Callee is a reg, since it is used by a target specific
978   // instruction, it must have a register class matching the
979   // constraint of that instruction.
980   if (Info.Callee.isReg())
981     constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
982                              *MF.getSubtarget().getRegBankInfo(), *MIB,
983                              MIB->getDesc(), Info.Callee, 0);
984 
985   // Finally we can copy the returned value back into its virtual-register. In
986   // symmetry with the arguments, the physical register must be an
987   // implicit-define of the call instruction.
988   if (!Info.OrigRet.Ty->isVoidTy()) {
989     CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv);
990     CallReturnHandler Handler(MIRBuilder, MRI, MIB, RetAssignFn);
991     if (!handleAssignments(MIRBuilder, InArgs, Handler))
992       return false;
993   }
994 
995   if (Info.SwiftErrorVReg) {
996     MIB.addDef(AArch64::X21, RegState::Implicit);
997     MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21));
998   }
999 
1000   uint64_t CalleePopBytes =
1001       doesCalleeRestoreStack(Info.CallConv,
1002                              MF.getTarget().Options.GuaranteedTailCallOpt)
1003           ? alignTo(Handler.StackSize, 16)
1004           : 0;
1005 
1006   CallSeqStart.addImm(Handler.StackSize).addImm(0);
1007   MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
1008       .addImm(Handler.StackSize)
1009       .addImm(CalleePopBytes);
1010 
1011   return true;
1012 }
1013