xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp (revision 3a9a9c0ca44ec535dcf73fe8462bee458e54814b)
1 //===--- AArch64CallLowering.cpp - Call lowering --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the lowering of LLVM calls to machine code calls for
11 /// GlobalISel.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "AArch64CallLowering.h"
16 #include "AArch64ISelLowering.h"
17 #include "AArch64MachineFunctionInfo.h"
18 #include "AArch64Subtarget.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/CodeGen/Analysis.h"
22 #include "llvm/CodeGen/CallingConvLower.h"
23 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
24 #include "llvm/CodeGen/GlobalISel/Utils.h"
25 #include "llvm/CodeGen/LowLevelType.h"
26 #include "llvm/CodeGen/MachineBasicBlock.h"
27 #include "llvm/CodeGen/MachineFrameInfo.h"
28 #include "llvm/CodeGen/MachineFunction.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 #include "llvm/CodeGen/MachineMemOperand.h"
31 #include "llvm/CodeGen/MachineOperand.h"
32 #include "llvm/CodeGen/MachineRegisterInfo.h"
33 #include "llvm/CodeGen/TargetRegisterInfo.h"
34 #include "llvm/CodeGen/TargetSubtargetInfo.h"
35 #include "llvm/CodeGen/ValueTypes.h"
36 #include "llvm/IR/Argument.h"
37 #include "llvm/IR/Attributes.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/IR/Type.h"
40 #include "llvm/IR/Value.h"
41 #include "llvm/Support/MachineValueType.h"
42 #include <algorithm>
43 #include <cassert>
44 #include <cstdint>
45 #include <iterator>
46 
47 #define DEBUG_TYPE "aarch64-call-lowering"
48 
49 using namespace llvm;
50 
51 AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
52   : CallLowering(&TLI) {}
53 
54 static void applyStackPassedSmallTypeDAGHack(EVT OrigVT, MVT &ValVT,
55                                              MVT &LocVT) {
56   // If ValVT is i1/i8/i16, we should set LocVT to i8/i8/i16. This is a legacy
57   // hack because the DAG calls the assignment function with pre-legalized
58   // register typed values, not the raw type.
59   //
60   // This hack is not applied to return values which are not passed on the
61   // stack.
62   if (OrigVT == MVT::i1 || OrigVT == MVT::i8)
63     ValVT = LocVT = MVT::i8;
64   else if (OrigVT == MVT::i16)
65     ValVT = LocVT = MVT::i16;
66 }
67 
68 // Account for i1/i8/i16 stack passed value hack
69 static LLT getStackValueStoreTypeHack(const CCValAssign &VA) {
70   const MVT ValVT = VA.getValVT();
71   return (ValVT == MVT::i8 || ValVT == MVT::i16) ? LLT(ValVT)
72                                                  : LLT(VA.getLocVT());
73 }
74 
75 namespace {
76 
77 struct AArch64IncomingValueAssigner
78     : public CallLowering::IncomingValueAssigner {
79   AArch64IncomingValueAssigner(CCAssignFn *AssignFn_,
80                                CCAssignFn *AssignFnVarArg_)
81       : IncomingValueAssigner(AssignFn_, AssignFnVarArg_) {}
82 
83   bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
84                  CCValAssign::LocInfo LocInfo,
85                  const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
86                  CCState &State) override {
87     applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT);
88     return IncomingValueAssigner::assignArg(ValNo, OrigVT, ValVT, LocVT,
89                                             LocInfo, Info, Flags, State);
90   }
91 };
92 
93 struct AArch64OutgoingValueAssigner
94     : public CallLowering::OutgoingValueAssigner {
95   const AArch64Subtarget &Subtarget;
96 
97   /// Track if this is used for a return instead of function argument
98   /// passing. We apply a hack to i1/i8/i16 stack passed values, but do not use
99   /// stack passed returns for them and cannot apply the type adjustment.
100   bool IsReturn;
101 
102   AArch64OutgoingValueAssigner(CCAssignFn *AssignFn_,
103                                CCAssignFn *AssignFnVarArg_,
104                                const AArch64Subtarget &Subtarget_,
105                                bool IsReturn)
106       : OutgoingValueAssigner(AssignFn_, AssignFnVarArg_),
107         Subtarget(Subtarget_), IsReturn(IsReturn) {}
108 
109   bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
110                  CCValAssign::LocInfo LocInfo,
111                  const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
112                  CCState &State) override {
113     bool IsCalleeWin = Subtarget.isCallingConvWin64(State.getCallingConv());
114     bool UseVarArgsCCForFixed = IsCalleeWin && State.isVarArg();
115 
116     if (!State.isVarArg() && !UseVarArgsCCForFixed && !IsReturn)
117       applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT);
118 
119     bool Res;
120     if (Info.IsFixed && !UseVarArgsCCForFixed)
121       Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State);
122     else
123       Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Flags, State);
124 
125     StackOffset = State.getNextStackOffset();
126     return Res;
127   }
128 };
129 
130 struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
131   IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
132       : IncomingValueHandler(MIRBuilder, MRI) {}
133 
134   Register getStackAddress(uint64_t Size, int64_t Offset,
135                            MachinePointerInfo &MPO,
136                            ISD::ArgFlagsTy Flags) override {
137     auto &MFI = MIRBuilder.getMF().getFrameInfo();
138 
139     // Byval is assumed to be writable memory, but other stack passed arguments
140     // are not.
141     const bool IsImmutable = !Flags.isByVal();
142 
143     int FI = MFI.CreateFixedObject(Size, Offset, IsImmutable);
144     MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
145     auto AddrReg = MIRBuilder.buildFrameIndex(LLT::pointer(0, 64), FI);
146     return AddrReg.getReg(0);
147   }
148 
149   LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA,
150                              ISD::ArgFlagsTy Flags) const override {
151     // For pointers, we just need to fixup the integer types reported in the
152     // CCValAssign.
153     if (Flags.isPointer())
154       return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags);
155     return getStackValueStoreTypeHack(VA);
156   }
157 
158   void assignValueToReg(Register ValVReg, Register PhysReg,
159                         CCValAssign VA) override {
160     markPhysRegUsed(PhysReg);
161     IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
162   }
163 
164   void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
165                             MachinePointerInfo &MPO, CCValAssign &VA) override {
166     MachineFunction &MF = MIRBuilder.getMF();
167 
168     LLT ValTy(VA.getValVT());
169     LLT LocTy(VA.getLocVT());
170 
171     // Fixup the types for the DAG compatibility hack.
172     if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16)
173       std::swap(ValTy, LocTy);
174     else {
175       // The calling code knows if this is a pointer or not, we're only touching
176       // the LocTy for the i8/i16 hack.
177       assert(LocTy.getSizeInBits() == MemTy.getSizeInBits());
178       LocTy = MemTy;
179     }
180 
181     auto MMO = MF.getMachineMemOperand(
182         MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, LocTy,
183         inferAlignFromPtrInfo(MF, MPO));
184 
185     switch (VA.getLocInfo()) {
186     case CCValAssign::LocInfo::ZExt:
187       MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, ValVReg, Addr, *MMO);
188       return;
189     case CCValAssign::LocInfo::SExt:
190       MIRBuilder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, ValVReg, Addr, *MMO);
191       return;
192     default:
193       MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
194       return;
195     }
196   }
197 
198   /// How the physical register gets marked varies between formal
199   /// parameters (it's a basic-block live-in), and a call instruction
200   /// (it's an implicit-def of the BL).
201   virtual void markPhysRegUsed(MCRegister PhysReg) = 0;
202 };
203 
204 struct FormalArgHandler : public IncomingArgHandler {
205   FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
206       : IncomingArgHandler(MIRBuilder, MRI) {}
207 
208   void markPhysRegUsed(MCRegister PhysReg) override {
209     MIRBuilder.getMRI()->addLiveIn(PhysReg);
210     MIRBuilder.getMBB().addLiveIn(PhysReg);
211   }
212 };
213 
214 struct CallReturnHandler : public IncomingArgHandler {
215   CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
216                     MachineInstrBuilder MIB)
217       : IncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {}
218 
219   void markPhysRegUsed(MCRegister PhysReg) override {
220     MIB.addDef(PhysReg, RegState::Implicit);
221   }
222 
223   MachineInstrBuilder MIB;
224 };
225 
226 /// A special return arg handler for "returned" attribute arg calls.
227 struct ReturnedArgCallReturnHandler : public CallReturnHandler {
228   ReturnedArgCallReturnHandler(MachineIRBuilder &MIRBuilder,
229                                MachineRegisterInfo &MRI,
230                                MachineInstrBuilder MIB)
231       : CallReturnHandler(MIRBuilder, MRI, MIB) {}
232 
233   void markPhysRegUsed(MCRegister PhysReg) override {}
234 };
235 
236 struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
237   OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
238                      MachineInstrBuilder MIB, bool IsTailCall = false,
239                      int FPDiff = 0)
240       : OutgoingValueHandler(MIRBuilder, MRI), MIB(MIB), IsTailCall(IsTailCall),
241         FPDiff(FPDiff),
242         Subtarget(MIRBuilder.getMF().getSubtarget<AArch64Subtarget>()) {}
243 
244   Register getStackAddress(uint64_t Size, int64_t Offset,
245                            MachinePointerInfo &MPO,
246                            ISD::ArgFlagsTy Flags) override {
247     MachineFunction &MF = MIRBuilder.getMF();
248     LLT p0 = LLT::pointer(0, 64);
249     LLT s64 = LLT::scalar(64);
250 
251     if (IsTailCall) {
252       assert(!Flags.isByVal() && "byval unhandled with tail calls");
253 
254       Offset += FPDiff;
255       int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
256       auto FIReg = MIRBuilder.buildFrameIndex(p0, FI);
257       MPO = MachinePointerInfo::getFixedStack(MF, FI);
258       return FIReg.getReg(0);
259     }
260 
261     if (!SPReg)
262       SPReg = MIRBuilder.buildCopy(p0, Register(AArch64::SP)).getReg(0);
263 
264     auto OffsetReg = MIRBuilder.buildConstant(s64, Offset);
265 
266     auto AddrReg = MIRBuilder.buildPtrAdd(p0, SPReg, OffsetReg);
267 
268     MPO = MachinePointerInfo::getStack(MF, Offset);
269     return AddrReg.getReg(0);
270   }
271 
272   /// We need to fixup the reported store size for certain value types because
273   /// we invert the interpretation of ValVT and LocVT in certain cases. This is
274   /// for compatability with the DAG call lowering implementation, which we're
275   /// currently building on top of.
276   LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA,
277                              ISD::ArgFlagsTy Flags) const override {
278     if (Flags.isPointer())
279       return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags);
280     return getStackValueStoreTypeHack(VA);
281   }
282 
283   void assignValueToReg(Register ValVReg, Register PhysReg,
284                         CCValAssign VA) override {
285     MIB.addUse(PhysReg, RegState::Implicit);
286     Register ExtReg = extendRegister(ValVReg, VA);
287     MIRBuilder.buildCopy(PhysReg, ExtReg);
288   }
289 
290   void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
291                             MachinePointerInfo &MPO, CCValAssign &VA) override {
292     MachineFunction &MF = MIRBuilder.getMF();
293     auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, MemTy,
294                                        inferAlignFromPtrInfo(MF, MPO));
295     MIRBuilder.buildStore(ValVReg, Addr, *MMO);
296   }
297 
298   void assignValueToAddress(const CallLowering::ArgInfo &Arg, unsigned RegIndex,
299                             Register Addr, LLT MemTy, MachinePointerInfo &MPO,
300                             CCValAssign &VA) override {
301     unsigned MaxSize = MemTy.getSizeInBytes() * 8;
302     // For varargs, we always want to extend them to 8 bytes, in which case
303     // we disable setting a max.
304     if (!Arg.IsFixed)
305       MaxSize = 0;
306 
307     Register ValVReg = Arg.Regs[RegIndex];
308     if (VA.getLocInfo() != CCValAssign::LocInfo::FPExt) {
309       MVT LocVT = VA.getLocVT();
310       MVT ValVT = VA.getValVT();
311 
312       if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16) {
313         std::swap(ValVT, LocVT);
314         MemTy = LLT(VA.getValVT());
315       }
316 
317       ValVReg = extendRegister(ValVReg, VA, MaxSize);
318     } else {
319       // The store does not cover the full allocated stack slot.
320       MemTy = LLT(VA.getValVT());
321     }
322 
323     assignValueToAddress(ValVReg, Addr, MemTy, MPO, VA);
324   }
325 
326   MachineInstrBuilder MIB;
327 
328   bool IsTailCall;
329 
330   /// For tail calls, the byte offset of the call's argument area from the
331   /// callee's. Unused elsewhere.
332   int FPDiff;
333 
334   // Cache the SP register vreg if we need it more than once in this call site.
335   Register SPReg;
336 
337   const AArch64Subtarget &Subtarget;
338 };
339 } // namespace
340 
341 static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) {
342   return (CallConv == CallingConv::Fast && TailCallOpt) ||
343          CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
344 }
345 
346 bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
347                                       const Value *Val,
348                                       ArrayRef<Register> VRegs,
349                                       FunctionLoweringInfo &FLI,
350                                       Register SwiftErrorVReg) const {
351   auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR);
352   assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
353          "Return value without a vreg");
354 
355   bool Success = true;
356   if (!VRegs.empty()) {
357     MachineFunction &MF = MIRBuilder.getMF();
358     const Function &F = MF.getFunction();
359     const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
360 
361     MachineRegisterInfo &MRI = MF.getRegInfo();
362     const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
363     CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv());
364     auto &DL = F.getParent()->getDataLayout();
365     LLVMContext &Ctx = Val->getType()->getContext();
366 
367     SmallVector<EVT, 4> SplitEVTs;
368     ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
369     assert(VRegs.size() == SplitEVTs.size() &&
370            "For each split Type there should be exactly one VReg.");
371 
372     SmallVector<ArgInfo, 8> SplitArgs;
373     CallingConv::ID CC = F.getCallingConv();
374 
375     for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
376       Register CurVReg = VRegs[i];
377       ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Ctx), 0};
378       setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
379 
380       // i1 is a special case because SDAG i1 true is naturally zero extended
381       // when widened using ANYEXT. We need to do it explicitly here.
382       if (MRI.getType(CurVReg).getSizeInBits() == 1) {
383         CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0);
384       } else if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) ==
385                  1) {
386         // Some types will need extending as specified by the CC.
387         MVT NewVT = TLI.getRegisterTypeForCallingConv(Ctx, CC, SplitEVTs[i]);
388         if (EVT(NewVT) != SplitEVTs[i]) {
389           unsigned ExtendOp = TargetOpcode::G_ANYEXT;
390           if (F.getAttributes().hasRetAttr(Attribute::SExt))
391             ExtendOp = TargetOpcode::G_SEXT;
392           else if (F.getAttributes().hasRetAttr(Attribute::ZExt))
393             ExtendOp = TargetOpcode::G_ZEXT;
394 
395           LLT NewLLT(NewVT);
396           LLT OldLLT(MVT::getVT(CurArgInfo.Ty));
397           CurArgInfo.Ty = EVT(NewVT).getTypeForEVT(Ctx);
398           // Instead of an extend, we might have a vector type which needs
399           // padding with more elements, e.g. <2 x half> -> <4 x half>.
400           if (NewVT.isVector()) {
401             if (OldLLT.isVector()) {
402               if (NewLLT.getNumElements() > OldLLT.getNumElements()) {
403                 // We don't handle VA types which are not exactly twice the
404                 // size, but can easily be done in future.
405                 if (NewLLT.getNumElements() != OldLLT.getNumElements() * 2) {
406                   LLVM_DEBUG(dbgs() << "Outgoing vector ret has too many elts");
407                   return false;
408                 }
409                 auto Undef = MIRBuilder.buildUndef({OldLLT});
410                 CurVReg =
411                     MIRBuilder.buildMerge({NewLLT}, {CurVReg, Undef}).getReg(0);
412               } else {
413                 // Just do a vector extend.
414                 CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg})
415                               .getReg(0);
416               }
417             } else if (NewLLT.getNumElements() == 2) {
418               // We need to pad a <1 x S> type to <2 x S>. Since we don't have
419               // <1 x S> vector types in GISel we use a build_vector instead
420               // of a vector merge/concat.
421               auto Undef = MIRBuilder.buildUndef({OldLLT});
422               CurVReg =
423                   MIRBuilder
424                       .buildBuildVector({NewLLT}, {CurVReg, Undef.getReg(0)})
425                       .getReg(0);
426             } else {
427               LLVM_DEBUG(dbgs() << "Could not handle ret ty\n");
428               return false;
429             }
430           } else {
431             // If the split EVT was a <1 x T> vector, and NewVT is T, then we
432             // don't have to do anything since we don't distinguish between the
433             // two.
434             if (NewLLT != MRI.getType(CurVReg)) {
435               // A scalar extend.
436               CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg})
437                             .getReg(0);
438             }
439           }
440         }
441       }
442       if (CurVReg != CurArgInfo.Regs[0]) {
443         CurArgInfo.Regs[0] = CurVReg;
444         // Reset the arg flags after modifying CurVReg.
445         setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
446       }
447       splitToValueTypes(CurArgInfo, SplitArgs, DL, CC);
448     }
449 
450     AArch64OutgoingValueAssigner Assigner(AssignFn, AssignFn, Subtarget,
451                                           /*IsReturn*/ true);
452     OutgoingArgHandler Handler(MIRBuilder, MRI, MIB);
453     Success = determineAndHandleAssignments(Handler, Assigner, SplitArgs,
454                                             MIRBuilder, CC, F.isVarArg());
455   }
456 
457   if (SwiftErrorVReg) {
458     MIB.addUse(AArch64::X21, RegState::Implicit);
459     MIRBuilder.buildCopy(AArch64::X21, SwiftErrorVReg);
460   }
461 
462   MIRBuilder.insertInstr(MIB);
463   return Success;
464 }
465 
466 /// Helper function to compute forwarded registers for musttail calls. Computes
467 /// the forwarded registers, sets MBB liveness, and emits COPY instructions that
468 /// can be used to save + restore registers later.
469 static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder,
470                                              CCAssignFn *AssignFn) {
471   MachineBasicBlock &MBB = MIRBuilder.getMBB();
472   MachineFunction &MF = MIRBuilder.getMF();
473   MachineFrameInfo &MFI = MF.getFrameInfo();
474 
475   if (!MFI.hasMustTailInVarArgFunc())
476     return;
477 
478   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
479   const Function &F = MF.getFunction();
480   assert(F.isVarArg() && "Expected F to be vararg?");
481 
482   // Compute the set of forwarded registers. The rest are scratch.
483   SmallVector<CCValAssign, 16> ArgLocs;
484   CCState CCInfo(F.getCallingConv(), /*IsVarArg=*/true, MF, ArgLocs,
485                  F.getContext());
486   SmallVector<MVT, 2> RegParmTypes;
487   RegParmTypes.push_back(MVT::i64);
488   RegParmTypes.push_back(MVT::f128);
489 
490   // Later on, we can use this vector to restore the registers if necessary.
491   SmallVectorImpl<ForwardedRegister> &Forwards =
492       FuncInfo->getForwardedMustTailRegParms();
493   CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, AssignFn);
494 
495   // Conservatively forward X8, since it might be used for an aggregate
496   // return.
497   if (!CCInfo.isAllocated(AArch64::X8)) {
498     Register X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
499     Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
500   }
501 
502   // Add the forwards to the MachineBasicBlock and MachineFunction.
503   for (const auto &F : Forwards) {
504     MBB.addLiveIn(F.PReg);
505     MIRBuilder.buildCopy(Register(F.VReg), Register(F.PReg));
506   }
507 }
508 
509 bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const {
510   auto &F = MF.getFunction();
511   if (isa<ScalableVectorType>(F.getReturnType()))
512     return true;
513   if (llvm::any_of(F.args(), [](const Argument &A) {
514         return isa<ScalableVectorType>(A.getType());
515       }))
516     return true;
517   const auto &ST = MF.getSubtarget<AArch64Subtarget>();
518   if (!ST.hasNEON() || !ST.hasFPARMv8()) {
519     LLVM_DEBUG(dbgs() << "Falling back to SDAG because we don't support no-NEON\n");
520     return true;
521   }
522   return false;
523 }
524 
525 bool AArch64CallLowering::lowerFormalArguments(
526     MachineIRBuilder &MIRBuilder, const Function &F,
527     ArrayRef<ArrayRef<Register>> VRegs, FunctionLoweringInfo &FLI) const {
528   MachineFunction &MF = MIRBuilder.getMF();
529   MachineBasicBlock &MBB = MIRBuilder.getMBB();
530   MachineRegisterInfo &MRI = MF.getRegInfo();
531   auto &DL = F.getParent()->getDataLayout();
532 
533   SmallVector<ArgInfo, 8> SplitArgs;
534   SmallVector<std::pair<Register, Register>> BoolArgs;
535   unsigned i = 0;
536   for (auto &Arg : F.args()) {
537     if (DL.getTypeStoreSize(Arg.getType()).isZero())
538       continue;
539 
540     ArgInfo OrigArg{VRegs[i], Arg, i};
541     setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F);
542 
543     // i1 arguments are zero-extended to i8 by the caller. Emit a
544     // hint to reflect this.
545     if (OrigArg.Ty->isIntegerTy(1)) {
546       assert(OrigArg.Regs.size() == 1 &&
547              MRI.getType(OrigArg.Regs[0]).getSizeInBits() == 1 &&
548              "Unexpected registers used for i1 arg");
549 
550       if (!OrigArg.Flags[0].isZExt()) {
551         // Lower i1 argument as i8, and insert AssertZExt + Trunc later.
552         Register OrigReg = OrigArg.Regs[0];
553         Register WideReg = MRI.createGenericVirtualRegister(LLT::scalar(8));
554         OrigArg.Regs[0] = WideReg;
555         BoolArgs.push_back({OrigReg, WideReg});
556       }
557     }
558 
559     if (Arg.hasAttribute(Attribute::SwiftAsync))
560       MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
561 
562     splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv());
563     ++i;
564   }
565 
566   if (!MBB.empty())
567     MIRBuilder.setInstr(*MBB.begin());
568 
569   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
570   CCAssignFn *AssignFn =
571       TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false);
572 
573   AArch64IncomingValueAssigner Assigner(AssignFn, AssignFn);
574   FormalArgHandler Handler(MIRBuilder, MRI);
575   if (!determineAndHandleAssignments(Handler, Assigner, SplitArgs, MIRBuilder,
576                                      F.getCallingConv(), F.isVarArg()))
577     return false;
578 
579   if (!BoolArgs.empty()) {
580     for (auto &KV : BoolArgs) {
581       Register OrigReg = KV.first;
582       Register WideReg = KV.second;
583       LLT WideTy = MRI.getType(WideReg);
584       assert(MRI.getType(OrigReg).getScalarSizeInBits() == 1 &&
585              "Unexpected bit size of a bool arg");
586       MIRBuilder.buildTrunc(
587           OrigReg, MIRBuilder.buildAssertZExt(WideTy, WideReg, 1).getReg(0));
588     }
589   }
590 
591   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
592   uint64_t StackOffset = Assigner.StackOffset;
593   if (F.isVarArg()) {
594     auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
595     if (!Subtarget.isTargetDarwin()) {
596         // FIXME: we need to reimplement saveVarArgsRegisters from
597       // AArch64ISelLowering.
598       return false;
599     }
600 
601     // We currently pass all varargs at 8-byte alignment, or 4 in ILP32.
602     StackOffset =
603         alignTo(Assigner.StackOffset, Subtarget.isTargetILP32() ? 4 : 8);
604 
605     auto &MFI = MIRBuilder.getMF().getFrameInfo();
606     FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
607   }
608 
609   if (doesCalleeRestoreStack(F.getCallingConv(),
610                              MF.getTarget().Options.GuaranteedTailCallOpt)) {
611     // We have a non-standard ABI, so why not make full use of the stack that
612     // we're going to pop? It must be aligned to 16 B in any case.
613     StackOffset = alignTo(StackOffset, 16);
614 
615     // If we're expected to restore the stack (e.g. fastcc), then we'll be
616     // adding a multiple of 16.
617     FuncInfo->setArgumentStackToRestore(StackOffset);
618 
619     // Our own callers will guarantee that the space is free by giving an
620     // aligned value to CALLSEQ_START.
621   }
622 
623   // When we tail call, we need to check if the callee's arguments
624   // will fit on the caller's stack. So, whenever we lower formal arguments,
625   // we should keep track of this information, since we might lower a tail call
626   // in this function later.
627   FuncInfo->setBytesInStackArgArea(StackOffset);
628 
629   auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
630   if (Subtarget.hasCustomCallingConv())
631     Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
632 
633   handleMustTailForwardedRegisters(MIRBuilder, AssignFn);
634 
635   // Move back to the end of the basic block.
636   MIRBuilder.setMBB(MBB);
637 
638   return true;
639 }
640 
641 /// Return true if the calling convention is one that we can guarantee TCO for.
642 static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
643   return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
644          CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
645 }
646 
647 /// Return true if we might ever do TCO for calls with this calling convention.
648 static bool mayTailCallThisCC(CallingConv::ID CC) {
649   switch (CC) {
650   case CallingConv::C:
651   case CallingConv::PreserveMost:
652   case CallingConv::Swift:
653   case CallingConv::SwiftTail:
654   case CallingConv::Tail:
655   case CallingConv::Fast:
656     return true;
657   default:
658     return false;
659   }
660 }
661 
662 /// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for
663 /// CC.
664 static std::pair<CCAssignFn *, CCAssignFn *>
665 getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI) {
666   return {TLI.CCAssignFnForCall(CC, false), TLI.CCAssignFnForCall(CC, true)};
667 }
668 
669 bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay(
670     CallLoweringInfo &Info, MachineFunction &MF,
671     SmallVectorImpl<ArgInfo> &InArgs) const {
672   const Function &CallerF = MF.getFunction();
673   CallingConv::ID CalleeCC = Info.CallConv;
674   CallingConv::ID CallerCC = CallerF.getCallingConv();
675 
676   // If the calling conventions match, then everything must be the same.
677   if (CalleeCC == CallerCC)
678     return true;
679 
680   // Check if the caller and callee will handle arguments in the same way.
681   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
682   CCAssignFn *CalleeAssignFnFixed;
683   CCAssignFn *CalleeAssignFnVarArg;
684   std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) =
685       getAssignFnsForCC(CalleeCC, TLI);
686 
687   CCAssignFn *CallerAssignFnFixed;
688   CCAssignFn *CallerAssignFnVarArg;
689   std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) =
690       getAssignFnsForCC(CallerCC, TLI);
691 
692   AArch64IncomingValueAssigner CalleeAssigner(CalleeAssignFnFixed,
693                                               CalleeAssignFnVarArg);
694   AArch64IncomingValueAssigner CallerAssigner(CallerAssignFnFixed,
695                                               CallerAssignFnVarArg);
696 
697   if (!resultsCompatible(Info, MF, InArgs, CalleeAssigner, CallerAssigner))
698     return false;
699 
700   // Make sure that the caller and callee preserve all of the same registers.
701   auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
702   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
703   const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
704   if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) {
705     TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
706     TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
707   }
708 
709   return TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved);
710 }
711 
712 bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable(
713     CallLoweringInfo &Info, MachineFunction &MF,
714     SmallVectorImpl<ArgInfo> &OutArgs) const {
715   // If there are no outgoing arguments, then we are done.
716   if (OutArgs.empty())
717     return true;
718 
719   const Function &CallerF = MF.getFunction();
720   LLVMContext &Ctx = CallerF.getContext();
721   CallingConv::ID CalleeCC = Info.CallConv;
722   CallingConv::ID CallerCC = CallerF.getCallingConv();
723   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
724   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
725 
726   CCAssignFn *AssignFnFixed;
727   CCAssignFn *AssignFnVarArg;
728   std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
729 
730   // We have outgoing arguments. Make sure that we can tail call with them.
731   SmallVector<CCValAssign, 16> OutLocs;
732   CCState OutInfo(CalleeCC, false, MF, OutLocs, Ctx);
733 
734   AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg,
735                                               Subtarget, /*IsReturn*/ false);
736   if (!determineAssignments(CalleeAssigner, OutArgs, OutInfo)) {
737     LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n");
738     return false;
739   }
740 
741   // Make sure that they can fit on the caller's stack.
742   const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
743   if (OutInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea()) {
744     LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n");
745     return false;
746   }
747 
748   // Verify that the parameters in callee-saved registers match.
749   // TODO: Port this over to CallLowering as general code once swiftself is
750   // supported.
751   auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
752   const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC);
753   MachineRegisterInfo &MRI = MF.getRegInfo();
754 
755   if (Info.IsVarArg) {
756     // Be conservative and disallow variadic memory operands to match SDAG's
757     // behaviour.
758     // FIXME: If the caller's calling convention is C, then we can
759     // potentially use its argument area. However, for cases like fastcc,
760     // we can't do anything.
761     for (unsigned i = 0; i < OutLocs.size(); ++i) {
762       auto &ArgLoc = OutLocs[i];
763       if (ArgLoc.isRegLoc())
764         continue;
765 
766       LLVM_DEBUG(
767           dbgs()
768           << "... Cannot tail call vararg function with stack arguments\n");
769       return false;
770     }
771   }
772 
773   return parametersInCSRMatch(MRI, CallerPreservedMask, OutLocs, OutArgs);
774 }
775 
776 bool AArch64CallLowering::isEligibleForTailCallOptimization(
777     MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
778     SmallVectorImpl<ArgInfo> &InArgs,
779     SmallVectorImpl<ArgInfo> &OutArgs) const {
780 
781   // Must pass all target-independent checks in order to tail call optimize.
782   if (!Info.IsTailCall)
783     return false;
784 
785   CallingConv::ID CalleeCC = Info.CallConv;
786   MachineFunction &MF = MIRBuilder.getMF();
787   const Function &CallerF = MF.getFunction();
788 
789   LLVM_DEBUG(dbgs() << "Attempting to lower call as tail call\n");
790 
791   if (Info.SwiftErrorVReg) {
792     // TODO: We should handle this.
793     // Note that this is also handled by the check for no outgoing arguments.
794     // Proactively disabling this though, because the swifterror handling in
795     // lowerCall inserts a COPY *after* the location of the call.
796     LLVM_DEBUG(dbgs() << "... Cannot handle tail calls with swifterror yet.\n");
797     return false;
798   }
799 
800   if (!mayTailCallThisCC(CalleeCC)) {
801     LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n");
802     return false;
803   }
804 
805   // Byval parameters hand the function a pointer directly into the stack area
806   // we want to reuse during a tail call. Working around this *is* possible (see
807   // X86).
808   //
809   // FIXME: In AArch64ISelLowering, this isn't worked around. Can/should we try
810   // it?
811   //
812   // On Windows, "inreg" attributes signify non-aggregate indirect returns.
813   // In this case, it is necessary to save/restore X0 in the callee. Tail
814   // call opt interferes with this. So we disable tail call opt when the
815   // caller has an argument with "inreg" attribute.
816   //
817   // FIXME: Check whether the callee also has an "inreg" argument.
818   //
819   // When the caller has a swifterror argument, we don't want to tail call
820   // because would have to move into the swifterror register before the
821   // tail call.
822   if (any_of(CallerF.args(), [](const Argument &A) {
823         return A.hasByValAttr() || A.hasInRegAttr() || A.hasSwiftErrorAttr();
824       })) {
825     LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval, "
826                          "inreg, or swifterror arguments\n");
827     return false;
828   }
829 
830   // Externally-defined functions with weak linkage should not be
831   // tail-called on AArch64 when the OS does not support dynamic
832   // pre-emption of symbols, as the AAELF spec requires normal calls
833   // to undefined weak functions to be replaced with a NOP or jump to the
834   // next instruction. The behaviour of branch instructions in this
835   // situation (as used for tail calls) is implementation-defined, so we
836   // cannot rely on the linker replacing the tail call with a return.
837   if (Info.Callee.isGlobal()) {
838     const GlobalValue *GV = Info.Callee.getGlobal();
839     const Triple &TT = MF.getTarget().getTargetTriple();
840     if (GV->hasExternalWeakLinkage() &&
841         (!TT.isOSWindows() || TT.isOSBinFormatELF() ||
842          TT.isOSBinFormatMachO())) {
843       LLVM_DEBUG(dbgs() << "... Cannot tail call externally-defined function "
844                            "with weak linkage for this OS.\n");
845       return false;
846     }
847   }
848 
849   // If we have -tailcallopt, then we're done.
850   if (canGuaranteeTCO(CalleeCC, MF.getTarget().Options.GuaranteedTailCallOpt))
851     return CalleeCC == CallerF.getCallingConv();
852 
853   // We don't have -tailcallopt, so we're allowed to change the ABI (sibcall).
854   // Try to find cases where we can do that.
855 
856   // I want anyone implementing a new calling convention to think long and hard
857   // about this assert.
858   assert((!Info.IsVarArg || CalleeCC == CallingConv::C) &&
859          "Unexpected variadic calling convention");
860 
861   // Verify that the incoming and outgoing arguments from the callee are
862   // safe to tail call.
863   if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) {
864     LLVM_DEBUG(
865         dbgs()
866         << "... Caller and callee have incompatible calling conventions.\n");
867     return false;
868   }
869 
870   if (!areCalleeOutgoingArgsTailCallable(Info, MF, OutArgs))
871     return false;
872 
873   LLVM_DEBUG(
874       dbgs() << "... Call is eligible for tail call optimization.\n");
875   return true;
876 }
877 
878 static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
879                               bool IsTailCall) {
880   if (!IsTailCall)
881     return IsIndirect ? getBLRCallOpcode(CallerF) : (unsigned)AArch64::BL;
882 
883   if (!IsIndirect)
884     return AArch64::TCRETURNdi;
885 
886   // When BTI is enabled, we need to use TCRETURNriBTI to make sure that we use
887   // x16 or x17.
888   if (CallerF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
889     return AArch64::TCRETURNriBTI;
890 
891   return AArch64::TCRETURNri;
892 }
893 
894 static const uint32_t *
895 getMaskForArgs(SmallVectorImpl<AArch64CallLowering::ArgInfo> &OutArgs,
896                AArch64CallLowering::CallLoweringInfo &Info,
897                const AArch64RegisterInfo &TRI, MachineFunction &MF) {
898   const uint32_t *Mask;
899   if (!OutArgs.empty() && OutArgs[0].Flags[0].isReturned()) {
900     // For 'this' returns, use the X0-preserving mask if applicable
901     Mask = TRI.getThisReturnPreservedMask(MF, Info.CallConv);
902     if (!Mask) {
903       OutArgs[0].Flags[0].setReturned(false);
904       Mask = TRI.getCallPreservedMask(MF, Info.CallConv);
905     }
906   } else {
907     Mask = TRI.getCallPreservedMask(MF, Info.CallConv);
908   }
909   return Mask;
910 }
911 
912 bool AArch64CallLowering::lowerTailCall(
913     MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
914     SmallVectorImpl<ArgInfo> &OutArgs) const {
915   MachineFunction &MF = MIRBuilder.getMF();
916   const Function &F = MF.getFunction();
917   MachineRegisterInfo &MRI = MF.getRegInfo();
918   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
919   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
920 
921   // True when we're tail calling, but without -tailcallopt.
922   bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt &&
923                    Info.CallConv != CallingConv::Tail &&
924                    Info.CallConv != CallingConv::SwiftTail;
925 
926   // TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64
927   // register class. Until we can do that, we should fall back here.
928   if (MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) {
929     LLVM_DEBUG(
930         dbgs() << "Cannot lower indirect tail calls with BTI enabled yet.\n");
931     return false;
932   }
933 
934   // Find out which ABI gets to decide where things go.
935   CallingConv::ID CalleeCC = Info.CallConv;
936   CCAssignFn *AssignFnFixed;
937   CCAssignFn *AssignFnVarArg;
938   std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
939 
940   MachineInstrBuilder CallSeqStart;
941   if (!IsSibCall)
942     CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
943 
944   unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), true);
945   auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
946   MIB.add(Info.Callee);
947 
948   // Byte offset for the tail call. When we are sibcalling, this will always
949   // be 0.
950   MIB.addImm(0);
951 
952   // Tell the call which registers are clobbered.
953   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
954   auto TRI = Subtarget.getRegisterInfo();
955   const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC);
956   if (Subtarget.hasCustomCallingConv())
957     TRI->UpdateCustomCallPreservedMask(MF, &Mask);
958   MIB.addRegMask(Mask);
959 
960   if (TRI->isAnyArgRegReserved(MF))
961     TRI->emitReservedArgRegCallError(MF);
962 
963   // FPDiff is the byte offset of the call's argument area from the callee's.
964   // Stores to callee stack arguments will be placed in FixedStackSlots offset
965   // by this amount for a tail call. In a sibling call it must be 0 because the
966   // caller will deallocate the entire stack and the callee still expects its
967   // arguments to begin at SP+0.
968   int FPDiff = 0;
969 
970   // This will be 0 for sibcalls, potentially nonzero for tail calls produced
971   // by -tailcallopt. For sibcalls, the memory operands for the call are
972   // already available in the caller's incoming argument space.
973   unsigned NumBytes = 0;
974   if (!IsSibCall) {
975     // We aren't sibcalling, so we need to compute FPDiff. We need to do this
976     // before handling assignments, because FPDiff must be known for memory
977     // arguments.
978     unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
979     SmallVector<CCValAssign, 16> OutLocs;
980     CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext());
981 
982     AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg,
983                                                 Subtarget, /*IsReturn*/ false);
984     if (!determineAssignments(CalleeAssigner, OutArgs, OutInfo))
985       return false;
986 
987     // The callee will pop the argument stack as a tail call. Thus, we must
988     // keep it 16-byte aligned.
989     NumBytes = alignTo(OutInfo.getNextStackOffset(), 16);
990 
991     // FPDiff will be negative if this tail call requires more space than we
992     // would automatically have in our incoming argument space. Positive if we
993     // actually shrink the stack.
994     FPDiff = NumReusableBytes - NumBytes;
995 
996     // Update the required reserved area if this is the tail call requiring the
997     // most argument stack space.
998     if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff)
999       FuncInfo->setTailCallReservedStack(-FPDiff);
1000 
1001     // The stack pointer must be 16-byte aligned at all times it's used for a
1002     // memory operation, which in practice means at *all* times and in
1003     // particular across call boundaries. Therefore our own arguments started at
1004     // a 16-byte aligned SP and the delta applied for the tail call should
1005     // satisfy the same constraint.
1006     assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
1007   }
1008 
1009   const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
1010 
1011   AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg,
1012                                         Subtarget, /*IsReturn*/ false);
1013 
1014   // Do the actual argument marshalling.
1015   OutgoingArgHandler Handler(MIRBuilder, MRI, MIB,
1016                              /*IsTailCall*/ true, FPDiff);
1017   if (!determineAndHandleAssignments(Handler, Assigner, OutArgs, MIRBuilder,
1018                                      CalleeCC, Info.IsVarArg))
1019     return false;
1020 
1021   Mask = getMaskForArgs(OutArgs, Info, *TRI, MF);
1022 
1023   if (Info.IsVarArg && Info.IsMustTailCall) {
1024     // Now we know what's being passed to the function. Add uses to the call for
1025     // the forwarded registers that we *aren't* passing as parameters. This will
1026     // preserve the copies we build earlier.
1027     for (const auto &F : Forwards) {
1028       Register ForwardedReg = F.PReg;
1029       // If the register is already passed, or aliases a register which is
1030       // already being passed, then skip it.
1031       if (any_of(MIB->uses(), [&ForwardedReg, &TRI](const MachineOperand &Use) {
1032             if (!Use.isReg())
1033               return false;
1034             return TRI->regsOverlap(Use.getReg(), ForwardedReg);
1035           }))
1036         continue;
1037 
1038       // We aren't passing it already, so we should add it to the call.
1039       MIRBuilder.buildCopy(ForwardedReg, Register(F.VReg));
1040       MIB.addReg(ForwardedReg, RegState::Implicit);
1041     }
1042   }
1043 
1044   // If we have -tailcallopt, we need to adjust the stack. We'll do the call
1045   // sequence start and end here.
1046   if (!IsSibCall) {
1047     MIB->getOperand(1).setImm(FPDiff);
1048     CallSeqStart.addImm(0).addImm(0);
1049     // End the call sequence *before* emitting the call. Normally, we would
1050     // tidy the frame up after the call. However, here, we've laid out the
1051     // parameters so that when SP is reset, they will be in the correct
1052     // location.
1053     MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP).addImm(0).addImm(0);
1054   }
1055 
1056   // Now we can add the actual call instruction to the correct basic block.
1057   MIRBuilder.insertInstr(MIB);
1058 
1059   // If Callee is a reg, since it is used by a target specific instruction,
1060   // it must have a register class matching the constraint of that instruction.
1061   if (Info.Callee.isReg())
1062     constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
1063                              *MF.getSubtarget().getRegBankInfo(), *MIB,
1064                              MIB->getDesc(), Info.Callee, 0);
1065 
1066   MF.getFrameInfo().setHasTailCall();
1067   Info.LoweredTailCall = true;
1068   return true;
1069 }
1070 
1071 bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
1072                                     CallLoweringInfo &Info) const {
1073   MachineFunction &MF = MIRBuilder.getMF();
1074   const Function &F = MF.getFunction();
1075   MachineRegisterInfo &MRI = MF.getRegInfo();
1076   auto &DL = F.getParent()->getDataLayout();
1077   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
1078 
1079   SmallVector<ArgInfo, 8> OutArgs;
1080   for (auto &OrigArg : Info.OrigArgs) {
1081     splitToValueTypes(OrigArg, OutArgs, DL, Info.CallConv);
1082     // AAPCS requires that we zero-extend i1 to 8 bits by the caller.
1083     if (OrigArg.Ty->isIntegerTy(1)) {
1084       ArgInfo &OutArg = OutArgs.back();
1085       assert(OutArg.Regs.size() == 1 &&
1086              MRI.getType(OutArg.Regs[0]).getSizeInBits() == 1 &&
1087              "Unexpected registers used for i1 arg");
1088 
1089       // We cannot use a ZExt ArgInfo flag here, because it will
1090       // zero-extend the argument to i32 instead of just i8.
1091       OutArg.Regs[0] =
1092           MIRBuilder.buildZExt(LLT::scalar(8), OutArg.Regs[0]).getReg(0);
1093       LLVMContext &Ctx = MF.getFunction().getContext();
1094       OutArg.Ty = Type::getInt8Ty(Ctx);
1095     }
1096   }
1097 
1098   SmallVector<ArgInfo, 8> InArgs;
1099   if (!Info.OrigRet.Ty->isVoidTy())
1100     splitToValueTypes(Info.OrigRet, InArgs, DL, Info.CallConv);
1101 
1102   // If we can lower as a tail call, do that instead.
1103   bool CanTailCallOpt =
1104       isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs, OutArgs);
1105 
1106   // We must emit a tail call if we have musttail.
1107   if (Info.IsMustTailCall && !CanTailCallOpt) {
1108     // There are types of incoming/outgoing arguments we can't handle yet, so
1109     // it doesn't make sense to actually die here like in ISelLowering. Instead,
1110     // fall back to SelectionDAG and let it try to handle this.
1111     LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n");
1112     return false;
1113   }
1114 
1115   Info.IsTailCall = CanTailCallOpt;
1116   if (CanTailCallOpt)
1117     return lowerTailCall(MIRBuilder, Info, OutArgs);
1118 
1119   // Find out which ABI gets to decide where things go.
1120   CCAssignFn *AssignFnFixed;
1121   CCAssignFn *AssignFnVarArg;
1122   std::tie(AssignFnFixed, AssignFnVarArg) =
1123       getAssignFnsForCC(Info.CallConv, TLI);
1124 
1125   MachineInstrBuilder CallSeqStart;
1126   CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
1127 
1128   // Create a temporarily-floating call instruction so we can add the implicit
1129   // uses of arg registers.
1130   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1131   unsigned Opc = 0;
1132   // A call to a returns twice function like setjmp must be followed by a bti
1133   // instruction.
1134   if (Info.CB && Info.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
1135       !Subtarget.noBTIAtReturnTwice() &&
1136       MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
1137     Opc = AArch64::BLR_BTI;
1138   else
1139     Opc = getCallOpcode(MF, Info.Callee.isReg(), false);
1140 
1141   auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
1142   MIB.add(Info.Callee);
1143 
1144   // Tell the call which registers are clobbered.
1145   const uint32_t *Mask;
1146   const auto *TRI = Subtarget.getRegisterInfo();
1147 
1148   AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg,
1149                                         Subtarget, /*IsReturn*/ false);
1150   // Do the actual argument marshalling.
1151   OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, /*IsReturn*/ false);
1152   if (!determineAndHandleAssignments(Handler, Assigner, OutArgs, MIRBuilder,
1153                                      Info.CallConv, Info.IsVarArg))
1154     return false;
1155 
1156   Mask = getMaskForArgs(OutArgs, Info, *TRI, MF);
1157 
1158   if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv())
1159     TRI->UpdateCustomCallPreservedMask(MF, &Mask);
1160   MIB.addRegMask(Mask);
1161 
1162   if (TRI->isAnyArgRegReserved(MF))
1163     TRI->emitReservedArgRegCallError(MF);
1164 
1165   // Now we can add the actual call instruction to the correct basic block.
1166   MIRBuilder.insertInstr(MIB);
1167 
1168   // If Callee is a reg, since it is used by a target specific
1169   // instruction, it must have a register class matching the
1170   // constraint of that instruction.
1171   if (Info.Callee.isReg())
1172     constrainOperandRegClass(MF, *TRI, MRI, *Subtarget.getInstrInfo(),
1173                              *Subtarget.getRegBankInfo(), *MIB, MIB->getDesc(),
1174                              Info.Callee, 0);
1175 
1176   // Finally we can copy the returned value back into its virtual-register. In
1177   // symmetry with the arguments, the physical register must be an
1178   // implicit-define of the call instruction.
1179   if (!Info.OrigRet.Ty->isVoidTy()) {
1180     CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv);
1181     CallReturnHandler Handler(MIRBuilder, MRI, MIB);
1182     bool UsingReturnedArg =
1183         !OutArgs.empty() && OutArgs[0].Flags[0].isReturned();
1184 
1185     AArch64OutgoingValueAssigner Assigner(RetAssignFn, RetAssignFn, Subtarget,
1186                                           /*IsReturn*/ false);
1187     ReturnedArgCallReturnHandler ReturnedArgHandler(MIRBuilder, MRI, MIB);
1188     if (!determineAndHandleAssignments(
1189             UsingReturnedArg ? ReturnedArgHandler : Handler, Assigner, InArgs,
1190             MIRBuilder, Info.CallConv, Info.IsVarArg,
1191             UsingReturnedArg ? makeArrayRef(OutArgs[0].Regs) : None))
1192       return false;
1193   }
1194 
1195   if (Info.SwiftErrorVReg) {
1196     MIB.addDef(AArch64::X21, RegState::Implicit);
1197     MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21));
1198   }
1199 
1200   uint64_t CalleePopBytes =
1201       doesCalleeRestoreStack(Info.CallConv,
1202                              MF.getTarget().Options.GuaranteedTailCallOpt)
1203           ? alignTo(Assigner.StackOffset, 16)
1204           : 0;
1205 
1206   CallSeqStart.addImm(Assigner.StackOffset).addImm(0);
1207   MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
1208       .addImm(Assigner.StackOffset)
1209       .addImm(CalleePopBytes);
1210 
1211   return true;
1212 }
1213 
1214 bool AArch64CallLowering::isTypeIsValidForThisReturn(EVT Ty) const {
1215   return Ty.getSizeInBits() == 64;
1216 }
1217