xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp (revision 833a452e9f082a7982a31c21f0da437dbbe0a39d)
1 //===--- AArch64CallLowering.cpp - Call lowering --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the lowering of LLVM calls to machine code calls for
11 /// GlobalISel.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "AArch64CallLowering.h"
16 #include "AArch64ISelLowering.h"
17 #include "AArch64MachineFunctionInfo.h"
18 #include "AArch64Subtarget.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/CodeGen/Analysis.h"
22 #include "llvm/CodeGen/CallingConvLower.h"
23 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
24 #include "llvm/CodeGen/GlobalISel/Utils.h"
25 #include "llvm/CodeGen/LowLevelType.h"
26 #include "llvm/CodeGen/MachineBasicBlock.h"
27 #include "llvm/CodeGen/MachineFrameInfo.h"
28 #include "llvm/CodeGen/MachineFunction.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 #include "llvm/CodeGen/MachineMemOperand.h"
31 #include "llvm/CodeGen/MachineOperand.h"
32 #include "llvm/CodeGen/MachineRegisterInfo.h"
33 #include "llvm/CodeGen/TargetRegisterInfo.h"
34 #include "llvm/CodeGen/TargetSubtargetInfo.h"
35 #include "llvm/CodeGen/ValueTypes.h"
36 #include "llvm/IR/Argument.h"
37 #include "llvm/IR/Attributes.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/IR/Type.h"
40 #include "llvm/IR/Value.h"
41 #include "llvm/Support/MachineValueType.h"
42 #include <algorithm>
43 #include <cassert>
44 #include <cstdint>
45 #include <iterator>
46 
47 #define DEBUG_TYPE "aarch64-call-lowering"
48 
49 using namespace llvm;
50 
51 AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
52   : CallLowering(&TLI) {}
53 
54 static void applyStackPassedSmallTypeDAGHack(EVT OrigVT, MVT &ValVT,
55                                              MVT &LocVT) {
56   // If ValVT is i1/i8/i16, we should set LocVT to i8/i8/i16. This is a legacy
57   // hack because the DAG calls the assignment function with pre-legalized
58   // register typed values, not the raw type.
59   //
60   // This hack is not applied to return values which are not passed on the
61   // stack.
62   if (OrigVT == MVT::i1 || OrigVT == MVT::i8)
63     ValVT = LocVT = MVT::i8;
64   else if (OrigVT == MVT::i16)
65     ValVT = LocVT = MVT::i16;
66 }
67 
68 // Account for i1/i8/i16 stack passed value hack
69 static LLT getStackValueStoreTypeHack(const CCValAssign &VA) {
70   const MVT ValVT = VA.getValVT();
71   return (ValVT == MVT::i8 || ValVT == MVT::i16) ? LLT(ValVT)
72                                                  : LLT(VA.getLocVT());
73 }
74 
75 namespace {
76 
77 struct AArch64IncomingValueAssigner
78     : public CallLowering::IncomingValueAssigner {
79   AArch64IncomingValueAssigner(CCAssignFn *AssignFn_,
80                                CCAssignFn *AssignFnVarArg_)
81       : IncomingValueAssigner(AssignFn_, AssignFnVarArg_) {}
82 
83   bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
84                  CCValAssign::LocInfo LocInfo,
85                  const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
86                  CCState &State) override {
87     applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT);
88     return IncomingValueAssigner::assignArg(ValNo, OrigVT, ValVT, LocVT,
89                                             LocInfo, Info, Flags, State);
90   }
91 };
92 
93 struct AArch64OutgoingValueAssigner
94     : public CallLowering::OutgoingValueAssigner {
95   const AArch64Subtarget &Subtarget;
96 
97   /// Track if this is used for a return instead of function argument
98   /// passing. We apply a hack to i1/i8/i16 stack passed values, but do not use
99   /// stack passed returns for them and cannot apply the type adjustment.
100   bool IsReturn;
101 
102   AArch64OutgoingValueAssigner(CCAssignFn *AssignFn_,
103                                CCAssignFn *AssignFnVarArg_,
104                                const AArch64Subtarget &Subtarget_,
105                                bool IsReturn)
106       : OutgoingValueAssigner(AssignFn_, AssignFnVarArg_),
107         Subtarget(Subtarget_), IsReturn(IsReturn) {}
108 
109   bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
110                  CCValAssign::LocInfo LocInfo,
111                  const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
112                  CCState &State) override {
113     bool IsCalleeWin = Subtarget.isCallingConvWin64(State.getCallingConv());
114     bool UseVarArgsCCForFixed = IsCalleeWin && State.isVarArg();
115 
116     if (!State.isVarArg() && !UseVarArgsCCForFixed && !IsReturn)
117       applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT);
118 
119     bool Res;
120     if (Info.IsFixed && !UseVarArgsCCForFixed)
121       Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State);
122     else
123       Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Flags, State);
124 
125     StackOffset = State.getNextStackOffset();
126     return Res;
127   }
128 };
129 
130 struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
131   IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
132       : IncomingValueHandler(MIRBuilder, MRI) {}
133 
134   Register getStackAddress(uint64_t Size, int64_t Offset,
135                            MachinePointerInfo &MPO,
136                            ISD::ArgFlagsTy Flags) override {
137     auto &MFI = MIRBuilder.getMF().getFrameInfo();
138 
139     // Byval is assumed to be writable memory, but other stack passed arguments
140     // are not.
141     const bool IsImmutable = !Flags.isByVal();
142 
143     int FI = MFI.CreateFixedObject(Size, Offset, IsImmutable);
144     MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
145     auto AddrReg = MIRBuilder.buildFrameIndex(LLT::pointer(0, 64), FI);
146     return AddrReg.getReg(0);
147   }
148 
149   LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA,
150                              ISD::ArgFlagsTy Flags) const override {
151     // For pointers, we just need to fixup the integer types reported in the
152     // CCValAssign.
153     if (Flags.isPointer())
154       return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags);
155     return getStackValueStoreTypeHack(VA);
156   }
157 
158   void assignValueToReg(Register ValVReg, Register PhysReg,
159                         CCValAssign &VA) override {
160     markPhysRegUsed(PhysReg);
161     IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
162   }
163 
164   void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
165                             MachinePointerInfo &MPO, CCValAssign &VA) override {
166     MachineFunction &MF = MIRBuilder.getMF();
167 
168     LLT ValTy(VA.getValVT());
169     LLT LocTy(VA.getLocVT());
170 
171     // Fixup the types for the DAG compatibility hack.
172     if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16)
173       std::swap(ValTy, LocTy);
174     else {
175       // The calling code knows if this is a pointer or not, we're only touching
176       // the LocTy for the i8/i16 hack.
177       assert(LocTy.getSizeInBits() == MemTy.getSizeInBits());
178       LocTy = MemTy;
179     }
180 
181     auto MMO = MF.getMachineMemOperand(
182         MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, LocTy,
183         inferAlignFromPtrInfo(MF, MPO));
184     MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
185   }
186 
187   /// How the physical register gets marked varies between formal
188   /// parameters (it's a basic-block live-in), and a call instruction
189   /// (it's an implicit-def of the BL).
190   virtual void markPhysRegUsed(MCRegister PhysReg) = 0;
191 };
192 
193 struct FormalArgHandler : public IncomingArgHandler {
194   FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
195       : IncomingArgHandler(MIRBuilder, MRI) {}
196 
197   void markPhysRegUsed(MCRegister PhysReg) override {
198     MIRBuilder.getMRI()->addLiveIn(PhysReg);
199     MIRBuilder.getMBB().addLiveIn(PhysReg);
200   }
201 };
202 
203 struct CallReturnHandler : public IncomingArgHandler {
204   CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
205                     MachineInstrBuilder MIB)
206       : IncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {}
207 
208   void markPhysRegUsed(MCRegister PhysReg) override {
209     MIB.addDef(PhysReg, RegState::Implicit);
210   }
211 
212   MachineInstrBuilder MIB;
213 };
214 
215 /// A special return arg handler for "returned" attribute arg calls.
216 struct ReturnedArgCallReturnHandler : public CallReturnHandler {
217   ReturnedArgCallReturnHandler(MachineIRBuilder &MIRBuilder,
218                                MachineRegisterInfo &MRI,
219                                MachineInstrBuilder MIB)
220       : CallReturnHandler(MIRBuilder, MRI, MIB) {}
221 
222   void markPhysRegUsed(MCRegister PhysReg) override {}
223 };
224 
225 struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
226   OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
227                      MachineInstrBuilder MIB, bool IsTailCall = false,
228                      int FPDiff = 0)
229       : OutgoingValueHandler(MIRBuilder, MRI), MIB(MIB), IsTailCall(IsTailCall),
230         FPDiff(FPDiff),
231         Subtarget(MIRBuilder.getMF().getSubtarget<AArch64Subtarget>()) {}
232 
233   Register getStackAddress(uint64_t Size, int64_t Offset,
234                            MachinePointerInfo &MPO,
235                            ISD::ArgFlagsTy Flags) override {
236     MachineFunction &MF = MIRBuilder.getMF();
237     LLT p0 = LLT::pointer(0, 64);
238     LLT s64 = LLT::scalar(64);
239 
240     if (IsTailCall) {
241       assert(!Flags.isByVal() && "byval unhandled with tail calls");
242 
243       Offset += FPDiff;
244       int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
245       auto FIReg = MIRBuilder.buildFrameIndex(p0, FI);
246       MPO = MachinePointerInfo::getFixedStack(MF, FI);
247       return FIReg.getReg(0);
248     }
249 
250     if (!SPReg)
251       SPReg = MIRBuilder.buildCopy(p0, Register(AArch64::SP)).getReg(0);
252 
253     auto OffsetReg = MIRBuilder.buildConstant(s64, Offset);
254 
255     auto AddrReg = MIRBuilder.buildPtrAdd(p0, SPReg, OffsetReg);
256 
257     MPO = MachinePointerInfo::getStack(MF, Offset);
258     return AddrReg.getReg(0);
259   }
260 
261   /// We need to fixup the reported store size for certain value types because
262   /// we invert the interpretation of ValVT and LocVT in certain cases. This is
263   /// for compatability with the DAG call lowering implementation, which we're
264   /// currently building on top of.
265   LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA,
266                              ISD::ArgFlagsTy Flags) const override {
267     if (Flags.isPointer())
268       return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags);
269     return getStackValueStoreTypeHack(VA);
270   }
271 
272   void assignValueToReg(Register ValVReg, Register PhysReg,
273                         CCValAssign &VA) override {
274     MIB.addUse(PhysReg, RegState::Implicit);
275     Register ExtReg = extendRegister(ValVReg, VA);
276     MIRBuilder.buildCopy(PhysReg, ExtReg);
277   }
278 
279   void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
280                             MachinePointerInfo &MPO, CCValAssign &VA) override {
281     MachineFunction &MF = MIRBuilder.getMF();
282     auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, MemTy,
283                                        inferAlignFromPtrInfo(MF, MPO));
284     MIRBuilder.buildStore(ValVReg, Addr, *MMO);
285   }
286 
287   void assignValueToAddress(const CallLowering::ArgInfo &Arg, unsigned RegIndex,
288                             Register Addr, LLT MemTy, MachinePointerInfo &MPO,
289                             CCValAssign &VA) override {
290     unsigned MaxSize = MemTy.getSizeInBytes() * 8;
291     // For varargs, we always want to extend them to 8 bytes, in which case
292     // we disable setting a max.
293     if (!Arg.IsFixed)
294       MaxSize = 0;
295 
296     Register ValVReg = Arg.Regs[RegIndex];
297     if (VA.getLocInfo() != CCValAssign::LocInfo::FPExt) {
298       MVT LocVT = VA.getLocVT();
299       MVT ValVT = VA.getValVT();
300 
301       if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16) {
302         std::swap(ValVT, LocVT);
303         MemTy = LLT(VA.getValVT());
304       }
305 
306       ValVReg = extendRegister(ValVReg, VA, MaxSize);
307     } else {
308       // The store does not cover the full allocated stack slot.
309       MemTy = LLT(VA.getValVT());
310     }
311 
312     assignValueToAddress(ValVReg, Addr, MemTy, MPO, VA);
313   }
314 
315   MachineInstrBuilder MIB;
316 
317   bool IsTailCall;
318 
319   /// For tail calls, the byte offset of the call's argument area from the
320   /// callee's. Unused elsewhere.
321   int FPDiff;
322 
323   // Cache the SP register vreg if we need it more than once in this call site.
324   Register SPReg;
325 
326   const AArch64Subtarget &Subtarget;
327 };
328 } // namespace
329 
330 static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) {
331   return (CallConv == CallingConv::Fast && TailCallOpt) ||
332          CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
333 }
334 
335 bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
336                                       const Value *Val,
337                                       ArrayRef<Register> VRegs,
338                                       FunctionLoweringInfo &FLI,
339                                       Register SwiftErrorVReg) const {
340   auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR);
341   assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
342          "Return value without a vreg");
343 
344   bool Success = true;
345   if (!VRegs.empty()) {
346     MachineFunction &MF = MIRBuilder.getMF();
347     const Function &F = MF.getFunction();
348     const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
349 
350     MachineRegisterInfo &MRI = MF.getRegInfo();
351     const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
352     CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv());
353     auto &DL = F.getParent()->getDataLayout();
354     LLVMContext &Ctx = Val->getType()->getContext();
355 
356     SmallVector<EVT, 4> SplitEVTs;
357     ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
358     assert(VRegs.size() == SplitEVTs.size() &&
359            "For each split Type there should be exactly one VReg.");
360 
361     SmallVector<ArgInfo, 8> SplitArgs;
362     CallingConv::ID CC = F.getCallingConv();
363 
364     for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
365       Register CurVReg = VRegs[i];
366       ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Ctx), 0};
367       setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
368 
369       // i1 is a special case because SDAG i1 true is naturally zero extended
370       // when widened using ANYEXT. We need to do it explicitly here.
371       if (MRI.getType(CurVReg).getSizeInBits() == 1) {
372         CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0);
373       } else if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) ==
374                  1) {
375         // Some types will need extending as specified by the CC.
376         MVT NewVT = TLI.getRegisterTypeForCallingConv(Ctx, CC, SplitEVTs[i]);
377         if (EVT(NewVT) != SplitEVTs[i]) {
378           unsigned ExtendOp = TargetOpcode::G_ANYEXT;
379           if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex,
380                                              Attribute::SExt))
381             ExtendOp = TargetOpcode::G_SEXT;
382           else if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex,
383                                                   Attribute::ZExt))
384             ExtendOp = TargetOpcode::G_ZEXT;
385 
386           LLT NewLLT(NewVT);
387           LLT OldLLT(MVT::getVT(CurArgInfo.Ty));
388           CurArgInfo.Ty = EVT(NewVT).getTypeForEVT(Ctx);
389           // Instead of an extend, we might have a vector type which needs
390           // padding with more elements, e.g. <2 x half> -> <4 x half>.
391           if (NewVT.isVector()) {
392             if (OldLLT.isVector()) {
393               if (NewLLT.getNumElements() > OldLLT.getNumElements()) {
394                 // We don't handle VA types which are not exactly twice the
395                 // size, but can easily be done in future.
396                 if (NewLLT.getNumElements() != OldLLT.getNumElements() * 2) {
397                   LLVM_DEBUG(dbgs() << "Outgoing vector ret has too many elts");
398                   return false;
399                 }
400                 auto Undef = MIRBuilder.buildUndef({OldLLT});
401                 CurVReg =
402                     MIRBuilder.buildMerge({NewLLT}, {CurVReg, Undef}).getReg(0);
403               } else {
404                 // Just do a vector extend.
405                 CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg})
406                               .getReg(0);
407               }
408             } else if (NewLLT.getNumElements() == 2) {
409               // We need to pad a <1 x S> type to <2 x S>. Since we don't have
410               // <1 x S> vector types in GISel we use a build_vector instead
411               // of a vector merge/concat.
412               auto Undef = MIRBuilder.buildUndef({OldLLT});
413               CurVReg =
414                   MIRBuilder
415                       .buildBuildVector({NewLLT}, {CurVReg, Undef.getReg(0)})
416                       .getReg(0);
417             } else {
418               LLVM_DEBUG(dbgs() << "Could not handle ret ty\n");
419               return false;
420             }
421           } else {
422             // If the split EVT was a <1 x T> vector, and NewVT is T, then we
423             // don't have to do anything since we don't distinguish between the
424             // two.
425             if (NewLLT != MRI.getType(CurVReg)) {
426               // A scalar extend.
427               CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg})
428                             .getReg(0);
429             }
430           }
431         }
432       }
433       if (CurVReg != CurArgInfo.Regs[0]) {
434         CurArgInfo.Regs[0] = CurVReg;
435         // Reset the arg flags after modifying CurVReg.
436         setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
437       }
438       splitToValueTypes(CurArgInfo, SplitArgs, DL, CC);
439     }
440 
441     AArch64OutgoingValueAssigner Assigner(AssignFn, AssignFn, Subtarget,
442                                           /*IsReturn*/ true);
443     OutgoingArgHandler Handler(MIRBuilder, MRI, MIB);
444     Success = determineAndHandleAssignments(Handler, Assigner, SplitArgs,
445                                             MIRBuilder, CC, F.isVarArg());
446   }
447 
448   if (SwiftErrorVReg) {
449     MIB.addUse(AArch64::X21, RegState::Implicit);
450     MIRBuilder.buildCopy(AArch64::X21, SwiftErrorVReg);
451   }
452 
453   MIRBuilder.insertInstr(MIB);
454   return Success;
455 }
456 
457 /// Helper function to compute forwarded registers for musttail calls. Computes
458 /// the forwarded registers, sets MBB liveness, and emits COPY instructions that
459 /// can be used to save + restore registers later.
460 static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder,
461                                              CCAssignFn *AssignFn) {
462   MachineBasicBlock &MBB = MIRBuilder.getMBB();
463   MachineFunction &MF = MIRBuilder.getMF();
464   MachineFrameInfo &MFI = MF.getFrameInfo();
465 
466   if (!MFI.hasMustTailInVarArgFunc())
467     return;
468 
469   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
470   const Function &F = MF.getFunction();
471   assert(F.isVarArg() && "Expected F to be vararg?");
472 
473   // Compute the set of forwarded registers. The rest are scratch.
474   SmallVector<CCValAssign, 16> ArgLocs;
475   CCState CCInfo(F.getCallingConv(), /*IsVarArg=*/true, MF, ArgLocs,
476                  F.getContext());
477   SmallVector<MVT, 2> RegParmTypes;
478   RegParmTypes.push_back(MVT::i64);
479   RegParmTypes.push_back(MVT::f128);
480 
481   // Later on, we can use this vector to restore the registers if necessary.
482   SmallVectorImpl<ForwardedRegister> &Forwards =
483       FuncInfo->getForwardedMustTailRegParms();
484   CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, AssignFn);
485 
486   // Conservatively forward X8, since it might be used for an aggregate
487   // return.
488   if (!CCInfo.isAllocated(AArch64::X8)) {
489     Register X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
490     Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
491   }
492 
493   // Add the forwards to the MachineBasicBlock and MachineFunction.
494   for (const auto &F : Forwards) {
495     MBB.addLiveIn(F.PReg);
496     MIRBuilder.buildCopy(Register(F.VReg), Register(F.PReg));
497   }
498 }
499 
500 bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const {
501   auto &F = MF.getFunction();
502   if (isa<ScalableVectorType>(F.getReturnType()))
503     return true;
504   if (llvm::any_of(F.args(), [](const Argument &A) {
505         return isa<ScalableVectorType>(A.getType());
506       }))
507     return true;
508   const auto &ST = MF.getSubtarget<AArch64Subtarget>();
509   if (!ST.hasNEON() || !ST.hasFPARMv8()) {
510     LLVM_DEBUG(dbgs() << "Falling back to SDAG because we don't support no-NEON\n");
511     return true;
512   }
513   return false;
514 }
515 
516 bool AArch64CallLowering::lowerFormalArguments(
517     MachineIRBuilder &MIRBuilder, const Function &F,
518     ArrayRef<ArrayRef<Register>> VRegs, FunctionLoweringInfo &FLI) const {
519   MachineFunction &MF = MIRBuilder.getMF();
520   MachineBasicBlock &MBB = MIRBuilder.getMBB();
521   MachineRegisterInfo &MRI = MF.getRegInfo();
522   auto &DL = F.getParent()->getDataLayout();
523 
524   SmallVector<ArgInfo, 8> SplitArgs;
525   unsigned i = 0;
526   for (auto &Arg : F.args()) {
527     if (DL.getTypeStoreSize(Arg.getType()).isZero())
528       continue;
529 
530     ArgInfo OrigArg{VRegs[i], Arg, i};
531     setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F);
532 
533     if (Arg.hasAttribute(Attribute::SwiftAsync))
534       MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
535 
536     splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv());
537     ++i;
538   }
539 
540   if (!MBB.empty())
541     MIRBuilder.setInstr(*MBB.begin());
542 
543   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
544   CCAssignFn *AssignFn =
545       TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false);
546 
547   AArch64IncomingValueAssigner Assigner(AssignFn, AssignFn);
548   FormalArgHandler Handler(MIRBuilder, MRI);
549   if (!determineAndHandleAssignments(Handler, Assigner, SplitArgs, MIRBuilder,
550                                      F.getCallingConv(), F.isVarArg()))
551     return false;
552 
553   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
554   uint64_t StackOffset = Assigner.StackOffset;
555   if (F.isVarArg()) {
556     auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
557     if (!Subtarget.isTargetDarwin()) {
558         // FIXME: we need to reimplement saveVarArgsRegisters from
559       // AArch64ISelLowering.
560       return false;
561     }
562 
563     // We currently pass all varargs at 8-byte alignment, or 4 in ILP32.
564     StackOffset =
565         alignTo(Assigner.StackOffset, Subtarget.isTargetILP32() ? 4 : 8);
566 
567     auto &MFI = MIRBuilder.getMF().getFrameInfo();
568     FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
569   }
570 
571   if (doesCalleeRestoreStack(F.getCallingConv(),
572                              MF.getTarget().Options.GuaranteedTailCallOpt)) {
573     // We have a non-standard ABI, so why not make full use of the stack that
574     // we're going to pop? It must be aligned to 16 B in any case.
575     StackOffset = alignTo(StackOffset, 16);
576 
577     // If we're expected to restore the stack (e.g. fastcc), then we'll be
578     // adding a multiple of 16.
579     FuncInfo->setArgumentStackToRestore(StackOffset);
580 
581     // Our own callers will guarantee that the space is free by giving an
582     // aligned value to CALLSEQ_START.
583   }
584 
585   // When we tail call, we need to check if the callee's arguments
586   // will fit on the caller's stack. So, whenever we lower formal arguments,
587   // we should keep track of this information, since we might lower a tail call
588   // in this function later.
589   FuncInfo->setBytesInStackArgArea(StackOffset);
590 
591   auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
592   if (Subtarget.hasCustomCallingConv())
593     Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
594 
595   handleMustTailForwardedRegisters(MIRBuilder, AssignFn);
596 
597   // Move back to the end of the basic block.
598   MIRBuilder.setMBB(MBB);
599 
600   return true;
601 }
602 
603 /// Return true if the calling convention is one that we can guarantee TCO for.
604 static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
605   return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
606          CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
607 }
608 
609 /// Return true if we might ever do TCO for calls with this calling convention.
610 static bool mayTailCallThisCC(CallingConv::ID CC) {
611   switch (CC) {
612   case CallingConv::C:
613   case CallingConv::PreserveMost:
614   case CallingConv::Swift:
615   case CallingConv::SwiftTail:
616   case CallingConv::Tail:
617   case CallingConv::Fast:
618     return true;
619   default:
620     return false;
621   }
622 }
623 
624 /// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for
625 /// CC.
626 static std::pair<CCAssignFn *, CCAssignFn *>
627 getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI) {
628   return {TLI.CCAssignFnForCall(CC, false), TLI.CCAssignFnForCall(CC, true)};
629 }
630 
631 bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay(
632     CallLoweringInfo &Info, MachineFunction &MF,
633     SmallVectorImpl<ArgInfo> &InArgs) const {
634   const Function &CallerF = MF.getFunction();
635   CallingConv::ID CalleeCC = Info.CallConv;
636   CallingConv::ID CallerCC = CallerF.getCallingConv();
637 
638   // If the calling conventions match, then everything must be the same.
639   if (CalleeCC == CallerCC)
640     return true;
641 
642   // Check if the caller and callee will handle arguments in the same way.
643   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
644   CCAssignFn *CalleeAssignFnFixed;
645   CCAssignFn *CalleeAssignFnVarArg;
646   std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) =
647       getAssignFnsForCC(CalleeCC, TLI);
648 
649   CCAssignFn *CallerAssignFnFixed;
650   CCAssignFn *CallerAssignFnVarArg;
651   std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) =
652       getAssignFnsForCC(CallerCC, TLI);
653 
654   AArch64IncomingValueAssigner CalleeAssigner(CalleeAssignFnFixed,
655                                               CalleeAssignFnVarArg);
656   AArch64IncomingValueAssigner CallerAssigner(CallerAssignFnFixed,
657                                               CallerAssignFnVarArg);
658 
659   if (!resultsCompatible(Info, MF, InArgs, CalleeAssigner, CallerAssigner))
660     return false;
661 
662   // Make sure that the caller and callee preserve all of the same registers.
663   auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
664   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
665   const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
666   if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) {
667     TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
668     TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
669   }
670 
671   return TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved);
672 }
673 
674 bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable(
675     CallLoweringInfo &Info, MachineFunction &MF,
676     SmallVectorImpl<ArgInfo> &OutArgs) const {
677   // If there are no outgoing arguments, then we are done.
678   if (OutArgs.empty())
679     return true;
680 
681   const Function &CallerF = MF.getFunction();
682   LLVMContext &Ctx = CallerF.getContext();
683   CallingConv::ID CalleeCC = Info.CallConv;
684   CallingConv::ID CallerCC = CallerF.getCallingConv();
685   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
686   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
687 
688   CCAssignFn *AssignFnFixed;
689   CCAssignFn *AssignFnVarArg;
690   std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
691 
692   // We have outgoing arguments. Make sure that we can tail call with them.
693   SmallVector<CCValAssign, 16> OutLocs;
694   CCState OutInfo(CalleeCC, false, MF, OutLocs, Ctx);
695 
696   AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg,
697                                               Subtarget, /*IsReturn*/ false);
698   if (!determineAssignments(CalleeAssigner, OutArgs, OutInfo)) {
699     LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n");
700     return false;
701   }
702 
703   // Make sure that they can fit on the caller's stack.
704   const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
705   if (OutInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea()) {
706     LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n");
707     return false;
708   }
709 
710   // Verify that the parameters in callee-saved registers match.
711   // TODO: Port this over to CallLowering as general code once swiftself is
712   // supported.
713   auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
714   const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC);
715   MachineRegisterInfo &MRI = MF.getRegInfo();
716 
717   if (Info.IsVarArg) {
718     // Be conservative and disallow variadic memory operands to match SDAG's
719     // behaviour.
720     // FIXME: If the caller's calling convention is C, then we can
721     // potentially use its argument area. However, for cases like fastcc,
722     // we can't do anything.
723     for (unsigned i = 0; i < OutLocs.size(); ++i) {
724       auto &ArgLoc = OutLocs[i];
725       if (ArgLoc.isRegLoc())
726         continue;
727 
728       LLVM_DEBUG(
729           dbgs()
730           << "... Cannot tail call vararg function with stack arguments\n");
731       return false;
732     }
733   }
734 
735   return parametersInCSRMatch(MRI, CallerPreservedMask, OutLocs, OutArgs);
736 }
737 
738 bool AArch64CallLowering::isEligibleForTailCallOptimization(
739     MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
740     SmallVectorImpl<ArgInfo> &InArgs,
741     SmallVectorImpl<ArgInfo> &OutArgs) const {
742 
743   // Must pass all target-independent checks in order to tail call optimize.
744   if (!Info.IsTailCall)
745     return false;
746 
747   CallingConv::ID CalleeCC = Info.CallConv;
748   MachineFunction &MF = MIRBuilder.getMF();
749   const Function &CallerF = MF.getFunction();
750 
751   LLVM_DEBUG(dbgs() << "Attempting to lower call as tail call\n");
752 
753   if (Info.SwiftErrorVReg) {
754     // TODO: We should handle this.
755     // Note that this is also handled by the check for no outgoing arguments.
756     // Proactively disabling this though, because the swifterror handling in
757     // lowerCall inserts a COPY *after* the location of the call.
758     LLVM_DEBUG(dbgs() << "... Cannot handle tail calls with swifterror yet.\n");
759     return false;
760   }
761 
762   if (!mayTailCallThisCC(CalleeCC)) {
763     LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n");
764     return false;
765   }
766 
767   // Byval parameters hand the function a pointer directly into the stack area
768   // we want to reuse during a tail call. Working around this *is* possible (see
769   // X86).
770   //
771   // FIXME: In AArch64ISelLowering, this isn't worked around. Can/should we try
772   // it?
773   //
774   // On Windows, "inreg" attributes signify non-aggregate indirect returns.
775   // In this case, it is necessary to save/restore X0 in the callee. Tail
776   // call opt interferes with this. So we disable tail call opt when the
777   // caller has an argument with "inreg" attribute.
778   //
779   // FIXME: Check whether the callee also has an "inreg" argument.
780   //
781   // When the caller has a swifterror argument, we don't want to tail call
782   // because would have to move into the swifterror register before the
783   // tail call.
784   if (any_of(CallerF.args(), [](const Argument &A) {
785         return A.hasByValAttr() || A.hasInRegAttr() || A.hasSwiftErrorAttr();
786       })) {
787     LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval, "
788                          "inreg, or swifterror arguments\n");
789     return false;
790   }
791 
792   // Externally-defined functions with weak linkage should not be
793   // tail-called on AArch64 when the OS does not support dynamic
794   // pre-emption of symbols, as the AAELF spec requires normal calls
795   // to undefined weak functions to be replaced with a NOP or jump to the
796   // next instruction. The behaviour of branch instructions in this
797   // situation (as used for tail calls) is implementation-defined, so we
798   // cannot rely on the linker replacing the tail call with a return.
799   if (Info.Callee.isGlobal()) {
800     const GlobalValue *GV = Info.Callee.getGlobal();
801     const Triple &TT = MF.getTarget().getTargetTriple();
802     if (GV->hasExternalWeakLinkage() &&
803         (!TT.isOSWindows() || TT.isOSBinFormatELF() ||
804          TT.isOSBinFormatMachO())) {
805       LLVM_DEBUG(dbgs() << "... Cannot tail call externally-defined function "
806                            "with weak linkage for this OS.\n");
807       return false;
808     }
809   }
810 
811   // If we have -tailcallopt, then we're done.
812   if (canGuaranteeTCO(CalleeCC, MF.getTarget().Options.GuaranteedTailCallOpt))
813     return CalleeCC == CallerF.getCallingConv();
814 
815   // We don't have -tailcallopt, so we're allowed to change the ABI (sibcall).
816   // Try to find cases where we can do that.
817 
818   // I want anyone implementing a new calling convention to think long and hard
819   // about this assert.
820   assert((!Info.IsVarArg || CalleeCC == CallingConv::C) &&
821          "Unexpected variadic calling convention");
822 
823   // Verify that the incoming and outgoing arguments from the callee are
824   // safe to tail call.
825   if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) {
826     LLVM_DEBUG(
827         dbgs()
828         << "... Caller and callee have incompatible calling conventions.\n");
829     return false;
830   }
831 
832   if (!areCalleeOutgoingArgsTailCallable(Info, MF, OutArgs))
833     return false;
834 
835   LLVM_DEBUG(
836       dbgs() << "... Call is eligible for tail call optimization.\n");
837   return true;
838 }
839 
840 static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
841                               bool IsTailCall) {
842   if (!IsTailCall)
843     return IsIndirect ? getBLRCallOpcode(CallerF) : (unsigned)AArch64::BL;
844 
845   if (!IsIndirect)
846     return AArch64::TCRETURNdi;
847 
848   // When BTI is enabled, we need to use TCRETURNriBTI to make sure that we use
849   // x16 or x17.
850   if (CallerF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
851     return AArch64::TCRETURNriBTI;
852 
853   return AArch64::TCRETURNri;
854 }
855 
856 static const uint32_t *
857 getMaskForArgs(SmallVectorImpl<AArch64CallLowering::ArgInfo> &OutArgs,
858                AArch64CallLowering::CallLoweringInfo &Info,
859                const AArch64RegisterInfo &TRI, MachineFunction &MF) {
860   const uint32_t *Mask;
861   if (!OutArgs.empty() && OutArgs[0].Flags[0].isReturned()) {
862     // For 'this' returns, use the X0-preserving mask if applicable
863     Mask = TRI.getThisReturnPreservedMask(MF, Info.CallConv);
864     if (!Mask) {
865       OutArgs[0].Flags[0].setReturned(false);
866       Mask = TRI.getCallPreservedMask(MF, Info.CallConv);
867     }
868   } else {
869     Mask = TRI.getCallPreservedMask(MF, Info.CallConv);
870   }
871   return Mask;
872 }
873 
874 bool AArch64CallLowering::lowerTailCall(
875     MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
876     SmallVectorImpl<ArgInfo> &OutArgs) const {
877   MachineFunction &MF = MIRBuilder.getMF();
878   const Function &F = MF.getFunction();
879   MachineRegisterInfo &MRI = MF.getRegInfo();
880   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
881   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
882 
883   // True when we're tail calling, but without -tailcallopt.
884   bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt &&
885                    Info.CallConv != CallingConv::Tail &&
886                    Info.CallConv != CallingConv::SwiftTail;
887 
888   // TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64
889   // register class. Until we can do that, we should fall back here.
890   if (MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) {
891     LLVM_DEBUG(
892         dbgs() << "Cannot lower indirect tail calls with BTI enabled yet.\n");
893     return false;
894   }
895 
896   // Find out which ABI gets to decide where things go.
897   CallingConv::ID CalleeCC = Info.CallConv;
898   CCAssignFn *AssignFnFixed;
899   CCAssignFn *AssignFnVarArg;
900   std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
901 
902   MachineInstrBuilder CallSeqStart;
903   if (!IsSibCall)
904     CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
905 
906   unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), true);
907   auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
908   MIB.add(Info.Callee);
909 
910   // Byte offset for the tail call. When we are sibcalling, this will always
911   // be 0.
912   MIB.addImm(0);
913 
914   // Tell the call which registers are clobbered.
915   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
916   auto TRI = Subtarget.getRegisterInfo();
917   const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC);
918   if (Subtarget.hasCustomCallingConv())
919     TRI->UpdateCustomCallPreservedMask(MF, &Mask);
920   MIB.addRegMask(Mask);
921 
922   if (TRI->isAnyArgRegReserved(MF))
923     TRI->emitReservedArgRegCallError(MF);
924 
925   // FPDiff is the byte offset of the call's argument area from the callee's.
926   // Stores to callee stack arguments will be placed in FixedStackSlots offset
927   // by this amount for a tail call. In a sibling call it must be 0 because the
928   // caller will deallocate the entire stack and the callee still expects its
929   // arguments to begin at SP+0.
930   int FPDiff = 0;
931 
932   // This will be 0 for sibcalls, potentially nonzero for tail calls produced
933   // by -tailcallopt. For sibcalls, the memory operands for the call are
934   // already available in the caller's incoming argument space.
935   unsigned NumBytes = 0;
936   if (!IsSibCall) {
937     // We aren't sibcalling, so we need to compute FPDiff. We need to do this
938     // before handling assignments, because FPDiff must be known for memory
939     // arguments.
940     unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
941     SmallVector<CCValAssign, 16> OutLocs;
942     CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext());
943 
944     AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg,
945                                                 Subtarget, /*IsReturn*/ false);
946     if (!determineAssignments(CalleeAssigner, OutArgs, OutInfo))
947       return false;
948 
949     // The callee will pop the argument stack as a tail call. Thus, we must
950     // keep it 16-byte aligned.
951     NumBytes = alignTo(OutInfo.getNextStackOffset(), 16);
952 
953     // FPDiff will be negative if this tail call requires more space than we
954     // would automatically have in our incoming argument space. Positive if we
955     // actually shrink the stack.
956     FPDiff = NumReusableBytes - NumBytes;
957 
958     // Update the required reserved area if this is the tail call requiring the
959     // most argument stack space.
960     if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff)
961       FuncInfo->setTailCallReservedStack(-FPDiff);
962 
963     // The stack pointer must be 16-byte aligned at all times it's used for a
964     // memory operation, which in practice means at *all* times and in
965     // particular across call boundaries. Therefore our own arguments started at
966     // a 16-byte aligned SP and the delta applied for the tail call should
967     // satisfy the same constraint.
968     assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
969   }
970 
971   const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
972 
973   AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg,
974                                         Subtarget, /*IsReturn*/ false);
975 
976   // Do the actual argument marshalling.
977   OutgoingArgHandler Handler(MIRBuilder, MRI, MIB,
978                              /*IsTailCall*/ true, FPDiff);
979   if (!determineAndHandleAssignments(Handler, Assigner, OutArgs, MIRBuilder,
980                                      CalleeCC, Info.IsVarArg))
981     return false;
982 
983   Mask = getMaskForArgs(OutArgs, Info, *TRI, MF);
984 
985   if (Info.IsVarArg && Info.IsMustTailCall) {
986     // Now we know what's being passed to the function. Add uses to the call for
987     // the forwarded registers that we *aren't* passing as parameters. This will
988     // preserve the copies we build earlier.
989     for (const auto &F : Forwards) {
990       Register ForwardedReg = F.PReg;
991       // If the register is already passed, or aliases a register which is
992       // already being passed, then skip it.
993       if (any_of(MIB->uses(), [&ForwardedReg, &TRI](const MachineOperand &Use) {
994             if (!Use.isReg())
995               return false;
996             return TRI->regsOverlap(Use.getReg(), ForwardedReg);
997           }))
998         continue;
999 
1000       // We aren't passing it already, so we should add it to the call.
1001       MIRBuilder.buildCopy(ForwardedReg, Register(F.VReg));
1002       MIB.addReg(ForwardedReg, RegState::Implicit);
1003     }
1004   }
1005 
1006   // If we have -tailcallopt, we need to adjust the stack. We'll do the call
1007   // sequence start and end here.
1008   if (!IsSibCall) {
1009     MIB->getOperand(1).setImm(FPDiff);
1010     CallSeqStart.addImm(0).addImm(0);
1011     // End the call sequence *before* emitting the call. Normally, we would
1012     // tidy the frame up after the call. However, here, we've laid out the
1013     // parameters so that when SP is reset, they will be in the correct
1014     // location.
1015     MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP).addImm(0).addImm(0);
1016   }
1017 
1018   // Now we can add the actual call instruction to the correct basic block.
1019   MIRBuilder.insertInstr(MIB);
1020 
1021   // If Callee is a reg, since it is used by a target specific instruction,
1022   // it must have a register class matching the constraint of that instruction.
1023   if (Info.Callee.isReg())
1024     constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
1025                              *MF.getSubtarget().getRegBankInfo(), *MIB,
1026                              MIB->getDesc(), Info.Callee, 0);
1027 
1028   MF.getFrameInfo().setHasTailCall();
1029   Info.LoweredTailCall = true;
1030   return true;
1031 }
1032 
1033 bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
1034                                     CallLoweringInfo &Info) const {
1035   MachineFunction &MF = MIRBuilder.getMF();
1036   const Function &F = MF.getFunction();
1037   MachineRegisterInfo &MRI = MF.getRegInfo();
1038   auto &DL = F.getParent()->getDataLayout();
1039   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
1040 
1041   SmallVector<ArgInfo, 8> OutArgs;
1042   for (auto &OrigArg : Info.OrigArgs) {
1043     splitToValueTypes(OrigArg, OutArgs, DL, Info.CallConv);
1044     // AAPCS requires that we zero-extend i1 to 8 bits by the caller.
1045     if (OrigArg.Ty->isIntegerTy(1))
1046       OutArgs.back().Flags[0].setZExt();
1047   }
1048 
1049   SmallVector<ArgInfo, 8> InArgs;
1050   if (!Info.OrigRet.Ty->isVoidTy())
1051     splitToValueTypes(Info.OrigRet, InArgs, DL, Info.CallConv);
1052 
1053   // If we can lower as a tail call, do that instead.
1054   bool CanTailCallOpt =
1055       isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs, OutArgs);
1056 
1057   // We must emit a tail call if we have musttail.
1058   if (Info.IsMustTailCall && !CanTailCallOpt) {
1059     // There are types of incoming/outgoing arguments we can't handle yet, so
1060     // it doesn't make sense to actually die here like in ISelLowering. Instead,
1061     // fall back to SelectionDAG and let it try to handle this.
1062     LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n");
1063     return false;
1064   }
1065 
1066   if (CanTailCallOpt)
1067     return lowerTailCall(MIRBuilder, Info, OutArgs);
1068 
1069   // Find out which ABI gets to decide where things go.
1070   CCAssignFn *AssignFnFixed;
1071   CCAssignFn *AssignFnVarArg;
1072   std::tie(AssignFnFixed, AssignFnVarArg) =
1073       getAssignFnsForCC(Info.CallConv, TLI);
1074 
1075   MachineInstrBuilder CallSeqStart;
1076   CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
1077 
1078   // Create a temporarily-floating call instruction so we can add the implicit
1079   // uses of arg registers.
1080   unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), false);
1081 
1082   auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
1083   MIB.add(Info.Callee);
1084 
1085   // Tell the call which registers are clobbered.
1086   const uint32_t *Mask;
1087   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1088   const auto *TRI = Subtarget.getRegisterInfo();
1089 
1090   AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg,
1091                                         Subtarget, /*IsReturn*/ false);
1092   // Do the actual argument marshalling.
1093   OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, /*IsReturn*/ false);
1094   if (!determineAndHandleAssignments(Handler, Assigner, OutArgs, MIRBuilder,
1095                                      Info.CallConv, Info.IsVarArg))
1096     return false;
1097 
1098   Mask = getMaskForArgs(OutArgs, Info, *TRI, MF);
1099 
1100   if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv())
1101     TRI->UpdateCustomCallPreservedMask(MF, &Mask);
1102   MIB.addRegMask(Mask);
1103 
1104   if (TRI->isAnyArgRegReserved(MF))
1105     TRI->emitReservedArgRegCallError(MF);
1106 
1107   // Now we can add the actual call instruction to the correct basic block.
1108   MIRBuilder.insertInstr(MIB);
1109 
1110   // If Callee is a reg, since it is used by a target specific
1111   // instruction, it must have a register class matching the
1112   // constraint of that instruction.
1113   if (Info.Callee.isReg())
1114     constrainOperandRegClass(MF, *TRI, MRI, *Subtarget.getInstrInfo(),
1115                              *Subtarget.getRegBankInfo(), *MIB, MIB->getDesc(),
1116                              Info.Callee, 0);
1117 
1118   // Finally we can copy the returned value back into its virtual-register. In
1119   // symmetry with the arguments, the physical register must be an
1120   // implicit-define of the call instruction.
1121   if (!Info.OrigRet.Ty->isVoidTy()) {
1122     CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv);
1123     CallReturnHandler Handler(MIRBuilder, MRI, MIB);
1124     bool UsingReturnedArg =
1125         !OutArgs.empty() && OutArgs[0].Flags[0].isReturned();
1126 
1127     AArch64OutgoingValueAssigner Assigner(RetAssignFn, RetAssignFn, Subtarget,
1128                                           /*IsReturn*/ false);
1129     ReturnedArgCallReturnHandler ReturnedArgHandler(MIRBuilder, MRI, MIB);
1130     if (!determineAndHandleAssignments(
1131             UsingReturnedArg ? ReturnedArgHandler : Handler, Assigner, InArgs,
1132             MIRBuilder, Info.CallConv, Info.IsVarArg,
1133             UsingReturnedArg ? OutArgs[0].Regs[0] : Register()))
1134       return false;
1135   }
1136 
1137   if (Info.SwiftErrorVReg) {
1138     MIB.addDef(AArch64::X21, RegState::Implicit);
1139     MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21));
1140   }
1141 
1142   uint64_t CalleePopBytes =
1143       doesCalleeRestoreStack(Info.CallConv,
1144                              MF.getTarget().Options.GuaranteedTailCallOpt)
1145           ? alignTo(Assigner.StackOffset, 16)
1146           : 0;
1147 
1148   CallSeqStart.addImm(Assigner.StackOffset).addImm(0);
1149   MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
1150       .addImm(Assigner.StackOffset)
1151       .addImm(CalleePopBytes);
1152 
1153   return true;
1154 }
1155 
1156 bool AArch64CallLowering::isTypeIsValidForThisReturn(EVT Ty) const {
1157   return Ty.getSizeInBits() == 64;
1158 }
1159