xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp (revision 7ef62cebc2f965b0f640263e179276928885e33d)
1 //===--- AArch64CallLowering.cpp - Call lowering --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the lowering of LLVM calls to machine code calls for
11 /// GlobalISel.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "AArch64CallLowering.h"
16 #include "AArch64ISelLowering.h"
17 #include "AArch64MachineFunctionInfo.h"
18 #include "AArch64Subtarget.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/Analysis/ObjCARCUtil.h"
22 #include "llvm/CodeGen/Analysis.h"
23 #include "llvm/CodeGen/CallingConvLower.h"
24 #include "llvm/CodeGen/FunctionLoweringInfo.h"
25 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
26 #include "llvm/CodeGen/GlobalISel/Utils.h"
27 #include "llvm/CodeGen/LowLevelType.h"
28 #include "llvm/CodeGen/MachineBasicBlock.h"
29 #include "llvm/CodeGen/MachineFrameInfo.h"
30 #include "llvm/CodeGen/MachineFunction.h"
31 #include "llvm/CodeGen/MachineInstrBuilder.h"
32 #include "llvm/CodeGen/MachineMemOperand.h"
33 #include "llvm/CodeGen/MachineOperand.h"
34 #include "llvm/CodeGen/MachineRegisterInfo.h"
35 #include "llvm/CodeGen/TargetRegisterInfo.h"
36 #include "llvm/CodeGen/TargetSubtargetInfo.h"
37 #include "llvm/CodeGen/ValueTypes.h"
38 #include "llvm/IR/Argument.h"
39 #include "llvm/IR/Attributes.h"
40 #include "llvm/IR/Function.h"
41 #include "llvm/IR/Type.h"
42 #include "llvm/IR/Value.h"
43 #include "llvm/Support/MachineValueType.h"
44 #include <algorithm>
45 #include <cassert>
46 #include <cstdint>
47 #include <iterator>
48 
49 #define DEBUG_TYPE "aarch64-call-lowering"
50 
51 using namespace llvm;
52 
53 AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
54   : CallLowering(&TLI) {}
55 
56 static void applyStackPassedSmallTypeDAGHack(EVT OrigVT, MVT &ValVT,
57                                              MVT &LocVT) {
58   // If ValVT is i1/i8/i16, we should set LocVT to i8/i8/i16. This is a legacy
59   // hack because the DAG calls the assignment function with pre-legalized
60   // register typed values, not the raw type.
61   //
62   // This hack is not applied to return values which are not passed on the
63   // stack.
64   if (OrigVT == MVT::i1 || OrigVT == MVT::i8)
65     ValVT = LocVT = MVT::i8;
66   else if (OrigVT == MVT::i16)
67     ValVT = LocVT = MVT::i16;
68 }
69 
70 // Account for i1/i8/i16 stack passed value hack
71 static LLT getStackValueStoreTypeHack(const CCValAssign &VA) {
72   const MVT ValVT = VA.getValVT();
73   return (ValVT == MVT::i8 || ValVT == MVT::i16) ? LLT(ValVT)
74                                                  : LLT(VA.getLocVT());
75 }
76 
77 namespace {
78 
79 struct AArch64IncomingValueAssigner
80     : public CallLowering::IncomingValueAssigner {
81   AArch64IncomingValueAssigner(CCAssignFn *AssignFn_,
82                                CCAssignFn *AssignFnVarArg_)
83       : IncomingValueAssigner(AssignFn_, AssignFnVarArg_) {}
84 
85   bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
86                  CCValAssign::LocInfo LocInfo,
87                  const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
88                  CCState &State) override {
89     applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT);
90     return IncomingValueAssigner::assignArg(ValNo, OrigVT, ValVT, LocVT,
91                                             LocInfo, Info, Flags, State);
92   }
93 };
94 
95 struct AArch64OutgoingValueAssigner
96     : public CallLowering::OutgoingValueAssigner {
97   const AArch64Subtarget &Subtarget;
98 
99   /// Track if this is used for a return instead of function argument
100   /// passing. We apply a hack to i1/i8/i16 stack passed values, but do not use
101   /// stack passed returns for them and cannot apply the type adjustment.
102   bool IsReturn;
103 
104   AArch64OutgoingValueAssigner(CCAssignFn *AssignFn_,
105                                CCAssignFn *AssignFnVarArg_,
106                                const AArch64Subtarget &Subtarget_,
107                                bool IsReturn)
108       : OutgoingValueAssigner(AssignFn_, AssignFnVarArg_),
109         Subtarget(Subtarget_), IsReturn(IsReturn) {}
110 
111   bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
112                  CCValAssign::LocInfo LocInfo,
113                  const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
114                  CCState &State) override {
115     bool IsCalleeWin = Subtarget.isCallingConvWin64(State.getCallingConv());
116     bool UseVarArgsCCForFixed = IsCalleeWin && State.isVarArg();
117 
118     bool Res;
119     if (Info.IsFixed && !UseVarArgsCCForFixed) {
120       if (!IsReturn)
121         applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT);
122       Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State);
123     } else
124       Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Flags, State);
125 
126     StackOffset = State.getNextStackOffset();
127     return Res;
128   }
129 };
130 
131 struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
132   IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
133       : IncomingValueHandler(MIRBuilder, MRI) {}
134 
135   Register getStackAddress(uint64_t Size, int64_t Offset,
136                            MachinePointerInfo &MPO,
137                            ISD::ArgFlagsTy Flags) override {
138     auto &MFI = MIRBuilder.getMF().getFrameInfo();
139 
140     // Byval is assumed to be writable memory, but other stack passed arguments
141     // are not.
142     const bool IsImmutable = !Flags.isByVal();
143 
144     int FI = MFI.CreateFixedObject(Size, Offset, IsImmutable);
145     MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
146     auto AddrReg = MIRBuilder.buildFrameIndex(LLT::pointer(0, 64), FI);
147     return AddrReg.getReg(0);
148   }
149 
150   LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA,
151                              ISD::ArgFlagsTy Flags) const override {
152     // For pointers, we just need to fixup the integer types reported in the
153     // CCValAssign.
154     if (Flags.isPointer())
155       return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags);
156     return getStackValueStoreTypeHack(VA);
157   }
158 
159   void assignValueToReg(Register ValVReg, Register PhysReg,
160                         CCValAssign VA) override {
161     markPhysRegUsed(PhysReg);
162     IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
163   }
164 
165   void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
166                             MachinePointerInfo &MPO, CCValAssign &VA) override {
167     MachineFunction &MF = MIRBuilder.getMF();
168 
169     LLT ValTy(VA.getValVT());
170     LLT LocTy(VA.getLocVT());
171 
172     // Fixup the types for the DAG compatibility hack.
173     if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16)
174       std::swap(ValTy, LocTy);
175     else {
176       // The calling code knows if this is a pointer or not, we're only touching
177       // the LocTy for the i8/i16 hack.
178       assert(LocTy.getSizeInBits() == MemTy.getSizeInBits());
179       LocTy = MemTy;
180     }
181 
182     auto MMO = MF.getMachineMemOperand(
183         MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, LocTy,
184         inferAlignFromPtrInfo(MF, MPO));
185 
186     switch (VA.getLocInfo()) {
187     case CCValAssign::LocInfo::ZExt:
188       MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, ValVReg, Addr, *MMO);
189       return;
190     case CCValAssign::LocInfo::SExt:
191       MIRBuilder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, ValVReg, Addr, *MMO);
192       return;
193     default:
194       MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
195       return;
196     }
197   }
198 
199   /// How the physical register gets marked varies between formal
200   /// parameters (it's a basic-block live-in), and a call instruction
201   /// (it's an implicit-def of the BL).
202   virtual void markPhysRegUsed(MCRegister PhysReg) = 0;
203 };
204 
205 struct FormalArgHandler : public IncomingArgHandler {
206   FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
207       : IncomingArgHandler(MIRBuilder, MRI) {}
208 
209   void markPhysRegUsed(MCRegister PhysReg) override {
210     MIRBuilder.getMRI()->addLiveIn(PhysReg);
211     MIRBuilder.getMBB().addLiveIn(PhysReg);
212   }
213 };
214 
215 struct CallReturnHandler : public IncomingArgHandler {
216   CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
217                     MachineInstrBuilder MIB)
218       : IncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {}
219 
220   void markPhysRegUsed(MCRegister PhysReg) override {
221     MIB.addDef(PhysReg, RegState::Implicit);
222   }
223 
224   MachineInstrBuilder MIB;
225 };
226 
227 /// A special return arg handler for "returned" attribute arg calls.
228 struct ReturnedArgCallReturnHandler : public CallReturnHandler {
229   ReturnedArgCallReturnHandler(MachineIRBuilder &MIRBuilder,
230                                MachineRegisterInfo &MRI,
231                                MachineInstrBuilder MIB)
232       : CallReturnHandler(MIRBuilder, MRI, MIB) {}
233 
234   void markPhysRegUsed(MCRegister PhysReg) override {}
235 };
236 
237 struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
238   OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
239                      MachineInstrBuilder MIB, bool IsTailCall = false,
240                      int FPDiff = 0)
241       : OutgoingValueHandler(MIRBuilder, MRI), MIB(MIB), IsTailCall(IsTailCall),
242         FPDiff(FPDiff),
243         Subtarget(MIRBuilder.getMF().getSubtarget<AArch64Subtarget>()) {}
244 
245   Register getStackAddress(uint64_t Size, int64_t Offset,
246                            MachinePointerInfo &MPO,
247                            ISD::ArgFlagsTy Flags) override {
248     MachineFunction &MF = MIRBuilder.getMF();
249     LLT p0 = LLT::pointer(0, 64);
250     LLT s64 = LLT::scalar(64);
251 
252     if (IsTailCall) {
253       assert(!Flags.isByVal() && "byval unhandled with tail calls");
254 
255       Offset += FPDiff;
256       int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
257       auto FIReg = MIRBuilder.buildFrameIndex(p0, FI);
258       MPO = MachinePointerInfo::getFixedStack(MF, FI);
259       return FIReg.getReg(0);
260     }
261 
262     if (!SPReg)
263       SPReg = MIRBuilder.buildCopy(p0, Register(AArch64::SP)).getReg(0);
264 
265     auto OffsetReg = MIRBuilder.buildConstant(s64, Offset);
266 
267     auto AddrReg = MIRBuilder.buildPtrAdd(p0, SPReg, OffsetReg);
268 
269     MPO = MachinePointerInfo::getStack(MF, Offset);
270     return AddrReg.getReg(0);
271   }
272 
273   /// We need to fixup the reported store size for certain value types because
274   /// we invert the interpretation of ValVT and LocVT in certain cases. This is
275   /// for compatability with the DAG call lowering implementation, which we're
276   /// currently building on top of.
277   LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA,
278                              ISD::ArgFlagsTy Flags) const override {
279     if (Flags.isPointer())
280       return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags);
281     return getStackValueStoreTypeHack(VA);
282   }
283 
284   void assignValueToReg(Register ValVReg, Register PhysReg,
285                         CCValAssign VA) override {
286     MIB.addUse(PhysReg, RegState::Implicit);
287     Register ExtReg = extendRegister(ValVReg, VA);
288     MIRBuilder.buildCopy(PhysReg, ExtReg);
289   }
290 
291   void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
292                             MachinePointerInfo &MPO, CCValAssign &VA) override {
293     MachineFunction &MF = MIRBuilder.getMF();
294     auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, MemTy,
295                                        inferAlignFromPtrInfo(MF, MPO));
296     MIRBuilder.buildStore(ValVReg, Addr, *MMO);
297   }
298 
299   void assignValueToAddress(const CallLowering::ArgInfo &Arg, unsigned RegIndex,
300                             Register Addr, LLT MemTy, MachinePointerInfo &MPO,
301                             CCValAssign &VA) override {
302     unsigned MaxSize = MemTy.getSizeInBytes() * 8;
303     // For varargs, we always want to extend them to 8 bytes, in which case
304     // we disable setting a max.
305     if (!Arg.IsFixed)
306       MaxSize = 0;
307 
308     Register ValVReg = Arg.Regs[RegIndex];
309     if (VA.getLocInfo() != CCValAssign::LocInfo::FPExt) {
310       MVT LocVT = VA.getLocVT();
311       MVT ValVT = VA.getValVT();
312 
313       if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16) {
314         std::swap(ValVT, LocVT);
315         MemTy = LLT(VA.getValVT());
316       }
317 
318       ValVReg = extendRegister(ValVReg, VA, MaxSize);
319     } else {
320       // The store does not cover the full allocated stack slot.
321       MemTy = LLT(VA.getValVT());
322     }
323 
324     assignValueToAddress(ValVReg, Addr, MemTy, MPO, VA);
325   }
326 
327   MachineInstrBuilder MIB;
328 
329   bool IsTailCall;
330 
331   /// For tail calls, the byte offset of the call's argument area from the
332   /// callee's. Unused elsewhere.
333   int FPDiff;
334 
335   // Cache the SP register vreg if we need it more than once in this call site.
336   Register SPReg;
337 
338   const AArch64Subtarget &Subtarget;
339 };
340 } // namespace
341 
342 static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) {
343   return (CallConv == CallingConv::Fast && TailCallOpt) ||
344          CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
345 }
346 
347 bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
348                                       const Value *Val,
349                                       ArrayRef<Register> VRegs,
350                                       FunctionLoweringInfo &FLI,
351                                       Register SwiftErrorVReg) const {
352   auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR);
353   assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
354          "Return value without a vreg");
355 
356   bool Success = true;
357   if (!FLI.CanLowerReturn) {
358     insertSRetStores(MIRBuilder, Val->getType(), VRegs, FLI.DemoteRegister);
359   } else if (!VRegs.empty()) {
360     MachineFunction &MF = MIRBuilder.getMF();
361     const Function &F = MF.getFunction();
362     const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
363 
364     MachineRegisterInfo &MRI = MF.getRegInfo();
365     const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
366     CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv());
367     auto &DL = F.getParent()->getDataLayout();
368     LLVMContext &Ctx = Val->getType()->getContext();
369 
370     SmallVector<EVT, 4> SplitEVTs;
371     ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
372     assert(VRegs.size() == SplitEVTs.size() &&
373            "For each split Type there should be exactly one VReg.");
374 
375     SmallVector<ArgInfo, 8> SplitArgs;
376     CallingConv::ID CC = F.getCallingConv();
377 
378     for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
379       Register CurVReg = VRegs[i];
380       ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Ctx), 0};
381       setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
382 
383       // i1 is a special case because SDAG i1 true is naturally zero extended
384       // when widened using ANYEXT. We need to do it explicitly here.
385       auto &Flags = CurArgInfo.Flags[0];
386       if (MRI.getType(CurVReg).getSizeInBits() == 1 && !Flags.isSExt() &&
387           !Flags.isZExt()) {
388         CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0);
389       } else if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) ==
390                  1) {
391         // Some types will need extending as specified by the CC.
392         MVT NewVT = TLI.getRegisterTypeForCallingConv(Ctx, CC, SplitEVTs[i]);
393         if (EVT(NewVT) != SplitEVTs[i]) {
394           unsigned ExtendOp = TargetOpcode::G_ANYEXT;
395           if (F.getAttributes().hasRetAttr(Attribute::SExt))
396             ExtendOp = TargetOpcode::G_SEXT;
397           else if (F.getAttributes().hasRetAttr(Attribute::ZExt))
398             ExtendOp = TargetOpcode::G_ZEXT;
399 
400           LLT NewLLT(NewVT);
401           LLT OldLLT(MVT::getVT(CurArgInfo.Ty));
402           CurArgInfo.Ty = EVT(NewVT).getTypeForEVT(Ctx);
403           // Instead of an extend, we might have a vector type which needs
404           // padding with more elements, e.g. <2 x half> -> <4 x half>.
405           if (NewVT.isVector()) {
406             if (OldLLT.isVector()) {
407               if (NewLLT.getNumElements() > OldLLT.getNumElements()) {
408                 // We don't handle VA types which are not exactly twice the
409                 // size, but can easily be done in future.
410                 if (NewLLT.getNumElements() != OldLLT.getNumElements() * 2) {
411                   LLVM_DEBUG(dbgs() << "Outgoing vector ret has too many elts");
412                   return false;
413                 }
414                 auto Undef = MIRBuilder.buildUndef({OldLLT});
415                 CurVReg =
416                     MIRBuilder.buildMergeLikeInstr({NewLLT}, {CurVReg, Undef})
417                         .getReg(0);
418               } else {
419                 // Just do a vector extend.
420                 CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg})
421                               .getReg(0);
422               }
423             } else if (NewLLT.getNumElements() == 2) {
424               // We need to pad a <1 x S> type to <2 x S>. Since we don't have
425               // <1 x S> vector types in GISel we use a build_vector instead
426               // of a vector merge/concat.
427               auto Undef = MIRBuilder.buildUndef({OldLLT});
428               CurVReg =
429                   MIRBuilder
430                       .buildBuildVector({NewLLT}, {CurVReg, Undef.getReg(0)})
431                       .getReg(0);
432             } else {
433               LLVM_DEBUG(dbgs() << "Could not handle ret ty\n");
434               return false;
435             }
436           } else {
437             // If the split EVT was a <1 x T> vector, and NewVT is T, then we
438             // don't have to do anything since we don't distinguish between the
439             // two.
440             if (NewLLT != MRI.getType(CurVReg)) {
441               // A scalar extend.
442               CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg})
443                             .getReg(0);
444             }
445           }
446         }
447       }
448       if (CurVReg != CurArgInfo.Regs[0]) {
449         CurArgInfo.Regs[0] = CurVReg;
450         // Reset the arg flags after modifying CurVReg.
451         setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
452       }
453       splitToValueTypes(CurArgInfo, SplitArgs, DL, CC);
454     }
455 
456     AArch64OutgoingValueAssigner Assigner(AssignFn, AssignFn, Subtarget,
457                                           /*IsReturn*/ true);
458     OutgoingArgHandler Handler(MIRBuilder, MRI, MIB);
459     Success = determineAndHandleAssignments(Handler, Assigner, SplitArgs,
460                                             MIRBuilder, CC, F.isVarArg());
461   }
462 
463   if (SwiftErrorVReg) {
464     MIB.addUse(AArch64::X21, RegState::Implicit);
465     MIRBuilder.buildCopy(AArch64::X21, SwiftErrorVReg);
466   }
467 
468   MIRBuilder.insertInstr(MIB);
469   return Success;
470 }
471 
472 bool AArch64CallLowering::canLowerReturn(MachineFunction &MF,
473                                          CallingConv::ID CallConv,
474                                          SmallVectorImpl<BaseArgInfo> &Outs,
475                                          bool IsVarArg) const {
476   SmallVector<CCValAssign, 16> ArgLocs;
477   const auto &TLI = *getTLI<AArch64TargetLowering>();
478   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs,
479                  MF.getFunction().getContext());
480 
481   return checkReturn(CCInfo, Outs, TLI.CCAssignFnForReturn(CallConv));
482 }
483 
484 /// Helper function to compute forwarded registers for musttail calls. Computes
485 /// the forwarded registers, sets MBB liveness, and emits COPY instructions that
486 /// can be used to save + restore registers later.
487 static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder,
488                                              CCAssignFn *AssignFn) {
489   MachineBasicBlock &MBB = MIRBuilder.getMBB();
490   MachineFunction &MF = MIRBuilder.getMF();
491   MachineFrameInfo &MFI = MF.getFrameInfo();
492 
493   if (!MFI.hasMustTailInVarArgFunc())
494     return;
495 
496   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
497   const Function &F = MF.getFunction();
498   assert(F.isVarArg() && "Expected F to be vararg?");
499 
500   // Compute the set of forwarded registers. The rest are scratch.
501   SmallVector<CCValAssign, 16> ArgLocs;
502   CCState CCInfo(F.getCallingConv(), /*IsVarArg=*/true, MF, ArgLocs,
503                  F.getContext());
504   SmallVector<MVT, 2> RegParmTypes;
505   RegParmTypes.push_back(MVT::i64);
506   RegParmTypes.push_back(MVT::f128);
507 
508   // Later on, we can use this vector to restore the registers if necessary.
509   SmallVectorImpl<ForwardedRegister> &Forwards =
510       FuncInfo->getForwardedMustTailRegParms();
511   CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, AssignFn);
512 
513   // Conservatively forward X8, since it might be used for an aggregate
514   // return.
515   if (!CCInfo.isAllocated(AArch64::X8)) {
516     Register X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
517     Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
518   }
519 
520   // Add the forwards to the MachineBasicBlock and MachineFunction.
521   for (const auto &F : Forwards) {
522     MBB.addLiveIn(F.PReg);
523     MIRBuilder.buildCopy(Register(F.VReg), Register(F.PReg));
524   }
525 }
526 
527 bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const {
528   auto &F = MF.getFunction();
529   if (isa<ScalableVectorType>(F.getReturnType()))
530     return true;
531   if (llvm::any_of(F.args(), [](const Argument &A) {
532         return isa<ScalableVectorType>(A.getType());
533       }))
534     return true;
535   const auto &ST = MF.getSubtarget<AArch64Subtarget>();
536   if (!ST.hasNEON() || !ST.hasFPARMv8()) {
537     LLVM_DEBUG(dbgs() << "Falling back to SDAG because we don't support no-NEON\n");
538     return true;
539   }
540 
541   SMEAttrs Attrs(F);
542   if (Attrs.hasNewZAInterface() ||
543       (!Attrs.hasStreamingInterface() && Attrs.hasStreamingBody()))
544     return true;
545 
546   return false;
547 }
548 
549 bool AArch64CallLowering::lowerFormalArguments(
550     MachineIRBuilder &MIRBuilder, const Function &F,
551     ArrayRef<ArrayRef<Register>> VRegs, FunctionLoweringInfo &FLI) const {
552   MachineFunction &MF = MIRBuilder.getMF();
553   MachineBasicBlock &MBB = MIRBuilder.getMBB();
554   MachineRegisterInfo &MRI = MF.getRegInfo();
555   auto &DL = F.getParent()->getDataLayout();
556 
557   SmallVector<ArgInfo, 8> SplitArgs;
558   SmallVector<std::pair<Register, Register>> BoolArgs;
559 
560   // Insert the hidden sret parameter if the return value won't fit in the
561   // return registers.
562   if (!FLI.CanLowerReturn)
563     insertSRetIncomingArgument(F, SplitArgs, FLI.DemoteRegister, MRI, DL);
564 
565   unsigned i = 0;
566   for (auto &Arg : F.args()) {
567     if (DL.getTypeStoreSize(Arg.getType()).isZero())
568       continue;
569 
570     ArgInfo OrigArg{VRegs[i], Arg, i};
571     setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F);
572 
573     // i1 arguments are zero-extended to i8 by the caller. Emit a
574     // hint to reflect this.
575     if (OrigArg.Ty->isIntegerTy(1)) {
576       assert(OrigArg.Regs.size() == 1 &&
577              MRI.getType(OrigArg.Regs[0]).getSizeInBits() == 1 &&
578              "Unexpected registers used for i1 arg");
579 
580       auto &Flags = OrigArg.Flags[0];
581       if (!Flags.isZExt() && !Flags.isSExt()) {
582         // Lower i1 argument as i8, and insert AssertZExt + Trunc later.
583         Register OrigReg = OrigArg.Regs[0];
584         Register WideReg = MRI.createGenericVirtualRegister(LLT::scalar(8));
585         OrigArg.Regs[0] = WideReg;
586         BoolArgs.push_back({OrigReg, WideReg});
587       }
588     }
589 
590     if (Arg.hasAttribute(Attribute::SwiftAsync))
591       MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
592 
593     splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv());
594     ++i;
595   }
596 
597   if (!MBB.empty())
598     MIRBuilder.setInstr(*MBB.begin());
599 
600   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
601   CCAssignFn *AssignFn =
602       TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false);
603 
604   AArch64IncomingValueAssigner Assigner(AssignFn, AssignFn);
605   FormalArgHandler Handler(MIRBuilder, MRI);
606   if (!determineAndHandleAssignments(Handler, Assigner, SplitArgs, MIRBuilder,
607                                      F.getCallingConv(), F.isVarArg()))
608     return false;
609 
610   if (!BoolArgs.empty()) {
611     for (auto &KV : BoolArgs) {
612       Register OrigReg = KV.first;
613       Register WideReg = KV.second;
614       LLT WideTy = MRI.getType(WideReg);
615       assert(MRI.getType(OrigReg).getScalarSizeInBits() == 1 &&
616              "Unexpected bit size of a bool arg");
617       MIRBuilder.buildTrunc(
618           OrigReg, MIRBuilder.buildAssertZExt(WideTy, WideReg, 1).getReg(0));
619     }
620   }
621 
622   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
623   uint64_t StackOffset = Assigner.StackOffset;
624   if (F.isVarArg()) {
625     auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
626     if (!Subtarget.isTargetDarwin()) {
627         // FIXME: we need to reimplement saveVarArgsRegisters from
628       // AArch64ISelLowering.
629       return false;
630     }
631 
632     // We currently pass all varargs at 8-byte alignment, or 4 in ILP32.
633     StackOffset =
634         alignTo(Assigner.StackOffset, Subtarget.isTargetILP32() ? 4 : 8);
635 
636     auto &MFI = MIRBuilder.getMF().getFrameInfo();
637     FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
638   }
639 
640   if (doesCalleeRestoreStack(F.getCallingConv(),
641                              MF.getTarget().Options.GuaranteedTailCallOpt)) {
642     // We have a non-standard ABI, so why not make full use of the stack that
643     // we're going to pop? It must be aligned to 16 B in any case.
644     StackOffset = alignTo(StackOffset, 16);
645 
646     // If we're expected to restore the stack (e.g. fastcc), then we'll be
647     // adding a multiple of 16.
648     FuncInfo->setArgumentStackToRestore(StackOffset);
649 
650     // Our own callers will guarantee that the space is free by giving an
651     // aligned value to CALLSEQ_START.
652   }
653 
654   // When we tail call, we need to check if the callee's arguments
655   // will fit on the caller's stack. So, whenever we lower formal arguments,
656   // we should keep track of this information, since we might lower a tail call
657   // in this function later.
658   FuncInfo->setBytesInStackArgArea(StackOffset);
659 
660   auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
661   if (Subtarget.hasCustomCallingConv())
662     Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
663 
664   handleMustTailForwardedRegisters(MIRBuilder, AssignFn);
665 
666   // Move back to the end of the basic block.
667   MIRBuilder.setMBB(MBB);
668 
669   return true;
670 }
671 
672 /// Return true if the calling convention is one that we can guarantee TCO for.
673 static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
674   return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
675          CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
676 }
677 
678 /// Return true if we might ever do TCO for calls with this calling convention.
679 static bool mayTailCallThisCC(CallingConv::ID CC) {
680   switch (CC) {
681   case CallingConv::C:
682   case CallingConv::PreserveMost:
683   case CallingConv::Swift:
684   case CallingConv::SwiftTail:
685   case CallingConv::Tail:
686   case CallingConv::Fast:
687     return true;
688   default:
689     return false;
690   }
691 }
692 
693 /// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for
694 /// CC.
695 static std::pair<CCAssignFn *, CCAssignFn *>
696 getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI) {
697   return {TLI.CCAssignFnForCall(CC, false), TLI.CCAssignFnForCall(CC, true)};
698 }
699 
700 bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay(
701     CallLoweringInfo &Info, MachineFunction &MF,
702     SmallVectorImpl<ArgInfo> &InArgs) const {
703   const Function &CallerF = MF.getFunction();
704   CallingConv::ID CalleeCC = Info.CallConv;
705   CallingConv::ID CallerCC = CallerF.getCallingConv();
706 
707   // If the calling conventions match, then everything must be the same.
708   if (CalleeCC == CallerCC)
709     return true;
710 
711   // Check if the caller and callee will handle arguments in the same way.
712   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
713   CCAssignFn *CalleeAssignFnFixed;
714   CCAssignFn *CalleeAssignFnVarArg;
715   std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) =
716       getAssignFnsForCC(CalleeCC, TLI);
717 
718   CCAssignFn *CallerAssignFnFixed;
719   CCAssignFn *CallerAssignFnVarArg;
720   std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) =
721       getAssignFnsForCC(CallerCC, TLI);
722 
723   AArch64IncomingValueAssigner CalleeAssigner(CalleeAssignFnFixed,
724                                               CalleeAssignFnVarArg);
725   AArch64IncomingValueAssigner CallerAssigner(CallerAssignFnFixed,
726                                               CallerAssignFnVarArg);
727 
728   if (!resultsCompatible(Info, MF, InArgs, CalleeAssigner, CallerAssigner))
729     return false;
730 
731   // Make sure that the caller and callee preserve all of the same registers.
732   auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
733   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
734   const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
735   if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) {
736     TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
737     TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
738   }
739 
740   return TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved);
741 }
742 
743 bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable(
744     CallLoweringInfo &Info, MachineFunction &MF,
745     SmallVectorImpl<ArgInfo> &OutArgs) const {
746   // If there are no outgoing arguments, then we are done.
747   if (OutArgs.empty())
748     return true;
749 
750   const Function &CallerF = MF.getFunction();
751   LLVMContext &Ctx = CallerF.getContext();
752   CallingConv::ID CalleeCC = Info.CallConv;
753   CallingConv::ID CallerCC = CallerF.getCallingConv();
754   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
755   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
756 
757   CCAssignFn *AssignFnFixed;
758   CCAssignFn *AssignFnVarArg;
759   std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
760 
761   // We have outgoing arguments. Make sure that we can tail call with them.
762   SmallVector<CCValAssign, 16> OutLocs;
763   CCState OutInfo(CalleeCC, false, MF, OutLocs, Ctx);
764 
765   AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg,
766                                               Subtarget, /*IsReturn*/ false);
767   if (!determineAssignments(CalleeAssigner, OutArgs, OutInfo)) {
768     LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n");
769     return false;
770   }
771 
772   // Make sure that they can fit on the caller's stack.
773   const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
774   if (OutInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea()) {
775     LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n");
776     return false;
777   }
778 
779   // Verify that the parameters in callee-saved registers match.
780   // TODO: Port this over to CallLowering as general code once swiftself is
781   // supported.
782   auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
783   const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC);
784   MachineRegisterInfo &MRI = MF.getRegInfo();
785 
786   if (Info.IsVarArg) {
787     // Be conservative and disallow variadic memory operands to match SDAG's
788     // behaviour.
789     // FIXME: If the caller's calling convention is C, then we can
790     // potentially use its argument area. However, for cases like fastcc,
791     // we can't do anything.
792     for (unsigned i = 0; i < OutLocs.size(); ++i) {
793       auto &ArgLoc = OutLocs[i];
794       if (ArgLoc.isRegLoc())
795         continue;
796 
797       LLVM_DEBUG(
798           dbgs()
799           << "... Cannot tail call vararg function with stack arguments\n");
800       return false;
801     }
802   }
803 
804   return parametersInCSRMatch(MRI, CallerPreservedMask, OutLocs, OutArgs);
805 }
806 
807 bool AArch64CallLowering::isEligibleForTailCallOptimization(
808     MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
809     SmallVectorImpl<ArgInfo> &InArgs,
810     SmallVectorImpl<ArgInfo> &OutArgs) const {
811 
812   // Must pass all target-independent checks in order to tail call optimize.
813   if (!Info.IsTailCall)
814     return false;
815 
816   CallingConv::ID CalleeCC = Info.CallConv;
817   MachineFunction &MF = MIRBuilder.getMF();
818   const Function &CallerF = MF.getFunction();
819 
820   LLVM_DEBUG(dbgs() << "Attempting to lower call as tail call\n");
821 
822   if (Info.SwiftErrorVReg) {
823     // TODO: We should handle this.
824     // Note that this is also handled by the check for no outgoing arguments.
825     // Proactively disabling this though, because the swifterror handling in
826     // lowerCall inserts a COPY *after* the location of the call.
827     LLVM_DEBUG(dbgs() << "... Cannot handle tail calls with swifterror yet.\n");
828     return false;
829   }
830 
831   if (!mayTailCallThisCC(CalleeCC)) {
832     LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n");
833     return false;
834   }
835 
836   // Byval parameters hand the function a pointer directly into the stack area
837   // we want to reuse during a tail call. Working around this *is* possible (see
838   // X86).
839   //
840   // FIXME: In AArch64ISelLowering, this isn't worked around. Can/should we try
841   // it?
842   //
843   // On Windows, "inreg" attributes signify non-aggregate indirect returns.
844   // In this case, it is necessary to save/restore X0 in the callee. Tail
845   // call opt interferes with this. So we disable tail call opt when the
846   // caller has an argument with "inreg" attribute.
847   //
848   // FIXME: Check whether the callee also has an "inreg" argument.
849   //
850   // When the caller has a swifterror argument, we don't want to tail call
851   // because would have to move into the swifterror register before the
852   // tail call.
853   if (any_of(CallerF.args(), [](const Argument &A) {
854         return A.hasByValAttr() || A.hasInRegAttr() || A.hasSwiftErrorAttr();
855       })) {
856     LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval, "
857                          "inreg, or swifterror arguments\n");
858     return false;
859   }
860 
861   // Externally-defined functions with weak linkage should not be
862   // tail-called on AArch64 when the OS does not support dynamic
863   // pre-emption of symbols, as the AAELF spec requires normal calls
864   // to undefined weak functions to be replaced with a NOP or jump to the
865   // next instruction. The behaviour of branch instructions in this
866   // situation (as used for tail calls) is implementation-defined, so we
867   // cannot rely on the linker replacing the tail call with a return.
868   if (Info.Callee.isGlobal()) {
869     const GlobalValue *GV = Info.Callee.getGlobal();
870     const Triple &TT = MF.getTarget().getTargetTriple();
871     if (GV->hasExternalWeakLinkage() &&
872         (!TT.isOSWindows() || TT.isOSBinFormatELF() ||
873          TT.isOSBinFormatMachO())) {
874       LLVM_DEBUG(dbgs() << "... Cannot tail call externally-defined function "
875                            "with weak linkage for this OS.\n");
876       return false;
877     }
878   }
879 
880   // If we have -tailcallopt, then we're done.
881   if (canGuaranteeTCO(CalleeCC, MF.getTarget().Options.GuaranteedTailCallOpt))
882     return CalleeCC == CallerF.getCallingConv();
883 
884   // We don't have -tailcallopt, so we're allowed to change the ABI (sibcall).
885   // Try to find cases where we can do that.
886 
887   // I want anyone implementing a new calling convention to think long and hard
888   // about this assert.
889   assert((!Info.IsVarArg || CalleeCC == CallingConv::C) &&
890          "Unexpected variadic calling convention");
891 
892   // Verify that the incoming and outgoing arguments from the callee are
893   // safe to tail call.
894   if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) {
895     LLVM_DEBUG(
896         dbgs()
897         << "... Caller and callee have incompatible calling conventions.\n");
898     return false;
899   }
900 
901   if (!areCalleeOutgoingArgsTailCallable(Info, MF, OutArgs))
902     return false;
903 
904   LLVM_DEBUG(
905       dbgs() << "... Call is eligible for tail call optimization.\n");
906   return true;
907 }
908 
909 static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
910                               bool IsTailCall) {
911   if (!IsTailCall)
912     return IsIndirect ? getBLRCallOpcode(CallerF) : (unsigned)AArch64::BL;
913 
914   if (!IsIndirect)
915     return AArch64::TCRETURNdi;
916 
917   // When BTI is enabled, we need to use TCRETURNriBTI to make sure that we use
918   // x16 or x17.
919   if (CallerF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
920     return AArch64::TCRETURNriBTI;
921 
922   return AArch64::TCRETURNri;
923 }
924 
925 static const uint32_t *
926 getMaskForArgs(SmallVectorImpl<AArch64CallLowering::ArgInfo> &OutArgs,
927                AArch64CallLowering::CallLoweringInfo &Info,
928                const AArch64RegisterInfo &TRI, MachineFunction &MF) {
929   const uint32_t *Mask;
930   if (!OutArgs.empty() && OutArgs[0].Flags[0].isReturned()) {
931     // For 'this' returns, use the X0-preserving mask if applicable
932     Mask = TRI.getThisReturnPreservedMask(MF, Info.CallConv);
933     if (!Mask) {
934       OutArgs[0].Flags[0].setReturned(false);
935       Mask = TRI.getCallPreservedMask(MF, Info.CallConv);
936     }
937   } else {
938     Mask = TRI.getCallPreservedMask(MF, Info.CallConv);
939   }
940   return Mask;
941 }
942 
943 bool AArch64CallLowering::lowerTailCall(
944     MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
945     SmallVectorImpl<ArgInfo> &OutArgs) const {
946   MachineFunction &MF = MIRBuilder.getMF();
947   const Function &F = MF.getFunction();
948   MachineRegisterInfo &MRI = MF.getRegInfo();
949   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
950   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
951 
952   // True when we're tail calling, but without -tailcallopt.
953   bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt &&
954                    Info.CallConv != CallingConv::Tail &&
955                    Info.CallConv != CallingConv::SwiftTail;
956 
957   // TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64
958   // register class. Until we can do that, we should fall back here.
959   if (MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) {
960     LLVM_DEBUG(
961         dbgs() << "Cannot lower indirect tail calls with BTI enabled yet.\n");
962     return false;
963   }
964 
965   // Find out which ABI gets to decide where things go.
966   CallingConv::ID CalleeCC = Info.CallConv;
967   CCAssignFn *AssignFnFixed;
968   CCAssignFn *AssignFnVarArg;
969   std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
970 
971   MachineInstrBuilder CallSeqStart;
972   if (!IsSibCall)
973     CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
974 
975   unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), true);
976   auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
977   MIB.add(Info.Callee);
978 
979   // Byte offset for the tail call. When we are sibcalling, this will always
980   // be 0.
981   MIB.addImm(0);
982 
983   // Tell the call which registers are clobbered.
984   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
985   auto TRI = Subtarget.getRegisterInfo();
986   const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC);
987   if (Subtarget.hasCustomCallingConv())
988     TRI->UpdateCustomCallPreservedMask(MF, &Mask);
989   MIB.addRegMask(Mask);
990 
991   if (Info.CFIType)
992     MIB->setCFIType(MF, Info.CFIType->getZExtValue());
993 
994   if (TRI->isAnyArgRegReserved(MF))
995     TRI->emitReservedArgRegCallError(MF);
996 
997   // FPDiff is the byte offset of the call's argument area from the callee's.
998   // Stores to callee stack arguments will be placed in FixedStackSlots offset
999   // by this amount for a tail call. In a sibling call it must be 0 because the
1000   // caller will deallocate the entire stack and the callee still expects its
1001   // arguments to begin at SP+0.
1002   int FPDiff = 0;
1003 
1004   // This will be 0 for sibcalls, potentially nonzero for tail calls produced
1005   // by -tailcallopt. For sibcalls, the memory operands for the call are
1006   // already available in the caller's incoming argument space.
1007   unsigned NumBytes = 0;
1008   if (!IsSibCall) {
1009     // We aren't sibcalling, so we need to compute FPDiff. We need to do this
1010     // before handling assignments, because FPDiff must be known for memory
1011     // arguments.
1012     unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
1013     SmallVector<CCValAssign, 16> OutLocs;
1014     CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext());
1015 
1016     AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg,
1017                                                 Subtarget, /*IsReturn*/ false);
1018     if (!determineAssignments(CalleeAssigner, OutArgs, OutInfo))
1019       return false;
1020 
1021     // The callee will pop the argument stack as a tail call. Thus, we must
1022     // keep it 16-byte aligned.
1023     NumBytes = alignTo(OutInfo.getNextStackOffset(), 16);
1024 
1025     // FPDiff will be negative if this tail call requires more space than we
1026     // would automatically have in our incoming argument space. Positive if we
1027     // actually shrink the stack.
1028     FPDiff = NumReusableBytes - NumBytes;
1029 
1030     // Update the required reserved area if this is the tail call requiring the
1031     // most argument stack space.
1032     if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff)
1033       FuncInfo->setTailCallReservedStack(-FPDiff);
1034 
1035     // The stack pointer must be 16-byte aligned at all times it's used for a
1036     // memory operation, which in practice means at *all* times and in
1037     // particular across call boundaries. Therefore our own arguments started at
1038     // a 16-byte aligned SP and the delta applied for the tail call should
1039     // satisfy the same constraint.
1040     assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
1041   }
1042 
1043   const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
1044 
1045   AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg,
1046                                         Subtarget, /*IsReturn*/ false);
1047 
1048   // Do the actual argument marshalling.
1049   OutgoingArgHandler Handler(MIRBuilder, MRI, MIB,
1050                              /*IsTailCall*/ true, FPDiff);
1051   if (!determineAndHandleAssignments(Handler, Assigner, OutArgs, MIRBuilder,
1052                                      CalleeCC, Info.IsVarArg))
1053     return false;
1054 
1055   Mask = getMaskForArgs(OutArgs, Info, *TRI, MF);
1056 
1057   if (Info.IsVarArg && Info.IsMustTailCall) {
1058     // Now we know what's being passed to the function. Add uses to the call for
1059     // the forwarded registers that we *aren't* passing as parameters. This will
1060     // preserve the copies we build earlier.
1061     for (const auto &F : Forwards) {
1062       Register ForwardedReg = F.PReg;
1063       // If the register is already passed, or aliases a register which is
1064       // already being passed, then skip it.
1065       if (any_of(MIB->uses(), [&ForwardedReg, &TRI](const MachineOperand &Use) {
1066             if (!Use.isReg())
1067               return false;
1068             return TRI->regsOverlap(Use.getReg(), ForwardedReg);
1069           }))
1070         continue;
1071 
1072       // We aren't passing it already, so we should add it to the call.
1073       MIRBuilder.buildCopy(ForwardedReg, Register(F.VReg));
1074       MIB.addReg(ForwardedReg, RegState::Implicit);
1075     }
1076   }
1077 
1078   // If we have -tailcallopt, we need to adjust the stack. We'll do the call
1079   // sequence start and end here.
1080   if (!IsSibCall) {
1081     MIB->getOperand(1).setImm(FPDiff);
1082     CallSeqStart.addImm(0).addImm(0);
1083     // End the call sequence *before* emitting the call. Normally, we would
1084     // tidy the frame up after the call. However, here, we've laid out the
1085     // parameters so that when SP is reset, they will be in the correct
1086     // location.
1087     MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP).addImm(0).addImm(0);
1088   }
1089 
1090   // Now we can add the actual call instruction to the correct basic block.
1091   MIRBuilder.insertInstr(MIB);
1092 
1093   // If Callee is a reg, since it is used by a target specific instruction,
1094   // it must have a register class matching the constraint of that instruction.
1095   if (MIB->getOperand(0).isReg())
1096     constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
1097                              *MF.getSubtarget().getRegBankInfo(), *MIB,
1098                              MIB->getDesc(), MIB->getOperand(0), 0);
1099 
1100   MF.getFrameInfo().setHasTailCall();
1101   Info.LoweredTailCall = true;
1102   return true;
1103 }
1104 
1105 bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
1106                                     CallLoweringInfo &Info) const {
1107   MachineFunction &MF = MIRBuilder.getMF();
1108   const Function &F = MF.getFunction();
1109   MachineRegisterInfo &MRI = MF.getRegInfo();
1110   auto &DL = F.getParent()->getDataLayout();
1111   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
1112   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1113 
1114   // Arm64EC has extra requirements for varargs calls; bail out for now.
1115   if (Info.IsVarArg && Subtarget.isWindowsArm64EC())
1116     return false;
1117 
1118   SmallVector<ArgInfo, 8> OutArgs;
1119   for (auto &OrigArg : Info.OrigArgs) {
1120     splitToValueTypes(OrigArg, OutArgs, DL, Info.CallConv);
1121     // AAPCS requires that we zero-extend i1 to 8 bits by the caller.
1122     auto &Flags = OrigArg.Flags[0];
1123     if (OrigArg.Ty->isIntegerTy(1) && !Flags.isSExt() && !Flags.isZExt()) {
1124       ArgInfo &OutArg = OutArgs.back();
1125       assert(OutArg.Regs.size() == 1 &&
1126              MRI.getType(OutArg.Regs[0]).getSizeInBits() == 1 &&
1127              "Unexpected registers used for i1 arg");
1128 
1129       // We cannot use a ZExt ArgInfo flag here, because it will
1130       // zero-extend the argument to i32 instead of just i8.
1131       OutArg.Regs[0] =
1132           MIRBuilder.buildZExt(LLT::scalar(8), OutArg.Regs[0]).getReg(0);
1133       LLVMContext &Ctx = MF.getFunction().getContext();
1134       OutArg.Ty = Type::getInt8Ty(Ctx);
1135     }
1136   }
1137 
1138   SmallVector<ArgInfo, 8> InArgs;
1139   if (!Info.OrigRet.Ty->isVoidTy())
1140     splitToValueTypes(Info.OrigRet, InArgs, DL, Info.CallConv);
1141 
1142   // If we can lower as a tail call, do that instead.
1143   bool CanTailCallOpt =
1144       isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs, OutArgs);
1145 
1146   // We must emit a tail call if we have musttail.
1147   if (Info.IsMustTailCall && !CanTailCallOpt) {
1148     // There are types of incoming/outgoing arguments we can't handle yet, so
1149     // it doesn't make sense to actually die here like in ISelLowering. Instead,
1150     // fall back to SelectionDAG and let it try to handle this.
1151     LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n");
1152     return false;
1153   }
1154 
1155   Info.IsTailCall = CanTailCallOpt;
1156   if (CanTailCallOpt)
1157     return lowerTailCall(MIRBuilder, Info, OutArgs);
1158 
1159   // Find out which ABI gets to decide where things go.
1160   CCAssignFn *AssignFnFixed;
1161   CCAssignFn *AssignFnVarArg;
1162   std::tie(AssignFnFixed, AssignFnVarArg) =
1163       getAssignFnsForCC(Info.CallConv, TLI);
1164 
1165   MachineInstrBuilder CallSeqStart;
1166   CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
1167 
1168   // Create a temporarily-floating call instruction so we can add the implicit
1169   // uses of arg registers.
1170 
1171   unsigned Opc = 0;
1172   // Calls with operand bundle "clang.arc.attachedcall" are special. They should
1173   // be expanded to the call, directly followed by a special marker sequence and
1174   // a call to an ObjC library function.
1175   if (Info.CB && objcarc::hasAttachedCallOpBundle(Info.CB))
1176     Opc = AArch64::BLR_RVMARKER;
1177   // A call to a returns twice function like setjmp must be followed by a bti
1178   // instruction.
1179   else if (Info.CB &&
1180            Info.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
1181            !Subtarget.noBTIAtReturnTwice() &&
1182            MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
1183     Opc = AArch64::BLR_BTI;
1184   else
1185     Opc = getCallOpcode(MF, Info.Callee.isReg(), false);
1186 
1187   auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
1188   unsigned CalleeOpNo = 0;
1189 
1190   if (Opc == AArch64::BLR_RVMARKER) {
1191     // Add a target global address for the retainRV/claimRV runtime function
1192     // just before the call target.
1193     Function *ARCFn = *objcarc::getAttachedARCFunction(Info.CB);
1194     MIB.addGlobalAddress(ARCFn);
1195     ++CalleeOpNo;
1196   } else if (Info.CFIType) {
1197     MIB->setCFIType(MF, Info.CFIType->getZExtValue());
1198   }
1199 
1200   MIB.add(Info.Callee);
1201 
1202   // Tell the call which registers are clobbered.
1203   const uint32_t *Mask;
1204   const auto *TRI = Subtarget.getRegisterInfo();
1205 
1206   AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg,
1207                                         Subtarget, /*IsReturn*/ false);
1208   // Do the actual argument marshalling.
1209   OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, /*IsReturn*/ false);
1210   if (!determineAndHandleAssignments(Handler, Assigner, OutArgs, MIRBuilder,
1211                                      Info.CallConv, Info.IsVarArg))
1212     return false;
1213 
1214   Mask = getMaskForArgs(OutArgs, Info, *TRI, MF);
1215 
1216   if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv())
1217     TRI->UpdateCustomCallPreservedMask(MF, &Mask);
1218   MIB.addRegMask(Mask);
1219 
1220   if (TRI->isAnyArgRegReserved(MF))
1221     TRI->emitReservedArgRegCallError(MF);
1222 
1223   // Now we can add the actual call instruction to the correct basic block.
1224   MIRBuilder.insertInstr(MIB);
1225 
1226   // If Callee is a reg, since it is used by a target specific
1227   // instruction, it must have a register class matching the
1228   // constraint of that instruction.
1229   if (MIB->getOperand(CalleeOpNo).isReg())
1230     constrainOperandRegClass(MF, *TRI, MRI, *Subtarget.getInstrInfo(),
1231                              *Subtarget.getRegBankInfo(), *MIB, MIB->getDesc(),
1232                              MIB->getOperand(CalleeOpNo), CalleeOpNo);
1233 
1234   // Finally we can copy the returned value back into its virtual-register. In
1235   // symmetry with the arguments, the physical register must be an
1236   // implicit-define of the call instruction.
1237   if (Info.CanLowerReturn  && !Info.OrigRet.Ty->isVoidTy()) {
1238     CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv);
1239     CallReturnHandler Handler(MIRBuilder, MRI, MIB);
1240     bool UsingReturnedArg =
1241         !OutArgs.empty() && OutArgs[0].Flags[0].isReturned();
1242 
1243     AArch64OutgoingValueAssigner Assigner(RetAssignFn, RetAssignFn, Subtarget,
1244                                           /*IsReturn*/ false);
1245     ReturnedArgCallReturnHandler ReturnedArgHandler(MIRBuilder, MRI, MIB);
1246     if (!determineAndHandleAssignments(
1247             UsingReturnedArg ? ReturnedArgHandler : Handler, Assigner, InArgs,
1248             MIRBuilder, Info.CallConv, Info.IsVarArg,
1249             UsingReturnedArg ? ArrayRef(OutArgs[0].Regs) : std::nullopt))
1250       return false;
1251   }
1252 
1253   if (Info.SwiftErrorVReg) {
1254     MIB.addDef(AArch64::X21, RegState::Implicit);
1255     MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21));
1256   }
1257 
1258   uint64_t CalleePopBytes =
1259       doesCalleeRestoreStack(Info.CallConv,
1260                              MF.getTarget().Options.GuaranteedTailCallOpt)
1261           ? alignTo(Assigner.StackOffset, 16)
1262           : 0;
1263 
1264   CallSeqStart.addImm(Assigner.StackOffset).addImm(0);
1265   MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
1266       .addImm(Assigner.StackOffset)
1267       .addImm(CalleePopBytes);
1268 
1269   if (!Info.CanLowerReturn) {
1270     insertSRetLoads(MIRBuilder, Info.OrigRet.Ty, Info.OrigRet.Regs,
1271                     Info.DemoteRegister, Info.DemoteStackIndex);
1272   }
1273   return true;
1274 }
1275 
1276 bool AArch64CallLowering::isTypeIsValidForThisReturn(EVT Ty) const {
1277   return Ty.getSizeInBits() == 64;
1278 }
1279