xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp (revision 3ceba58a7509418b47b8fca2d2b6bbf088714e26)
1 //===--- AArch64CallLowering.cpp - Call lowering --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the lowering of LLVM calls to machine code calls for
11 /// GlobalISel.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "AArch64CallLowering.h"
16 #include "AArch64GlobalISelUtils.h"
17 #include "AArch64ISelLowering.h"
18 #include "AArch64MachineFunctionInfo.h"
19 #include "AArch64RegisterInfo.h"
20 #include "AArch64Subtarget.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/Analysis/ObjCARCUtil.h"
24 #include "llvm/CodeGen/Analysis.h"
25 #include "llvm/CodeGen/CallingConvLower.h"
26 #include "llvm/CodeGen/FunctionLoweringInfo.h"
27 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
28 #include "llvm/CodeGen/GlobalISel/Utils.h"
29 #include "llvm/CodeGen/LowLevelTypeUtils.h"
30 #include "llvm/CodeGen/MachineBasicBlock.h"
31 #include "llvm/CodeGen/MachineFrameInfo.h"
32 #include "llvm/CodeGen/MachineFunction.h"
33 #include "llvm/CodeGen/MachineInstrBuilder.h"
34 #include "llvm/CodeGen/MachineMemOperand.h"
35 #include "llvm/CodeGen/MachineOperand.h"
36 #include "llvm/CodeGen/MachineRegisterInfo.h"
37 #include "llvm/CodeGen/TargetRegisterInfo.h"
38 #include "llvm/CodeGen/TargetSubtargetInfo.h"
39 #include "llvm/CodeGen/ValueTypes.h"
40 #include "llvm/CodeGenTypes/MachineValueType.h"
41 #include "llvm/IR/Argument.h"
42 #include "llvm/IR/Attributes.h"
43 #include "llvm/IR/Function.h"
44 #include "llvm/IR/Type.h"
45 #include "llvm/IR/Value.h"
46 #include <algorithm>
47 #include <cassert>
48 #include <cstdint>
49 #include <iterator>
50 
51 #define DEBUG_TYPE "aarch64-call-lowering"
52 
53 using namespace llvm;
54 using namespace AArch64GISelUtils;
55 
56 extern cl::opt<bool> EnableSVEGISel;
57 
58 AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
59   : CallLowering(&TLI) {}
60 
61 static void applyStackPassedSmallTypeDAGHack(EVT OrigVT, MVT &ValVT,
62                                              MVT &LocVT) {
63   // If ValVT is i1/i8/i16, we should set LocVT to i8/i8/i16. This is a legacy
64   // hack because the DAG calls the assignment function with pre-legalized
65   // register typed values, not the raw type.
66   //
67   // This hack is not applied to return values which are not passed on the
68   // stack.
69   if (OrigVT == MVT::i1 || OrigVT == MVT::i8)
70     ValVT = LocVT = MVT::i8;
71   else if (OrigVT == MVT::i16)
72     ValVT = LocVT = MVT::i16;
73 }
74 
75 // Account for i1/i8/i16 stack passed value hack
76 static LLT getStackValueStoreTypeHack(const CCValAssign &VA) {
77   const MVT ValVT = VA.getValVT();
78   return (ValVT == MVT::i8 || ValVT == MVT::i16) ? LLT(ValVT)
79                                                  : LLT(VA.getLocVT());
80 }
81 
82 namespace {
83 
84 struct AArch64IncomingValueAssigner
85     : public CallLowering::IncomingValueAssigner {
86   AArch64IncomingValueAssigner(CCAssignFn *AssignFn_,
87                                CCAssignFn *AssignFnVarArg_)
88       : IncomingValueAssigner(AssignFn_, AssignFnVarArg_) {}
89 
90   bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
91                  CCValAssign::LocInfo LocInfo,
92                  const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
93                  CCState &State) override {
94     applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT);
95     return IncomingValueAssigner::assignArg(ValNo, OrigVT, ValVT, LocVT,
96                                             LocInfo, Info, Flags, State);
97   }
98 };
99 
100 struct AArch64OutgoingValueAssigner
101     : public CallLowering::OutgoingValueAssigner {
102   const AArch64Subtarget &Subtarget;
103 
104   /// Track if this is used for a return instead of function argument
105   /// passing. We apply a hack to i1/i8/i16 stack passed values, but do not use
106   /// stack passed returns for them and cannot apply the type adjustment.
107   bool IsReturn;
108 
109   AArch64OutgoingValueAssigner(CCAssignFn *AssignFn_,
110                                CCAssignFn *AssignFnVarArg_,
111                                const AArch64Subtarget &Subtarget_,
112                                bool IsReturn)
113       : OutgoingValueAssigner(AssignFn_, AssignFnVarArg_),
114         Subtarget(Subtarget_), IsReturn(IsReturn) {}
115 
116   bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
117                  CCValAssign::LocInfo LocInfo,
118                  const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
119                  CCState &State) override {
120     const Function &F = State.getMachineFunction().getFunction();
121     bool IsCalleeWin =
122         Subtarget.isCallingConvWin64(State.getCallingConv(), F.isVarArg());
123     bool UseVarArgsCCForFixed = IsCalleeWin && State.isVarArg();
124 
125     bool Res;
126     if (Info.IsFixed && !UseVarArgsCCForFixed) {
127       if (!IsReturn)
128         applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT);
129       Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State);
130     } else
131       Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Flags, State);
132 
133     StackSize = State.getStackSize();
134     return Res;
135   }
136 };
137 
138 struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
139   IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
140       : IncomingValueHandler(MIRBuilder, MRI) {}
141 
142   Register getStackAddress(uint64_t Size, int64_t Offset,
143                            MachinePointerInfo &MPO,
144                            ISD::ArgFlagsTy Flags) override {
145     auto &MFI = MIRBuilder.getMF().getFrameInfo();
146 
147     // Byval is assumed to be writable memory, but other stack passed arguments
148     // are not.
149     const bool IsImmutable = !Flags.isByVal();
150 
151     int FI = MFI.CreateFixedObject(Size, Offset, IsImmutable);
152     MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
153     auto AddrReg = MIRBuilder.buildFrameIndex(LLT::pointer(0, 64), FI);
154     return AddrReg.getReg(0);
155   }
156 
157   LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA,
158                              ISD::ArgFlagsTy Flags) const override {
159     // For pointers, we just need to fixup the integer types reported in the
160     // CCValAssign.
161     if (Flags.isPointer())
162       return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags);
163     return getStackValueStoreTypeHack(VA);
164   }
165 
166   void assignValueToReg(Register ValVReg, Register PhysReg,
167                         const CCValAssign &VA) override {
168     markPhysRegUsed(PhysReg);
169     IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
170   }
171 
172   void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
173                             const MachinePointerInfo &MPO,
174                             const CCValAssign &VA) override {
175     MachineFunction &MF = MIRBuilder.getMF();
176 
177     LLT ValTy(VA.getValVT());
178     LLT LocTy(VA.getLocVT());
179 
180     // Fixup the types for the DAG compatibility hack.
181     if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16)
182       std::swap(ValTy, LocTy);
183     else {
184       // The calling code knows if this is a pointer or not, we're only touching
185       // the LocTy for the i8/i16 hack.
186       assert(LocTy.getSizeInBits() == MemTy.getSizeInBits());
187       LocTy = MemTy;
188     }
189 
190     auto MMO = MF.getMachineMemOperand(
191         MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, LocTy,
192         inferAlignFromPtrInfo(MF, MPO));
193 
194     switch (VA.getLocInfo()) {
195     case CCValAssign::LocInfo::ZExt:
196       MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, ValVReg, Addr, *MMO);
197       return;
198     case CCValAssign::LocInfo::SExt:
199       MIRBuilder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, ValVReg, Addr, *MMO);
200       return;
201     default:
202       MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
203       return;
204     }
205   }
206 
207   /// How the physical register gets marked varies between formal
208   /// parameters (it's a basic-block live-in), and a call instruction
209   /// (it's an implicit-def of the BL).
210   virtual void markPhysRegUsed(MCRegister PhysReg) = 0;
211 };
212 
213 struct FormalArgHandler : public IncomingArgHandler {
214   FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
215       : IncomingArgHandler(MIRBuilder, MRI) {}
216 
217   void markPhysRegUsed(MCRegister PhysReg) override {
218     MIRBuilder.getMRI()->addLiveIn(PhysReg);
219     MIRBuilder.getMBB().addLiveIn(PhysReg);
220   }
221 };
222 
223 struct CallReturnHandler : public IncomingArgHandler {
224   CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
225                     MachineInstrBuilder MIB)
226       : IncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {}
227 
228   void markPhysRegUsed(MCRegister PhysReg) override {
229     MIB.addDef(PhysReg, RegState::Implicit);
230   }
231 
232   MachineInstrBuilder MIB;
233 };
234 
235 /// A special return arg handler for "returned" attribute arg calls.
236 struct ReturnedArgCallReturnHandler : public CallReturnHandler {
237   ReturnedArgCallReturnHandler(MachineIRBuilder &MIRBuilder,
238                                MachineRegisterInfo &MRI,
239                                MachineInstrBuilder MIB)
240       : CallReturnHandler(MIRBuilder, MRI, MIB) {}
241 
242   void markPhysRegUsed(MCRegister PhysReg) override {}
243 };
244 
245 struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
246   OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
247                      MachineInstrBuilder MIB, bool IsTailCall = false,
248                      int FPDiff = 0)
249       : OutgoingValueHandler(MIRBuilder, MRI), MIB(MIB), IsTailCall(IsTailCall),
250         FPDiff(FPDiff),
251         Subtarget(MIRBuilder.getMF().getSubtarget<AArch64Subtarget>()) {}
252 
253   Register getStackAddress(uint64_t Size, int64_t Offset,
254                            MachinePointerInfo &MPO,
255                            ISD::ArgFlagsTy Flags) override {
256     MachineFunction &MF = MIRBuilder.getMF();
257     LLT p0 = LLT::pointer(0, 64);
258     LLT s64 = LLT::scalar(64);
259 
260     if (IsTailCall) {
261       assert(!Flags.isByVal() && "byval unhandled with tail calls");
262 
263       Offset += FPDiff;
264       int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
265       auto FIReg = MIRBuilder.buildFrameIndex(p0, FI);
266       MPO = MachinePointerInfo::getFixedStack(MF, FI);
267       return FIReg.getReg(0);
268     }
269 
270     if (!SPReg)
271       SPReg = MIRBuilder.buildCopy(p0, Register(AArch64::SP)).getReg(0);
272 
273     auto OffsetReg = MIRBuilder.buildConstant(s64, Offset);
274 
275     auto AddrReg = MIRBuilder.buildPtrAdd(p0, SPReg, OffsetReg);
276 
277     MPO = MachinePointerInfo::getStack(MF, Offset);
278     return AddrReg.getReg(0);
279   }
280 
281   /// We need to fixup the reported store size for certain value types because
282   /// we invert the interpretation of ValVT and LocVT in certain cases. This is
283   /// for compatability with the DAG call lowering implementation, which we're
284   /// currently building on top of.
285   LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA,
286                              ISD::ArgFlagsTy Flags) const override {
287     if (Flags.isPointer())
288       return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags);
289     return getStackValueStoreTypeHack(VA);
290   }
291 
292   void assignValueToReg(Register ValVReg, Register PhysReg,
293                         const CCValAssign &VA) override {
294     MIB.addUse(PhysReg, RegState::Implicit);
295     Register ExtReg = extendRegister(ValVReg, VA);
296     MIRBuilder.buildCopy(PhysReg, ExtReg);
297   }
298 
299   void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
300                             const MachinePointerInfo &MPO,
301                             const CCValAssign &VA) override {
302     MachineFunction &MF = MIRBuilder.getMF();
303     auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, MemTy,
304                                        inferAlignFromPtrInfo(MF, MPO));
305     MIRBuilder.buildStore(ValVReg, Addr, *MMO);
306   }
307 
308   void assignValueToAddress(const CallLowering::ArgInfo &Arg, unsigned RegIndex,
309                             Register Addr, LLT MemTy,
310                             const MachinePointerInfo &MPO,
311                             const CCValAssign &VA) override {
312     unsigned MaxSize = MemTy.getSizeInBytes() * 8;
313     // For varargs, we always want to extend them to 8 bytes, in which case
314     // we disable setting a max.
315     if (!Arg.IsFixed)
316       MaxSize = 0;
317 
318     Register ValVReg = Arg.Regs[RegIndex];
319     if (VA.getLocInfo() != CCValAssign::LocInfo::FPExt) {
320       MVT LocVT = VA.getLocVT();
321       MVT ValVT = VA.getValVT();
322 
323       if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16) {
324         std::swap(ValVT, LocVT);
325         MemTy = LLT(VA.getValVT());
326       }
327 
328       ValVReg = extendRegister(ValVReg, VA, MaxSize);
329     } else {
330       // The store does not cover the full allocated stack slot.
331       MemTy = LLT(VA.getValVT());
332     }
333 
334     assignValueToAddress(ValVReg, Addr, MemTy, MPO, VA);
335   }
336 
337   MachineInstrBuilder MIB;
338 
339   bool IsTailCall;
340 
341   /// For tail calls, the byte offset of the call's argument area from the
342   /// callee's. Unused elsewhere.
343   int FPDiff;
344 
345   // Cache the SP register vreg if we need it more than once in this call site.
346   Register SPReg;
347 
348   const AArch64Subtarget &Subtarget;
349 };
350 } // namespace
351 
352 static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) {
353   return (CallConv == CallingConv::Fast && TailCallOpt) ||
354          CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
355 }
356 
357 bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
358                                       const Value *Val,
359                                       ArrayRef<Register> VRegs,
360                                       FunctionLoweringInfo &FLI,
361                                       Register SwiftErrorVReg) const {
362   auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR);
363   assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
364          "Return value without a vreg");
365 
366   bool Success = true;
367   if (!FLI.CanLowerReturn) {
368     insertSRetStores(MIRBuilder, Val->getType(), VRegs, FLI.DemoteRegister);
369   } else if (!VRegs.empty()) {
370     MachineFunction &MF = MIRBuilder.getMF();
371     const Function &F = MF.getFunction();
372     const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
373 
374     MachineRegisterInfo &MRI = MF.getRegInfo();
375     const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
376     CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv());
377     auto &DL = F.getDataLayout();
378     LLVMContext &Ctx = Val->getType()->getContext();
379 
380     SmallVector<EVT, 4> SplitEVTs;
381     ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
382     assert(VRegs.size() == SplitEVTs.size() &&
383            "For each split Type there should be exactly one VReg.");
384 
385     SmallVector<ArgInfo, 8> SplitArgs;
386     CallingConv::ID CC = F.getCallingConv();
387 
388     for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
389       Register CurVReg = VRegs[i];
390       ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Ctx), 0};
391       setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
392 
393       // i1 is a special case because SDAG i1 true is naturally zero extended
394       // when widened using ANYEXT. We need to do it explicitly here.
395       auto &Flags = CurArgInfo.Flags[0];
396       if (MRI.getType(CurVReg).getSizeInBits() == 1 && !Flags.isSExt() &&
397           !Flags.isZExt()) {
398         CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0);
399       } else if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) ==
400                  1) {
401         // Some types will need extending as specified by the CC.
402         MVT NewVT = TLI.getRegisterTypeForCallingConv(Ctx, CC, SplitEVTs[i]);
403         if (EVT(NewVT) != SplitEVTs[i]) {
404           unsigned ExtendOp = TargetOpcode::G_ANYEXT;
405           if (F.getAttributes().hasRetAttr(Attribute::SExt))
406             ExtendOp = TargetOpcode::G_SEXT;
407           else if (F.getAttributes().hasRetAttr(Attribute::ZExt))
408             ExtendOp = TargetOpcode::G_ZEXT;
409 
410           LLT NewLLT(NewVT);
411           LLT OldLLT = getLLTForType(*CurArgInfo.Ty, DL);
412           CurArgInfo.Ty = EVT(NewVT).getTypeForEVT(Ctx);
413           // Instead of an extend, we might have a vector type which needs
414           // padding with more elements, e.g. <2 x half> -> <4 x half>.
415           if (NewVT.isVector()) {
416             if (OldLLT.isVector()) {
417               if (NewLLT.getNumElements() > OldLLT.getNumElements()) {
418                 CurVReg =
419                     MIRBuilder.buildPadVectorWithUndefElements(NewLLT, CurVReg)
420                         .getReg(0);
421               } else {
422                 // Just do a vector extend.
423                 CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg})
424                               .getReg(0);
425               }
426             } else if (NewLLT.getNumElements() >= 2 &&
427                        NewLLT.getNumElements() <= 8) {
428               // We need to pad a <1 x S> type to <2/4/8 x S>. Since we don't
429               // have <1 x S> vector types in GISel we use a build_vector
430               // instead of a vector merge/concat.
431               CurVReg =
432                   MIRBuilder.buildPadVectorWithUndefElements(NewLLT, CurVReg)
433                       .getReg(0);
434             } else {
435               LLVM_DEBUG(dbgs() << "Could not handle ret ty\n");
436               return false;
437             }
438           } else {
439             // If the split EVT was a <1 x T> vector, and NewVT is T, then we
440             // don't have to do anything since we don't distinguish between the
441             // two.
442             if (NewLLT != MRI.getType(CurVReg)) {
443               // A scalar extend.
444               CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg})
445                             .getReg(0);
446             }
447           }
448         }
449       }
450       if (CurVReg != CurArgInfo.Regs[0]) {
451         CurArgInfo.Regs[0] = CurVReg;
452         // Reset the arg flags after modifying CurVReg.
453         setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
454       }
455       splitToValueTypes(CurArgInfo, SplitArgs, DL, CC);
456     }
457 
458     AArch64OutgoingValueAssigner Assigner(AssignFn, AssignFn, Subtarget,
459                                           /*IsReturn*/ true);
460     OutgoingArgHandler Handler(MIRBuilder, MRI, MIB);
461     Success = determineAndHandleAssignments(Handler, Assigner, SplitArgs,
462                                             MIRBuilder, CC, F.isVarArg());
463   }
464 
465   if (SwiftErrorVReg) {
466     MIB.addUse(AArch64::X21, RegState::Implicit);
467     MIRBuilder.buildCopy(AArch64::X21, SwiftErrorVReg);
468   }
469 
470   MIRBuilder.insertInstr(MIB);
471   return Success;
472 }
473 
474 bool AArch64CallLowering::canLowerReturn(MachineFunction &MF,
475                                          CallingConv::ID CallConv,
476                                          SmallVectorImpl<BaseArgInfo> &Outs,
477                                          bool IsVarArg) const {
478   SmallVector<CCValAssign, 16> ArgLocs;
479   const auto &TLI = *getTLI<AArch64TargetLowering>();
480   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs,
481                  MF.getFunction().getContext());
482 
483   return checkReturn(CCInfo, Outs, TLI.CCAssignFnForReturn(CallConv));
484 }
485 
486 /// Helper function to compute forwarded registers for musttail calls. Computes
487 /// the forwarded registers, sets MBB liveness, and emits COPY instructions that
488 /// can be used to save + restore registers later.
489 static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder,
490                                              CCAssignFn *AssignFn) {
491   MachineBasicBlock &MBB = MIRBuilder.getMBB();
492   MachineFunction &MF = MIRBuilder.getMF();
493   MachineFrameInfo &MFI = MF.getFrameInfo();
494 
495   if (!MFI.hasMustTailInVarArgFunc())
496     return;
497 
498   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
499   const Function &F = MF.getFunction();
500   assert(F.isVarArg() && "Expected F to be vararg?");
501 
502   // Compute the set of forwarded registers. The rest are scratch.
503   SmallVector<CCValAssign, 16> ArgLocs;
504   CCState CCInfo(F.getCallingConv(), /*IsVarArg=*/true, MF, ArgLocs,
505                  F.getContext());
506   SmallVector<MVT, 2> RegParmTypes;
507   RegParmTypes.push_back(MVT::i64);
508   RegParmTypes.push_back(MVT::f128);
509 
510   // Later on, we can use this vector to restore the registers if necessary.
511   SmallVectorImpl<ForwardedRegister> &Forwards =
512       FuncInfo->getForwardedMustTailRegParms();
513   CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, AssignFn);
514 
515   // Conservatively forward X8, since it might be used for an aggregate
516   // return.
517   if (!CCInfo.isAllocated(AArch64::X8)) {
518     Register X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
519     Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
520   }
521 
522   // Add the forwards to the MachineBasicBlock and MachineFunction.
523   for (const auto &F : Forwards) {
524     MBB.addLiveIn(F.PReg);
525     MIRBuilder.buildCopy(Register(F.VReg), Register(F.PReg));
526   }
527 }
528 
529 bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const {
530   auto &F = MF.getFunction();
531   if (!EnableSVEGISel && (F.getReturnType()->isScalableTy() ||
532                           llvm::any_of(F.args(), [](const Argument &A) {
533                             return A.getType()->isScalableTy();
534                           })))
535     return true;
536   const auto &ST = MF.getSubtarget<AArch64Subtarget>();
537   if (!ST.hasNEON() || !ST.hasFPARMv8()) {
538     LLVM_DEBUG(dbgs() << "Falling back to SDAG because we don't support no-NEON\n");
539     return true;
540   }
541 
542   SMEAttrs Attrs(F);
543   if (Attrs.hasZAState() || Attrs.hasZT0State() ||
544       Attrs.hasStreamingInterfaceOrBody() ||
545       Attrs.hasStreamingCompatibleInterface())
546     return true;
547 
548   return false;
549 }
550 
551 void AArch64CallLowering::saveVarArgRegisters(
552     MachineIRBuilder &MIRBuilder, CallLowering::IncomingValueHandler &Handler,
553     CCState &CCInfo) const {
554   auto GPRArgRegs = AArch64::getGPRArgRegs();
555   auto FPRArgRegs = AArch64::getFPRArgRegs();
556 
557   MachineFunction &MF = MIRBuilder.getMF();
558   MachineRegisterInfo &MRI = MF.getRegInfo();
559   MachineFrameInfo &MFI = MF.getFrameInfo();
560   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
561   auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
562   bool IsWin64CC = Subtarget.isCallingConvWin64(CCInfo.getCallingConv(),
563                                                 MF.getFunction().isVarArg());
564   const LLT p0 = LLT::pointer(0, 64);
565   const LLT s64 = LLT::scalar(64);
566 
567   unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
568   unsigned NumVariadicGPRArgRegs = GPRArgRegs.size() - FirstVariadicGPR + 1;
569 
570   unsigned GPRSaveSize = 8 * (GPRArgRegs.size() - FirstVariadicGPR);
571   int GPRIdx = 0;
572   if (GPRSaveSize != 0) {
573     if (IsWin64CC) {
574       GPRIdx = MFI.CreateFixedObject(GPRSaveSize,
575                                      -static_cast<int>(GPRSaveSize), false);
576       if (GPRSaveSize & 15)
577         // The extra size here, if triggered, will always be 8.
578         MFI.CreateFixedObject(16 - (GPRSaveSize & 15),
579                               -static_cast<int>(alignTo(GPRSaveSize, 16)),
580                               false);
581     } else
582       GPRIdx = MFI.CreateStackObject(GPRSaveSize, Align(8), false);
583 
584     auto FIN = MIRBuilder.buildFrameIndex(p0, GPRIdx);
585     auto Offset =
586         MIRBuilder.buildConstant(MRI.createGenericVirtualRegister(s64), 8);
587 
588     for (unsigned i = FirstVariadicGPR; i < GPRArgRegs.size(); ++i) {
589       Register Val = MRI.createGenericVirtualRegister(s64);
590       Handler.assignValueToReg(
591           Val, GPRArgRegs[i],
592           CCValAssign::getReg(i + MF.getFunction().getNumOperands(), MVT::i64,
593                               GPRArgRegs[i], MVT::i64, CCValAssign::Full));
594       auto MPO = IsWin64CC ? MachinePointerInfo::getFixedStack(
595                                MF, GPRIdx, (i - FirstVariadicGPR) * 8)
596                          : MachinePointerInfo::getStack(MF, i * 8);
597       MIRBuilder.buildStore(Val, FIN, MPO, inferAlignFromPtrInfo(MF, MPO));
598 
599       FIN = MIRBuilder.buildPtrAdd(MRI.createGenericVirtualRegister(p0),
600                                    FIN.getReg(0), Offset);
601     }
602   }
603   FuncInfo->setVarArgsGPRIndex(GPRIdx);
604   FuncInfo->setVarArgsGPRSize(GPRSaveSize);
605 
606   if (Subtarget.hasFPARMv8() && !IsWin64CC) {
607     unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
608 
609     unsigned FPRSaveSize = 16 * (FPRArgRegs.size() - FirstVariadicFPR);
610     int FPRIdx = 0;
611     if (FPRSaveSize != 0) {
612       FPRIdx = MFI.CreateStackObject(FPRSaveSize, Align(16), false);
613 
614       auto FIN = MIRBuilder.buildFrameIndex(p0, FPRIdx);
615       auto Offset =
616           MIRBuilder.buildConstant(MRI.createGenericVirtualRegister(s64), 16);
617 
618       for (unsigned i = FirstVariadicFPR; i < FPRArgRegs.size(); ++i) {
619         Register Val = MRI.createGenericVirtualRegister(LLT::scalar(128));
620         Handler.assignValueToReg(
621             Val, FPRArgRegs[i],
622             CCValAssign::getReg(
623                 i + MF.getFunction().getNumOperands() + NumVariadicGPRArgRegs,
624                 MVT::f128, FPRArgRegs[i], MVT::f128, CCValAssign::Full));
625 
626         auto MPO = MachinePointerInfo::getStack(MF, i * 16);
627         MIRBuilder.buildStore(Val, FIN, MPO, inferAlignFromPtrInfo(MF, MPO));
628 
629         FIN = MIRBuilder.buildPtrAdd(MRI.createGenericVirtualRegister(p0),
630                                      FIN.getReg(0), Offset);
631       }
632     }
633     FuncInfo->setVarArgsFPRIndex(FPRIdx);
634     FuncInfo->setVarArgsFPRSize(FPRSaveSize);
635   }
636 }
637 
638 bool AArch64CallLowering::lowerFormalArguments(
639     MachineIRBuilder &MIRBuilder, const Function &F,
640     ArrayRef<ArrayRef<Register>> VRegs, FunctionLoweringInfo &FLI) const {
641   MachineFunction &MF = MIRBuilder.getMF();
642   MachineBasicBlock &MBB = MIRBuilder.getMBB();
643   MachineRegisterInfo &MRI = MF.getRegInfo();
644   auto &DL = F.getDataLayout();
645   auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
646 
647   // Arm64EC has extra requirements for varargs calls which are only implemented
648   // in SelectionDAG; bail out for now.
649   if (F.isVarArg() && Subtarget.isWindowsArm64EC())
650     return false;
651 
652   // Arm64EC thunks have a special calling convention which is only implemented
653   // in SelectionDAG; bail out for now.
654   if (F.getCallingConv() == CallingConv::ARM64EC_Thunk_Native ||
655       F.getCallingConv() == CallingConv::ARM64EC_Thunk_X64)
656     return false;
657 
658   bool IsWin64 =
659       Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg()) &&
660       !Subtarget.isWindowsArm64EC();
661 
662   SmallVector<ArgInfo, 8> SplitArgs;
663   SmallVector<std::pair<Register, Register>> BoolArgs;
664 
665   // Insert the hidden sret parameter if the return value won't fit in the
666   // return registers.
667   if (!FLI.CanLowerReturn)
668     insertSRetIncomingArgument(F, SplitArgs, FLI.DemoteRegister, MRI, DL);
669 
670   unsigned i = 0;
671   for (auto &Arg : F.args()) {
672     if (DL.getTypeStoreSize(Arg.getType()).isZero())
673       continue;
674 
675     ArgInfo OrigArg{VRegs[i], Arg, i};
676     setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F);
677 
678     // i1 arguments are zero-extended to i8 by the caller. Emit a
679     // hint to reflect this.
680     if (OrigArg.Ty->isIntegerTy(1)) {
681       assert(OrigArg.Regs.size() == 1 &&
682              MRI.getType(OrigArg.Regs[0]).getSizeInBits() == 1 &&
683              "Unexpected registers used for i1 arg");
684 
685       auto &Flags = OrigArg.Flags[0];
686       if (!Flags.isZExt() && !Flags.isSExt()) {
687         // Lower i1 argument as i8, and insert AssertZExt + Trunc later.
688         Register OrigReg = OrigArg.Regs[0];
689         Register WideReg = MRI.createGenericVirtualRegister(LLT::scalar(8));
690         OrigArg.Regs[0] = WideReg;
691         BoolArgs.push_back({OrigReg, WideReg});
692       }
693     }
694 
695     if (Arg.hasAttribute(Attribute::SwiftAsync))
696       MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
697 
698     splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv());
699     ++i;
700   }
701 
702   if (!MBB.empty())
703     MIRBuilder.setInstr(*MBB.begin());
704 
705   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
706   CCAssignFn *AssignFn = TLI.CCAssignFnForCall(F.getCallingConv(), IsWin64 && F.isVarArg());
707 
708   AArch64IncomingValueAssigner Assigner(AssignFn, AssignFn);
709   FormalArgHandler Handler(MIRBuilder, MRI);
710   SmallVector<CCValAssign, 16> ArgLocs;
711   CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
712   if (!determineAssignments(Assigner, SplitArgs, CCInfo) ||
713       !handleAssignments(Handler, SplitArgs, CCInfo, ArgLocs, MIRBuilder))
714     return false;
715 
716   if (!BoolArgs.empty()) {
717     for (auto &KV : BoolArgs) {
718       Register OrigReg = KV.first;
719       Register WideReg = KV.second;
720       LLT WideTy = MRI.getType(WideReg);
721       assert(MRI.getType(OrigReg).getScalarSizeInBits() == 1 &&
722              "Unexpected bit size of a bool arg");
723       MIRBuilder.buildTrunc(
724           OrigReg, MIRBuilder.buildAssertZExt(WideTy, WideReg, 1).getReg(0));
725     }
726   }
727 
728   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
729   uint64_t StackSize = Assigner.StackSize;
730   if (F.isVarArg()) {
731     if ((!Subtarget.isTargetDarwin() && !Subtarget.isWindowsArm64EC()) || IsWin64) {
732       // The AAPCS variadic function ABI is identical to the non-variadic
733       // one. As a result there may be more arguments in registers and we should
734       // save them for future reference.
735       // Win64 variadic functions also pass arguments in registers, but all
736       // float arguments are passed in integer registers.
737       saveVarArgRegisters(MIRBuilder, Handler, CCInfo);
738     } else if (Subtarget.isWindowsArm64EC()) {
739       return false;
740     }
741 
742     // We currently pass all varargs at 8-byte alignment, or 4 in ILP32.
743     StackSize = alignTo(Assigner.StackSize, Subtarget.isTargetILP32() ? 4 : 8);
744 
745     auto &MFI = MIRBuilder.getMF().getFrameInfo();
746     FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackSize, true));
747   }
748 
749   if (doesCalleeRestoreStack(F.getCallingConv(),
750                              MF.getTarget().Options.GuaranteedTailCallOpt)) {
751     // We have a non-standard ABI, so why not make full use of the stack that
752     // we're going to pop? It must be aligned to 16 B in any case.
753     StackSize = alignTo(StackSize, 16);
754 
755     // If we're expected to restore the stack (e.g. fastcc), then we'll be
756     // adding a multiple of 16.
757     FuncInfo->setArgumentStackToRestore(StackSize);
758 
759     // Our own callers will guarantee that the space is free by giving an
760     // aligned value to CALLSEQ_START.
761   }
762 
763   // When we tail call, we need to check if the callee's arguments
764   // will fit on the caller's stack. So, whenever we lower formal arguments,
765   // we should keep track of this information, since we might lower a tail call
766   // in this function later.
767   FuncInfo->setBytesInStackArgArea(StackSize);
768 
769   if (Subtarget.hasCustomCallingConv())
770     Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
771 
772   handleMustTailForwardedRegisters(MIRBuilder, AssignFn);
773 
774   // Move back to the end of the basic block.
775   MIRBuilder.setMBB(MBB);
776 
777   return true;
778 }
779 
780 /// Return true if the calling convention is one that we can guarantee TCO for.
781 static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
782   return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
783          CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
784 }
785 
786 /// Return true if we might ever do TCO for calls with this calling convention.
787 static bool mayTailCallThisCC(CallingConv::ID CC) {
788   switch (CC) {
789   case CallingConv::C:
790   case CallingConv::PreserveMost:
791   case CallingConv::PreserveAll:
792   case CallingConv::PreserveNone:
793   case CallingConv::Swift:
794   case CallingConv::SwiftTail:
795   case CallingConv::Tail:
796   case CallingConv::Fast:
797     return true;
798   default:
799     return false;
800   }
801 }
802 
803 /// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for
804 /// CC.
805 static std::pair<CCAssignFn *, CCAssignFn *>
806 getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI) {
807   return {TLI.CCAssignFnForCall(CC, false), TLI.CCAssignFnForCall(CC, true)};
808 }
809 
810 bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay(
811     CallLoweringInfo &Info, MachineFunction &MF,
812     SmallVectorImpl<ArgInfo> &InArgs) const {
813   const Function &CallerF = MF.getFunction();
814   CallingConv::ID CalleeCC = Info.CallConv;
815   CallingConv::ID CallerCC = CallerF.getCallingConv();
816 
817   // If the calling conventions match, then everything must be the same.
818   if (CalleeCC == CallerCC)
819     return true;
820 
821   // Check if the caller and callee will handle arguments in the same way.
822   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
823   CCAssignFn *CalleeAssignFnFixed;
824   CCAssignFn *CalleeAssignFnVarArg;
825   std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) =
826       getAssignFnsForCC(CalleeCC, TLI);
827 
828   CCAssignFn *CallerAssignFnFixed;
829   CCAssignFn *CallerAssignFnVarArg;
830   std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) =
831       getAssignFnsForCC(CallerCC, TLI);
832 
833   AArch64IncomingValueAssigner CalleeAssigner(CalleeAssignFnFixed,
834                                               CalleeAssignFnVarArg);
835   AArch64IncomingValueAssigner CallerAssigner(CallerAssignFnFixed,
836                                               CallerAssignFnVarArg);
837 
838   if (!resultsCompatible(Info, MF, InArgs, CalleeAssigner, CallerAssigner))
839     return false;
840 
841   // Make sure that the caller and callee preserve all of the same registers.
842   auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
843   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
844   const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
845   if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) {
846     TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
847     TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
848   }
849 
850   return TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved);
851 }
852 
853 bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable(
854     CallLoweringInfo &Info, MachineFunction &MF,
855     SmallVectorImpl<ArgInfo> &OrigOutArgs) const {
856   // If there are no outgoing arguments, then we are done.
857   if (OrigOutArgs.empty())
858     return true;
859 
860   const Function &CallerF = MF.getFunction();
861   LLVMContext &Ctx = CallerF.getContext();
862   CallingConv::ID CalleeCC = Info.CallConv;
863   CallingConv::ID CallerCC = CallerF.getCallingConv();
864   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
865   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
866 
867   CCAssignFn *AssignFnFixed;
868   CCAssignFn *AssignFnVarArg;
869   std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
870 
871   // We have outgoing arguments. Make sure that we can tail call with them.
872   SmallVector<CCValAssign, 16> OutLocs;
873   CCState OutInfo(CalleeCC, false, MF, OutLocs, Ctx);
874 
875   AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg,
876                                               Subtarget, /*IsReturn*/ false);
877   // determineAssignments() may modify argument flags, so make a copy.
878   SmallVector<ArgInfo, 8> OutArgs;
879   append_range(OutArgs, OrigOutArgs);
880   if (!determineAssignments(CalleeAssigner, OutArgs, OutInfo)) {
881     LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n");
882     return false;
883   }
884 
885   // Make sure that they can fit on the caller's stack.
886   const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
887   if (OutInfo.getStackSize() > FuncInfo->getBytesInStackArgArea()) {
888     LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n");
889     return false;
890   }
891 
892   // Verify that the parameters in callee-saved registers match.
893   // TODO: Port this over to CallLowering as general code once swiftself is
894   // supported.
895   auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
896   const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC);
897   MachineRegisterInfo &MRI = MF.getRegInfo();
898 
899   if (Info.IsVarArg) {
900     // Be conservative and disallow variadic memory operands to match SDAG's
901     // behaviour.
902     // FIXME: If the caller's calling convention is C, then we can
903     // potentially use its argument area. However, for cases like fastcc,
904     // we can't do anything.
905     for (unsigned i = 0; i < OutLocs.size(); ++i) {
906       auto &ArgLoc = OutLocs[i];
907       if (ArgLoc.isRegLoc())
908         continue;
909 
910       LLVM_DEBUG(
911           dbgs()
912           << "... Cannot tail call vararg function with stack arguments\n");
913       return false;
914     }
915   }
916 
917   return parametersInCSRMatch(MRI, CallerPreservedMask, OutLocs, OutArgs);
918 }
919 
920 bool AArch64CallLowering::isEligibleForTailCallOptimization(
921     MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
922     SmallVectorImpl<ArgInfo> &InArgs,
923     SmallVectorImpl<ArgInfo> &OutArgs) const {
924 
925   // Must pass all target-independent checks in order to tail call optimize.
926   if (!Info.IsTailCall)
927     return false;
928 
929   CallingConv::ID CalleeCC = Info.CallConv;
930   MachineFunction &MF = MIRBuilder.getMF();
931   const Function &CallerF = MF.getFunction();
932 
933   LLVM_DEBUG(dbgs() << "Attempting to lower call as tail call\n");
934 
935   if (Info.SwiftErrorVReg) {
936     // TODO: We should handle this.
937     // Note that this is also handled by the check for no outgoing arguments.
938     // Proactively disabling this though, because the swifterror handling in
939     // lowerCall inserts a COPY *after* the location of the call.
940     LLVM_DEBUG(dbgs() << "... Cannot handle tail calls with swifterror yet.\n");
941     return false;
942   }
943 
944   if (!mayTailCallThisCC(CalleeCC)) {
945     LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n");
946     return false;
947   }
948 
949   // Byval parameters hand the function a pointer directly into the stack area
950   // we want to reuse during a tail call. Working around this *is* possible (see
951   // X86).
952   //
953   // FIXME: In AArch64ISelLowering, this isn't worked around. Can/should we try
954   // it?
955   //
956   // On Windows, "inreg" attributes signify non-aggregate indirect returns.
957   // In this case, it is necessary to save/restore X0 in the callee. Tail
958   // call opt interferes with this. So we disable tail call opt when the
959   // caller has an argument with "inreg" attribute.
960   //
961   // FIXME: Check whether the callee also has an "inreg" argument.
962   //
963   // When the caller has a swifterror argument, we don't want to tail call
964   // because would have to move into the swifterror register before the
965   // tail call.
966   if (any_of(CallerF.args(), [](const Argument &A) {
967         return A.hasByValAttr() || A.hasInRegAttr() || A.hasSwiftErrorAttr();
968       })) {
969     LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval, "
970                          "inreg, or swifterror arguments\n");
971     return false;
972   }
973 
974   // Externally-defined functions with weak linkage should not be
975   // tail-called on AArch64 when the OS does not support dynamic
976   // pre-emption of symbols, as the AAELF spec requires normal calls
977   // to undefined weak functions to be replaced with a NOP or jump to the
978   // next instruction. The behaviour of branch instructions in this
979   // situation (as used for tail calls) is implementation-defined, so we
980   // cannot rely on the linker replacing the tail call with a return.
981   if (Info.Callee.isGlobal()) {
982     const GlobalValue *GV = Info.Callee.getGlobal();
983     const Triple &TT = MF.getTarget().getTargetTriple();
984     if (GV->hasExternalWeakLinkage() &&
985         (!TT.isOSWindows() || TT.isOSBinFormatELF() ||
986          TT.isOSBinFormatMachO())) {
987       LLVM_DEBUG(dbgs() << "... Cannot tail call externally-defined function "
988                            "with weak linkage for this OS.\n");
989       return false;
990     }
991   }
992 
993   // If we have -tailcallopt, then we're done.
994   if (canGuaranteeTCO(CalleeCC, MF.getTarget().Options.GuaranteedTailCallOpt))
995     return CalleeCC == CallerF.getCallingConv();
996 
997   // We don't have -tailcallopt, so we're allowed to change the ABI (sibcall).
998   // Try to find cases where we can do that.
999 
1000   // I want anyone implementing a new calling convention to think long and hard
1001   // about this assert.
1002   assert((!Info.IsVarArg || CalleeCC == CallingConv::C) &&
1003          "Unexpected variadic calling convention");
1004 
1005   // Verify that the incoming and outgoing arguments from the callee are
1006   // safe to tail call.
1007   if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) {
1008     LLVM_DEBUG(
1009         dbgs()
1010         << "... Caller and callee have incompatible calling conventions.\n");
1011     return false;
1012   }
1013 
1014   if (!areCalleeOutgoingArgsTailCallable(Info, MF, OutArgs))
1015     return false;
1016 
1017   LLVM_DEBUG(
1018       dbgs() << "... Call is eligible for tail call optimization.\n");
1019   return true;
1020 }
1021 
1022 static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
1023                               bool IsTailCall,
1024                               std::optional<CallLowering::PtrAuthInfo> &PAI,
1025                               MachineRegisterInfo &MRI) {
1026   const AArch64FunctionInfo *FuncInfo = CallerF.getInfo<AArch64FunctionInfo>();
1027 
1028   if (!IsTailCall) {
1029     if (!PAI)
1030       return IsIndirect ? getBLRCallOpcode(CallerF) : (unsigned)AArch64::BL;
1031 
1032     assert(IsIndirect && "Direct call should not be authenticated");
1033     assert((PAI->Key == AArch64PACKey::IA || PAI->Key == AArch64PACKey::IB) &&
1034            "Invalid auth call key");
1035     return AArch64::BLRA;
1036   }
1037 
1038   if (!IsIndirect)
1039     return AArch64::TCRETURNdi;
1040 
1041   // When BTI or PAuthLR are enabled, there are restrictions on using x16 and
1042   // x17 to hold the function pointer.
1043   if (FuncInfo->branchTargetEnforcement()) {
1044     if (FuncInfo->branchProtectionPAuthLR()) {
1045       assert(!PAI && "ptrauth tail-calls not yet supported with PAuthLR");
1046       return AArch64::TCRETURNrix17;
1047     }
1048     if (PAI)
1049       return AArch64::AUTH_TCRETURN_BTI;
1050     return AArch64::TCRETURNrix16x17;
1051   }
1052 
1053   if (FuncInfo->branchProtectionPAuthLR()) {
1054     assert(!PAI && "ptrauth tail-calls not yet supported with PAuthLR");
1055     return AArch64::TCRETURNrinotx16;
1056   }
1057 
1058   if (PAI)
1059     return AArch64::AUTH_TCRETURN;
1060   return AArch64::TCRETURNri;
1061 }
1062 
1063 static const uint32_t *
1064 getMaskForArgs(SmallVectorImpl<AArch64CallLowering::ArgInfo> &OutArgs,
1065                AArch64CallLowering::CallLoweringInfo &Info,
1066                const AArch64RegisterInfo &TRI, MachineFunction &MF) {
1067   const uint32_t *Mask;
1068   if (!OutArgs.empty() && OutArgs[0].Flags[0].isReturned()) {
1069     // For 'this' returns, use the X0-preserving mask if applicable
1070     Mask = TRI.getThisReturnPreservedMask(MF, Info.CallConv);
1071     if (!Mask) {
1072       OutArgs[0].Flags[0].setReturned(false);
1073       Mask = TRI.getCallPreservedMask(MF, Info.CallConv);
1074     }
1075   } else {
1076     Mask = TRI.getCallPreservedMask(MF, Info.CallConv);
1077   }
1078   return Mask;
1079 }
1080 
1081 bool AArch64CallLowering::lowerTailCall(
1082     MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
1083     SmallVectorImpl<ArgInfo> &OutArgs) const {
1084   MachineFunction &MF = MIRBuilder.getMF();
1085   const Function &F = MF.getFunction();
1086   MachineRegisterInfo &MRI = MF.getRegInfo();
1087   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
1088   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1089 
1090   // True when we're tail calling, but without -tailcallopt.
1091   bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt &&
1092                    Info.CallConv != CallingConv::Tail &&
1093                    Info.CallConv != CallingConv::SwiftTail;
1094 
1095   // Find out which ABI gets to decide where things go.
1096   CallingConv::ID CalleeCC = Info.CallConv;
1097   CCAssignFn *AssignFnFixed;
1098   CCAssignFn *AssignFnVarArg;
1099   std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
1100 
1101   MachineInstrBuilder CallSeqStart;
1102   if (!IsSibCall)
1103     CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
1104 
1105   unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), true, Info.PAI, MRI);
1106   auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
1107   MIB.add(Info.Callee);
1108 
1109   // Tell the call which registers are clobbered.
1110   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1111   auto TRI = Subtarget.getRegisterInfo();
1112 
1113   // Byte offset for the tail call. When we are sibcalling, this will always
1114   // be 0.
1115   MIB.addImm(0);
1116 
1117   // Authenticated tail calls always take key/discriminator arguments.
1118   if (Opc == AArch64::AUTH_TCRETURN || Opc == AArch64::AUTH_TCRETURN_BTI) {
1119     assert((Info.PAI->Key == AArch64PACKey::IA ||
1120             Info.PAI->Key == AArch64PACKey::IB) &&
1121            "Invalid auth call key");
1122     MIB.addImm(Info.PAI->Key);
1123 
1124     Register AddrDisc = 0;
1125     uint16_t IntDisc = 0;
1126     std::tie(IntDisc, AddrDisc) =
1127         extractPtrauthBlendDiscriminators(Info.PAI->Discriminator, MRI);
1128 
1129     MIB.addImm(IntDisc);
1130     MIB.addUse(AddrDisc);
1131     if (AddrDisc != AArch64::NoRegister) {
1132       MIB->getOperand(4).setReg(constrainOperandRegClass(
1133           MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
1134           *MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(),
1135           MIB->getOperand(4), 4));
1136     }
1137   }
1138 
1139   // Tell the call which registers are clobbered.
1140   const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC);
1141   if (Subtarget.hasCustomCallingConv())
1142     TRI->UpdateCustomCallPreservedMask(MF, &Mask);
1143   MIB.addRegMask(Mask);
1144 
1145   if (Info.CFIType)
1146     MIB->setCFIType(MF, Info.CFIType->getZExtValue());
1147 
1148   if (TRI->isAnyArgRegReserved(MF))
1149     TRI->emitReservedArgRegCallError(MF);
1150 
1151   // FPDiff is the byte offset of the call's argument area from the callee's.
1152   // Stores to callee stack arguments will be placed in FixedStackSlots offset
1153   // by this amount for a tail call. In a sibling call it must be 0 because the
1154   // caller will deallocate the entire stack and the callee still expects its
1155   // arguments to begin at SP+0.
1156   int FPDiff = 0;
1157 
1158   // This will be 0 for sibcalls, potentially nonzero for tail calls produced
1159   // by -tailcallopt. For sibcalls, the memory operands for the call are
1160   // already available in the caller's incoming argument space.
1161   unsigned NumBytes = 0;
1162   if (!IsSibCall) {
1163     // We aren't sibcalling, so we need to compute FPDiff. We need to do this
1164     // before handling assignments, because FPDiff must be known for memory
1165     // arguments.
1166     unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
1167     SmallVector<CCValAssign, 16> OutLocs;
1168     CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext());
1169 
1170     AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg,
1171                                                 Subtarget, /*IsReturn*/ false);
1172     if (!determineAssignments(CalleeAssigner, OutArgs, OutInfo))
1173       return false;
1174 
1175     // The callee will pop the argument stack as a tail call. Thus, we must
1176     // keep it 16-byte aligned.
1177     NumBytes = alignTo(OutInfo.getStackSize(), 16);
1178 
1179     // FPDiff will be negative if this tail call requires more space than we
1180     // would automatically have in our incoming argument space. Positive if we
1181     // actually shrink the stack.
1182     FPDiff = NumReusableBytes - NumBytes;
1183 
1184     // Update the required reserved area if this is the tail call requiring the
1185     // most argument stack space.
1186     if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff)
1187       FuncInfo->setTailCallReservedStack(-FPDiff);
1188 
1189     // The stack pointer must be 16-byte aligned at all times it's used for a
1190     // memory operation, which in practice means at *all* times and in
1191     // particular across call boundaries. Therefore our own arguments started at
1192     // a 16-byte aligned SP and the delta applied for the tail call should
1193     // satisfy the same constraint.
1194     assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
1195   }
1196 
1197   const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
1198 
1199   AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg,
1200                                         Subtarget, /*IsReturn*/ false);
1201 
1202   // Do the actual argument marshalling.
1203   OutgoingArgHandler Handler(MIRBuilder, MRI, MIB,
1204                              /*IsTailCall*/ true, FPDiff);
1205   if (!determineAndHandleAssignments(Handler, Assigner, OutArgs, MIRBuilder,
1206                                      CalleeCC, Info.IsVarArg))
1207     return false;
1208 
1209   Mask = getMaskForArgs(OutArgs, Info, *TRI, MF);
1210 
1211   if (Info.IsVarArg && Info.IsMustTailCall) {
1212     // Now we know what's being passed to the function. Add uses to the call for
1213     // the forwarded registers that we *aren't* passing as parameters. This will
1214     // preserve the copies we build earlier.
1215     for (const auto &F : Forwards) {
1216       Register ForwardedReg = F.PReg;
1217       // If the register is already passed, or aliases a register which is
1218       // already being passed, then skip it.
1219       if (any_of(MIB->uses(), [&ForwardedReg, &TRI](const MachineOperand &Use) {
1220             if (!Use.isReg())
1221               return false;
1222             return TRI->regsOverlap(Use.getReg(), ForwardedReg);
1223           }))
1224         continue;
1225 
1226       // We aren't passing it already, so we should add it to the call.
1227       MIRBuilder.buildCopy(ForwardedReg, Register(F.VReg));
1228       MIB.addReg(ForwardedReg, RegState::Implicit);
1229     }
1230   }
1231 
1232   // If we have -tailcallopt, we need to adjust the stack. We'll do the call
1233   // sequence start and end here.
1234   if (!IsSibCall) {
1235     MIB->getOperand(1).setImm(FPDiff);
1236     CallSeqStart.addImm(0).addImm(0);
1237     // End the call sequence *before* emitting the call. Normally, we would
1238     // tidy the frame up after the call. However, here, we've laid out the
1239     // parameters so that when SP is reset, they will be in the correct
1240     // location.
1241     MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP).addImm(0).addImm(0);
1242   }
1243 
1244   // Now we can add the actual call instruction to the correct basic block.
1245   MIRBuilder.insertInstr(MIB);
1246 
1247   // If Callee is a reg, since it is used by a target specific instruction,
1248   // it must have a register class matching the constraint of that instruction.
1249   if (MIB->getOperand(0).isReg())
1250     constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
1251                              *MF.getSubtarget().getRegBankInfo(), *MIB,
1252                              MIB->getDesc(), MIB->getOperand(0), 0);
1253 
1254   MF.getFrameInfo().setHasTailCall();
1255   Info.LoweredTailCall = true;
1256   return true;
1257 }
1258 
1259 bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
1260                                     CallLoweringInfo &Info) const {
1261   MachineFunction &MF = MIRBuilder.getMF();
1262   const Function &F = MF.getFunction();
1263   MachineRegisterInfo &MRI = MF.getRegInfo();
1264   auto &DL = F.getDataLayout();
1265   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
1266   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1267 
1268   // Arm64EC has extra requirements for varargs calls; bail out for now.
1269   //
1270   // Arm64EC has special mangling rules for calls; bail out on all calls for
1271   // now.
1272   if (Subtarget.isWindowsArm64EC())
1273     return false;
1274 
1275   // Arm64EC thunks have a special calling convention which is only implemented
1276   // in SelectionDAG; bail out for now.
1277   if (Info.CallConv == CallingConv::ARM64EC_Thunk_Native ||
1278       Info.CallConv == CallingConv::ARM64EC_Thunk_X64)
1279     return false;
1280 
1281   SmallVector<ArgInfo, 8> OutArgs;
1282   for (auto &OrigArg : Info.OrigArgs) {
1283     splitToValueTypes(OrigArg, OutArgs, DL, Info.CallConv);
1284     // AAPCS requires that we zero-extend i1 to 8 bits by the caller.
1285     auto &Flags = OrigArg.Flags[0];
1286     if (OrigArg.Ty->isIntegerTy(1) && !Flags.isSExt() && !Flags.isZExt()) {
1287       ArgInfo &OutArg = OutArgs.back();
1288       assert(OutArg.Regs.size() == 1 &&
1289              MRI.getType(OutArg.Regs[0]).getSizeInBits() == 1 &&
1290              "Unexpected registers used for i1 arg");
1291 
1292       // We cannot use a ZExt ArgInfo flag here, because it will
1293       // zero-extend the argument to i32 instead of just i8.
1294       OutArg.Regs[0] =
1295           MIRBuilder.buildZExt(LLT::scalar(8), OutArg.Regs[0]).getReg(0);
1296       LLVMContext &Ctx = MF.getFunction().getContext();
1297       OutArg.Ty = Type::getInt8Ty(Ctx);
1298     }
1299   }
1300 
1301   SmallVector<ArgInfo, 8> InArgs;
1302   if (!Info.OrigRet.Ty->isVoidTy())
1303     splitToValueTypes(Info.OrigRet, InArgs, DL, Info.CallConv);
1304 
1305   // If we can lower as a tail call, do that instead.
1306   bool CanTailCallOpt =
1307       isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs, OutArgs);
1308 
1309   // We must emit a tail call if we have musttail.
1310   if (Info.IsMustTailCall && !CanTailCallOpt) {
1311     // There are types of incoming/outgoing arguments we can't handle yet, so
1312     // it doesn't make sense to actually die here like in ISelLowering. Instead,
1313     // fall back to SelectionDAG and let it try to handle this.
1314     LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n");
1315     return false;
1316   }
1317 
1318   Info.IsTailCall = CanTailCallOpt;
1319   if (CanTailCallOpt)
1320     return lowerTailCall(MIRBuilder, Info, OutArgs);
1321 
1322   // Find out which ABI gets to decide where things go.
1323   CCAssignFn *AssignFnFixed;
1324   CCAssignFn *AssignFnVarArg;
1325   std::tie(AssignFnFixed, AssignFnVarArg) =
1326       getAssignFnsForCC(Info.CallConv, TLI);
1327 
1328   MachineInstrBuilder CallSeqStart;
1329   CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
1330 
1331   // Create a temporarily-floating call instruction so we can add the implicit
1332   // uses of arg registers.
1333 
1334   unsigned Opc = 0;
1335   // Calls with operand bundle "clang.arc.attachedcall" are special. They should
1336   // be expanded to the call, directly followed by a special marker sequence and
1337   // a call to an ObjC library function.
1338   if (Info.CB && objcarc::hasAttachedCallOpBundle(Info.CB))
1339     Opc = Info.PAI ? AArch64::BLRA_RVMARKER : AArch64::BLR_RVMARKER;
1340   // A call to a returns twice function like setjmp must be followed by a bti
1341   // instruction.
1342   else if (Info.CB && Info.CB->hasFnAttr(Attribute::ReturnsTwice) &&
1343            !Subtarget.noBTIAtReturnTwice() &&
1344            MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
1345     Opc = AArch64::BLR_BTI;
1346   else {
1347     // For an intrinsic call (e.g. memset), use GOT if "RtLibUseGOT" (-fno-plt)
1348     // is set.
1349     if (Info.Callee.isSymbol() && F.getParent()->getRtLibUseGOT()) {
1350       auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_GLOBAL_VALUE);
1351       DstOp(getLLTForType(*F.getType(), DL)).addDefToMIB(MRI, MIB);
1352       MIB.addExternalSymbol(Info.Callee.getSymbolName(), AArch64II::MO_GOT);
1353       Info.Callee = MachineOperand::CreateReg(MIB.getReg(0), false);
1354     }
1355     Opc = getCallOpcode(MF, Info.Callee.isReg(), false, Info.PAI, MRI);
1356   }
1357 
1358   auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
1359   unsigned CalleeOpNo = 0;
1360 
1361   if (Opc == AArch64::BLR_RVMARKER || Opc == AArch64::BLRA_RVMARKER) {
1362     // Add a target global address for the retainRV/claimRV runtime function
1363     // just before the call target.
1364     Function *ARCFn = *objcarc::getAttachedARCFunction(Info.CB);
1365     MIB.addGlobalAddress(ARCFn);
1366     ++CalleeOpNo;
1367   } else if (Info.CFIType) {
1368     MIB->setCFIType(MF, Info.CFIType->getZExtValue());
1369   }
1370 
1371   MIB.add(Info.Callee);
1372 
1373   // Tell the call which registers are clobbered.
1374   const uint32_t *Mask;
1375   const auto *TRI = Subtarget.getRegisterInfo();
1376 
1377   AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg,
1378                                         Subtarget, /*IsReturn*/ false);
1379   // Do the actual argument marshalling.
1380   OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, /*IsReturn*/ false);
1381   if (!determineAndHandleAssignments(Handler, Assigner, OutArgs, MIRBuilder,
1382                                      Info.CallConv, Info.IsVarArg))
1383     return false;
1384 
1385   Mask = getMaskForArgs(OutArgs, Info, *TRI, MF);
1386 
1387   if (Opc == AArch64::BLRA || Opc == AArch64::BLRA_RVMARKER) {
1388     assert((Info.PAI->Key == AArch64PACKey::IA ||
1389             Info.PAI->Key == AArch64PACKey::IB) &&
1390            "Invalid auth call key");
1391     MIB.addImm(Info.PAI->Key);
1392 
1393     Register AddrDisc = 0;
1394     uint16_t IntDisc = 0;
1395     std::tie(IntDisc, AddrDisc) =
1396         extractPtrauthBlendDiscriminators(Info.PAI->Discriminator, MRI);
1397 
1398     MIB.addImm(IntDisc);
1399     MIB.addUse(AddrDisc);
1400     if (AddrDisc != AArch64::NoRegister) {
1401       constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
1402                                *MF.getSubtarget().getRegBankInfo(), *MIB,
1403                                MIB->getDesc(), MIB->getOperand(CalleeOpNo + 3),
1404                                CalleeOpNo + 3);
1405     }
1406   }
1407 
1408   // Tell the call which registers are clobbered.
1409   if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv())
1410     TRI->UpdateCustomCallPreservedMask(MF, &Mask);
1411   MIB.addRegMask(Mask);
1412 
1413   if (TRI->isAnyArgRegReserved(MF))
1414     TRI->emitReservedArgRegCallError(MF);
1415 
1416   // Now we can add the actual call instruction to the correct basic block.
1417   MIRBuilder.insertInstr(MIB);
1418 
1419   uint64_t CalleePopBytes =
1420       doesCalleeRestoreStack(Info.CallConv,
1421                              MF.getTarget().Options.GuaranteedTailCallOpt)
1422           ? alignTo(Assigner.StackSize, 16)
1423           : 0;
1424 
1425   CallSeqStart.addImm(Assigner.StackSize).addImm(0);
1426   MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
1427       .addImm(Assigner.StackSize)
1428       .addImm(CalleePopBytes);
1429 
1430   // If Callee is a reg, since it is used by a target specific
1431   // instruction, it must have a register class matching the
1432   // constraint of that instruction.
1433   if (MIB->getOperand(CalleeOpNo).isReg())
1434     constrainOperandRegClass(MF, *TRI, MRI, *Subtarget.getInstrInfo(),
1435                              *Subtarget.getRegBankInfo(), *MIB, MIB->getDesc(),
1436                              MIB->getOperand(CalleeOpNo), CalleeOpNo);
1437 
1438   // Finally we can copy the returned value back into its virtual-register. In
1439   // symmetry with the arguments, the physical register must be an
1440   // implicit-define of the call instruction.
1441   if (Info.CanLowerReturn  && !Info.OrigRet.Ty->isVoidTy()) {
1442     CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv);
1443     CallReturnHandler Handler(MIRBuilder, MRI, MIB);
1444     bool UsingReturnedArg =
1445         !OutArgs.empty() && OutArgs[0].Flags[0].isReturned();
1446 
1447     AArch64OutgoingValueAssigner Assigner(RetAssignFn, RetAssignFn, Subtarget,
1448                                           /*IsReturn*/ false);
1449     ReturnedArgCallReturnHandler ReturnedArgHandler(MIRBuilder, MRI, MIB);
1450     if (!determineAndHandleAssignments(
1451             UsingReturnedArg ? ReturnedArgHandler : Handler, Assigner, InArgs,
1452             MIRBuilder, Info.CallConv, Info.IsVarArg,
1453             UsingReturnedArg ? ArrayRef(OutArgs[0].Regs) : std::nullopt))
1454       return false;
1455   }
1456 
1457   if (Info.SwiftErrorVReg) {
1458     MIB.addDef(AArch64::X21, RegState::Implicit);
1459     MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21));
1460   }
1461 
1462   if (!Info.CanLowerReturn) {
1463     insertSRetLoads(MIRBuilder, Info.OrigRet.Ty, Info.OrigRet.Regs,
1464                     Info.DemoteRegister, Info.DemoteStackIndex);
1465   }
1466   return true;
1467 }
1468 
1469 bool AArch64CallLowering::isTypeIsValidForThisReturn(EVT Ty) const {
1470   return Ty.getSizeInBits() == 64;
1471 }
1472