xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===--- AArch64CallLowering.cpp - Call lowering --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the lowering of LLVM calls to machine code calls for
11 /// GlobalISel.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "AArch64CallLowering.h"
16 #include "AArch64GlobalISelUtils.h"
17 #include "AArch64ISelLowering.h"
18 #include "AArch64MachineFunctionInfo.h"
19 #include "AArch64RegisterInfo.h"
20 #include "AArch64Subtarget.h"
21 #include "Utils/AArch64SMEAttributes.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/Analysis/ObjCARCUtil.h"
25 #include "llvm/CodeGen/Analysis.h"
26 #include "llvm/CodeGen/CallingConvLower.h"
27 #include "llvm/CodeGen/FunctionLoweringInfo.h"
28 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
29 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
30 #include "llvm/CodeGen/GlobalISel/Utils.h"
31 #include "llvm/CodeGen/LowLevelTypeUtils.h"
32 #include "llvm/CodeGen/MachineBasicBlock.h"
33 #include "llvm/CodeGen/MachineFrameInfo.h"
34 #include "llvm/CodeGen/MachineFunction.h"
35 #include "llvm/CodeGen/MachineInstrBuilder.h"
36 #include "llvm/CodeGen/MachineMemOperand.h"
37 #include "llvm/CodeGen/MachineOperand.h"
38 #include "llvm/CodeGen/MachineRegisterInfo.h"
39 #include "llvm/CodeGen/TargetOpcodes.h"
40 #include "llvm/CodeGen/TargetRegisterInfo.h"
41 #include "llvm/CodeGen/TargetSubtargetInfo.h"
42 #include "llvm/CodeGen/ValueTypes.h"
43 #include "llvm/CodeGenTypes/MachineValueType.h"
44 #include "llvm/IR/Argument.h"
45 #include "llvm/IR/Attributes.h"
46 #include "llvm/IR/Function.h"
47 #include "llvm/IR/Type.h"
48 #include "llvm/IR/Value.h"
49 #include <algorithm>
50 #include <cassert>
51 #include <cstdint>
52 
53 #define DEBUG_TYPE "aarch64-call-lowering"
54 
55 using namespace llvm;
56 using namespace AArch64GISelUtils;
57 
58 extern cl::opt<bool> EnableSVEGISel;
59 
AArch64CallLowering(const AArch64TargetLowering & TLI)60 AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
61   : CallLowering(&TLI) {}
62 
applyStackPassedSmallTypeDAGHack(EVT OrigVT,MVT & ValVT,MVT & LocVT)63 static void applyStackPassedSmallTypeDAGHack(EVT OrigVT, MVT &ValVT,
64                                              MVT &LocVT) {
65   // If ValVT is i1/i8/i16, we should set LocVT to i8/i8/i16. This is a legacy
66   // hack because the DAG calls the assignment function with pre-legalized
67   // register typed values, not the raw type.
68   //
69   // This hack is not applied to return values which are not passed on the
70   // stack.
71   if (OrigVT == MVT::i1 || OrigVT == MVT::i8)
72     ValVT = LocVT = MVT::i8;
73   else if (OrigVT == MVT::i16)
74     ValVT = LocVT = MVT::i16;
75 }
76 
77 // Account for i1/i8/i16 stack passed value hack
getStackValueStoreTypeHack(const CCValAssign & VA)78 static LLT getStackValueStoreTypeHack(const CCValAssign &VA) {
79   const MVT ValVT = VA.getValVT();
80   return (ValVT == MVT::i8 || ValVT == MVT::i16) ? LLT(ValVT)
81                                                  : LLT(VA.getLocVT());
82 }
83 
84 namespace {
85 
86 struct AArch64IncomingValueAssigner
87     : public CallLowering::IncomingValueAssigner {
AArch64IncomingValueAssigner__anon2d01892d0111::AArch64IncomingValueAssigner88   AArch64IncomingValueAssigner(CCAssignFn *AssignFn_,
89                                CCAssignFn *AssignFnVarArg_)
90       : IncomingValueAssigner(AssignFn_, AssignFnVarArg_) {}
91 
assignArg__anon2d01892d0111::AArch64IncomingValueAssigner92   bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
93                  CCValAssign::LocInfo LocInfo,
94                  const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
95                  CCState &State) override {
96     applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT);
97     return IncomingValueAssigner::assignArg(ValNo, OrigVT, ValVT, LocVT,
98                                             LocInfo, Info, Flags, State);
99   }
100 };
101 
102 struct AArch64OutgoingValueAssigner
103     : public CallLowering::OutgoingValueAssigner {
104   const AArch64Subtarget &Subtarget;
105 
106   /// Track if this is used for a return instead of function argument
107   /// passing. We apply a hack to i1/i8/i16 stack passed values, but do not use
108   /// stack passed returns for them and cannot apply the type adjustment.
109   bool IsReturn;
110 
AArch64OutgoingValueAssigner__anon2d01892d0111::AArch64OutgoingValueAssigner111   AArch64OutgoingValueAssigner(CCAssignFn *AssignFn_,
112                                CCAssignFn *AssignFnVarArg_,
113                                const AArch64Subtarget &Subtarget_,
114                                bool IsReturn)
115       : OutgoingValueAssigner(AssignFn_, AssignFnVarArg_),
116         Subtarget(Subtarget_), IsReturn(IsReturn) {}
117 
assignArg__anon2d01892d0111::AArch64OutgoingValueAssigner118   bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
119                  CCValAssign::LocInfo LocInfo,
120                  const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
121                  CCState &State) override {
122     const Function &F = State.getMachineFunction().getFunction();
123     bool IsCalleeWin =
124         Subtarget.isCallingConvWin64(State.getCallingConv(), F.isVarArg());
125     bool UseVarArgsCCForFixed = IsCalleeWin && State.isVarArg();
126 
127     bool Res;
128     if (Info.IsFixed && !UseVarArgsCCForFixed) {
129       if (!IsReturn)
130         applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT);
131       Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State);
132     } else
133       Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Flags, State);
134 
135     StackSize = State.getStackSize();
136     return Res;
137   }
138 };
139 
140 struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
IncomingArgHandler__anon2d01892d0111::IncomingArgHandler141   IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
142       : IncomingValueHandler(MIRBuilder, MRI) {}
143 
getStackAddress__anon2d01892d0111::IncomingArgHandler144   Register getStackAddress(uint64_t Size, int64_t Offset,
145                            MachinePointerInfo &MPO,
146                            ISD::ArgFlagsTy Flags) override {
147     auto &MFI = MIRBuilder.getMF().getFrameInfo();
148 
149     // Byval is assumed to be writable memory, but other stack passed arguments
150     // are not.
151     const bool IsImmutable = !Flags.isByVal();
152 
153     int FI = MFI.CreateFixedObject(Size, Offset, IsImmutable);
154     MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
155     auto AddrReg = MIRBuilder.buildFrameIndex(LLT::pointer(0, 64), FI);
156     return AddrReg.getReg(0);
157   }
158 
getStackValueStoreType__anon2d01892d0111::IncomingArgHandler159   LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA,
160                              ISD::ArgFlagsTy Flags) const override {
161     // For pointers, we just need to fixup the integer types reported in the
162     // CCValAssign.
163     if (Flags.isPointer())
164       return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags);
165     return getStackValueStoreTypeHack(VA);
166   }
167 
assignValueToReg__anon2d01892d0111::IncomingArgHandler168   void assignValueToReg(Register ValVReg, Register PhysReg,
169                         const CCValAssign &VA) override {
170     markRegUsed(PhysReg);
171     IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
172   }
173 
assignValueToAddress__anon2d01892d0111::IncomingArgHandler174   void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
175                             const MachinePointerInfo &MPO,
176                             const CCValAssign &VA) override {
177     MachineFunction &MF = MIRBuilder.getMF();
178 
179     LLT ValTy(VA.getValVT());
180     LLT LocTy(VA.getLocVT());
181 
182     // Fixup the types for the DAG compatibility hack.
183     if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16)
184       std::swap(ValTy, LocTy);
185     else {
186       // The calling code knows if this is a pointer or not, we're only touching
187       // the LocTy for the i8/i16 hack.
188       assert(LocTy.getSizeInBits() == MemTy.getSizeInBits());
189       LocTy = MemTy;
190     }
191 
192     auto MMO = MF.getMachineMemOperand(
193         MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, LocTy,
194         inferAlignFromPtrInfo(MF, MPO));
195 
196     switch (VA.getLocInfo()) {
197     case CCValAssign::LocInfo::ZExt:
198       MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, ValVReg, Addr, *MMO);
199       return;
200     case CCValAssign::LocInfo::SExt:
201       MIRBuilder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, ValVReg, Addr, *MMO);
202       return;
203     default:
204       MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
205       return;
206     }
207   }
208 
209   /// How the physical register gets marked varies between formal
210   /// parameters (it's a basic-block live-in), and a call instruction
211   /// (it's an implicit-def of the BL).
212   virtual void markRegUsed(Register Reg) = 0;
213 };
214 
215 struct FormalArgHandler : public IncomingArgHandler {
FormalArgHandler__anon2d01892d0111::FormalArgHandler216   FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
217       : IncomingArgHandler(MIRBuilder, MRI) {}
218 
markRegUsed__anon2d01892d0111::FormalArgHandler219   void markRegUsed(Register Reg) override {
220     MIRBuilder.getMRI()->addLiveIn(Reg.asMCReg());
221     MIRBuilder.getMBB().addLiveIn(Reg.asMCReg());
222   }
223 };
224 
225 struct CallReturnHandler : public IncomingArgHandler {
CallReturnHandler__anon2d01892d0111::CallReturnHandler226   CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
227                     MachineInstrBuilder MIB)
228       : IncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {}
229 
markRegUsed__anon2d01892d0111::CallReturnHandler230   void markRegUsed(Register Reg) override {
231     MIB.addDef(Reg, RegState::Implicit);
232   }
233 
234   MachineInstrBuilder MIB;
235 };
236 
237 /// A special return arg handler for "returned" attribute arg calls.
238 struct ReturnedArgCallReturnHandler : public CallReturnHandler {
ReturnedArgCallReturnHandler__anon2d01892d0111::ReturnedArgCallReturnHandler239   ReturnedArgCallReturnHandler(MachineIRBuilder &MIRBuilder,
240                                MachineRegisterInfo &MRI,
241                                MachineInstrBuilder MIB)
242       : CallReturnHandler(MIRBuilder, MRI, MIB) {}
243 
markRegUsed__anon2d01892d0111::ReturnedArgCallReturnHandler244   void markRegUsed(Register Reg) override {}
245 };
246 
247 struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
OutgoingArgHandler__anon2d01892d0111::OutgoingArgHandler248   OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
249                      MachineInstrBuilder MIB, bool IsTailCall = false,
250                      int FPDiff = 0)
251       : OutgoingValueHandler(MIRBuilder, MRI), MIB(MIB), IsTailCall(IsTailCall),
252         FPDiff(FPDiff),
253         Subtarget(MIRBuilder.getMF().getSubtarget<AArch64Subtarget>()) {}
254 
getStackAddress__anon2d01892d0111::OutgoingArgHandler255   Register getStackAddress(uint64_t Size, int64_t Offset,
256                            MachinePointerInfo &MPO,
257                            ISD::ArgFlagsTy Flags) override {
258     MachineFunction &MF = MIRBuilder.getMF();
259     LLT p0 = LLT::pointer(0, 64);
260     LLT s64 = LLT::scalar(64);
261 
262     if (IsTailCall) {
263       assert(!Flags.isByVal() && "byval unhandled with tail calls");
264 
265       Offset += FPDiff;
266       int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
267       auto FIReg = MIRBuilder.buildFrameIndex(p0, FI);
268       MPO = MachinePointerInfo::getFixedStack(MF, FI);
269       return FIReg.getReg(0);
270     }
271 
272     if (!SPReg)
273       SPReg = MIRBuilder.buildCopy(p0, Register(AArch64::SP)).getReg(0);
274 
275     auto OffsetReg = MIRBuilder.buildConstant(s64, Offset);
276 
277     auto AddrReg = MIRBuilder.buildPtrAdd(p0, SPReg, OffsetReg);
278 
279     MPO = MachinePointerInfo::getStack(MF, Offset);
280     return AddrReg.getReg(0);
281   }
282 
283   /// We need to fixup the reported store size for certain value types because
284   /// we invert the interpretation of ValVT and LocVT in certain cases. This is
285   /// for compatibility with the DAG call lowering implementation, which we're
286   /// currently building on top of.
getStackValueStoreType__anon2d01892d0111::OutgoingArgHandler287   LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA,
288                              ISD::ArgFlagsTy Flags) const override {
289     if (Flags.isPointer())
290       return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags);
291     return getStackValueStoreTypeHack(VA);
292   }
293 
assignValueToReg__anon2d01892d0111::OutgoingArgHandler294   void assignValueToReg(Register ValVReg, Register PhysReg,
295                         const CCValAssign &VA) override {
296     MIB.addUse(PhysReg, RegState::Implicit);
297     Register ExtReg = extendRegister(ValVReg, VA);
298     MIRBuilder.buildCopy(PhysReg, ExtReg);
299   }
300 
301   /// Check whether a stack argument requires lowering in a tail call.
shouldLowerTailCallStackArg__anon2d01892d0111::OutgoingArgHandler302   static bool shouldLowerTailCallStackArg(const MachineFunction &MF,
303                                           const CCValAssign &VA,
304                                           Register ValVReg,
305                                           Register StoreAddr) {
306     const MachineRegisterInfo &MRI = MF.getRegInfo();
307     // Print the defining instruction for the value.
308     auto *DefMI = MRI.getVRegDef(ValVReg);
309     assert(DefMI && "No defining instruction");
310     for (;;) {
311       // Look through nodes that don't alter the bits of the incoming value.
312       unsigned Op = DefMI->getOpcode();
313       if (Op == TargetOpcode::G_ZEXT || Op == TargetOpcode::G_ANYEXT ||
314           Op == TargetOpcode::G_BITCAST || isAssertMI(*DefMI)) {
315         DefMI = MRI.getVRegDef(DefMI->getOperand(1).getReg());
316         continue;
317       }
318       break;
319     }
320 
321     auto *Load = dyn_cast<GLoad>(DefMI);
322     if (!Load)
323       return true;
324     Register LoadReg = Load->getPointerReg();
325     auto *LoadAddrDef = MRI.getVRegDef(LoadReg);
326     if (LoadAddrDef->getOpcode() != TargetOpcode::G_FRAME_INDEX)
327       return true;
328     const MachineFrameInfo &MFI = MF.getFrameInfo();
329     int LoadFI = LoadAddrDef->getOperand(1).getIndex();
330 
331     auto *StoreAddrDef = MRI.getVRegDef(StoreAddr);
332     if (StoreAddrDef->getOpcode() != TargetOpcode::G_FRAME_INDEX)
333       return true;
334     int StoreFI = StoreAddrDef->getOperand(1).getIndex();
335 
336     if (!MFI.isImmutableObjectIndex(LoadFI))
337       return true;
338     if (MFI.getObjectOffset(LoadFI) != MFI.getObjectOffset(StoreFI))
339       return true;
340     if (Load->getMemSize() != MFI.getObjectSize(StoreFI))
341       return true;
342 
343     return false;
344   }
345 
assignValueToAddress__anon2d01892d0111::OutgoingArgHandler346   void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
347                             const MachinePointerInfo &MPO,
348                             const CCValAssign &VA) override {
349     MachineFunction &MF = MIRBuilder.getMF();
350     if (!FPDiff && !shouldLowerTailCallStackArg(MF, VA, ValVReg, Addr))
351       return;
352     auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, MemTy,
353                                        inferAlignFromPtrInfo(MF, MPO));
354     MIRBuilder.buildStore(ValVReg, Addr, *MMO);
355   }
356 
assignValueToAddress__anon2d01892d0111::OutgoingArgHandler357   void assignValueToAddress(const CallLowering::ArgInfo &Arg, unsigned RegIndex,
358                             Register Addr, LLT MemTy,
359                             const MachinePointerInfo &MPO,
360                             const CCValAssign &VA) override {
361     unsigned MaxSize = MemTy.getSizeInBytes() * 8;
362     // For varargs, we always want to extend them to 8 bytes, in which case
363     // we disable setting a max.
364     if (!Arg.IsFixed)
365       MaxSize = 0;
366 
367     Register ValVReg = Arg.Regs[RegIndex];
368     if (VA.getLocInfo() != CCValAssign::LocInfo::FPExt) {
369       MVT LocVT = VA.getLocVT();
370       MVT ValVT = VA.getValVT();
371 
372       if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16) {
373         std::swap(ValVT, LocVT);
374         MemTy = LLT(VA.getValVT());
375       }
376 
377       ValVReg = extendRegister(ValVReg, VA, MaxSize);
378     } else {
379       // The store does not cover the full allocated stack slot.
380       MemTy = LLT(VA.getValVT());
381     }
382 
383     assignValueToAddress(ValVReg, Addr, MemTy, MPO, VA);
384   }
385 
386   MachineInstrBuilder MIB;
387 
388   bool IsTailCall;
389 
390   /// For tail calls, the byte offset of the call's argument area from the
391   /// callee's. Unused elsewhere.
392   int FPDiff;
393 
394   // Cache the SP register vreg if we need it more than once in this call site.
395   Register SPReg;
396 
397   const AArch64Subtarget &Subtarget;
398 };
399 } // namespace
400 
doesCalleeRestoreStack(CallingConv::ID CallConv,bool TailCallOpt)401 static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) {
402   return (CallConv == CallingConv::Fast && TailCallOpt) ||
403          CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
404 }
405 
lowerReturn(MachineIRBuilder & MIRBuilder,const Value * Val,ArrayRef<Register> VRegs,FunctionLoweringInfo & FLI,Register SwiftErrorVReg) const406 bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
407                                       const Value *Val,
408                                       ArrayRef<Register> VRegs,
409                                       FunctionLoweringInfo &FLI,
410                                       Register SwiftErrorVReg) const {
411   auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR);
412   assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
413          "Return value without a vreg");
414 
415   bool Success = true;
416   if (!FLI.CanLowerReturn) {
417     insertSRetStores(MIRBuilder, Val->getType(), VRegs, FLI.DemoteRegister);
418   } else if (!VRegs.empty()) {
419     MachineFunction &MF = MIRBuilder.getMF();
420     const Function &F = MF.getFunction();
421     const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
422 
423     MachineRegisterInfo &MRI = MF.getRegInfo();
424     const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
425     CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv());
426     auto &DL = F.getDataLayout();
427     LLVMContext &Ctx = Val->getType()->getContext();
428 
429     SmallVector<EVT, 4> SplitEVTs;
430     ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
431     assert(VRegs.size() == SplitEVTs.size() &&
432            "For each split Type there should be exactly one VReg.");
433 
434     SmallVector<ArgInfo, 8> SplitArgs;
435     CallingConv::ID CC = F.getCallingConv();
436 
437     for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
438       Register CurVReg = VRegs[i];
439       ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Ctx), 0};
440       setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
441 
442       // i1 is a special case because SDAG i1 true is naturally zero extended
443       // when widened using ANYEXT. We need to do it explicitly here.
444       auto &Flags = CurArgInfo.Flags[0];
445       if (MRI.getType(CurVReg).getSizeInBits() == TypeSize::getFixed(1) &&
446           !Flags.isSExt() && !Flags.isZExt()) {
447         CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0);
448       } else if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) ==
449                  1) {
450         // Some types will need extending as specified by the CC.
451         MVT NewVT = TLI.getRegisterTypeForCallingConv(Ctx, CC, SplitEVTs[i]);
452         if (EVT(NewVT) != SplitEVTs[i]) {
453           unsigned ExtendOp = TargetOpcode::G_ANYEXT;
454           if (F.getAttributes().hasRetAttr(Attribute::SExt))
455             ExtendOp = TargetOpcode::G_SEXT;
456           else if (F.getAttributes().hasRetAttr(Attribute::ZExt))
457             ExtendOp = TargetOpcode::G_ZEXT;
458 
459           LLT NewLLT(NewVT);
460           LLT OldLLT = getLLTForType(*CurArgInfo.Ty, DL);
461           CurArgInfo.Ty = EVT(NewVT).getTypeForEVT(Ctx);
462           // Instead of an extend, we might have a vector type which needs
463           // padding with more elements, e.g. <2 x half> -> <4 x half>.
464           if (NewVT.isVector()) {
465             if (OldLLT.isVector()) {
466               if (NewLLT.getNumElements() > OldLLT.getNumElements()) {
467                 CurVReg =
468                     MIRBuilder.buildPadVectorWithUndefElements(NewLLT, CurVReg)
469                         .getReg(0);
470               } else {
471                 // Just do a vector extend.
472                 CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg})
473                               .getReg(0);
474               }
475             } else if (NewLLT.getNumElements() >= 2 &&
476                        NewLLT.getNumElements() <= 8) {
477               // We need to pad a <1 x S> type to <2/4/8 x S>. Since we don't
478               // have <1 x S> vector types in GISel we use a build_vector
479               // instead of a vector merge/concat.
480               CurVReg =
481                   MIRBuilder.buildPadVectorWithUndefElements(NewLLT, CurVReg)
482                       .getReg(0);
483             } else {
484               LLVM_DEBUG(dbgs() << "Could not handle ret ty\n");
485               return false;
486             }
487           } else {
488             // If the split EVT was a <1 x T> vector, and NewVT is T, then we
489             // don't have to do anything since we don't distinguish between the
490             // two.
491             if (NewLLT != MRI.getType(CurVReg)) {
492               // A scalar extend.
493               CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg})
494                             .getReg(0);
495             }
496           }
497         }
498       }
499       if (CurVReg != CurArgInfo.Regs[0]) {
500         CurArgInfo.Regs[0] = CurVReg;
501         // Reset the arg flags after modifying CurVReg.
502         setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
503       }
504       splitToValueTypes(CurArgInfo, SplitArgs, DL, CC);
505     }
506 
507     AArch64OutgoingValueAssigner Assigner(AssignFn, AssignFn, Subtarget,
508                                           /*IsReturn*/ true);
509     OutgoingArgHandler Handler(MIRBuilder, MRI, MIB);
510     Success = determineAndHandleAssignments(Handler, Assigner, SplitArgs,
511                                             MIRBuilder, CC, F.isVarArg());
512   }
513 
514   if (SwiftErrorVReg) {
515     MIB.addUse(AArch64::X21, RegState::Implicit);
516     MIRBuilder.buildCopy(AArch64::X21, SwiftErrorVReg);
517   }
518 
519   MIRBuilder.insertInstr(MIB);
520   return Success;
521 }
522 
canLowerReturn(MachineFunction & MF,CallingConv::ID CallConv,SmallVectorImpl<BaseArgInfo> & Outs,bool IsVarArg) const523 bool AArch64CallLowering::canLowerReturn(MachineFunction &MF,
524                                          CallingConv::ID CallConv,
525                                          SmallVectorImpl<BaseArgInfo> &Outs,
526                                          bool IsVarArg) const {
527   SmallVector<CCValAssign, 16> ArgLocs;
528   const auto &TLI = *getTLI<AArch64TargetLowering>();
529   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs,
530                  MF.getFunction().getContext());
531 
532   return checkReturn(CCInfo, Outs, TLI.CCAssignFnForReturn(CallConv));
533 }
534 
535 /// Helper function to compute forwarded registers for musttail calls. Computes
536 /// the forwarded registers, sets MBB liveness, and emits COPY instructions that
537 /// can be used to save + restore registers later.
handleMustTailForwardedRegisters(MachineIRBuilder & MIRBuilder,CCAssignFn * AssignFn)538 static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder,
539                                              CCAssignFn *AssignFn) {
540   MachineBasicBlock &MBB = MIRBuilder.getMBB();
541   MachineFunction &MF = MIRBuilder.getMF();
542   MachineFrameInfo &MFI = MF.getFrameInfo();
543 
544   if (!MFI.hasMustTailInVarArgFunc())
545     return;
546 
547   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
548   const Function &F = MF.getFunction();
549   assert(F.isVarArg() && "Expected F to be vararg?");
550 
551   // Compute the set of forwarded registers. The rest are scratch.
552   SmallVector<CCValAssign, 16> ArgLocs;
553   CCState CCInfo(F.getCallingConv(), /*IsVarArg=*/true, MF, ArgLocs,
554                  F.getContext());
555   SmallVector<MVT, 2> RegParmTypes;
556   RegParmTypes.push_back(MVT::i64);
557   RegParmTypes.push_back(MVT::f128);
558 
559   // Later on, we can use this vector to restore the registers if necessary.
560   SmallVectorImpl<ForwardedRegister> &Forwards =
561       FuncInfo->getForwardedMustTailRegParms();
562   CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, AssignFn);
563 
564   // Conservatively forward X8, since it might be used for an aggregate
565   // return.
566   if (!CCInfo.isAllocated(AArch64::X8)) {
567     Register X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
568     Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
569   }
570 
571   // Add the forwards to the MachineBasicBlock and MachineFunction.
572   for (const auto &F : Forwards) {
573     MBB.addLiveIn(F.PReg);
574     MIRBuilder.buildCopy(Register(F.VReg), Register(F.PReg));
575   }
576 }
577 
fallBackToDAGISel(const MachineFunction & MF) const578 bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const {
579   auto &F = MF.getFunction();
580   if (!EnableSVEGISel && (F.getReturnType()->isScalableTy() ||
581                           llvm::any_of(F.args(), [](const Argument &A) {
582                             return A.getType()->isScalableTy();
583                           })))
584     return true;
585   const auto &ST = MF.getSubtarget<AArch64Subtarget>();
586   if (!ST.hasNEON() || !ST.hasFPARMv8()) {
587     LLVM_DEBUG(dbgs() << "Falling back to SDAG because we don't support no-NEON\n");
588     return true;
589   }
590 
591   SMEAttrs Attrs = MF.getInfo<AArch64FunctionInfo>()->getSMEFnAttrs();
592   if (Attrs.hasZAState() || Attrs.hasZT0State() ||
593       Attrs.hasStreamingInterfaceOrBody() ||
594       Attrs.hasStreamingCompatibleInterface())
595     return true;
596 
597   return false;
598 }
599 
saveVarArgRegisters(MachineIRBuilder & MIRBuilder,CallLowering::IncomingValueHandler & Handler,CCState & CCInfo) const600 void AArch64CallLowering::saveVarArgRegisters(
601     MachineIRBuilder &MIRBuilder, CallLowering::IncomingValueHandler &Handler,
602     CCState &CCInfo) const {
603   auto GPRArgRegs = AArch64::getGPRArgRegs();
604   auto FPRArgRegs = AArch64::getFPRArgRegs();
605 
606   MachineFunction &MF = MIRBuilder.getMF();
607   MachineRegisterInfo &MRI = MF.getRegInfo();
608   MachineFrameInfo &MFI = MF.getFrameInfo();
609   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
610   auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
611   bool IsWin64CC = Subtarget.isCallingConvWin64(CCInfo.getCallingConv(),
612                                                 MF.getFunction().isVarArg());
613   const LLT p0 = LLT::pointer(0, 64);
614   const LLT s64 = LLT::scalar(64);
615 
616   unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
617   unsigned NumVariadicGPRArgRegs = GPRArgRegs.size() - FirstVariadicGPR + 1;
618 
619   unsigned GPRSaveSize = 8 * (GPRArgRegs.size() - FirstVariadicGPR);
620   int GPRIdx = 0;
621   if (GPRSaveSize != 0) {
622     if (IsWin64CC) {
623       GPRIdx = MFI.CreateFixedObject(GPRSaveSize,
624                                      -static_cast<int>(GPRSaveSize), false);
625       if (GPRSaveSize & 15)
626         // The extra size here, if triggered, will always be 8.
627         MFI.CreateFixedObject(16 - (GPRSaveSize & 15),
628                               -static_cast<int>(alignTo(GPRSaveSize, 16)),
629                               false);
630     } else
631       GPRIdx = MFI.CreateStackObject(GPRSaveSize, Align(8), false);
632 
633     auto FIN = MIRBuilder.buildFrameIndex(p0, GPRIdx);
634     auto Offset =
635         MIRBuilder.buildConstant(MRI.createGenericVirtualRegister(s64), 8);
636 
637     for (unsigned i = FirstVariadicGPR; i < GPRArgRegs.size(); ++i) {
638       Register Val = MRI.createGenericVirtualRegister(s64);
639       Handler.assignValueToReg(
640           Val, GPRArgRegs[i],
641           CCValAssign::getReg(i + MF.getFunction().getNumOperands(), MVT::i64,
642                               GPRArgRegs[i], MVT::i64, CCValAssign::Full));
643       auto MPO = IsWin64CC ? MachinePointerInfo::getFixedStack(
644                                MF, GPRIdx, (i - FirstVariadicGPR) * 8)
645                          : MachinePointerInfo::getStack(MF, i * 8);
646       MIRBuilder.buildStore(Val, FIN, MPO, inferAlignFromPtrInfo(MF, MPO));
647 
648       FIN = MIRBuilder.buildPtrAdd(MRI.createGenericVirtualRegister(p0),
649                                    FIN.getReg(0), Offset);
650     }
651   }
652   FuncInfo->setVarArgsGPRIndex(GPRIdx);
653   FuncInfo->setVarArgsGPRSize(GPRSaveSize);
654 
655   if (Subtarget.hasFPARMv8() && !IsWin64CC) {
656     unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
657 
658     unsigned FPRSaveSize = 16 * (FPRArgRegs.size() - FirstVariadicFPR);
659     int FPRIdx = 0;
660     if (FPRSaveSize != 0) {
661       FPRIdx = MFI.CreateStackObject(FPRSaveSize, Align(16), false);
662 
663       auto FIN = MIRBuilder.buildFrameIndex(p0, FPRIdx);
664       auto Offset =
665           MIRBuilder.buildConstant(MRI.createGenericVirtualRegister(s64), 16);
666 
667       for (unsigned i = FirstVariadicFPR; i < FPRArgRegs.size(); ++i) {
668         Register Val = MRI.createGenericVirtualRegister(LLT::scalar(128));
669         Handler.assignValueToReg(
670             Val, FPRArgRegs[i],
671             CCValAssign::getReg(
672                 i + MF.getFunction().getNumOperands() + NumVariadicGPRArgRegs,
673                 MVT::f128, FPRArgRegs[i], MVT::f128, CCValAssign::Full));
674 
675         auto MPO = MachinePointerInfo::getStack(MF, i * 16);
676         MIRBuilder.buildStore(Val, FIN, MPO, inferAlignFromPtrInfo(MF, MPO));
677 
678         FIN = MIRBuilder.buildPtrAdd(MRI.createGenericVirtualRegister(p0),
679                                      FIN.getReg(0), Offset);
680       }
681     }
682     FuncInfo->setVarArgsFPRIndex(FPRIdx);
683     FuncInfo->setVarArgsFPRSize(FPRSaveSize);
684   }
685 }
686 
lowerFormalArguments(MachineIRBuilder & MIRBuilder,const Function & F,ArrayRef<ArrayRef<Register>> VRegs,FunctionLoweringInfo & FLI) const687 bool AArch64CallLowering::lowerFormalArguments(
688     MachineIRBuilder &MIRBuilder, const Function &F,
689     ArrayRef<ArrayRef<Register>> VRegs, FunctionLoweringInfo &FLI) const {
690   MachineFunction &MF = MIRBuilder.getMF();
691   MachineBasicBlock &MBB = MIRBuilder.getMBB();
692   MachineRegisterInfo &MRI = MF.getRegInfo();
693   auto &DL = F.getDataLayout();
694   auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
695 
696   // Arm64EC has extra requirements for varargs calls which are only implemented
697   // in SelectionDAG; bail out for now.
698   if (F.isVarArg() && Subtarget.isWindowsArm64EC())
699     return false;
700 
701   // Arm64EC thunks have a special calling convention which is only implemented
702   // in SelectionDAG; bail out for now.
703   if (F.getCallingConv() == CallingConv::ARM64EC_Thunk_Native ||
704       F.getCallingConv() == CallingConv::ARM64EC_Thunk_X64)
705     return false;
706 
707   bool IsWin64 =
708       Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg()) &&
709       !Subtarget.isWindowsArm64EC();
710 
711   SmallVector<ArgInfo, 8> SplitArgs;
712   SmallVector<std::pair<Register, Register>> BoolArgs;
713 
714   // Insert the hidden sret parameter if the return value won't fit in the
715   // return registers.
716   if (!FLI.CanLowerReturn)
717     insertSRetIncomingArgument(F, SplitArgs, FLI.DemoteRegister, MRI, DL);
718 
719   unsigned i = 0;
720   for (auto &Arg : F.args()) {
721     if (DL.getTypeStoreSize(Arg.getType()).isZero())
722       continue;
723 
724     ArgInfo OrigArg{VRegs[i], Arg, i};
725     setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F);
726 
727     // i1 arguments are zero-extended to i8 by the caller. Emit a
728     // hint to reflect this.
729     if (OrigArg.Ty->isIntegerTy(1)) {
730       assert(OrigArg.Regs.size() == 1 &&
731              MRI.getType(OrigArg.Regs[0]).getSizeInBits() == 1 &&
732              "Unexpected registers used for i1 arg");
733 
734       auto &Flags = OrigArg.Flags[0];
735       if (!Flags.isZExt() && !Flags.isSExt()) {
736         // Lower i1 argument as i8, and insert AssertZExt + Trunc later.
737         Register OrigReg = OrigArg.Regs[0];
738         Register WideReg = MRI.createGenericVirtualRegister(LLT::scalar(8));
739         OrigArg.Regs[0] = WideReg;
740         BoolArgs.push_back({OrigReg, WideReg});
741       }
742     }
743 
744     if (Arg.hasAttribute(Attribute::SwiftAsync))
745       MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
746 
747     splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv());
748     ++i;
749   }
750 
751   if (!MBB.empty())
752     MIRBuilder.setInstr(*MBB.begin());
753 
754   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
755   CCAssignFn *AssignFn = TLI.CCAssignFnForCall(F.getCallingConv(), IsWin64 && F.isVarArg());
756 
757   AArch64IncomingValueAssigner Assigner(AssignFn, AssignFn);
758   FormalArgHandler Handler(MIRBuilder, MRI);
759   SmallVector<CCValAssign, 16> ArgLocs;
760   CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
761   if (!determineAssignments(Assigner, SplitArgs, CCInfo) ||
762       !handleAssignments(Handler, SplitArgs, CCInfo, ArgLocs, MIRBuilder))
763     return false;
764 
765   if (!BoolArgs.empty()) {
766     for (auto &KV : BoolArgs) {
767       Register OrigReg = KV.first;
768       Register WideReg = KV.second;
769       LLT WideTy = MRI.getType(WideReg);
770       assert(MRI.getType(OrigReg).getScalarSizeInBits() == 1 &&
771              "Unexpected bit size of a bool arg");
772       MIRBuilder.buildTrunc(
773           OrigReg, MIRBuilder.buildAssertZExt(WideTy, WideReg, 1).getReg(0));
774     }
775   }
776 
777   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
778   uint64_t StackSize = Assigner.StackSize;
779   if (F.isVarArg()) {
780     if ((!Subtarget.isTargetDarwin() && !Subtarget.isWindowsArm64EC()) || IsWin64) {
781       // The AAPCS variadic function ABI is identical to the non-variadic
782       // one. As a result there may be more arguments in registers and we should
783       // save them for future reference.
784       // Win64 variadic functions also pass arguments in registers, but all
785       // float arguments are passed in integer registers.
786       saveVarArgRegisters(MIRBuilder, Handler, CCInfo);
787     } else if (Subtarget.isWindowsArm64EC()) {
788       return false;
789     }
790 
791     // We currently pass all varargs at 8-byte alignment, or 4 in ILP32.
792     StackSize = alignTo(Assigner.StackSize, Subtarget.isTargetILP32() ? 4 : 8);
793 
794     auto &MFI = MIRBuilder.getMF().getFrameInfo();
795     FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackSize, true));
796   }
797 
798   if (doesCalleeRestoreStack(F.getCallingConv(),
799                              MF.getTarget().Options.GuaranteedTailCallOpt)) {
800     // We have a non-standard ABI, so why not make full use of the stack that
801     // we're going to pop? It must be aligned to 16 B in any case.
802     StackSize = alignTo(StackSize, 16);
803 
804     // If we're expected to restore the stack (e.g. fastcc), then we'll be
805     // adding a multiple of 16.
806     FuncInfo->setArgumentStackToRestore(StackSize);
807 
808     // Our own callers will guarantee that the space is free by giving an
809     // aligned value to CALLSEQ_START.
810   }
811 
812   // When we tail call, we need to check if the callee's arguments
813   // will fit on the caller's stack. So, whenever we lower formal arguments,
814   // we should keep track of this information, since we might lower a tail call
815   // in this function later.
816   FuncInfo->setBytesInStackArgArea(StackSize);
817 
818   if (Subtarget.hasCustomCallingConv())
819     Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
820 
821   handleMustTailForwardedRegisters(MIRBuilder, AssignFn);
822 
823   // Move back to the end of the basic block.
824   MIRBuilder.setMBB(MBB);
825 
826   return true;
827 }
828 
829 /// Return true if the calling convention is one that we can guarantee TCO for.
canGuaranteeTCO(CallingConv::ID CC,bool GuaranteeTailCalls)830 static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
831   return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
832          CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
833 }
834 
835 /// Return true if we might ever do TCO for calls with this calling convention.
mayTailCallThisCC(CallingConv::ID CC)836 static bool mayTailCallThisCC(CallingConv::ID CC) {
837   switch (CC) {
838   case CallingConv::C:
839   case CallingConv::PreserveMost:
840   case CallingConv::PreserveAll:
841   case CallingConv::PreserveNone:
842   case CallingConv::Swift:
843   case CallingConv::SwiftTail:
844   case CallingConv::Tail:
845   case CallingConv::Fast:
846     return true;
847   default:
848     return false;
849   }
850 }
851 
852 /// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for
853 /// CC.
854 static std::pair<CCAssignFn *, CCAssignFn *>
getAssignFnsForCC(CallingConv::ID CC,const AArch64TargetLowering & TLI)855 getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI) {
856   return {TLI.CCAssignFnForCall(CC, false), TLI.CCAssignFnForCall(CC, true)};
857 }
858 
doCallerAndCalleePassArgsTheSameWay(CallLoweringInfo & Info,MachineFunction & MF,SmallVectorImpl<ArgInfo> & InArgs) const859 bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay(
860     CallLoweringInfo &Info, MachineFunction &MF,
861     SmallVectorImpl<ArgInfo> &InArgs) const {
862   const Function &CallerF = MF.getFunction();
863   CallingConv::ID CalleeCC = Info.CallConv;
864   CallingConv::ID CallerCC = CallerF.getCallingConv();
865 
866   // If the calling conventions match, then everything must be the same.
867   if (CalleeCC == CallerCC)
868     return true;
869 
870   // Check if the caller and callee will handle arguments in the same way.
871   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
872   CCAssignFn *CalleeAssignFnFixed;
873   CCAssignFn *CalleeAssignFnVarArg;
874   std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) =
875       getAssignFnsForCC(CalleeCC, TLI);
876 
877   CCAssignFn *CallerAssignFnFixed;
878   CCAssignFn *CallerAssignFnVarArg;
879   std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) =
880       getAssignFnsForCC(CallerCC, TLI);
881 
882   AArch64IncomingValueAssigner CalleeAssigner(CalleeAssignFnFixed,
883                                               CalleeAssignFnVarArg);
884   AArch64IncomingValueAssigner CallerAssigner(CallerAssignFnFixed,
885                                               CallerAssignFnVarArg);
886 
887   if (!resultsCompatible(Info, MF, InArgs, CalleeAssigner, CallerAssigner))
888     return false;
889 
890   // Make sure that the caller and callee preserve all of the same registers.
891   auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
892   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
893   const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
894   if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) {
895     TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
896     TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
897   }
898 
899   return TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved);
900 }
901 
areCalleeOutgoingArgsTailCallable(CallLoweringInfo & Info,MachineFunction & MF,SmallVectorImpl<ArgInfo> & OrigOutArgs) const902 bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable(
903     CallLoweringInfo &Info, MachineFunction &MF,
904     SmallVectorImpl<ArgInfo> &OrigOutArgs) const {
905   // If there are no outgoing arguments, then we are done.
906   if (OrigOutArgs.empty())
907     return true;
908 
909   const Function &CallerF = MF.getFunction();
910   LLVMContext &Ctx = CallerF.getContext();
911   CallingConv::ID CalleeCC = Info.CallConv;
912   CallingConv::ID CallerCC = CallerF.getCallingConv();
913   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
914   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
915 
916   CCAssignFn *AssignFnFixed;
917   CCAssignFn *AssignFnVarArg;
918   std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
919 
920   // We have outgoing arguments. Make sure that we can tail call with them.
921   SmallVector<CCValAssign, 16> OutLocs;
922   CCState OutInfo(CalleeCC, false, MF, OutLocs, Ctx);
923 
924   AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg,
925                                               Subtarget, /*IsReturn*/ false);
926   // determineAssignments() may modify argument flags, so make a copy.
927   SmallVector<ArgInfo, 8> OutArgs;
928   append_range(OutArgs, OrigOutArgs);
929   if (!determineAssignments(CalleeAssigner, OutArgs, OutInfo)) {
930     LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n");
931     return false;
932   }
933 
934   // Make sure that they can fit on the caller's stack.
935   const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
936   if (OutInfo.getStackSize() > FuncInfo->getBytesInStackArgArea()) {
937     LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n");
938     return false;
939   }
940 
941   // Verify that the parameters in callee-saved registers match.
942   // TODO: Port this over to CallLowering as general code once swiftself is
943   // supported.
944   auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
945   const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC);
946   MachineRegisterInfo &MRI = MF.getRegInfo();
947 
948   if (Info.IsVarArg) {
949     // Be conservative and disallow variadic memory operands to match SDAG's
950     // behaviour.
951     // FIXME: If the caller's calling convention is C, then we can
952     // potentially use its argument area. However, for cases like fastcc,
953     // we can't do anything.
954     for (unsigned i = 0; i < OutLocs.size(); ++i) {
955       auto &ArgLoc = OutLocs[i];
956       if (ArgLoc.isRegLoc())
957         continue;
958 
959       LLVM_DEBUG(
960           dbgs()
961           << "... Cannot tail call vararg function with stack arguments\n");
962       return false;
963     }
964   }
965 
966   return parametersInCSRMatch(MRI, CallerPreservedMask, OutLocs, OutArgs);
967 }
968 
isEligibleForTailCallOptimization(MachineIRBuilder & MIRBuilder,CallLoweringInfo & Info,SmallVectorImpl<ArgInfo> & InArgs,SmallVectorImpl<ArgInfo> & OutArgs) const969 bool AArch64CallLowering::isEligibleForTailCallOptimization(
970     MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
971     SmallVectorImpl<ArgInfo> &InArgs,
972     SmallVectorImpl<ArgInfo> &OutArgs) const {
973 
974   // Must pass all target-independent checks in order to tail call optimize.
975   if (!Info.IsTailCall)
976     return false;
977 
978   CallingConv::ID CalleeCC = Info.CallConv;
979   MachineFunction &MF = MIRBuilder.getMF();
980   const Function &CallerF = MF.getFunction();
981 
982   LLVM_DEBUG(dbgs() << "Attempting to lower call as tail call\n");
983 
984   if (Info.SwiftErrorVReg) {
985     // TODO: We should handle this.
986     // Note that this is also handled by the check for no outgoing arguments.
987     // Proactively disabling this though, because the swifterror handling in
988     // lowerCall inserts a COPY *after* the location of the call.
989     LLVM_DEBUG(dbgs() << "... Cannot handle tail calls with swifterror yet.\n");
990     return false;
991   }
992 
993   if (!mayTailCallThisCC(CalleeCC)) {
994     LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n");
995     return false;
996   }
997 
998   // Byval parameters hand the function a pointer directly into the stack area
999   // we want to reuse during a tail call. Working around this *is* possible (see
1000   // X86).
1001   //
1002   // FIXME: In AArch64ISelLowering, this isn't worked around. Can/should we try
1003   // it?
1004   //
1005   // On Windows, "inreg" attributes signify non-aggregate indirect returns.
1006   // In this case, it is necessary to save/restore X0 in the callee. Tail
1007   // call opt interferes with this. So we disable tail call opt when the
1008   // caller has an argument with "inreg" attribute.
1009   //
1010   // FIXME: Check whether the callee also has an "inreg" argument.
1011   //
1012   // When the caller has a swifterror argument, we don't want to tail call
1013   // because would have to move into the swifterror register before the
1014   // tail call.
1015   if (any_of(CallerF.args(), [](const Argument &A) {
1016         return A.hasByValAttr() || A.hasInRegAttr() || A.hasSwiftErrorAttr();
1017       })) {
1018     LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval, "
1019                          "inreg, or swifterror arguments\n");
1020     return false;
1021   }
1022 
1023   // Externally-defined functions with weak linkage should not be
1024   // tail-called on AArch64 when the OS does not support dynamic
1025   // pre-emption of symbols, as the AAELF spec requires normal calls
1026   // to undefined weak functions to be replaced with a NOP or jump to the
1027   // next instruction. The behaviour of branch instructions in this
1028   // situation (as used for tail calls) is implementation-defined, so we
1029   // cannot rely on the linker replacing the tail call with a return.
1030   if (Info.Callee.isGlobal()) {
1031     const GlobalValue *GV = Info.Callee.getGlobal();
1032     const Triple &TT = MF.getTarget().getTargetTriple();
1033     if (GV->hasExternalWeakLinkage() &&
1034         (!TT.isOSWindows() || TT.isOSBinFormatELF() ||
1035          TT.isOSBinFormatMachO())) {
1036       LLVM_DEBUG(dbgs() << "... Cannot tail call externally-defined function "
1037                            "with weak linkage for this OS.\n");
1038       return false;
1039     }
1040   }
1041 
1042   // If we have -tailcallopt, then we're done.
1043   if (canGuaranteeTCO(CalleeCC, MF.getTarget().Options.GuaranteedTailCallOpt))
1044     return CalleeCC == CallerF.getCallingConv();
1045 
1046   // We don't have -tailcallopt, so we're allowed to change the ABI (sibcall).
1047   // Try to find cases where we can do that.
1048 
1049   // I want anyone implementing a new calling convention to think long and hard
1050   // about this assert.
1051   assert((!Info.IsVarArg || CalleeCC == CallingConv::C) &&
1052          "Unexpected variadic calling convention");
1053 
1054   // Verify that the incoming and outgoing arguments from the callee are
1055   // safe to tail call.
1056   if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) {
1057     LLVM_DEBUG(
1058         dbgs()
1059         << "... Caller and callee have incompatible calling conventions.\n");
1060     return false;
1061   }
1062 
1063   if (!areCalleeOutgoingArgsTailCallable(Info, MF, OutArgs))
1064     return false;
1065 
1066   LLVM_DEBUG(
1067       dbgs() << "... Call is eligible for tail call optimization.\n");
1068   return true;
1069 }
1070 
getCallOpcode(const MachineFunction & CallerF,bool IsIndirect,bool IsTailCall,std::optional<CallLowering::PtrAuthInfo> & PAI,MachineRegisterInfo & MRI)1071 static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
1072                               bool IsTailCall,
1073                               std::optional<CallLowering::PtrAuthInfo> &PAI,
1074                               MachineRegisterInfo &MRI) {
1075   const AArch64FunctionInfo *FuncInfo = CallerF.getInfo<AArch64FunctionInfo>();
1076 
1077   if (!IsTailCall) {
1078     if (!PAI)
1079       return IsIndirect ? getBLRCallOpcode(CallerF) : (unsigned)AArch64::BL;
1080 
1081     assert(IsIndirect && "Direct call should not be authenticated");
1082     assert((PAI->Key == AArch64PACKey::IA || PAI->Key == AArch64PACKey::IB) &&
1083            "Invalid auth call key");
1084     return AArch64::BLRA;
1085   }
1086 
1087   if (!IsIndirect)
1088     return AArch64::TCRETURNdi;
1089 
1090   // When BTI or PAuthLR are enabled, there are restrictions on using x16 and
1091   // x17 to hold the function pointer.
1092   if (FuncInfo->branchTargetEnforcement()) {
1093     if (FuncInfo->branchProtectionPAuthLR()) {
1094       assert(!PAI && "ptrauth tail-calls not yet supported with PAuthLR");
1095       return AArch64::TCRETURNrix17;
1096     }
1097     if (PAI)
1098       return AArch64::AUTH_TCRETURN_BTI;
1099     return AArch64::TCRETURNrix16x17;
1100   }
1101 
1102   if (FuncInfo->branchProtectionPAuthLR()) {
1103     assert(!PAI && "ptrauth tail-calls not yet supported with PAuthLR");
1104     return AArch64::TCRETURNrinotx16;
1105   }
1106 
1107   if (PAI)
1108     return AArch64::AUTH_TCRETURN;
1109   return AArch64::TCRETURNri;
1110 }
1111 
1112 static const uint32_t *
getMaskForArgs(SmallVectorImpl<AArch64CallLowering::ArgInfo> & OutArgs,AArch64CallLowering::CallLoweringInfo & Info,const AArch64RegisterInfo & TRI,MachineFunction & MF)1113 getMaskForArgs(SmallVectorImpl<AArch64CallLowering::ArgInfo> &OutArgs,
1114                AArch64CallLowering::CallLoweringInfo &Info,
1115                const AArch64RegisterInfo &TRI, MachineFunction &MF) {
1116   const uint32_t *Mask;
1117   if (!OutArgs.empty() && OutArgs[0].Flags[0].isReturned()) {
1118     // For 'this' returns, use the X0-preserving mask if applicable
1119     Mask = TRI.getThisReturnPreservedMask(MF, Info.CallConv);
1120     if (!Mask) {
1121       OutArgs[0].Flags[0].setReturned(false);
1122       Mask = TRI.getCallPreservedMask(MF, Info.CallConv);
1123     }
1124   } else {
1125     Mask = TRI.getCallPreservedMask(MF, Info.CallConv);
1126   }
1127   return Mask;
1128 }
1129 
lowerTailCall(MachineIRBuilder & MIRBuilder,CallLoweringInfo & Info,SmallVectorImpl<ArgInfo> & OutArgs) const1130 bool AArch64CallLowering::lowerTailCall(
1131     MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
1132     SmallVectorImpl<ArgInfo> &OutArgs) const {
1133   MachineFunction &MF = MIRBuilder.getMF();
1134   const Function &F = MF.getFunction();
1135   MachineRegisterInfo &MRI = MF.getRegInfo();
1136   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
1137   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1138 
1139   // True when we're tail calling, but without -tailcallopt.
1140   bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt &&
1141                    Info.CallConv != CallingConv::Tail &&
1142                    Info.CallConv != CallingConv::SwiftTail;
1143 
1144   // Find out which ABI gets to decide where things go.
1145   CallingConv::ID CalleeCC = Info.CallConv;
1146   CCAssignFn *AssignFnFixed;
1147   CCAssignFn *AssignFnVarArg;
1148   std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
1149 
1150   MachineInstrBuilder CallSeqStart;
1151   if (!IsSibCall)
1152     CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
1153 
1154   unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), true, Info.PAI, MRI);
1155   auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
1156   MIB.add(Info.Callee);
1157 
1158   // Tell the call which registers are clobbered.
1159   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1160   auto TRI = Subtarget.getRegisterInfo();
1161 
1162   // Byte offset for the tail call. When we are sibcalling, this will always
1163   // be 0.
1164   MIB.addImm(0);
1165 
1166   // Authenticated tail calls always take key/discriminator arguments.
1167   if (Opc == AArch64::AUTH_TCRETURN || Opc == AArch64::AUTH_TCRETURN_BTI) {
1168     assert((Info.PAI->Key == AArch64PACKey::IA ||
1169             Info.PAI->Key == AArch64PACKey::IB) &&
1170            "Invalid auth call key");
1171     MIB.addImm(Info.PAI->Key);
1172 
1173     Register AddrDisc = 0;
1174     uint16_t IntDisc = 0;
1175     std::tie(IntDisc, AddrDisc) =
1176         extractPtrauthBlendDiscriminators(Info.PAI->Discriminator, MRI);
1177 
1178     MIB.addImm(IntDisc);
1179     MIB.addUse(AddrDisc);
1180     if (AddrDisc != AArch64::NoRegister) {
1181       MIB->getOperand(4).setReg(constrainOperandRegClass(
1182           MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
1183           *MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(),
1184           MIB->getOperand(4), 4));
1185     }
1186   }
1187 
1188   // Tell the call which registers are clobbered.
1189   const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC);
1190   if (Subtarget.hasCustomCallingConv())
1191     TRI->UpdateCustomCallPreservedMask(MF, &Mask);
1192   MIB.addRegMask(Mask);
1193 
1194   if (Info.CFIType)
1195     MIB->setCFIType(MF, Info.CFIType->getZExtValue());
1196 
1197   if (TRI->isAnyArgRegReserved(MF))
1198     TRI->emitReservedArgRegCallError(MF);
1199 
1200   // FPDiff is the byte offset of the call's argument area from the callee's.
1201   // Stores to callee stack arguments will be placed in FixedStackSlots offset
1202   // by this amount for a tail call. In a sibling call it must be 0 because the
1203   // caller will deallocate the entire stack and the callee still expects its
1204   // arguments to begin at SP+0.
1205   int FPDiff = 0;
1206 
1207   // This will be 0 for sibcalls, potentially nonzero for tail calls produced
1208   // by -tailcallopt. For sibcalls, the memory operands for the call are
1209   // already available in the caller's incoming argument space.
1210   unsigned NumBytes = 0;
1211   if (!IsSibCall) {
1212     // We aren't sibcalling, so we need to compute FPDiff. We need to do this
1213     // before handling assignments, because FPDiff must be known for memory
1214     // arguments.
1215     unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
1216     SmallVector<CCValAssign, 16> OutLocs;
1217     CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext());
1218 
1219     AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg,
1220                                                 Subtarget, /*IsReturn*/ false);
1221     if (!determineAssignments(CalleeAssigner, OutArgs, OutInfo))
1222       return false;
1223 
1224     // The callee will pop the argument stack as a tail call. Thus, we must
1225     // keep it 16-byte aligned.
1226     NumBytes = alignTo(OutInfo.getStackSize(), 16);
1227 
1228     // FPDiff will be negative if this tail call requires more space than we
1229     // would automatically have in our incoming argument space. Positive if we
1230     // actually shrink the stack.
1231     FPDiff = NumReusableBytes - NumBytes;
1232 
1233     // Update the required reserved area if this is the tail call requiring the
1234     // most argument stack space.
1235     if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff)
1236       FuncInfo->setTailCallReservedStack(-FPDiff);
1237 
1238     // The stack pointer must be 16-byte aligned at all times it's used for a
1239     // memory operation, which in practice means at *all* times and in
1240     // particular across call boundaries. Therefore our own arguments started at
1241     // a 16-byte aligned SP and the delta applied for the tail call should
1242     // satisfy the same constraint.
1243     assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
1244   }
1245 
1246   const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
1247 
1248   AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg,
1249                                         Subtarget, /*IsReturn*/ false);
1250 
1251   // Do the actual argument marshalling.
1252   OutgoingArgHandler Handler(MIRBuilder, MRI, MIB,
1253                              /*IsTailCall*/ true, FPDiff);
1254   if (!determineAndHandleAssignments(Handler, Assigner, OutArgs, MIRBuilder,
1255                                      CalleeCC, Info.IsVarArg))
1256     return false;
1257 
1258   Mask = getMaskForArgs(OutArgs, Info, *TRI, MF);
1259 
1260   if (Info.IsVarArg && Info.IsMustTailCall) {
1261     // Now we know what's being passed to the function. Add uses to the call for
1262     // the forwarded registers that we *aren't* passing as parameters. This will
1263     // preserve the copies we build earlier.
1264     for (const auto &F : Forwards) {
1265       Register ForwardedReg = F.PReg;
1266       // If the register is already passed, or aliases a register which is
1267       // already being passed, then skip it.
1268       if (any_of(MIB->uses(), [&ForwardedReg, &TRI](const MachineOperand &Use) {
1269             if (!Use.isReg())
1270               return false;
1271             return TRI->regsOverlap(Use.getReg(), ForwardedReg);
1272           }))
1273         continue;
1274 
1275       // We aren't passing it already, so we should add it to the call.
1276       MIRBuilder.buildCopy(ForwardedReg, Register(F.VReg));
1277       MIB.addReg(ForwardedReg, RegState::Implicit);
1278     }
1279   }
1280 
1281   // If we have -tailcallopt, we need to adjust the stack. We'll do the call
1282   // sequence start and end here.
1283   if (!IsSibCall) {
1284     MIB->getOperand(1).setImm(FPDiff);
1285     CallSeqStart.addImm(0).addImm(0);
1286     // End the call sequence *before* emitting the call. Normally, we would
1287     // tidy the frame up after the call. However, here, we've laid out the
1288     // parameters so that when SP is reset, they will be in the correct
1289     // location.
1290     MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP).addImm(0).addImm(0);
1291   }
1292 
1293   // Now we can add the actual call instruction to the correct basic block.
1294   MIRBuilder.insertInstr(MIB);
1295 
1296   // If Callee is a reg, since it is used by a target specific instruction,
1297   // it must have a register class matching the constraint of that instruction.
1298   if (MIB->getOperand(0).isReg())
1299     constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
1300                              *MF.getSubtarget().getRegBankInfo(), *MIB,
1301                              MIB->getDesc(), MIB->getOperand(0), 0);
1302 
1303   MF.getFrameInfo().setHasTailCall();
1304   Info.LoweredTailCall = true;
1305   return true;
1306 }
1307 
lowerCall(MachineIRBuilder & MIRBuilder,CallLoweringInfo & Info) const1308 bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
1309                                     CallLoweringInfo &Info) const {
1310   MachineFunction &MF = MIRBuilder.getMF();
1311   const Function &F = MF.getFunction();
1312   MachineRegisterInfo &MRI = MF.getRegInfo();
1313   auto &DL = F.getDataLayout();
1314   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
1315   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1316 
1317   // Arm64EC has extra requirements for varargs calls; bail out for now.
1318   //
1319   // Arm64EC has special mangling rules for calls; bail out on all calls for
1320   // now.
1321   if (Subtarget.isWindowsArm64EC())
1322     return false;
1323 
1324   // Arm64EC thunks have a special calling convention which is only implemented
1325   // in SelectionDAG; bail out for now.
1326   if (Info.CallConv == CallingConv::ARM64EC_Thunk_Native ||
1327       Info.CallConv == CallingConv::ARM64EC_Thunk_X64)
1328     return false;
1329 
1330   SmallVector<ArgInfo, 8> OutArgs;
1331   for (auto &OrigArg : Info.OrigArgs) {
1332     splitToValueTypes(OrigArg, OutArgs, DL, Info.CallConv);
1333     // AAPCS requires that we zero-extend i1 to 8 bits by the caller.
1334     auto &Flags = OrigArg.Flags[0];
1335     if (OrigArg.Ty->isIntegerTy(1) && !Flags.isSExt() && !Flags.isZExt()) {
1336       ArgInfo &OutArg = OutArgs.back();
1337       assert(OutArg.Regs.size() == 1 &&
1338              MRI.getType(OutArg.Regs[0]).getSizeInBits() == 1 &&
1339              "Unexpected registers used for i1 arg");
1340 
1341       // We cannot use a ZExt ArgInfo flag here, because it will
1342       // zero-extend the argument to i32 instead of just i8.
1343       OutArg.Regs[0] =
1344           MIRBuilder.buildZExt(LLT::scalar(8), OutArg.Regs[0]).getReg(0);
1345       LLVMContext &Ctx = MF.getFunction().getContext();
1346       OutArg.Ty = Type::getInt8Ty(Ctx);
1347     }
1348   }
1349 
1350   SmallVector<ArgInfo, 8> InArgs;
1351   if (!Info.OrigRet.Ty->isVoidTy())
1352     splitToValueTypes(Info.OrigRet, InArgs, DL, Info.CallConv);
1353 
1354   // If we can lower as a tail call, do that instead.
1355   bool CanTailCallOpt =
1356       isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs, OutArgs);
1357 
1358   // We must emit a tail call if we have musttail.
1359   if (Info.IsMustTailCall && !CanTailCallOpt) {
1360     // There are types of incoming/outgoing arguments we can't handle yet, so
1361     // it doesn't make sense to actually die here like in ISelLowering. Instead,
1362     // fall back to SelectionDAG and let it try to handle this.
1363     LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n");
1364     return false;
1365   }
1366 
1367   Info.IsTailCall = CanTailCallOpt;
1368   if (CanTailCallOpt)
1369     return lowerTailCall(MIRBuilder, Info, OutArgs);
1370 
1371   // Find out which ABI gets to decide where things go.
1372   CCAssignFn *AssignFnFixed;
1373   CCAssignFn *AssignFnVarArg;
1374   std::tie(AssignFnFixed, AssignFnVarArg) =
1375       getAssignFnsForCC(Info.CallConv, TLI);
1376 
1377   MachineInstrBuilder CallSeqStart;
1378   CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
1379 
1380   // Create a temporarily-floating call instruction so we can add the implicit
1381   // uses of arg registers.
1382 
1383   unsigned Opc = 0;
1384   // Calls with operand bundle "clang.arc.attachedcall" are special. They should
1385   // be expanded to the call, directly followed by a special marker sequence and
1386   // a call to an ObjC library function.
1387   if (Info.CB && objcarc::hasAttachedCallOpBundle(Info.CB))
1388     Opc = Info.PAI ? AArch64::BLRA_RVMARKER : AArch64::BLR_RVMARKER;
1389   // A call to a returns twice function like setjmp must be followed by a bti
1390   // instruction.
1391   else if (Info.CB && Info.CB->hasFnAttr(Attribute::ReturnsTwice) &&
1392            !Subtarget.noBTIAtReturnTwice() &&
1393            MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
1394     Opc = AArch64::BLR_BTI;
1395   else {
1396     // For an intrinsic call (e.g. memset), use GOT if "RtLibUseGOT" (-fno-plt)
1397     // is set.
1398     if (Info.Callee.isSymbol() && F.getParent()->getRtLibUseGOT()) {
1399       auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_GLOBAL_VALUE);
1400       DstOp(getLLTForType(*F.getType(), DL)).addDefToMIB(MRI, MIB);
1401       MIB.addExternalSymbol(Info.Callee.getSymbolName(), AArch64II::MO_GOT);
1402       Info.Callee = MachineOperand::CreateReg(MIB.getReg(0), false);
1403     }
1404     Opc = getCallOpcode(MF, Info.Callee.isReg(), false, Info.PAI, MRI);
1405   }
1406 
1407   auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
1408   unsigned CalleeOpNo = 0;
1409 
1410   if (Opc == AArch64::BLR_RVMARKER || Opc == AArch64::BLRA_RVMARKER) {
1411     // Add a target global address for the retainRV/claimRV runtime function
1412     // just before the call target.
1413     Function *ARCFn = *objcarc::getAttachedARCFunction(Info.CB);
1414     MIB.addGlobalAddress(ARCFn);
1415     ++CalleeOpNo;
1416 
1417     // We may or may not need to emit both the marker and the retain/claim call.
1418     // Tell the pseudo expansion using an additional boolean op.
1419     MIB.addImm(objcarc::attachedCallOpBundleNeedsMarker(Info.CB));
1420     ++CalleeOpNo;
1421   } else if (Info.CFIType) {
1422     MIB->setCFIType(MF, Info.CFIType->getZExtValue());
1423   }
1424 
1425   MIB.add(Info.Callee);
1426 
1427   // Tell the call which registers are clobbered.
1428   const uint32_t *Mask;
1429   const auto *TRI = Subtarget.getRegisterInfo();
1430 
1431   AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg,
1432                                         Subtarget, /*IsReturn*/ false);
1433   // Do the actual argument marshalling.
1434   OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, /*IsReturn*/ false);
1435   if (!determineAndHandleAssignments(Handler, Assigner, OutArgs, MIRBuilder,
1436                                      Info.CallConv, Info.IsVarArg))
1437     return false;
1438 
1439   Mask = getMaskForArgs(OutArgs, Info, *TRI, MF);
1440 
1441   if (Opc == AArch64::BLRA || Opc == AArch64::BLRA_RVMARKER) {
1442     assert((Info.PAI->Key == AArch64PACKey::IA ||
1443             Info.PAI->Key == AArch64PACKey::IB) &&
1444            "Invalid auth call key");
1445     MIB.addImm(Info.PAI->Key);
1446 
1447     Register AddrDisc = 0;
1448     uint16_t IntDisc = 0;
1449     std::tie(IntDisc, AddrDisc) =
1450         extractPtrauthBlendDiscriminators(Info.PAI->Discriminator, MRI);
1451 
1452     MIB.addImm(IntDisc);
1453     MIB.addUse(AddrDisc);
1454     if (AddrDisc != AArch64::NoRegister) {
1455       constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
1456                                *MF.getSubtarget().getRegBankInfo(), *MIB,
1457                                MIB->getDesc(), MIB->getOperand(CalleeOpNo + 3),
1458                                CalleeOpNo + 3);
1459     }
1460   }
1461 
1462   // Tell the call which registers are clobbered.
1463   if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv())
1464     TRI->UpdateCustomCallPreservedMask(MF, &Mask);
1465   MIB.addRegMask(Mask);
1466 
1467   if (TRI->isAnyArgRegReserved(MF))
1468     TRI->emitReservedArgRegCallError(MF);
1469 
1470   // Now we can add the actual call instruction to the correct basic block.
1471   MIRBuilder.insertInstr(MIB);
1472 
1473   uint64_t CalleePopBytes =
1474       doesCalleeRestoreStack(Info.CallConv,
1475                              MF.getTarget().Options.GuaranteedTailCallOpt)
1476           ? alignTo(Assigner.StackSize, 16)
1477           : 0;
1478 
1479   CallSeqStart.addImm(Assigner.StackSize).addImm(0);
1480   MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
1481       .addImm(Assigner.StackSize)
1482       .addImm(CalleePopBytes);
1483 
1484   // If Callee is a reg, since it is used by a target specific
1485   // instruction, it must have a register class matching the
1486   // constraint of that instruction.
1487   if (MIB->getOperand(CalleeOpNo).isReg())
1488     constrainOperandRegClass(MF, *TRI, MRI, *Subtarget.getInstrInfo(),
1489                              *Subtarget.getRegBankInfo(), *MIB, MIB->getDesc(),
1490                              MIB->getOperand(CalleeOpNo), CalleeOpNo);
1491 
1492   // Finally we can copy the returned value back into its virtual-register. In
1493   // symmetry with the arguments, the physical register must be an
1494   // implicit-define of the call instruction.
1495   if (Info.CanLowerReturn  && !Info.OrigRet.Ty->isVoidTy()) {
1496     CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv);
1497     CallReturnHandler Handler(MIRBuilder, MRI, MIB);
1498     bool UsingReturnedArg =
1499         !OutArgs.empty() && OutArgs[0].Flags[0].isReturned();
1500 
1501     AArch64OutgoingValueAssigner Assigner(RetAssignFn, RetAssignFn, Subtarget,
1502                                           /*IsReturn*/ false);
1503     ReturnedArgCallReturnHandler ReturnedArgHandler(MIRBuilder, MRI, MIB);
1504     if (!determineAndHandleAssignments(
1505             UsingReturnedArg ? ReturnedArgHandler : Handler, Assigner, InArgs,
1506             MIRBuilder, Info.CallConv, Info.IsVarArg,
1507             UsingReturnedArg ? ArrayRef(OutArgs[0].Regs)
1508                              : ArrayRef<Register>()))
1509       return false;
1510   }
1511 
1512   if (Info.SwiftErrorVReg) {
1513     MIB.addDef(AArch64::X21, RegState::Implicit);
1514     MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21));
1515   }
1516 
1517   if (!Info.CanLowerReturn) {
1518     insertSRetLoads(MIRBuilder, Info.OrigRet.Ty, Info.OrigRet.Regs,
1519                     Info.DemoteRegister, Info.DemoteStackIndex);
1520   }
1521   return true;
1522 }
1523 
isTypeIsValidForThisReturn(EVT Ty) const1524 bool AArch64CallLowering::isTypeIsValidForThisReturn(EVT Ty) const {
1525   return Ty.getSizeInBits() == 64;
1526 }
1527