1 //===--- AArch64CallLowering.cpp - Call lowering --------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements the lowering of LLVM calls to machine code calls for 11 /// GlobalISel. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "AArch64CallLowering.h" 16 #include "AArch64ISelLowering.h" 17 #include "AArch64MachineFunctionInfo.h" 18 #include "AArch64Subtarget.h" 19 #include "llvm/ADT/ArrayRef.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/CodeGen/Analysis.h" 22 #include "llvm/CodeGen/CallingConvLower.h" 23 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 24 #include "llvm/CodeGen/GlobalISel/Utils.h" 25 #include "llvm/CodeGen/LowLevelType.h" 26 #include "llvm/CodeGen/MachineBasicBlock.h" 27 #include "llvm/CodeGen/MachineFrameInfo.h" 28 #include "llvm/CodeGen/MachineFunction.h" 29 #include "llvm/CodeGen/MachineInstrBuilder.h" 30 #include "llvm/CodeGen/MachineMemOperand.h" 31 #include "llvm/CodeGen/MachineOperand.h" 32 #include "llvm/CodeGen/MachineRegisterInfo.h" 33 #include "llvm/CodeGen/TargetRegisterInfo.h" 34 #include "llvm/CodeGen/TargetSubtargetInfo.h" 35 #include "llvm/CodeGen/ValueTypes.h" 36 #include "llvm/IR/Argument.h" 37 #include "llvm/IR/Attributes.h" 38 #include "llvm/IR/Function.h" 39 #include "llvm/IR/Type.h" 40 #include "llvm/IR/Value.h" 41 #include "llvm/Support/MachineValueType.h" 42 #include <algorithm> 43 #include <cassert> 44 #include <cstdint> 45 #include <iterator> 46 47 #define DEBUG_TYPE "aarch64-call-lowering" 48 49 using namespace llvm; 50 51 AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI) 52 : CallLowering(&TLI) {} 53 54 static void applyStackPassedSmallTypeDAGHack(EVT OrigVT, MVT &ValVT, 55 MVT &LocVT) { 56 // If ValVT is i1/i8/i16, we should set LocVT to i8/i8/i16. This is a legacy 57 // hack because the DAG calls the assignment function with pre-legalized 58 // register typed values, not the raw type. 59 // 60 // This hack is not applied to return values which are not passed on the 61 // stack. 62 if (OrigVT == MVT::i1 || OrigVT == MVT::i8) 63 ValVT = LocVT = MVT::i8; 64 else if (OrigVT == MVT::i16) 65 ValVT = LocVT = MVT::i16; 66 } 67 68 // Account for i1/i8/i16 stack passed value hack 69 static LLT getStackValueStoreTypeHack(const CCValAssign &VA) { 70 const MVT ValVT = VA.getValVT(); 71 return (ValVT == MVT::i8 || ValVT == MVT::i16) ? LLT(ValVT) 72 : LLT(VA.getLocVT()); 73 } 74 75 namespace { 76 77 struct AArch64IncomingValueAssigner 78 : public CallLowering::IncomingValueAssigner { 79 AArch64IncomingValueAssigner(CCAssignFn *AssignFn_, 80 CCAssignFn *AssignFnVarArg_) 81 : IncomingValueAssigner(AssignFn_, AssignFnVarArg_) {} 82 83 bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT, 84 CCValAssign::LocInfo LocInfo, 85 const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags, 86 CCState &State) override { 87 applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT); 88 return IncomingValueAssigner::assignArg(ValNo, OrigVT, ValVT, LocVT, 89 LocInfo, Info, Flags, State); 90 } 91 }; 92 93 struct AArch64OutgoingValueAssigner 94 : public CallLowering::OutgoingValueAssigner { 95 const AArch64Subtarget &Subtarget; 96 97 /// Track if this is used for a return instead of function argument 98 /// passing. We apply a hack to i1/i8/i16 stack passed values, but do not use 99 /// stack passed returns for them and cannot apply the type adjustment. 100 bool IsReturn; 101 102 AArch64OutgoingValueAssigner(CCAssignFn *AssignFn_, 103 CCAssignFn *AssignFnVarArg_, 104 const AArch64Subtarget &Subtarget_, 105 bool IsReturn) 106 : OutgoingValueAssigner(AssignFn_, AssignFnVarArg_), 107 Subtarget(Subtarget_), IsReturn(IsReturn) {} 108 109 bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT, 110 CCValAssign::LocInfo LocInfo, 111 const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags, 112 CCState &State) override { 113 bool IsCalleeWin = Subtarget.isCallingConvWin64(State.getCallingConv()); 114 bool UseVarArgsCCForFixed = IsCalleeWin && State.isVarArg(); 115 116 if (!State.isVarArg() && !UseVarArgsCCForFixed && !IsReturn) 117 applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT); 118 119 bool Res; 120 if (Info.IsFixed && !UseVarArgsCCForFixed) 121 Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State); 122 else 123 Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Flags, State); 124 125 StackOffset = State.getNextStackOffset(); 126 return Res; 127 } 128 }; 129 130 struct IncomingArgHandler : public CallLowering::IncomingValueHandler { 131 IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI) 132 : IncomingValueHandler(MIRBuilder, MRI) {} 133 134 Register getStackAddress(uint64_t Size, int64_t Offset, 135 MachinePointerInfo &MPO, 136 ISD::ArgFlagsTy Flags) override { 137 auto &MFI = MIRBuilder.getMF().getFrameInfo(); 138 139 // Byval is assumed to be writable memory, but other stack passed arguments 140 // are not. 141 const bool IsImmutable = !Flags.isByVal(); 142 143 int FI = MFI.CreateFixedObject(Size, Offset, IsImmutable); 144 MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI); 145 auto AddrReg = MIRBuilder.buildFrameIndex(LLT::pointer(0, 64), FI); 146 return AddrReg.getReg(0); 147 } 148 149 LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA, 150 ISD::ArgFlagsTy Flags) const override { 151 // For pointers, we just need to fixup the integer types reported in the 152 // CCValAssign. 153 if (Flags.isPointer()) 154 return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags); 155 return getStackValueStoreTypeHack(VA); 156 } 157 158 void assignValueToReg(Register ValVReg, Register PhysReg, 159 CCValAssign &VA) override { 160 markPhysRegUsed(PhysReg); 161 IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA); 162 } 163 164 void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, 165 MachinePointerInfo &MPO, CCValAssign &VA) override { 166 MachineFunction &MF = MIRBuilder.getMF(); 167 168 LLT ValTy(VA.getValVT()); 169 LLT LocTy(VA.getLocVT()); 170 171 // Fixup the types for the DAG compatibility hack. 172 if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16) 173 std::swap(ValTy, LocTy); 174 else { 175 // The calling code knows if this is a pointer or not, we're only touching 176 // the LocTy for the i8/i16 hack. 177 assert(LocTy.getSizeInBits() == MemTy.getSizeInBits()); 178 LocTy = MemTy; 179 } 180 181 auto MMO = MF.getMachineMemOperand( 182 MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, LocTy, 183 inferAlignFromPtrInfo(MF, MPO)); 184 MIRBuilder.buildLoad(ValVReg, Addr, *MMO); 185 } 186 187 /// How the physical register gets marked varies between formal 188 /// parameters (it's a basic-block live-in), and a call instruction 189 /// (it's an implicit-def of the BL). 190 virtual void markPhysRegUsed(MCRegister PhysReg) = 0; 191 }; 192 193 struct FormalArgHandler : public IncomingArgHandler { 194 FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI) 195 : IncomingArgHandler(MIRBuilder, MRI) {} 196 197 void markPhysRegUsed(MCRegister PhysReg) override { 198 MIRBuilder.getMRI()->addLiveIn(PhysReg); 199 MIRBuilder.getMBB().addLiveIn(PhysReg); 200 } 201 }; 202 203 struct CallReturnHandler : public IncomingArgHandler { 204 CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, 205 MachineInstrBuilder MIB) 206 : IncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {} 207 208 void markPhysRegUsed(MCRegister PhysReg) override { 209 MIB.addDef(PhysReg, RegState::Implicit); 210 } 211 212 MachineInstrBuilder MIB; 213 }; 214 215 /// A special return arg handler for "returned" attribute arg calls. 216 struct ReturnedArgCallReturnHandler : public CallReturnHandler { 217 ReturnedArgCallReturnHandler(MachineIRBuilder &MIRBuilder, 218 MachineRegisterInfo &MRI, 219 MachineInstrBuilder MIB) 220 : CallReturnHandler(MIRBuilder, MRI, MIB) {} 221 222 void markPhysRegUsed(MCRegister PhysReg) override {} 223 }; 224 225 struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler { 226 OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, 227 MachineInstrBuilder MIB, bool IsTailCall = false, 228 int FPDiff = 0) 229 : OutgoingValueHandler(MIRBuilder, MRI), MIB(MIB), IsTailCall(IsTailCall), 230 FPDiff(FPDiff), 231 Subtarget(MIRBuilder.getMF().getSubtarget<AArch64Subtarget>()) {} 232 233 Register getStackAddress(uint64_t Size, int64_t Offset, 234 MachinePointerInfo &MPO, 235 ISD::ArgFlagsTy Flags) override { 236 MachineFunction &MF = MIRBuilder.getMF(); 237 LLT p0 = LLT::pointer(0, 64); 238 LLT s64 = LLT::scalar(64); 239 240 if (IsTailCall) { 241 assert(!Flags.isByVal() && "byval unhandled with tail calls"); 242 243 Offset += FPDiff; 244 int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true); 245 auto FIReg = MIRBuilder.buildFrameIndex(p0, FI); 246 MPO = MachinePointerInfo::getFixedStack(MF, FI); 247 return FIReg.getReg(0); 248 } 249 250 if (!SPReg) 251 SPReg = MIRBuilder.buildCopy(p0, Register(AArch64::SP)).getReg(0); 252 253 auto OffsetReg = MIRBuilder.buildConstant(s64, Offset); 254 255 auto AddrReg = MIRBuilder.buildPtrAdd(p0, SPReg, OffsetReg); 256 257 MPO = MachinePointerInfo::getStack(MF, Offset); 258 return AddrReg.getReg(0); 259 } 260 261 /// We need to fixup the reported store size for certain value types because 262 /// we invert the interpretation of ValVT and LocVT in certain cases. This is 263 /// for compatability with the DAG call lowering implementation, which we're 264 /// currently building on top of. 265 LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA, 266 ISD::ArgFlagsTy Flags) const override { 267 if (Flags.isPointer()) 268 return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags); 269 return getStackValueStoreTypeHack(VA); 270 } 271 272 void assignValueToReg(Register ValVReg, Register PhysReg, 273 CCValAssign &VA) override { 274 MIB.addUse(PhysReg, RegState::Implicit); 275 Register ExtReg = extendRegister(ValVReg, VA); 276 MIRBuilder.buildCopy(PhysReg, ExtReg); 277 } 278 279 void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, 280 MachinePointerInfo &MPO, CCValAssign &VA) override { 281 MachineFunction &MF = MIRBuilder.getMF(); 282 auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, MemTy, 283 inferAlignFromPtrInfo(MF, MPO)); 284 MIRBuilder.buildStore(ValVReg, Addr, *MMO); 285 } 286 287 void assignValueToAddress(const CallLowering::ArgInfo &Arg, unsigned RegIndex, 288 Register Addr, LLT MemTy, MachinePointerInfo &MPO, 289 CCValAssign &VA) override { 290 unsigned MaxSize = MemTy.getSizeInBytes() * 8; 291 // For varargs, we always want to extend them to 8 bytes, in which case 292 // we disable setting a max. 293 if (!Arg.IsFixed) 294 MaxSize = 0; 295 296 Register ValVReg = Arg.Regs[RegIndex]; 297 if (VA.getLocInfo() != CCValAssign::LocInfo::FPExt) { 298 MVT LocVT = VA.getLocVT(); 299 MVT ValVT = VA.getValVT(); 300 301 if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16) { 302 std::swap(ValVT, LocVT); 303 MemTy = LLT(VA.getValVT()); 304 } 305 306 ValVReg = extendRegister(ValVReg, VA, MaxSize); 307 } else { 308 // The store does not cover the full allocated stack slot. 309 MemTy = LLT(VA.getValVT()); 310 } 311 312 assignValueToAddress(ValVReg, Addr, MemTy, MPO, VA); 313 } 314 315 MachineInstrBuilder MIB; 316 317 bool IsTailCall; 318 319 /// For tail calls, the byte offset of the call's argument area from the 320 /// callee's. Unused elsewhere. 321 int FPDiff; 322 323 // Cache the SP register vreg if we need it more than once in this call site. 324 Register SPReg; 325 326 const AArch64Subtarget &Subtarget; 327 }; 328 } // namespace 329 330 static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) { 331 return (CallConv == CallingConv::Fast && TailCallOpt) || 332 CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail; 333 } 334 335 bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, 336 const Value *Val, 337 ArrayRef<Register> VRegs, 338 FunctionLoweringInfo &FLI, 339 Register SwiftErrorVReg) const { 340 auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR); 341 assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) && 342 "Return value without a vreg"); 343 344 bool Success = true; 345 if (!VRegs.empty()) { 346 MachineFunction &MF = MIRBuilder.getMF(); 347 const Function &F = MF.getFunction(); 348 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 349 350 MachineRegisterInfo &MRI = MF.getRegInfo(); 351 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); 352 CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv()); 353 auto &DL = F.getParent()->getDataLayout(); 354 LLVMContext &Ctx = Val->getType()->getContext(); 355 356 SmallVector<EVT, 4> SplitEVTs; 357 ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs); 358 assert(VRegs.size() == SplitEVTs.size() && 359 "For each split Type there should be exactly one VReg."); 360 361 SmallVector<ArgInfo, 8> SplitArgs; 362 CallingConv::ID CC = F.getCallingConv(); 363 364 for (unsigned i = 0; i < SplitEVTs.size(); ++i) { 365 Register CurVReg = VRegs[i]; 366 ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Ctx), 0}; 367 setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F); 368 369 // i1 is a special case because SDAG i1 true is naturally zero extended 370 // when widened using ANYEXT. We need to do it explicitly here. 371 if (MRI.getType(CurVReg).getSizeInBits() == 1) { 372 CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0); 373 } else if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) == 374 1) { 375 // Some types will need extending as specified by the CC. 376 MVT NewVT = TLI.getRegisterTypeForCallingConv(Ctx, CC, SplitEVTs[i]); 377 if (EVT(NewVT) != SplitEVTs[i]) { 378 unsigned ExtendOp = TargetOpcode::G_ANYEXT; 379 if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex, 380 Attribute::SExt)) 381 ExtendOp = TargetOpcode::G_SEXT; 382 else if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex, 383 Attribute::ZExt)) 384 ExtendOp = TargetOpcode::G_ZEXT; 385 386 LLT NewLLT(NewVT); 387 LLT OldLLT(MVT::getVT(CurArgInfo.Ty)); 388 CurArgInfo.Ty = EVT(NewVT).getTypeForEVT(Ctx); 389 // Instead of an extend, we might have a vector type which needs 390 // padding with more elements, e.g. <2 x half> -> <4 x half>. 391 if (NewVT.isVector()) { 392 if (OldLLT.isVector()) { 393 if (NewLLT.getNumElements() > OldLLT.getNumElements()) { 394 // We don't handle VA types which are not exactly twice the 395 // size, but can easily be done in future. 396 if (NewLLT.getNumElements() != OldLLT.getNumElements() * 2) { 397 LLVM_DEBUG(dbgs() << "Outgoing vector ret has too many elts"); 398 return false; 399 } 400 auto Undef = MIRBuilder.buildUndef({OldLLT}); 401 CurVReg = 402 MIRBuilder.buildMerge({NewLLT}, {CurVReg, Undef}).getReg(0); 403 } else { 404 // Just do a vector extend. 405 CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg}) 406 .getReg(0); 407 } 408 } else if (NewLLT.getNumElements() == 2) { 409 // We need to pad a <1 x S> type to <2 x S>. Since we don't have 410 // <1 x S> vector types in GISel we use a build_vector instead 411 // of a vector merge/concat. 412 auto Undef = MIRBuilder.buildUndef({OldLLT}); 413 CurVReg = 414 MIRBuilder 415 .buildBuildVector({NewLLT}, {CurVReg, Undef.getReg(0)}) 416 .getReg(0); 417 } else { 418 LLVM_DEBUG(dbgs() << "Could not handle ret ty\n"); 419 return false; 420 } 421 } else { 422 // If the split EVT was a <1 x T> vector, and NewVT is T, then we 423 // don't have to do anything since we don't distinguish between the 424 // two. 425 if (NewLLT != MRI.getType(CurVReg)) { 426 // A scalar extend. 427 CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg}) 428 .getReg(0); 429 } 430 } 431 } 432 } 433 if (CurVReg != CurArgInfo.Regs[0]) { 434 CurArgInfo.Regs[0] = CurVReg; 435 // Reset the arg flags after modifying CurVReg. 436 setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F); 437 } 438 splitToValueTypes(CurArgInfo, SplitArgs, DL, CC); 439 } 440 441 AArch64OutgoingValueAssigner Assigner(AssignFn, AssignFn, Subtarget, 442 /*IsReturn*/ true); 443 OutgoingArgHandler Handler(MIRBuilder, MRI, MIB); 444 Success = determineAndHandleAssignments(Handler, Assigner, SplitArgs, 445 MIRBuilder, CC, F.isVarArg()); 446 } 447 448 if (SwiftErrorVReg) { 449 MIB.addUse(AArch64::X21, RegState::Implicit); 450 MIRBuilder.buildCopy(AArch64::X21, SwiftErrorVReg); 451 } 452 453 MIRBuilder.insertInstr(MIB); 454 return Success; 455 } 456 457 /// Helper function to compute forwarded registers for musttail calls. Computes 458 /// the forwarded registers, sets MBB liveness, and emits COPY instructions that 459 /// can be used to save + restore registers later. 460 static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder, 461 CCAssignFn *AssignFn) { 462 MachineBasicBlock &MBB = MIRBuilder.getMBB(); 463 MachineFunction &MF = MIRBuilder.getMF(); 464 MachineFrameInfo &MFI = MF.getFrameInfo(); 465 466 if (!MFI.hasMustTailInVarArgFunc()) 467 return; 468 469 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); 470 const Function &F = MF.getFunction(); 471 assert(F.isVarArg() && "Expected F to be vararg?"); 472 473 // Compute the set of forwarded registers. The rest are scratch. 474 SmallVector<CCValAssign, 16> ArgLocs; 475 CCState CCInfo(F.getCallingConv(), /*IsVarArg=*/true, MF, ArgLocs, 476 F.getContext()); 477 SmallVector<MVT, 2> RegParmTypes; 478 RegParmTypes.push_back(MVT::i64); 479 RegParmTypes.push_back(MVT::f128); 480 481 // Later on, we can use this vector to restore the registers if necessary. 482 SmallVectorImpl<ForwardedRegister> &Forwards = 483 FuncInfo->getForwardedMustTailRegParms(); 484 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, AssignFn); 485 486 // Conservatively forward X8, since it might be used for an aggregate 487 // return. 488 if (!CCInfo.isAllocated(AArch64::X8)) { 489 Register X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass); 490 Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64)); 491 } 492 493 // Add the forwards to the MachineBasicBlock and MachineFunction. 494 for (const auto &F : Forwards) { 495 MBB.addLiveIn(F.PReg); 496 MIRBuilder.buildCopy(Register(F.VReg), Register(F.PReg)); 497 } 498 } 499 500 bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const { 501 auto &F = MF.getFunction(); 502 if (isa<ScalableVectorType>(F.getReturnType())) 503 return true; 504 if (llvm::any_of(F.args(), [](const Argument &A) { 505 return isa<ScalableVectorType>(A.getType()); 506 })) 507 return true; 508 const auto &ST = MF.getSubtarget<AArch64Subtarget>(); 509 if (!ST.hasNEON() || !ST.hasFPARMv8()) { 510 LLVM_DEBUG(dbgs() << "Falling back to SDAG because we don't support no-NEON\n"); 511 return true; 512 } 513 return false; 514 } 515 516 bool AArch64CallLowering::lowerFormalArguments( 517 MachineIRBuilder &MIRBuilder, const Function &F, 518 ArrayRef<ArrayRef<Register>> VRegs, FunctionLoweringInfo &FLI) const { 519 MachineFunction &MF = MIRBuilder.getMF(); 520 MachineBasicBlock &MBB = MIRBuilder.getMBB(); 521 MachineRegisterInfo &MRI = MF.getRegInfo(); 522 auto &DL = F.getParent()->getDataLayout(); 523 524 SmallVector<ArgInfo, 8> SplitArgs; 525 unsigned i = 0; 526 for (auto &Arg : F.args()) { 527 if (DL.getTypeStoreSize(Arg.getType()).isZero()) 528 continue; 529 530 ArgInfo OrigArg{VRegs[i], Arg, i}; 531 setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F); 532 533 if (Arg.hasAttribute(Attribute::SwiftAsync)) 534 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true); 535 536 splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv()); 537 ++i; 538 } 539 540 if (!MBB.empty()) 541 MIRBuilder.setInstr(*MBB.begin()); 542 543 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); 544 CCAssignFn *AssignFn = 545 TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false); 546 547 AArch64IncomingValueAssigner Assigner(AssignFn, AssignFn); 548 FormalArgHandler Handler(MIRBuilder, MRI); 549 if (!determineAndHandleAssignments(Handler, Assigner, SplitArgs, MIRBuilder, 550 F.getCallingConv(), F.isVarArg())) 551 return false; 552 553 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); 554 uint64_t StackOffset = Assigner.StackOffset; 555 if (F.isVarArg()) { 556 auto &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 557 if (!Subtarget.isTargetDarwin()) { 558 // FIXME: we need to reimplement saveVarArgsRegisters from 559 // AArch64ISelLowering. 560 return false; 561 } 562 563 // We currently pass all varargs at 8-byte alignment, or 4 in ILP32. 564 StackOffset = 565 alignTo(Assigner.StackOffset, Subtarget.isTargetILP32() ? 4 : 8); 566 567 auto &MFI = MIRBuilder.getMF().getFrameInfo(); 568 FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true)); 569 } 570 571 if (doesCalleeRestoreStack(F.getCallingConv(), 572 MF.getTarget().Options.GuaranteedTailCallOpt)) { 573 // We have a non-standard ABI, so why not make full use of the stack that 574 // we're going to pop? It must be aligned to 16 B in any case. 575 StackOffset = alignTo(StackOffset, 16); 576 577 // If we're expected to restore the stack (e.g. fastcc), then we'll be 578 // adding a multiple of 16. 579 FuncInfo->setArgumentStackToRestore(StackOffset); 580 581 // Our own callers will guarantee that the space is free by giving an 582 // aligned value to CALLSEQ_START. 583 } 584 585 // When we tail call, we need to check if the callee's arguments 586 // will fit on the caller's stack. So, whenever we lower formal arguments, 587 // we should keep track of this information, since we might lower a tail call 588 // in this function later. 589 FuncInfo->setBytesInStackArgArea(StackOffset); 590 591 auto &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 592 if (Subtarget.hasCustomCallingConv()) 593 Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF); 594 595 handleMustTailForwardedRegisters(MIRBuilder, AssignFn); 596 597 // Move back to the end of the basic block. 598 MIRBuilder.setMBB(MBB); 599 600 return true; 601 } 602 603 /// Return true if the calling convention is one that we can guarantee TCO for. 604 static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) { 605 return (CC == CallingConv::Fast && GuaranteeTailCalls) || 606 CC == CallingConv::Tail || CC == CallingConv::SwiftTail; 607 } 608 609 /// Return true if we might ever do TCO for calls with this calling convention. 610 static bool mayTailCallThisCC(CallingConv::ID CC) { 611 switch (CC) { 612 case CallingConv::C: 613 case CallingConv::PreserveMost: 614 case CallingConv::Swift: 615 case CallingConv::SwiftTail: 616 case CallingConv::Tail: 617 case CallingConv::Fast: 618 return true; 619 default: 620 return false; 621 } 622 } 623 624 /// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for 625 /// CC. 626 static std::pair<CCAssignFn *, CCAssignFn *> 627 getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI) { 628 return {TLI.CCAssignFnForCall(CC, false), TLI.CCAssignFnForCall(CC, true)}; 629 } 630 631 bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay( 632 CallLoweringInfo &Info, MachineFunction &MF, 633 SmallVectorImpl<ArgInfo> &InArgs) const { 634 const Function &CallerF = MF.getFunction(); 635 CallingConv::ID CalleeCC = Info.CallConv; 636 CallingConv::ID CallerCC = CallerF.getCallingConv(); 637 638 // If the calling conventions match, then everything must be the same. 639 if (CalleeCC == CallerCC) 640 return true; 641 642 // Check if the caller and callee will handle arguments in the same way. 643 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); 644 CCAssignFn *CalleeAssignFnFixed; 645 CCAssignFn *CalleeAssignFnVarArg; 646 std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) = 647 getAssignFnsForCC(CalleeCC, TLI); 648 649 CCAssignFn *CallerAssignFnFixed; 650 CCAssignFn *CallerAssignFnVarArg; 651 std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) = 652 getAssignFnsForCC(CallerCC, TLI); 653 654 AArch64IncomingValueAssigner CalleeAssigner(CalleeAssignFnFixed, 655 CalleeAssignFnVarArg); 656 AArch64IncomingValueAssigner CallerAssigner(CallerAssignFnFixed, 657 CallerAssignFnVarArg); 658 659 if (!resultsCompatible(Info, MF, InArgs, CalleeAssigner, CallerAssigner)) 660 return false; 661 662 // Make sure that the caller and callee preserve all of the same registers. 663 auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo(); 664 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 665 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 666 if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) { 667 TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved); 668 TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved); 669 } 670 671 return TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved); 672 } 673 674 bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable( 675 CallLoweringInfo &Info, MachineFunction &MF, 676 SmallVectorImpl<ArgInfo> &OutArgs) const { 677 // If there are no outgoing arguments, then we are done. 678 if (OutArgs.empty()) 679 return true; 680 681 const Function &CallerF = MF.getFunction(); 682 LLVMContext &Ctx = CallerF.getContext(); 683 CallingConv::ID CalleeCC = Info.CallConv; 684 CallingConv::ID CallerCC = CallerF.getCallingConv(); 685 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); 686 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 687 688 CCAssignFn *AssignFnFixed; 689 CCAssignFn *AssignFnVarArg; 690 std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI); 691 692 // We have outgoing arguments. Make sure that we can tail call with them. 693 SmallVector<CCValAssign, 16> OutLocs; 694 CCState OutInfo(CalleeCC, false, MF, OutLocs, Ctx); 695 696 AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg, 697 Subtarget, /*IsReturn*/ false); 698 if (!determineAssignments(CalleeAssigner, OutArgs, OutInfo)) { 699 LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n"); 700 return false; 701 } 702 703 // Make sure that they can fit on the caller's stack. 704 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); 705 if (OutInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea()) { 706 LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n"); 707 return false; 708 } 709 710 // Verify that the parameters in callee-saved registers match. 711 // TODO: Port this over to CallLowering as general code once swiftself is 712 // supported. 713 auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo(); 714 const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC); 715 MachineRegisterInfo &MRI = MF.getRegInfo(); 716 717 if (Info.IsVarArg) { 718 // Be conservative and disallow variadic memory operands to match SDAG's 719 // behaviour. 720 // FIXME: If the caller's calling convention is C, then we can 721 // potentially use its argument area. However, for cases like fastcc, 722 // we can't do anything. 723 for (unsigned i = 0; i < OutLocs.size(); ++i) { 724 auto &ArgLoc = OutLocs[i]; 725 if (ArgLoc.isRegLoc()) 726 continue; 727 728 LLVM_DEBUG( 729 dbgs() 730 << "... Cannot tail call vararg function with stack arguments\n"); 731 return false; 732 } 733 } 734 735 return parametersInCSRMatch(MRI, CallerPreservedMask, OutLocs, OutArgs); 736 } 737 738 bool AArch64CallLowering::isEligibleForTailCallOptimization( 739 MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, 740 SmallVectorImpl<ArgInfo> &InArgs, 741 SmallVectorImpl<ArgInfo> &OutArgs) const { 742 743 // Must pass all target-independent checks in order to tail call optimize. 744 if (!Info.IsTailCall) 745 return false; 746 747 CallingConv::ID CalleeCC = Info.CallConv; 748 MachineFunction &MF = MIRBuilder.getMF(); 749 const Function &CallerF = MF.getFunction(); 750 751 LLVM_DEBUG(dbgs() << "Attempting to lower call as tail call\n"); 752 753 if (Info.SwiftErrorVReg) { 754 // TODO: We should handle this. 755 // Note that this is also handled by the check for no outgoing arguments. 756 // Proactively disabling this though, because the swifterror handling in 757 // lowerCall inserts a COPY *after* the location of the call. 758 LLVM_DEBUG(dbgs() << "... Cannot handle tail calls with swifterror yet.\n"); 759 return false; 760 } 761 762 if (!mayTailCallThisCC(CalleeCC)) { 763 LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n"); 764 return false; 765 } 766 767 // Byval parameters hand the function a pointer directly into the stack area 768 // we want to reuse during a tail call. Working around this *is* possible (see 769 // X86). 770 // 771 // FIXME: In AArch64ISelLowering, this isn't worked around. Can/should we try 772 // it? 773 // 774 // On Windows, "inreg" attributes signify non-aggregate indirect returns. 775 // In this case, it is necessary to save/restore X0 in the callee. Tail 776 // call opt interferes with this. So we disable tail call opt when the 777 // caller has an argument with "inreg" attribute. 778 // 779 // FIXME: Check whether the callee also has an "inreg" argument. 780 // 781 // When the caller has a swifterror argument, we don't want to tail call 782 // because would have to move into the swifterror register before the 783 // tail call. 784 if (any_of(CallerF.args(), [](const Argument &A) { 785 return A.hasByValAttr() || A.hasInRegAttr() || A.hasSwiftErrorAttr(); 786 })) { 787 LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval, " 788 "inreg, or swifterror arguments\n"); 789 return false; 790 } 791 792 // Externally-defined functions with weak linkage should not be 793 // tail-called on AArch64 when the OS does not support dynamic 794 // pre-emption of symbols, as the AAELF spec requires normal calls 795 // to undefined weak functions to be replaced with a NOP or jump to the 796 // next instruction. The behaviour of branch instructions in this 797 // situation (as used for tail calls) is implementation-defined, so we 798 // cannot rely on the linker replacing the tail call with a return. 799 if (Info.Callee.isGlobal()) { 800 const GlobalValue *GV = Info.Callee.getGlobal(); 801 const Triple &TT = MF.getTarget().getTargetTriple(); 802 if (GV->hasExternalWeakLinkage() && 803 (!TT.isOSWindows() || TT.isOSBinFormatELF() || 804 TT.isOSBinFormatMachO())) { 805 LLVM_DEBUG(dbgs() << "... Cannot tail call externally-defined function " 806 "with weak linkage for this OS.\n"); 807 return false; 808 } 809 } 810 811 // If we have -tailcallopt, then we're done. 812 if (canGuaranteeTCO(CalleeCC, MF.getTarget().Options.GuaranteedTailCallOpt)) 813 return CalleeCC == CallerF.getCallingConv(); 814 815 // We don't have -tailcallopt, so we're allowed to change the ABI (sibcall). 816 // Try to find cases where we can do that. 817 818 // I want anyone implementing a new calling convention to think long and hard 819 // about this assert. 820 assert((!Info.IsVarArg || CalleeCC == CallingConv::C) && 821 "Unexpected variadic calling convention"); 822 823 // Verify that the incoming and outgoing arguments from the callee are 824 // safe to tail call. 825 if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) { 826 LLVM_DEBUG( 827 dbgs() 828 << "... Caller and callee have incompatible calling conventions.\n"); 829 return false; 830 } 831 832 if (!areCalleeOutgoingArgsTailCallable(Info, MF, OutArgs)) 833 return false; 834 835 LLVM_DEBUG( 836 dbgs() << "... Call is eligible for tail call optimization.\n"); 837 return true; 838 } 839 840 static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, 841 bool IsTailCall) { 842 if (!IsTailCall) 843 return IsIndirect ? getBLRCallOpcode(CallerF) : (unsigned)AArch64::BL; 844 845 if (!IsIndirect) 846 return AArch64::TCRETURNdi; 847 848 // When BTI is enabled, we need to use TCRETURNriBTI to make sure that we use 849 // x16 or x17. 850 if (CallerF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) 851 return AArch64::TCRETURNriBTI; 852 853 return AArch64::TCRETURNri; 854 } 855 856 static const uint32_t * 857 getMaskForArgs(SmallVectorImpl<AArch64CallLowering::ArgInfo> &OutArgs, 858 AArch64CallLowering::CallLoweringInfo &Info, 859 const AArch64RegisterInfo &TRI, MachineFunction &MF) { 860 const uint32_t *Mask; 861 if (!OutArgs.empty() && OutArgs[0].Flags[0].isReturned()) { 862 // For 'this' returns, use the X0-preserving mask if applicable 863 Mask = TRI.getThisReturnPreservedMask(MF, Info.CallConv); 864 if (!Mask) { 865 OutArgs[0].Flags[0].setReturned(false); 866 Mask = TRI.getCallPreservedMask(MF, Info.CallConv); 867 } 868 } else { 869 Mask = TRI.getCallPreservedMask(MF, Info.CallConv); 870 } 871 return Mask; 872 } 873 874 bool AArch64CallLowering::lowerTailCall( 875 MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, 876 SmallVectorImpl<ArgInfo> &OutArgs) const { 877 MachineFunction &MF = MIRBuilder.getMF(); 878 const Function &F = MF.getFunction(); 879 MachineRegisterInfo &MRI = MF.getRegInfo(); 880 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); 881 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); 882 883 // True when we're tail calling, but without -tailcallopt. 884 bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt && 885 Info.CallConv != CallingConv::Tail && 886 Info.CallConv != CallingConv::SwiftTail; 887 888 // TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64 889 // register class. Until we can do that, we should fall back here. 890 if (MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) { 891 LLVM_DEBUG( 892 dbgs() << "Cannot lower indirect tail calls with BTI enabled yet.\n"); 893 return false; 894 } 895 896 // Find out which ABI gets to decide where things go. 897 CallingConv::ID CalleeCC = Info.CallConv; 898 CCAssignFn *AssignFnFixed; 899 CCAssignFn *AssignFnVarArg; 900 std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI); 901 902 MachineInstrBuilder CallSeqStart; 903 if (!IsSibCall) 904 CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN); 905 906 unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), true); 907 auto MIB = MIRBuilder.buildInstrNoInsert(Opc); 908 MIB.add(Info.Callee); 909 910 // Byte offset for the tail call. When we are sibcalling, this will always 911 // be 0. 912 MIB.addImm(0); 913 914 // Tell the call which registers are clobbered. 915 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 916 auto TRI = Subtarget.getRegisterInfo(); 917 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC); 918 if (Subtarget.hasCustomCallingConv()) 919 TRI->UpdateCustomCallPreservedMask(MF, &Mask); 920 MIB.addRegMask(Mask); 921 922 if (TRI->isAnyArgRegReserved(MF)) 923 TRI->emitReservedArgRegCallError(MF); 924 925 // FPDiff is the byte offset of the call's argument area from the callee's. 926 // Stores to callee stack arguments will be placed in FixedStackSlots offset 927 // by this amount for a tail call. In a sibling call it must be 0 because the 928 // caller will deallocate the entire stack and the callee still expects its 929 // arguments to begin at SP+0. 930 int FPDiff = 0; 931 932 // This will be 0 for sibcalls, potentially nonzero for tail calls produced 933 // by -tailcallopt. For sibcalls, the memory operands for the call are 934 // already available in the caller's incoming argument space. 935 unsigned NumBytes = 0; 936 if (!IsSibCall) { 937 // We aren't sibcalling, so we need to compute FPDiff. We need to do this 938 // before handling assignments, because FPDiff must be known for memory 939 // arguments. 940 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea(); 941 SmallVector<CCValAssign, 16> OutLocs; 942 CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext()); 943 944 AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg, 945 Subtarget, /*IsReturn*/ false); 946 if (!determineAssignments(CalleeAssigner, OutArgs, OutInfo)) 947 return false; 948 949 // The callee will pop the argument stack as a tail call. Thus, we must 950 // keep it 16-byte aligned. 951 NumBytes = alignTo(OutInfo.getNextStackOffset(), 16); 952 953 // FPDiff will be negative if this tail call requires more space than we 954 // would automatically have in our incoming argument space. Positive if we 955 // actually shrink the stack. 956 FPDiff = NumReusableBytes - NumBytes; 957 958 // Update the required reserved area if this is the tail call requiring the 959 // most argument stack space. 960 if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff) 961 FuncInfo->setTailCallReservedStack(-FPDiff); 962 963 // The stack pointer must be 16-byte aligned at all times it's used for a 964 // memory operation, which in practice means at *all* times and in 965 // particular across call boundaries. Therefore our own arguments started at 966 // a 16-byte aligned SP and the delta applied for the tail call should 967 // satisfy the same constraint. 968 assert(FPDiff % 16 == 0 && "unaligned stack on tail call"); 969 } 970 971 const auto &Forwards = FuncInfo->getForwardedMustTailRegParms(); 972 973 AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg, 974 Subtarget, /*IsReturn*/ false); 975 976 // Do the actual argument marshalling. 977 OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, 978 /*IsTailCall*/ true, FPDiff); 979 if (!determineAndHandleAssignments(Handler, Assigner, OutArgs, MIRBuilder, 980 CalleeCC, Info.IsVarArg)) 981 return false; 982 983 Mask = getMaskForArgs(OutArgs, Info, *TRI, MF); 984 985 if (Info.IsVarArg && Info.IsMustTailCall) { 986 // Now we know what's being passed to the function. Add uses to the call for 987 // the forwarded registers that we *aren't* passing as parameters. This will 988 // preserve the copies we build earlier. 989 for (const auto &F : Forwards) { 990 Register ForwardedReg = F.PReg; 991 // If the register is already passed, or aliases a register which is 992 // already being passed, then skip it. 993 if (any_of(MIB->uses(), [&ForwardedReg, &TRI](const MachineOperand &Use) { 994 if (!Use.isReg()) 995 return false; 996 return TRI->regsOverlap(Use.getReg(), ForwardedReg); 997 })) 998 continue; 999 1000 // We aren't passing it already, so we should add it to the call. 1001 MIRBuilder.buildCopy(ForwardedReg, Register(F.VReg)); 1002 MIB.addReg(ForwardedReg, RegState::Implicit); 1003 } 1004 } 1005 1006 // If we have -tailcallopt, we need to adjust the stack. We'll do the call 1007 // sequence start and end here. 1008 if (!IsSibCall) { 1009 MIB->getOperand(1).setImm(FPDiff); 1010 CallSeqStart.addImm(0).addImm(0); 1011 // End the call sequence *before* emitting the call. Normally, we would 1012 // tidy the frame up after the call. However, here, we've laid out the 1013 // parameters so that when SP is reset, they will be in the correct 1014 // location. 1015 MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP).addImm(0).addImm(0); 1016 } 1017 1018 // Now we can add the actual call instruction to the correct basic block. 1019 MIRBuilder.insertInstr(MIB); 1020 1021 // If Callee is a reg, since it is used by a target specific instruction, 1022 // it must have a register class matching the constraint of that instruction. 1023 if (Info.Callee.isReg()) 1024 constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(), 1025 *MF.getSubtarget().getRegBankInfo(), *MIB, 1026 MIB->getDesc(), Info.Callee, 0); 1027 1028 MF.getFrameInfo().setHasTailCall(); 1029 Info.LoweredTailCall = true; 1030 return true; 1031 } 1032 1033 bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, 1034 CallLoweringInfo &Info) const { 1035 MachineFunction &MF = MIRBuilder.getMF(); 1036 const Function &F = MF.getFunction(); 1037 MachineRegisterInfo &MRI = MF.getRegInfo(); 1038 auto &DL = F.getParent()->getDataLayout(); 1039 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); 1040 1041 SmallVector<ArgInfo, 8> OutArgs; 1042 for (auto &OrigArg : Info.OrigArgs) { 1043 splitToValueTypes(OrigArg, OutArgs, DL, Info.CallConv); 1044 // AAPCS requires that we zero-extend i1 to 8 bits by the caller. 1045 if (OrigArg.Ty->isIntegerTy(1)) 1046 OutArgs.back().Flags[0].setZExt(); 1047 } 1048 1049 SmallVector<ArgInfo, 8> InArgs; 1050 if (!Info.OrigRet.Ty->isVoidTy()) 1051 splitToValueTypes(Info.OrigRet, InArgs, DL, Info.CallConv); 1052 1053 // If we can lower as a tail call, do that instead. 1054 bool CanTailCallOpt = 1055 isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs, OutArgs); 1056 1057 // We must emit a tail call if we have musttail. 1058 if (Info.IsMustTailCall && !CanTailCallOpt) { 1059 // There are types of incoming/outgoing arguments we can't handle yet, so 1060 // it doesn't make sense to actually die here like in ISelLowering. Instead, 1061 // fall back to SelectionDAG and let it try to handle this. 1062 LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n"); 1063 return false; 1064 } 1065 1066 if (CanTailCallOpt) 1067 return lowerTailCall(MIRBuilder, Info, OutArgs); 1068 1069 // Find out which ABI gets to decide where things go. 1070 CCAssignFn *AssignFnFixed; 1071 CCAssignFn *AssignFnVarArg; 1072 std::tie(AssignFnFixed, AssignFnVarArg) = 1073 getAssignFnsForCC(Info.CallConv, TLI); 1074 1075 MachineInstrBuilder CallSeqStart; 1076 CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN); 1077 1078 // Create a temporarily-floating call instruction so we can add the implicit 1079 // uses of arg registers. 1080 unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), false); 1081 1082 auto MIB = MIRBuilder.buildInstrNoInsert(Opc); 1083 MIB.add(Info.Callee); 1084 1085 // Tell the call which registers are clobbered. 1086 const uint32_t *Mask; 1087 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 1088 const auto *TRI = Subtarget.getRegisterInfo(); 1089 1090 AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg, 1091 Subtarget, /*IsReturn*/ false); 1092 // Do the actual argument marshalling. 1093 OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, /*IsReturn*/ false); 1094 if (!determineAndHandleAssignments(Handler, Assigner, OutArgs, MIRBuilder, 1095 Info.CallConv, Info.IsVarArg)) 1096 return false; 1097 1098 Mask = getMaskForArgs(OutArgs, Info, *TRI, MF); 1099 1100 if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) 1101 TRI->UpdateCustomCallPreservedMask(MF, &Mask); 1102 MIB.addRegMask(Mask); 1103 1104 if (TRI->isAnyArgRegReserved(MF)) 1105 TRI->emitReservedArgRegCallError(MF); 1106 1107 // Now we can add the actual call instruction to the correct basic block. 1108 MIRBuilder.insertInstr(MIB); 1109 1110 // If Callee is a reg, since it is used by a target specific 1111 // instruction, it must have a register class matching the 1112 // constraint of that instruction. 1113 if (Info.Callee.isReg()) 1114 constrainOperandRegClass(MF, *TRI, MRI, *Subtarget.getInstrInfo(), 1115 *Subtarget.getRegBankInfo(), *MIB, MIB->getDesc(), 1116 Info.Callee, 0); 1117 1118 // Finally we can copy the returned value back into its virtual-register. In 1119 // symmetry with the arguments, the physical register must be an 1120 // implicit-define of the call instruction. 1121 if (!Info.OrigRet.Ty->isVoidTy()) { 1122 CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv); 1123 CallReturnHandler Handler(MIRBuilder, MRI, MIB); 1124 bool UsingReturnedArg = 1125 !OutArgs.empty() && OutArgs[0].Flags[0].isReturned(); 1126 1127 AArch64OutgoingValueAssigner Assigner(RetAssignFn, RetAssignFn, Subtarget, 1128 /*IsReturn*/ false); 1129 ReturnedArgCallReturnHandler ReturnedArgHandler(MIRBuilder, MRI, MIB); 1130 if (!determineAndHandleAssignments( 1131 UsingReturnedArg ? ReturnedArgHandler : Handler, Assigner, InArgs, 1132 MIRBuilder, Info.CallConv, Info.IsVarArg, 1133 UsingReturnedArg ? OutArgs[0].Regs[0] : Register())) 1134 return false; 1135 } 1136 1137 if (Info.SwiftErrorVReg) { 1138 MIB.addDef(AArch64::X21, RegState::Implicit); 1139 MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21)); 1140 } 1141 1142 uint64_t CalleePopBytes = 1143 doesCalleeRestoreStack(Info.CallConv, 1144 MF.getTarget().Options.GuaranteedTailCallOpt) 1145 ? alignTo(Assigner.StackOffset, 16) 1146 : 0; 1147 1148 CallSeqStart.addImm(Assigner.StackOffset).addImm(0); 1149 MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP) 1150 .addImm(Assigner.StackOffset) 1151 .addImm(CalleePopBytes); 1152 1153 return true; 1154 } 1155 1156 bool AArch64CallLowering::isTypeIsValidForThisReturn(EVT Ty) const { 1157 return Ty.getSizeInBits() == 64; 1158 } 1159