1 //===--- AArch64CallLowering.cpp - Call lowering --------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements the lowering of LLVM calls to machine code calls for 11 /// GlobalISel. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "AArch64CallLowering.h" 16 #include "AArch64ISelLowering.h" 17 #include "AArch64MachineFunctionInfo.h" 18 #include "AArch64Subtarget.h" 19 #include "llvm/ADT/ArrayRef.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/Analysis/ObjCARCUtil.h" 22 #include "llvm/CodeGen/Analysis.h" 23 #include "llvm/CodeGen/CallingConvLower.h" 24 #include "llvm/CodeGen/FunctionLoweringInfo.h" 25 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 26 #include "llvm/CodeGen/GlobalISel/Utils.h" 27 #include "llvm/CodeGen/LowLevelType.h" 28 #include "llvm/CodeGen/MachineBasicBlock.h" 29 #include "llvm/CodeGen/MachineFrameInfo.h" 30 #include "llvm/CodeGen/MachineFunction.h" 31 #include "llvm/CodeGen/MachineInstrBuilder.h" 32 #include "llvm/CodeGen/MachineMemOperand.h" 33 #include "llvm/CodeGen/MachineOperand.h" 34 #include "llvm/CodeGen/MachineRegisterInfo.h" 35 #include "llvm/CodeGen/TargetRegisterInfo.h" 36 #include "llvm/CodeGen/TargetSubtargetInfo.h" 37 #include "llvm/CodeGen/ValueTypes.h" 38 #include "llvm/IR/Argument.h" 39 #include "llvm/IR/Attributes.h" 40 #include "llvm/IR/Function.h" 41 #include "llvm/IR/Type.h" 42 #include "llvm/IR/Value.h" 43 #include "llvm/Support/MachineValueType.h" 44 #include <algorithm> 45 #include <cassert> 46 #include <cstdint> 47 #include <iterator> 48 49 #define DEBUG_TYPE "aarch64-call-lowering" 50 51 using namespace llvm; 52 53 AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI) 54 : CallLowering(&TLI) {} 55 56 static void applyStackPassedSmallTypeDAGHack(EVT OrigVT, MVT &ValVT, 57 MVT &LocVT) { 58 // If ValVT is i1/i8/i16, we should set LocVT to i8/i8/i16. This is a legacy 59 // hack because the DAG calls the assignment function with pre-legalized 60 // register typed values, not the raw type. 61 // 62 // This hack is not applied to return values which are not passed on the 63 // stack. 64 if (OrigVT == MVT::i1 || OrigVT == MVT::i8) 65 ValVT = LocVT = MVT::i8; 66 else if (OrigVT == MVT::i16) 67 ValVT = LocVT = MVT::i16; 68 } 69 70 // Account for i1/i8/i16 stack passed value hack 71 static LLT getStackValueStoreTypeHack(const CCValAssign &VA) { 72 const MVT ValVT = VA.getValVT(); 73 return (ValVT == MVT::i8 || ValVT == MVT::i16) ? LLT(ValVT) 74 : LLT(VA.getLocVT()); 75 } 76 77 namespace { 78 79 struct AArch64IncomingValueAssigner 80 : public CallLowering::IncomingValueAssigner { 81 AArch64IncomingValueAssigner(CCAssignFn *AssignFn_, 82 CCAssignFn *AssignFnVarArg_) 83 : IncomingValueAssigner(AssignFn_, AssignFnVarArg_) {} 84 85 bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT, 86 CCValAssign::LocInfo LocInfo, 87 const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags, 88 CCState &State) override { 89 applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT); 90 return IncomingValueAssigner::assignArg(ValNo, OrigVT, ValVT, LocVT, 91 LocInfo, Info, Flags, State); 92 } 93 }; 94 95 struct AArch64OutgoingValueAssigner 96 : public CallLowering::OutgoingValueAssigner { 97 const AArch64Subtarget &Subtarget; 98 99 /// Track if this is used for a return instead of function argument 100 /// passing. We apply a hack to i1/i8/i16 stack passed values, but do not use 101 /// stack passed returns for them and cannot apply the type adjustment. 102 bool IsReturn; 103 104 AArch64OutgoingValueAssigner(CCAssignFn *AssignFn_, 105 CCAssignFn *AssignFnVarArg_, 106 const AArch64Subtarget &Subtarget_, 107 bool IsReturn) 108 : OutgoingValueAssigner(AssignFn_, AssignFnVarArg_), 109 Subtarget(Subtarget_), IsReturn(IsReturn) {} 110 111 bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT, 112 CCValAssign::LocInfo LocInfo, 113 const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags, 114 CCState &State) override { 115 bool IsCalleeWin = Subtarget.isCallingConvWin64(State.getCallingConv()); 116 bool UseVarArgsCCForFixed = IsCalleeWin && State.isVarArg(); 117 118 if (!State.isVarArg() && !UseVarArgsCCForFixed && !IsReturn) 119 applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT); 120 121 bool Res; 122 if (Info.IsFixed && !UseVarArgsCCForFixed) 123 Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State); 124 else 125 Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Flags, State); 126 127 StackOffset = State.getNextStackOffset(); 128 return Res; 129 } 130 }; 131 132 struct IncomingArgHandler : public CallLowering::IncomingValueHandler { 133 IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI) 134 : IncomingValueHandler(MIRBuilder, MRI) {} 135 136 Register getStackAddress(uint64_t Size, int64_t Offset, 137 MachinePointerInfo &MPO, 138 ISD::ArgFlagsTy Flags) override { 139 auto &MFI = MIRBuilder.getMF().getFrameInfo(); 140 141 // Byval is assumed to be writable memory, but other stack passed arguments 142 // are not. 143 const bool IsImmutable = !Flags.isByVal(); 144 145 int FI = MFI.CreateFixedObject(Size, Offset, IsImmutable); 146 MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI); 147 auto AddrReg = MIRBuilder.buildFrameIndex(LLT::pointer(0, 64), FI); 148 return AddrReg.getReg(0); 149 } 150 151 LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA, 152 ISD::ArgFlagsTy Flags) const override { 153 // For pointers, we just need to fixup the integer types reported in the 154 // CCValAssign. 155 if (Flags.isPointer()) 156 return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags); 157 return getStackValueStoreTypeHack(VA); 158 } 159 160 void assignValueToReg(Register ValVReg, Register PhysReg, 161 CCValAssign VA) override { 162 markPhysRegUsed(PhysReg); 163 IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA); 164 } 165 166 void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, 167 MachinePointerInfo &MPO, CCValAssign &VA) override { 168 MachineFunction &MF = MIRBuilder.getMF(); 169 170 LLT ValTy(VA.getValVT()); 171 LLT LocTy(VA.getLocVT()); 172 173 // Fixup the types for the DAG compatibility hack. 174 if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16) 175 std::swap(ValTy, LocTy); 176 else { 177 // The calling code knows if this is a pointer or not, we're only touching 178 // the LocTy for the i8/i16 hack. 179 assert(LocTy.getSizeInBits() == MemTy.getSizeInBits()); 180 LocTy = MemTy; 181 } 182 183 auto MMO = MF.getMachineMemOperand( 184 MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, LocTy, 185 inferAlignFromPtrInfo(MF, MPO)); 186 187 switch (VA.getLocInfo()) { 188 case CCValAssign::LocInfo::ZExt: 189 MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, ValVReg, Addr, *MMO); 190 return; 191 case CCValAssign::LocInfo::SExt: 192 MIRBuilder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, ValVReg, Addr, *MMO); 193 return; 194 default: 195 MIRBuilder.buildLoad(ValVReg, Addr, *MMO); 196 return; 197 } 198 } 199 200 /// How the physical register gets marked varies between formal 201 /// parameters (it's a basic-block live-in), and a call instruction 202 /// (it's an implicit-def of the BL). 203 virtual void markPhysRegUsed(MCRegister PhysReg) = 0; 204 }; 205 206 struct FormalArgHandler : public IncomingArgHandler { 207 FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI) 208 : IncomingArgHandler(MIRBuilder, MRI) {} 209 210 void markPhysRegUsed(MCRegister PhysReg) override { 211 MIRBuilder.getMRI()->addLiveIn(PhysReg); 212 MIRBuilder.getMBB().addLiveIn(PhysReg); 213 } 214 }; 215 216 struct CallReturnHandler : public IncomingArgHandler { 217 CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, 218 MachineInstrBuilder MIB) 219 : IncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {} 220 221 void markPhysRegUsed(MCRegister PhysReg) override { 222 MIB.addDef(PhysReg, RegState::Implicit); 223 } 224 225 MachineInstrBuilder MIB; 226 }; 227 228 /// A special return arg handler for "returned" attribute arg calls. 229 struct ReturnedArgCallReturnHandler : public CallReturnHandler { 230 ReturnedArgCallReturnHandler(MachineIRBuilder &MIRBuilder, 231 MachineRegisterInfo &MRI, 232 MachineInstrBuilder MIB) 233 : CallReturnHandler(MIRBuilder, MRI, MIB) {} 234 235 void markPhysRegUsed(MCRegister PhysReg) override {} 236 }; 237 238 struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler { 239 OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, 240 MachineInstrBuilder MIB, bool IsTailCall = false, 241 int FPDiff = 0) 242 : OutgoingValueHandler(MIRBuilder, MRI), MIB(MIB), IsTailCall(IsTailCall), 243 FPDiff(FPDiff), 244 Subtarget(MIRBuilder.getMF().getSubtarget<AArch64Subtarget>()) {} 245 246 Register getStackAddress(uint64_t Size, int64_t Offset, 247 MachinePointerInfo &MPO, 248 ISD::ArgFlagsTy Flags) override { 249 MachineFunction &MF = MIRBuilder.getMF(); 250 LLT p0 = LLT::pointer(0, 64); 251 LLT s64 = LLT::scalar(64); 252 253 if (IsTailCall) { 254 assert(!Flags.isByVal() && "byval unhandled with tail calls"); 255 256 Offset += FPDiff; 257 int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true); 258 auto FIReg = MIRBuilder.buildFrameIndex(p0, FI); 259 MPO = MachinePointerInfo::getFixedStack(MF, FI); 260 return FIReg.getReg(0); 261 } 262 263 if (!SPReg) 264 SPReg = MIRBuilder.buildCopy(p0, Register(AArch64::SP)).getReg(0); 265 266 auto OffsetReg = MIRBuilder.buildConstant(s64, Offset); 267 268 auto AddrReg = MIRBuilder.buildPtrAdd(p0, SPReg, OffsetReg); 269 270 MPO = MachinePointerInfo::getStack(MF, Offset); 271 return AddrReg.getReg(0); 272 } 273 274 /// We need to fixup the reported store size for certain value types because 275 /// we invert the interpretation of ValVT and LocVT in certain cases. This is 276 /// for compatability with the DAG call lowering implementation, which we're 277 /// currently building on top of. 278 LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA, 279 ISD::ArgFlagsTy Flags) const override { 280 if (Flags.isPointer()) 281 return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags); 282 return getStackValueStoreTypeHack(VA); 283 } 284 285 void assignValueToReg(Register ValVReg, Register PhysReg, 286 CCValAssign VA) override { 287 MIB.addUse(PhysReg, RegState::Implicit); 288 Register ExtReg = extendRegister(ValVReg, VA); 289 MIRBuilder.buildCopy(PhysReg, ExtReg); 290 } 291 292 void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, 293 MachinePointerInfo &MPO, CCValAssign &VA) override { 294 MachineFunction &MF = MIRBuilder.getMF(); 295 auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, MemTy, 296 inferAlignFromPtrInfo(MF, MPO)); 297 MIRBuilder.buildStore(ValVReg, Addr, *MMO); 298 } 299 300 void assignValueToAddress(const CallLowering::ArgInfo &Arg, unsigned RegIndex, 301 Register Addr, LLT MemTy, MachinePointerInfo &MPO, 302 CCValAssign &VA) override { 303 unsigned MaxSize = MemTy.getSizeInBytes() * 8; 304 // For varargs, we always want to extend them to 8 bytes, in which case 305 // we disable setting a max. 306 if (!Arg.IsFixed) 307 MaxSize = 0; 308 309 Register ValVReg = Arg.Regs[RegIndex]; 310 if (VA.getLocInfo() != CCValAssign::LocInfo::FPExt) { 311 MVT LocVT = VA.getLocVT(); 312 MVT ValVT = VA.getValVT(); 313 314 if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16) { 315 std::swap(ValVT, LocVT); 316 MemTy = LLT(VA.getValVT()); 317 } 318 319 ValVReg = extendRegister(ValVReg, VA, MaxSize); 320 } else { 321 // The store does not cover the full allocated stack slot. 322 MemTy = LLT(VA.getValVT()); 323 } 324 325 assignValueToAddress(ValVReg, Addr, MemTy, MPO, VA); 326 } 327 328 MachineInstrBuilder MIB; 329 330 bool IsTailCall; 331 332 /// For tail calls, the byte offset of the call's argument area from the 333 /// callee's. Unused elsewhere. 334 int FPDiff; 335 336 // Cache the SP register vreg if we need it more than once in this call site. 337 Register SPReg; 338 339 const AArch64Subtarget &Subtarget; 340 }; 341 } // namespace 342 343 static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) { 344 return (CallConv == CallingConv::Fast && TailCallOpt) || 345 CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail; 346 } 347 348 bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, 349 const Value *Val, 350 ArrayRef<Register> VRegs, 351 FunctionLoweringInfo &FLI, 352 Register SwiftErrorVReg) const { 353 auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR); 354 assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) && 355 "Return value without a vreg"); 356 357 bool Success = true; 358 if (!FLI.CanLowerReturn) { 359 insertSRetStores(MIRBuilder, Val->getType(), VRegs, FLI.DemoteRegister); 360 } else if (!VRegs.empty()) { 361 MachineFunction &MF = MIRBuilder.getMF(); 362 const Function &F = MF.getFunction(); 363 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 364 365 MachineRegisterInfo &MRI = MF.getRegInfo(); 366 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); 367 CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv()); 368 auto &DL = F.getParent()->getDataLayout(); 369 LLVMContext &Ctx = Val->getType()->getContext(); 370 371 SmallVector<EVT, 4> SplitEVTs; 372 ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs); 373 assert(VRegs.size() == SplitEVTs.size() && 374 "For each split Type there should be exactly one VReg."); 375 376 SmallVector<ArgInfo, 8> SplitArgs; 377 CallingConv::ID CC = F.getCallingConv(); 378 379 for (unsigned i = 0; i < SplitEVTs.size(); ++i) { 380 Register CurVReg = VRegs[i]; 381 ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Ctx), 0}; 382 setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F); 383 384 // i1 is a special case because SDAG i1 true is naturally zero extended 385 // when widened using ANYEXT. We need to do it explicitly here. 386 if (MRI.getType(CurVReg).getSizeInBits() == 1) { 387 CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0); 388 } else if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) == 389 1) { 390 // Some types will need extending as specified by the CC. 391 MVT NewVT = TLI.getRegisterTypeForCallingConv(Ctx, CC, SplitEVTs[i]); 392 if (EVT(NewVT) != SplitEVTs[i]) { 393 unsigned ExtendOp = TargetOpcode::G_ANYEXT; 394 if (F.getAttributes().hasRetAttr(Attribute::SExt)) 395 ExtendOp = TargetOpcode::G_SEXT; 396 else if (F.getAttributes().hasRetAttr(Attribute::ZExt)) 397 ExtendOp = TargetOpcode::G_ZEXT; 398 399 LLT NewLLT(NewVT); 400 LLT OldLLT(MVT::getVT(CurArgInfo.Ty)); 401 CurArgInfo.Ty = EVT(NewVT).getTypeForEVT(Ctx); 402 // Instead of an extend, we might have a vector type which needs 403 // padding with more elements, e.g. <2 x half> -> <4 x half>. 404 if (NewVT.isVector()) { 405 if (OldLLT.isVector()) { 406 if (NewLLT.getNumElements() > OldLLT.getNumElements()) { 407 // We don't handle VA types which are not exactly twice the 408 // size, but can easily be done in future. 409 if (NewLLT.getNumElements() != OldLLT.getNumElements() * 2) { 410 LLVM_DEBUG(dbgs() << "Outgoing vector ret has too many elts"); 411 return false; 412 } 413 auto Undef = MIRBuilder.buildUndef({OldLLT}); 414 CurVReg = 415 MIRBuilder.buildMerge({NewLLT}, {CurVReg, Undef}).getReg(0); 416 } else { 417 // Just do a vector extend. 418 CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg}) 419 .getReg(0); 420 } 421 } else if (NewLLT.getNumElements() == 2) { 422 // We need to pad a <1 x S> type to <2 x S>. Since we don't have 423 // <1 x S> vector types in GISel we use a build_vector instead 424 // of a vector merge/concat. 425 auto Undef = MIRBuilder.buildUndef({OldLLT}); 426 CurVReg = 427 MIRBuilder 428 .buildBuildVector({NewLLT}, {CurVReg, Undef.getReg(0)}) 429 .getReg(0); 430 } else { 431 LLVM_DEBUG(dbgs() << "Could not handle ret ty\n"); 432 return false; 433 } 434 } else { 435 // If the split EVT was a <1 x T> vector, and NewVT is T, then we 436 // don't have to do anything since we don't distinguish between the 437 // two. 438 if (NewLLT != MRI.getType(CurVReg)) { 439 // A scalar extend. 440 CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg}) 441 .getReg(0); 442 } 443 } 444 } 445 } 446 if (CurVReg != CurArgInfo.Regs[0]) { 447 CurArgInfo.Regs[0] = CurVReg; 448 // Reset the arg flags after modifying CurVReg. 449 setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F); 450 } 451 splitToValueTypes(CurArgInfo, SplitArgs, DL, CC); 452 } 453 454 AArch64OutgoingValueAssigner Assigner(AssignFn, AssignFn, Subtarget, 455 /*IsReturn*/ true); 456 OutgoingArgHandler Handler(MIRBuilder, MRI, MIB); 457 Success = determineAndHandleAssignments(Handler, Assigner, SplitArgs, 458 MIRBuilder, CC, F.isVarArg()); 459 } 460 461 if (SwiftErrorVReg) { 462 MIB.addUse(AArch64::X21, RegState::Implicit); 463 MIRBuilder.buildCopy(AArch64::X21, SwiftErrorVReg); 464 } 465 466 MIRBuilder.insertInstr(MIB); 467 return Success; 468 } 469 470 bool AArch64CallLowering::canLowerReturn(MachineFunction &MF, 471 CallingConv::ID CallConv, 472 SmallVectorImpl<BaseArgInfo> &Outs, 473 bool IsVarArg) const { 474 SmallVector<CCValAssign, 16> ArgLocs; 475 const auto &TLI = *getTLI<AArch64TargetLowering>(); 476 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, 477 MF.getFunction().getContext()); 478 479 return checkReturn(CCInfo, Outs, TLI.CCAssignFnForReturn(CallConv)); 480 } 481 482 /// Helper function to compute forwarded registers for musttail calls. Computes 483 /// the forwarded registers, sets MBB liveness, and emits COPY instructions that 484 /// can be used to save + restore registers later. 485 static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder, 486 CCAssignFn *AssignFn) { 487 MachineBasicBlock &MBB = MIRBuilder.getMBB(); 488 MachineFunction &MF = MIRBuilder.getMF(); 489 MachineFrameInfo &MFI = MF.getFrameInfo(); 490 491 if (!MFI.hasMustTailInVarArgFunc()) 492 return; 493 494 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); 495 const Function &F = MF.getFunction(); 496 assert(F.isVarArg() && "Expected F to be vararg?"); 497 498 // Compute the set of forwarded registers. The rest are scratch. 499 SmallVector<CCValAssign, 16> ArgLocs; 500 CCState CCInfo(F.getCallingConv(), /*IsVarArg=*/true, MF, ArgLocs, 501 F.getContext()); 502 SmallVector<MVT, 2> RegParmTypes; 503 RegParmTypes.push_back(MVT::i64); 504 RegParmTypes.push_back(MVT::f128); 505 506 // Later on, we can use this vector to restore the registers if necessary. 507 SmallVectorImpl<ForwardedRegister> &Forwards = 508 FuncInfo->getForwardedMustTailRegParms(); 509 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, AssignFn); 510 511 // Conservatively forward X8, since it might be used for an aggregate 512 // return. 513 if (!CCInfo.isAllocated(AArch64::X8)) { 514 Register X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass); 515 Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64)); 516 } 517 518 // Add the forwards to the MachineBasicBlock and MachineFunction. 519 for (const auto &F : Forwards) { 520 MBB.addLiveIn(F.PReg); 521 MIRBuilder.buildCopy(Register(F.VReg), Register(F.PReg)); 522 } 523 } 524 525 bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const { 526 auto &F = MF.getFunction(); 527 if (isa<ScalableVectorType>(F.getReturnType())) 528 return true; 529 if (llvm::any_of(F.args(), [](const Argument &A) { 530 return isa<ScalableVectorType>(A.getType()); 531 })) 532 return true; 533 const auto &ST = MF.getSubtarget<AArch64Subtarget>(); 534 if (!ST.hasNEON() || !ST.hasFPARMv8()) { 535 LLVM_DEBUG(dbgs() << "Falling back to SDAG because we don't support no-NEON\n"); 536 return true; 537 } 538 return false; 539 } 540 541 bool AArch64CallLowering::lowerFormalArguments( 542 MachineIRBuilder &MIRBuilder, const Function &F, 543 ArrayRef<ArrayRef<Register>> VRegs, FunctionLoweringInfo &FLI) const { 544 MachineFunction &MF = MIRBuilder.getMF(); 545 MachineBasicBlock &MBB = MIRBuilder.getMBB(); 546 MachineRegisterInfo &MRI = MF.getRegInfo(); 547 auto &DL = F.getParent()->getDataLayout(); 548 549 SmallVector<ArgInfo, 8> SplitArgs; 550 SmallVector<std::pair<Register, Register>> BoolArgs; 551 552 // Insert the hidden sret parameter if the return value won't fit in the 553 // return registers. 554 if (!FLI.CanLowerReturn) 555 insertSRetIncomingArgument(F, SplitArgs, FLI.DemoteRegister, MRI, DL); 556 557 unsigned i = 0; 558 for (auto &Arg : F.args()) { 559 if (DL.getTypeStoreSize(Arg.getType()).isZero()) 560 continue; 561 562 ArgInfo OrigArg{VRegs[i], Arg, i}; 563 setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F); 564 565 // i1 arguments are zero-extended to i8 by the caller. Emit a 566 // hint to reflect this. 567 if (OrigArg.Ty->isIntegerTy(1)) { 568 assert(OrigArg.Regs.size() == 1 && 569 MRI.getType(OrigArg.Regs[0]).getSizeInBits() == 1 && 570 "Unexpected registers used for i1 arg"); 571 572 if (!OrigArg.Flags[0].isZExt()) { 573 // Lower i1 argument as i8, and insert AssertZExt + Trunc later. 574 Register OrigReg = OrigArg.Regs[0]; 575 Register WideReg = MRI.createGenericVirtualRegister(LLT::scalar(8)); 576 OrigArg.Regs[0] = WideReg; 577 BoolArgs.push_back({OrigReg, WideReg}); 578 } 579 } 580 581 if (Arg.hasAttribute(Attribute::SwiftAsync)) 582 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true); 583 584 splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv()); 585 ++i; 586 } 587 588 if (!MBB.empty()) 589 MIRBuilder.setInstr(*MBB.begin()); 590 591 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); 592 CCAssignFn *AssignFn = 593 TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false); 594 595 AArch64IncomingValueAssigner Assigner(AssignFn, AssignFn); 596 FormalArgHandler Handler(MIRBuilder, MRI); 597 if (!determineAndHandleAssignments(Handler, Assigner, SplitArgs, MIRBuilder, 598 F.getCallingConv(), F.isVarArg())) 599 return false; 600 601 if (!BoolArgs.empty()) { 602 for (auto &KV : BoolArgs) { 603 Register OrigReg = KV.first; 604 Register WideReg = KV.second; 605 LLT WideTy = MRI.getType(WideReg); 606 assert(MRI.getType(OrigReg).getScalarSizeInBits() == 1 && 607 "Unexpected bit size of a bool arg"); 608 MIRBuilder.buildTrunc( 609 OrigReg, MIRBuilder.buildAssertZExt(WideTy, WideReg, 1).getReg(0)); 610 } 611 } 612 613 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); 614 uint64_t StackOffset = Assigner.StackOffset; 615 if (F.isVarArg()) { 616 auto &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 617 if (!Subtarget.isTargetDarwin()) { 618 // FIXME: we need to reimplement saveVarArgsRegisters from 619 // AArch64ISelLowering. 620 return false; 621 } 622 623 // We currently pass all varargs at 8-byte alignment, or 4 in ILP32. 624 StackOffset = 625 alignTo(Assigner.StackOffset, Subtarget.isTargetILP32() ? 4 : 8); 626 627 auto &MFI = MIRBuilder.getMF().getFrameInfo(); 628 FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true)); 629 } 630 631 if (doesCalleeRestoreStack(F.getCallingConv(), 632 MF.getTarget().Options.GuaranteedTailCallOpt)) { 633 // We have a non-standard ABI, so why not make full use of the stack that 634 // we're going to pop? It must be aligned to 16 B in any case. 635 StackOffset = alignTo(StackOffset, 16); 636 637 // If we're expected to restore the stack (e.g. fastcc), then we'll be 638 // adding a multiple of 16. 639 FuncInfo->setArgumentStackToRestore(StackOffset); 640 641 // Our own callers will guarantee that the space is free by giving an 642 // aligned value to CALLSEQ_START. 643 } 644 645 // When we tail call, we need to check if the callee's arguments 646 // will fit on the caller's stack. So, whenever we lower formal arguments, 647 // we should keep track of this information, since we might lower a tail call 648 // in this function later. 649 FuncInfo->setBytesInStackArgArea(StackOffset); 650 651 auto &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 652 if (Subtarget.hasCustomCallingConv()) 653 Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF); 654 655 handleMustTailForwardedRegisters(MIRBuilder, AssignFn); 656 657 // Move back to the end of the basic block. 658 MIRBuilder.setMBB(MBB); 659 660 return true; 661 } 662 663 /// Return true if the calling convention is one that we can guarantee TCO for. 664 static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) { 665 return (CC == CallingConv::Fast && GuaranteeTailCalls) || 666 CC == CallingConv::Tail || CC == CallingConv::SwiftTail; 667 } 668 669 /// Return true if we might ever do TCO for calls with this calling convention. 670 static bool mayTailCallThisCC(CallingConv::ID CC) { 671 switch (CC) { 672 case CallingConv::C: 673 case CallingConv::PreserveMost: 674 case CallingConv::Swift: 675 case CallingConv::SwiftTail: 676 case CallingConv::Tail: 677 case CallingConv::Fast: 678 return true; 679 default: 680 return false; 681 } 682 } 683 684 /// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for 685 /// CC. 686 static std::pair<CCAssignFn *, CCAssignFn *> 687 getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI) { 688 return {TLI.CCAssignFnForCall(CC, false), TLI.CCAssignFnForCall(CC, true)}; 689 } 690 691 bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay( 692 CallLoweringInfo &Info, MachineFunction &MF, 693 SmallVectorImpl<ArgInfo> &InArgs) const { 694 const Function &CallerF = MF.getFunction(); 695 CallingConv::ID CalleeCC = Info.CallConv; 696 CallingConv::ID CallerCC = CallerF.getCallingConv(); 697 698 // If the calling conventions match, then everything must be the same. 699 if (CalleeCC == CallerCC) 700 return true; 701 702 // Check if the caller and callee will handle arguments in the same way. 703 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); 704 CCAssignFn *CalleeAssignFnFixed; 705 CCAssignFn *CalleeAssignFnVarArg; 706 std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) = 707 getAssignFnsForCC(CalleeCC, TLI); 708 709 CCAssignFn *CallerAssignFnFixed; 710 CCAssignFn *CallerAssignFnVarArg; 711 std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) = 712 getAssignFnsForCC(CallerCC, TLI); 713 714 AArch64IncomingValueAssigner CalleeAssigner(CalleeAssignFnFixed, 715 CalleeAssignFnVarArg); 716 AArch64IncomingValueAssigner CallerAssigner(CallerAssignFnFixed, 717 CallerAssignFnVarArg); 718 719 if (!resultsCompatible(Info, MF, InArgs, CalleeAssigner, CallerAssigner)) 720 return false; 721 722 // Make sure that the caller and callee preserve all of the same registers. 723 auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo(); 724 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 725 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 726 if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) { 727 TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved); 728 TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved); 729 } 730 731 return TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved); 732 } 733 734 bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable( 735 CallLoweringInfo &Info, MachineFunction &MF, 736 SmallVectorImpl<ArgInfo> &OutArgs) const { 737 // If there are no outgoing arguments, then we are done. 738 if (OutArgs.empty()) 739 return true; 740 741 const Function &CallerF = MF.getFunction(); 742 LLVMContext &Ctx = CallerF.getContext(); 743 CallingConv::ID CalleeCC = Info.CallConv; 744 CallingConv::ID CallerCC = CallerF.getCallingConv(); 745 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); 746 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 747 748 CCAssignFn *AssignFnFixed; 749 CCAssignFn *AssignFnVarArg; 750 std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI); 751 752 // We have outgoing arguments. Make sure that we can tail call with them. 753 SmallVector<CCValAssign, 16> OutLocs; 754 CCState OutInfo(CalleeCC, false, MF, OutLocs, Ctx); 755 756 AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg, 757 Subtarget, /*IsReturn*/ false); 758 if (!determineAssignments(CalleeAssigner, OutArgs, OutInfo)) { 759 LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n"); 760 return false; 761 } 762 763 // Make sure that they can fit on the caller's stack. 764 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); 765 if (OutInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea()) { 766 LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n"); 767 return false; 768 } 769 770 // Verify that the parameters in callee-saved registers match. 771 // TODO: Port this over to CallLowering as general code once swiftself is 772 // supported. 773 auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo(); 774 const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC); 775 MachineRegisterInfo &MRI = MF.getRegInfo(); 776 777 if (Info.IsVarArg) { 778 // Be conservative and disallow variadic memory operands to match SDAG's 779 // behaviour. 780 // FIXME: If the caller's calling convention is C, then we can 781 // potentially use its argument area. However, for cases like fastcc, 782 // we can't do anything. 783 for (unsigned i = 0; i < OutLocs.size(); ++i) { 784 auto &ArgLoc = OutLocs[i]; 785 if (ArgLoc.isRegLoc()) 786 continue; 787 788 LLVM_DEBUG( 789 dbgs() 790 << "... Cannot tail call vararg function with stack arguments\n"); 791 return false; 792 } 793 } 794 795 return parametersInCSRMatch(MRI, CallerPreservedMask, OutLocs, OutArgs); 796 } 797 798 bool AArch64CallLowering::isEligibleForTailCallOptimization( 799 MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, 800 SmallVectorImpl<ArgInfo> &InArgs, 801 SmallVectorImpl<ArgInfo> &OutArgs) const { 802 803 // Must pass all target-independent checks in order to tail call optimize. 804 if (!Info.IsTailCall) 805 return false; 806 807 CallingConv::ID CalleeCC = Info.CallConv; 808 MachineFunction &MF = MIRBuilder.getMF(); 809 const Function &CallerF = MF.getFunction(); 810 811 LLVM_DEBUG(dbgs() << "Attempting to lower call as tail call\n"); 812 813 if (Info.SwiftErrorVReg) { 814 // TODO: We should handle this. 815 // Note that this is also handled by the check for no outgoing arguments. 816 // Proactively disabling this though, because the swifterror handling in 817 // lowerCall inserts a COPY *after* the location of the call. 818 LLVM_DEBUG(dbgs() << "... Cannot handle tail calls with swifterror yet.\n"); 819 return false; 820 } 821 822 if (!mayTailCallThisCC(CalleeCC)) { 823 LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n"); 824 return false; 825 } 826 827 // Byval parameters hand the function a pointer directly into the stack area 828 // we want to reuse during a tail call. Working around this *is* possible (see 829 // X86). 830 // 831 // FIXME: In AArch64ISelLowering, this isn't worked around. Can/should we try 832 // it? 833 // 834 // On Windows, "inreg" attributes signify non-aggregate indirect returns. 835 // In this case, it is necessary to save/restore X0 in the callee. Tail 836 // call opt interferes with this. So we disable tail call opt when the 837 // caller has an argument with "inreg" attribute. 838 // 839 // FIXME: Check whether the callee also has an "inreg" argument. 840 // 841 // When the caller has a swifterror argument, we don't want to tail call 842 // because would have to move into the swifterror register before the 843 // tail call. 844 if (any_of(CallerF.args(), [](const Argument &A) { 845 return A.hasByValAttr() || A.hasInRegAttr() || A.hasSwiftErrorAttr(); 846 })) { 847 LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval, " 848 "inreg, or swifterror arguments\n"); 849 return false; 850 } 851 852 // Externally-defined functions with weak linkage should not be 853 // tail-called on AArch64 when the OS does not support dynamic 854 // pre-emption of symbols, as the AAELF spec requires normal calls 855 // to undefined weak functions to be replaced with a NOP or jump to the 856 // next instruction. The behaviour of branch instructions in this 857 // situation (as used for tail calls) is implementation-defined, so we 858 // cannot rely on the linker replacing the tail call with a return. 859 if (Info.Callee.isGlobal()) { 860 const GlobalValue *GV = Info.Callee.getGlobal(); 861 const Triple &TT = MF.getTarget().getTargetTriple(); 862 if (GV->hasExternalWeakLinkage() && 863 (!TT.isOSWindows() || TT.isOSBinFormatELF() || 864 TT.isOSBinFormatMachO())) { 865 LLVM_DEBUG(dbgs() << "... Cannot tail call externally-defined function " 866 "with weak linkage for this OS.\n"); 867 return false; 868 } 869 } 870 871 // If we have -tailcallopt, then we're done. 872 if (canGuaranteeTCO(CalleeCC, MF.getTarget().Options.GuaranteedTailCallOpt)) 873 return CalleeCC == CallerF.getCallingConv(); 874 875 // We don't have -tailcallopt, so we're allowed to change the ABI (sibcall). 876 // Try to find cases where we can do that. 877 878 // I want anyone implementing a new calling convention to think long and hard 879 // about this assert. 880 assert((!Info.IsVarArg || CalleeCC == CallingConv::C) && 881 "Unexpected variadic calling convention"); 882 883 // Verify that the incoming and outgoing arguments from the callee are 884 // safe to tail call. 885 if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) { 886 LLVM_DEBUG( 887 dbgs() 888 << "... Caller and callee have incompatible calling conventions.\n"); 889 return false; 890 } 891 892 if (!areCalleeOutgoingArgsTailCallable(Info, MF, OutArgs)) 893 return false; 894 895 LLVM_DEBUG( 896 dbgs() << "... Call is eligible for tail call optimization.\n"); 897 return true; 898 } 899 900 static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, 901 bool IsTailCall) { 902 if (!IsTailCall) 903 return IsIndirect ? getBLRCallOpcode(CallerF) : (unsigned)AArch64::BL; 904 905 if (!IsIndirect) 906 return AArch64::TCRETURNdi; 907 908 // When BTI is enabled, we need to use TCRETURNriBTI to make sure that we use 909 // x16 or x17. 910 if (CallerF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) 911 return AArch64::TCRETURNriBTI; 912 913 return AArch64::TCRETURNri; 914 } 915 916 static const uint32_t * 917 getMaskForArgs(SmallVectorImpl<AArch64CallLowering::ArgInfo> &OutArgs, 918 AArch64CallLowering::CallLoweringInfo &Info, 919 const AArch64RegisterInfo &TRI, MachineFunction &MF) { 920 const uint32_t *Mask; 921 if (!OutArgs.empty() && OutArgs[0].Flags[0].isReturned()) { 922 // For 'this' returns, use the X0-preserving mask if applicable 923 Mask = TRI.getThisReturnPreservedMask(MF, Info.CallConv); 924 if (!Mask) { 925 OutArgs[0].Flags[0].setReturned(false); 926 Mask = TRI.getCallPreservedMask(MF, Info.CallConv); 927 } 928 } else { 929 Mask = TRI.getCallPreservedMask(MF, Info.CallConv); 930 } 931 return Mask; 932 } 933 934 bool AArch64CallLowering::lowerTailCall( 935 MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, 936 SmallVectorImpl<ArgInfo> &OutArgs) const { 937 MachineFunction &MF = MIRBuilder.getMF(); 938 const Function &F = MF.getFunction(); 939 MachineRegisterInfo &MRI = MF.getRegInfo(); 940 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); 941 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); 942 943 // True when we're tail calling, but without -tailcallopt. 944 bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt && 945 Info.CallConv != CallingConv::Tail && 946 Info.CallConv != CallingConv::SwiftTail; 947 948 // TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64 949 // register class. Until we can do that, we should fall back here. 950 if (MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) { 951 LLVM_DEBUG( 952 dbgs() << "Cannot lower indirect tail calls with BTI enabled yet.\n"); 953 return false; 954 } 955 956 // Find out which ABI gets to decide where things go. 957 CallingConv::ID CalleeCC = Info.CallConv; 958 CCAssignFn *AssignFnFixed; 959 CCAssignFn *AssignFnVarArg; 960 std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI); 961 962 MachineInstrBuilder CallSeqStart; 963 if (!IsSibCall) 964 CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN); 965 966 unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), true); 967 auto MIB = MIRBuilder.buildInstrNoInsert(Opc); 968 MIB.add(Info.Callee); 969 970 // Byte offset for the tail call. When we are sibcalling, this will always 971 // be 0. 972 MIB.addImm(0); 973 974 // Tell the call which registers are clobbered. 975 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 976 auto TRI = Subtarget.getRegisterInfo(); 977 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC); 978 if (Subtarget.hasCustomCallingConv()) 979 TRI->UpdateCustomCallPreservedMask(MF, &Mask); 980 MIB.addRegMask(Mask); 981 982 if (TRI->isAnyArgRegReserved(MF)) 983 TRI->emitReservedArgRegCallError(MF); 984 985 // FPDiff is the byte offset of the call's argument area from the callee's. 986 // Stores to callee stack arguments will be placed in FixedStackSlots offset 987 // by this amount for a tail call. In a sibling call it must be 0 because the 988 // caller will deallocate the entire stack and the callee still expects its 989 // arguments to begin at SP+0. 990 int FPDiff = 0; 991 992 // This will be 0 for sibcalls, potentially nonzero for tail calls produced 993 // by -tailcallopt. For sibcalls, the memory operands for the call are 994 // already available in the caller's incoming argument space. 995 unsigned NumBytes = 0; 996 if (!IsSibCall) { 997 // We aren't sibcalling, so we need to compute FPDiff. We need to do this 998 // before handling assignments, because FPDiff must be known for memory 999 // arguments. 1000 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea(); 1001 SmallVector<CCValAssign, 16> OutLocs; 1002 CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext()); 1003 1004 AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg, 1005 Subtarget, /*IsReturn*/ false); 1006 if (!determineAssignments(CalleeAssigner, OutArgs, OutInfo)) 1007 return false; 1008 1009 // The callee will pop the argument stack as a tail call. Thus, we must 1010 // keep it 16-byte aligned. 1011 NumBytes = alignTo(OutInfo.getNextStackOffset(), 16); 1012 1013 // FPDiff will be negative if this tail call requires more space than we 1014 // would automatically have in our incoming argument space. Positive if we 1015 // actually shrink the stack. 1016 FPDiff = NumReusableBytes - NumBytes; 1017 1018 // Update the required reserved area if this is the tail call requiring the 1019 // most argument stack space. 1020 if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff) 1021 FuncInfo->setTailCallReservedStack(-FPDiff); 1022 1023 // The stack pointer must be 16-byte aligned at all times it's used for a 1024 // memory operation, which in practice means at *all* times and in 1025 // particular across call boundaries. Therefore our own arguments started at 1026 // a 16-byte aligned SP and the delta applied for the tail call should 1027 // satisfy the same constraint. 1028 assert(FPDiff % 16 == 0 && "unaligned stack on tail call"); 1029 } 1030 1031 const auto &Forwards = FuncInfo->getForwardedMustTailRegParms(); 1032 1033 AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg, 1034 Subtarget, /*IsReturn*/ false); 1035 1036 // Do the actual argument marshalling. 1037 OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, 1038 /*IsTailCall*/ true, FPDiff); 1039 if (!determineAndHandleAssignments(Handler, Assigner, OutArgs, MIRBuilder, 1040 CalleeCC, Info.IsVarArg)) 1041 return false; 1042 1043 Mask = getMaskForArgs(OutArgs, Info, *TRI, MF); 1044 1045 if (Info.IsVarArg && Info.IsMustTailCall) { 1046 // Now we know what's being passed to the function. Add uses to the call for 1047 // the forwarded registers that we *aren't* passing as parameters. This will 1048 // preserve the copies we build earlier. 1049 for (const auto &F : Forwards) { 1050 Register ForwardedReg = F.PReg; 1051 // If the register is already passed, or aliases a register which is 1052 // already being passed, then skip it. 1053 if (any_of(MIB->uses(), [&ForwardedReg, &TRI](const MachineOperand &Use) { 1054 if (!Use.isReg()) 1055 return false; 1056 return TRI->regsOverlap(Use.getReg(), ForwardedReg); 1057 })) 1058 continue; 1059 1060 // We aren't passing it already, so we should add it to the call. 1061 MIRBuilder.buildCopy(ForwardedReg, Register(F.VReg)); 1062 MIB.addReg(ForwardedReg, RegState::Implicit); 1063 } 1064 } 1065 1066 // If we have -tailcallopt, we need to adjust the stack. We'll do the call 1067 // sequence start and end here. 1068 if (!IsSibCall) { 1069 MIB->getOperand(1).setImm(FPDiff); 1070 CallSeqStart.addImm(0).addImm(0); 1071 // End the call sequence *before* emitting the call. Normally, we would 1072 // tidy the frame up after the call. However, here, we've laid out the 1073 // parameters so that when SP is reset, they will be in the correct 1074 // location. 1075 MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP).addImm(0).addImm(0); 1076 } 1077 1078 // Now we can add the actual call instruction to the correct basic block. 1079 MIRBuilder.insertInstr(MIB); 1080 1081 // If Callee is a reg, since it is used by a target specific instruction, 1082 // it must have a register class matching the constraint of that instruction. 1083 if (MIB->getOperand(0).isReg()) 1084 constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(), 1085 *MF.getSubtarget().getRegBankInfo(), *MIB, 1086 MIB->getDesc(), MIB->getOperand(0), 0); 1087 1088 MF.getFrameInfo().setHasTailCall(); 1089 Info.LoweredTailCall = true; 1090 return true; 1091 } 1092 1093 bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, 1094 CallLoweringInfo &Info) const { 1095 MachineFunction &MF = MIRBuilder.getMF(); 1096 const Function &F = MF.getFunction(); 1097 MachineRegisterInfo &MRI = MF.getRegInfo(); 1098 auto &DL = F.getParent()->getDataLayout(); 1099 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); 1100 1101 SmallVector<ArgInfo, 8> OutArgs; 1102 for (auto &OrigArg : Info.OrigArgs) { 1103 splitToValueTypes(OrigArg, OutArgs, DL, Info.CallConv); 1104 // AAPCS requires that we zero-extend i1 to 8 bits by the caller. 1105 if (OrigArg.Ty->isIntegerTy(1)) { 1106 ArgInfo &OutArg = OutArgs.back(); 1107 assert(OutArg.Regs.size() == 1 && 1108 MRI.getType(OutArg.Regs[0]).getSizeInBits() == 1 && 1109 "Unexpected registers used for i1 arg"); 1110 1111 // We cannot use a ZExt ArgInfo flag here, because it will 1112 // zero-extend the argument to i32 instead of just i8. 1113 OutArg.Regs[0] = 1114 MIRBuilder.buildZExt(LLT::scalar(8), OutArg.Regs[0]).getReg(0); 1115 LLVMContext &Ctx = MF.getFunction().getContext(); 1116 OutArg.Ty = Type::getInt8Ty(Ctx); 1117 } 1118 } 1119 1120 SmallVector<ArgInfo, 8> InArgs; 1121 if (!Info.OrigRet.Ty->isVoidTy()) 1122 splitToValueTypes(Info.OrigRet, InArgs, DL, Info.CallConv); 1123 1124 // If we can lower as a tail call, do that instead. 1125 bool CanTailCallOpt = 1126 isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs, OutArgs); 1127 1128 // We must emit a tail call if we have musttail. 1129 if (Info.IsMustTailCall && !CanTailCallOpt) { 1130 // There are types of incoming/outgoing arguments we can't handle yet, so 1131 // it doesn't make sense to actually die here like in ISelLowering. Instead, 1132 // fall back to SelectionDAG and let it try to handle this. 1133 LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n"); 1134 return false; 1135 } 1136 1137 Info.IsTailCall = CanTailCallOpt; 1138 if (CanTailCallOpt) 1139 return lowerTailCall(MIRBuilder, Info, OutArgs); 1140 1141 // Find out which ABI gets to decide where things go. 1142 CCAssignFn *AssignFnFixed; 1143 CCAssignFn *AssignFnVarArg; 1144 std::tie(AssignFnFixed, AssignFnVarArg) = 1145 getAssignFnsForCC(Info.CallConv, TLI); 1146 1147 MachineInstrBuilder CallSeqStart; 1148 CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN); 1149 1150 // Create a temporarily-floating call instruction so we can add the implicit 1151 // uses of arg registers. 1152 1153 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 1154 unsigned Opc = 0; 1155 // Calls with operand bundle "clang.arc.attachedcall" are special. They should 1156 // be expanded to the call, directly followed by a special marker sequence and 1157 // a call to an ObjC library function. 1158 if (Info.CB && objcarc::hasAttachedCallOpBundle(Info.CB)) 1159 Opc = AArch64::BLR_RVMARKER; 1160 // A call to a returns twice function like setjmp must be followed by a bti 1161 // instruction. 1162 else if (Info.CB && 1163 Info.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) && 1164 !Subtarget.noBTIAtReturnTwice() && 1165 MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) 1166 Opc = AArch64::BLR_BTI; 1167 else 1168 Opc = getCallOpcode(MF, Info.Callee.isReg(), false); 1169 1170 auto MIB = MIRBuilder.buildInstrNoInsert(Opc); 1171 unsigned CalleeOpNo = 0; 1172 1173 if (Opc == AArch64::BLR_RVMARKER) { 1174 // Add a target global address for the retainRV/claimRV runtime function 1175 // just before the call target. 1176 Function *ARCFn = *objcarc::getAttachedARCFunction(Info.CB); 1177 MIB.addGlobalAddress(ARCFn); 1178 ++CalleeOpNo; 1179 } 1180 1181 MIB.add(Info.Callee); 1182 1183 // Tell the call which registers are clobbered. 1184 const uint32_t *Mask; 1185 const auto *TRI = Subtarget.getRegisterInfo(); 1186 1187 AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg, 1188 Subtarget, /*IsReturn*/ false); 1189 // Do the actual argument marshalling. 1190 OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, /*IsReturn*/ false); 1191 if (!determineAndHandleAssignments(Handler, Assigner, OutArgs, MIRBuilder, 1192 Info.CallConv, Info.IsVarArg)) 1193 return false; 1194 1195 Mask = getMaskForArgs(OutArgs, Info, *TRI, MF); 1196 1197 if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) 1198 TRI->UpdateCustomCallPreservedMask(MF, &Mask); 1199 MIB.addRegMask(Mask); 1200 1201 if (TRI->isAnyArgRegReserved(MF)) 1202 TRI->emitReservedArgRegCallError(MF); 1203 1204 // Now we can add the actual call instruction to the correct basic block. 1205 MIRBuilder.insertInstr(MIB); 1206 1207 // If Callee is a reg, since it is used by a target specific 1208 // instruction, it must have a register class matching the 1209 // constraint of that instruction. 1210 if (MIB->getOperand(CalleeOpNo).isReg()) 1211 constrainOperandRegClass(MF, *TRI, MRI, *Subtarget.getInstrInfo(), 1212 *Subtarget.getRegBankInfo(), *MIB, MIB->getDesc(), 1213 MIB->getOperand(CalleeOpNo), CalleeOpNo); 1214 1215 // Finally we can copy the returned value back into its virtual-register. In 1216 // symmetry with the arguments, the physical register must be an 1217 // implicit-define of the call instruction. 1218 if (Info.CanLowerReturn && !Info.OrigRet.Ty->isVoidTy()) { 1219 CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv); 1220 CallReturnHandler Handler(MIRBuilder, MRI, MIB); 1221 bool UsingReturnedArg = 1222 !OutArgs.empty() && OutArgs[0].Flags[0].isReturned(); 1223 1224 AArch64OutgoingValueAssigner Assigner(RetAssignFn, RetAssignFn, Subtarget, 1225 /*IsReturn*/ false); 1226 ReturnedArgCallReturnHandler ReturnedArgHandler(MIRBuilder, MRI, MIB); 1227 if (!determineAndHandleAssignments( 1228 UsingReturnedArg ? ReturnedArgHandler : Handler, Assigner, InArgs, 1229 MIRBuilder, Info.CallConv, Info.IsVarArg, 1230 UsingReturnedArg ? makeArrayRef(OutArgs[0].Regs) : None)) 1231 return false; 1232 } 1233 1234 if (Info.SwiftErrorVReg) { 1235 MIB.addDef(AArch64::X21, RegState::Implicit); 1236 MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21)); 1237 } 1238 1239 uint64_t CalleePopBytes = 1240 doesCalleeRestoreStack(Info.CallConv, 1241 MF.getTarget().Options.GuaranteedTailCallOpt) 1242 ? alignTo(Assigner.StackOffset, 16) 1243 : 0; 1244 1245 CallSeqStart.addImm(Assigner.StackOffset).addImm(0); 1246 MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP) 1247 .addImm(Assigner.StackOffset) 1248 .addImm(CalleePopBytes); 1249 1250 if (!Info.CanLowerReturn) { 1251 insertSRetLoads(MIRBuilder, Info.OrigRet.Ty, Info.OrigRet.Regs, 1252 Info.DemoteRegister, Info.DemoteStackIndex); 1253 } 1254 return true; 1255 } 1256 1257 bool AArch64CallLowering::isTypeIsValidForThisReturn(EVT Ty) const { 1258 return Ty.getSizeInBits() == 64; 1259 } 1260