10b57cec5SDimitry Andric //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric ///
90b57cec5SDimitry Andric /// \file
100b57cec5SDimitry Andric /// This file implements the lowering of LLVM calls to machine code calls for
110b57cec5SDimitry Andric /// GlobalISel.
120b57cec5SDimitry Andric ///
130b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
140b57cec5SDimitry Andric
150b57cec5SDimitry Andric #include "AMDGPUCallLowering.h"
160b57cec5SDimitry Andric #include "AMDGPU.h"
17e8d8bef9SDimitry Andric #include "AMDGPULegalizerInfo.h"
185ffd83dbSDimitry Andric #include "AMDGPUTargetMachine.h"
190b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h"
200b57cec5SDimitry Andric #include "SIRegisterInfo.h"
210b57cec5SDimitry Andric #include "llvm/CodeGen/Analysis.h"
22e8d8bef9SDimitry Andric #include "llvm/CodeGen/FunctionLoweringInfo.h"
230b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
2481ad6265SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
25e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h"
26e8d8bef9SDimitry Andric
27e8d8bef9SDimitry Andric #define DEBUG_TYPE "amdgpu-call-lowering"
280b57cec5SDimitry Andric
290b57cec5SDimitry Andric using namespace llvm;
300b57cec5SDimitry Andric
310b57cec5SDimitry Andric namespace {
320b57cec5SDimitry Andric
33fe6060f1SDimitry Andric /// Wrapper around extendRegister to ensure we extend to a full 32-bit register.
extendRegisterMin32(CallLowering::ValueHandler & Handler,Register ValVReg,const CCValAssign & VA)34fe6060f1SDimitry Andric static Register extendRegisterMin32(CallLowering::ValueHandler &Handler,
355f757f3fSDimitry Andric Register ValVReg, const CCValAssign &VA) {
36e8d8bef9SDimitry Andric if (VA.getLocVT().getSizeInBits() < 32) {
37e8d8bef9SDimitry Andric // 16-bit types are reported as legal for 32-bit registers. We need to
38e8d8bef9SDimitry Andric // extend and do a 32-bit copy to avoid the verifier complaining about it.
39fe6060f1SDimitry Andric return Handler.MIRBuilder.buildAnyExt(LLT::scalar(32), ValVReg).getReg(0);
40e8d8bef9SDimitry Andric }
41e8d8bef9SDimitry Andric
42fe6060f1SDimitry Andric return Handler.extendRegister(ValVReg, VA);
43e8d8bef9SDimitry Andric }
44e8d8bef9SDimitry Andric
45fe6060f1SDimitry Andric struct AMDGPUOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
AMDGPUOutgoingValueHandler__anonc9bc67b40111::AMDGPUOutgoingValueHandler46e8d8bef9SDimitry Andric AMDGPUOutgoingValueHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
47fe6060f1SDimitry Andric MachineInstrBuilder MIB)
48fe6060f1SDimitry Andric : OutgoingValueHandler(B, MRI), MIB(MIB) {}
490b57cec5SDimitry Andric
500b57cec5SDimitry Andric MachineInstrBuilder MIB;
510b57cec5SDimitry Andric
getStackAddress__anonc9bc67b40111::AMDGPUOutgoingValueHandler520b57cec5SDimitry Andric Register getStackAddress(uint64_t Size, int64_t Offset,
53fe6060f1SDimitry Andric MachinePointerInfo &MPO,
54fe6060f1SDimitry Andric ISD::ArgFlagsTy Flags) override {
550b57cec5SDimitry Andric llvm_unreachable("not implemented");
560b57cec5SDimitry Andric }
570b57cec5SDimitry Andric
assignValueToAddress__anonc9bc67b40111::AMDGPUOutgoingValueHandler58fe6060f1SDimitry Andric void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
595f757f3fSDimitry Andric const MachinePointerInfo &MPO,
605f757f3fSDimitry Andric const CCValAssign &VA) override {
610b57cec5SDimitry Andric llvm_unreachable("not implemented");
620b57cec5SDimitry Andric }
630b57cec5SDimitry Andric
assignValueToReg__anonc9bc67b40111::AMDGPUOutgoingValueHandler640b57cec5SDimitry Andric void assignValueToReg(Register ValVReg, Register PhysReg,
655f757f3fSDimitry Andric const CCValAssign &VA) override {
66fe6060f1SDimitry Andric Register ExtReg = extendRegisterMin32(*this, ValVReg, VA);
678bcb0991SDimitry Andric
685ffd83dbSDimitry Andric // If this is a scalar return, insert a readfirstlane just in case the value
695ffd83dbSDimitry Andric // ends up in a VGPR.
705ffd83dbSDimitry Andric // FIXME: Assert this is a shader return.
715ffd83dbSDimitry Andric const SIRegisterInfo *TRI
725ffd83dbSDimitry Andric = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
735ffd83dbSDimitry Andric if (TRI->isSGPRReg(MRI, PhysReg)) {
74bdd1243dSDimitry Andric LLT Ty = MRI.getType(ExtReg);
75bdd1243dSDimitry Andric LLT S32 = LLT::scalar(32);
76bdd1243dSDimitry Andric if (Ty != S32) {
77bdd1243dSDimitry Andric // FIXME: We should probably support readfirstlane intrinsics with all
78bdd1243dSDimitry Andric // legal 32-bit types.
79bdd1243dSDimitry Andric assert(Ty.getSizeInBits() == 32);
80bdd1243dSDimitry Andric if (Ty.isPointer())
81bdd1243dSDimitry Andric ExtReg = MIRBuilder.buildPtrToInt(S32, ExtReg).getReg(0);
82bdd1243dSDimitry Andric else
83bdd1243dSDimitry Andric ExtReg = MIRBuilder.buildBitcast(S32, ExtReg).getReg(0);
84bdd1243dSDimitry Andric }
85bdd1243dSDimitry Andric
865f757f3fSDimitry Andric auto ToSGPR = MIRBuilder
875f757f3fSDimitry Andric .buildIntrinsic(Intrinsic::amdgcn_readfirstlane,
885f757f3fSDimitry Andric {MRI.getType(ExtReg)})
895ffd83dbSDimitry Andric .addReg(ExtReg);
905ffd83dbSDimitry Andric ExtReg = ToSGPR.getReg(0);
915ffd83dbSDimitry Andric }
925ffd83dbSDimitry Andric
938bcb0991SDimitry Andric MIRBuilder.buildCopy(PhysReg, ExtReg);
948bcb0991SDimitry Andric MIB.addUse(PhysReg, RegState::Implicit);
950b57cec5SDimitry Andric }
968bcb0991SDimitry Andric };
978bcb0991SDimitry Andric
98fe6060f1SDimitry Andric struct AMDGPUIncomingArgHandler : public CallLowering::IncomingValueHandler {
998bcb0991SDimitry Andric uint64_t StackUsed = 0;
1008bcb0991SDimitry Andric
AMDGPUIncomingArgHandler__anonc9bc67b40111::AMDGPUIncomingArgHandler101fe6060f1SDimitry Andric AMDGPUIncomingArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI)
102fe6060f1SDimitry Andric : IncomingValueHandler(B, MRI) {}
1038bcb0991SDimitry Andric
getStackAddress__anonc9bc67b40111::AMDGPUIncomingArgHandler1048bcb0991SDimitry Andric Register getStackAddress(uint64_t Size, int64_t Offset,
105fe6060f1SDimitry Andric MachinePointerInfo &MPO,
106fe6060f1SDimitry Andric ISD::ArgFlagsTy Flags) override {
1078bcb0991SDimitry Andric auto &MFI = MIRBuilder.getMF().getFrameInfo();
108fe6060f1SDimitry Andric
109fe6060f1SDimitry Andric // Byval is assumed to be writable memory, but other stack passed arguments
110fe6060f1SDimitry Andric // are not.
111fe6060f1SDimitry Andric const bool IsImmutable = !Flags.isByVal();
112fe6060f1SDimitry Andric int FI = MFI.CreateFixedObject(Size, Offset, IsImmutable);
1138bcb0991SDimitry Andric MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
1145ffd83dbSDimitry Andric auto AddrReg = MIRBuilder.buildFrameIndex(
1155ffd83dbSDimitry Andric LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32), FI);
1168bcb0991SDimitry Andric StackUsed = std::max(StackUsed, Size + Offset);
1175ffd83dbSDimitry Andric return AddrReg.getReg(0);
1188bcb0991SDimitry Andric }
1198bcb0991SDimitry Andric
assignValueToReg__anonc9bc67b40111::AMDGPUIncomingArgHandler1208bcb0991SDimitry Andric void assignValueToReg(Register ValVReg, Register PhysReg,
1215f757f3fSDimitry Andric const CCValAssign &VA) override {
1228bcb0991SDimitry Andric markPhysRegUsed(PhysReg);
1238bcb0991SDimitry Andric
1248bcb0991SDimitry Andric if (VA.getLocVT().getSizeInBits() < 32) {
1258bcb0991SDimitry Andric // 16-bit types are reported as legal for 32-bit registers. We need to do
1268bcb0991SDimitry Andric // a 32-bit copy, and truncate to avoid the verifier complaining about it.
1278bcb0991SDimitry Andric auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg);
128fe6060f1SDimitry Andric
129fe6060f1SDimitry Andric // If we have signext/zeroext, it applies to the whole 32-bit register
130fe6060f1SDimitry Andric // before truncation.
131fe6060f1SDimitry Andric auto Extended =
132fe6060f1SDimitry Andric buildExtensionHint(VA, Copy.getReg(0), LLT(VA.getLocVT()));
133fe6060f1SDimitry Andric MIRBuilder.buildTrunc(ValVReg, Extended);
1348bcb0991SDimitry Andric return;
1358bcb0991SDimitry Andric }
1368bcb0991SDimitry Andric
137fe6060f1SDimitry Andric IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
1388bcb0991SDimitry Andric }
1398bcb0991SDimitry Andric
assignValueToAddress__anonc9bc67b40111::AMDGPUIncomingArgHandler140fe6060f1SDimitry Andric void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
1415f757f3fSDimitry Andric const MachinePointerInfo &MPO,
1425f757f3fSDimitry Andric const CCValAssign &VA) override {
1435ffd83dbSDimitry Andric MachineFunction &MF = MIRBuilder.getMF();
1445ffd83dbSDimitry Andric
1455ffd83dbSDimitry Andric auto MMO = MF.getMachineMemOperand(
146fe6060f1SDimitry Andric MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, MemTy,
1475ffd83dbSDimitry Andric inferAlignFromPtrInfo(MF, MPO));
1488bcb0991SDimitry Andric MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
1498bcb0991SDimitry Andric }
1508bcb0991SDimitry Andric
1518bcb0991SDimitry Andric /// How the physical register gets marked varies between formal
1528bcb0991SDimitry Andric /// parameters (it's a basic-block live-in), and a call instruction
1538bcb0991SDimitry Andric /// (it's an implicit-def of the BL).
1548bcb0991SDimitry Andric virtual void markPhysRegUsed(unsigned PhysReg) = 0;
1558bcb0991SDimitry Andric };
1568bcb0991SDimitry Andric
157e8d8bef9SDimitry Andric struct FormalArgHandler : public AMDGPUIncomingArgHandler {
FormalArgHandler__anonc9bc67b40111::FormalArgHandler158fe6060f1SDimitry Andric FormalArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI)
159fe6060f1SDimitry Andric : AMDGPUIncomingArgHandler(B, MRI) {}
1608bcb0991SDimitry Andric
markPhysRegUsed__anonc9bc67b40111::FormalArgHandler1618bcb0991SDimitry Andric void markPhysRegUsed(unsigned PhysReg) override {
1628bcb0991SDimitry Andric MIRBuilder.getMBB().addLiveIn(PhysReg);
1630b57cec5SDimitry Andric }
1640b57cec5SDimitry Andric };
1650b57cec5SDimitry Andric
166e8d8bef9SDimitry Andric struct CallReturnHandler : public AMDGPUIncomingArgHandler {
CallReturnHandler__anonc9bc67b40111::CallReturnHandler167e8d8bef9SDimitry Andric CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
168fe6060f1SDimitry Andric MachineInstrBuilder MIB)
169fe6060f1SDimitry Andric : AMDGPUIncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {}
170e8d8bef9SDimitry Andric
markPhysRegUsed__anonc9bc67b40111::CallReturnHandler171e8d8bef9SDimitry Andric void markPhysRegUsed(unsigned PhysReg) override {
172e8d8bef9SDimitry Andric MIB.addDef(PhysReg, RegState::Implicit);
173e8d8bef9SDimitry Andric }
174e8d8bef9SDimitry Andric
175e8d8bef9SDimitry Andric MachineInstrBuilder MIB;
176e8d8bef9SDimitry Andric };
177e8d8bef9SDimitry Andric
178fe6060f1SDimitry Andric struct AMDGPUOutgoingArgHandler : public AMDGPUOutgoingValueHandler {
179e8d8bef9SDimitry Andric /// For tail calls, the byte offset of the call's argument area from the
180e8d8bef9SDimitry Andric /// callee's. Unused elsewhere.
181e8d8bef9SDimitry Andric int FPDiff;
182e8d8bef9SDimitry Andric
183e8d8bef9SDimitry Andric // Cache the SP register vreg if we need it more than once in this call site.
184e8d8bef9SDimitry Andric Register SPReg;
185e8d8bef9SDimitry Andric
186e8d8bef9SDimitry Andric bool IsTailCall;
187e8d8bef9SDimitry Andric
AMDGPUOutgoingArgHandler__anonc9bc67b40111::AMDGPUOutgoingArgHandler188e8d8bef9SDimitry Andric AMDGPUOutgoingArgHandler(MachineIRBuilder &MIRBuilder,
189e8d8bef9SDimitry Andric MachineRegisterInfo &MRI, MachineInstrBuilder MIB,
190e8d8bef9SDimitry Andric bool IsTailCall = false, int FPDiff = 0)
191fe6060f1SDimitry Andric : AMDGPUOutgoingValueHandler(MIRBuilder, MRI, MIB), FPDiff(FPDiff),
192fe6060f1SDimitry Andric IsTailCall(IsTailCall) {}
193e8d8bef9SDimitry Andric
getStackAddress__anonc9bc67b40111::AMDGPUOutgoingArgHandler194e8d8bef9SDimitry Andric Register getStackAddress(uint64_t Size, int64_t Offset,
195fe6060f1SDimitry Andric MachinePointerInfo &MPO,
196fe6060f1SDimitry Andric ISD::ArgFlagsTy Flags) override {
197e8d8bef9SDimitry Andric MachineFunction &MF = MIRBuilder.getMF();
198e8d8bef9SDimitry Andric const LLT PtrTy = LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32);
199e8d8bef9SDimitry Andric const LLT S32 = LLT::scalar(32);
200e8d8bef9SDimitry Andric
201e8d8bef9SDimitry Andric if (IsTailCall) {
202fe6060f1SDimitry Andric Offset += FPDiff;
203fe6060f1SDimitry Andric int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
204fe6060f1SDimitry Andric auto FIReg = MIRBuilder.buildFrameIndex(PtrTy, FI);
205fe6060f1SDimitry Andric MPO = MachinePointerInfo::getFixedStack(MF, FI);
206fe6060f1SDimitry Andric return FIReg.getReg(0);
207e8d8bef9SDimitry Andric }
208e8d8bef9SDimitry Andric
209e8d8bef9SDimitry Andric const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
210e8d8bef9SDimitry Andric
21104eeddc0SDimitry Andric if (!SPReg) {
21204eeddc0SDimitry Andric const GCNSubtarget &ST = MIRBuilder.getMF().getSubtarget<GCNSubtarget>();
21304eeddc0SDimitry Andric if (ST.enableFlatScratch()) {
21404eeddc0SDimitry Andric // The stack is accessed unswizzled, so we can use a regular copy.
21504eeddc0SDimitry Andric SPReg = MIRBuilder.buildCopy(PtrTy,
21604eeddc0SDimitry Andric MFI->getStackPtrOffsetReg()).getReg(0);
21704eeddc0SDimitry Andric } else {
21804eeddc0SDimitry Andric // The address we produce here, without knowing the use context, is going
21904eeddc0SDimitry Andric // to be interpreted as a vector address, so we need to convert to a
22004eeddc0SDimitry Andric // swizzled address.
22104eeddc0SDimitry Andric SPReg = MIRBuilder.buildInstr(AMDGPU::G_AMDGPU_WAVE_ADDRESS, {PtrTy},
22204eeddc0SDimitry Andric {MFI->getStackPtrOffsetReg()}).getReg(0);
22304eeddc0SDimitry Andric }
22404eeddc0SDimitry Andric }
225e8d8bef9SDimitry Andric
226e8d8bef9SDimitry Andric auto OffsetReg = MIRBuilder.buildConstant(S32, Offset);
227e8d8bef9SDimitry Andric
228e8d8bef9SDimitry Andric auto AddrReg = MIRBuilder.buildPtrAdd(PtrTy, SPReg, OffsetReg);
229e8d8bef9SDimitry Andric MPO = MachinePointerInfo::getStack(MF, Offset);
230e8d8bef9SDimitry Andric return AddrReg.getReg(0);
231e8d8bef9SDimitry Andric }
232e8d8bef9SDimitry Andric
assignValueToReg__anonc9bc67b40111::AMDGPUOutgoingArgHandler233e8d8bef9SDimitry Andric void assignValueToReg(Register ValVReg, Register PhysReg,
2345f757f3fSDimitry Andric const CCValAssign &VA) override {
235e8d8bef9SDimitry Andric MIB.addUse(PhysReg, RegState::Implicit);
236fe6060f1SDimitry Andric Register ExtReg = extendRegisterMin32(*this, ValVReg, VA);
237e8d8bef9SDimitry Andric MIRBuilder.buildCopy(PhysReg, ExtReg);
238e8d8bef9SDimitry Andric }
239e8d8bef9SDimitry Andric
assignValueToAddress__anonc9bc67b40111::AMDGPUOutgoingArgHandler240fe6060f1SDimitry Andric void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
2415f757f3fSDimitry Andric const MachinePointerInfo &MPO,
2425f757f3fSDimitry Andric const CCValAssign &VA) override {
243e8d8bef9SDimitry Andric MachineFunction &MF = MIRBuilder.getMF();
244e8d8bef9SDimitry Andric uint64_t LocMemOffset = VA.getLocMemOffset();
245e8d8bef9SDimitry Andric const auto &ST = MF.getSubtarget<GCNSubtarget>();
246e8d8bef9SDimitry Andric
247e8d8bef9SDimitry Andric auto MMO = MF.getMachineMemOperand(
248fe6060f1SDimitry Andric MPO, MachineMemOperand::MOStore, MemTy,
249e8d8bef9SDimitry Andric commonAlignment(ST.getStackAlignment(), LocMemOffset));
250e8d8bef9SDimitry Andric MIRBuilder.buildStore(ValVReg, Addr, *MMO);
251e8d8bef9SDimitry Andric }
252e8d8bef9SDimitry Andric
assignValueToAddress__anonc9bc67b40111::AMDGPUOutgoingArgHandler253fe6060f1SDimitry Andric void assignValueToAddress(const CallLowering::ArgInfo &Arg,
254fe6060f1SDimitry Andric unsigned ValRegIndex, Register Addr, LLT MemTy,
2555f757f3fSDimitry Andric const MachinePointerInfo &MPO,
2565f757f3fSDimitry Andric const CCValAssign &VA) override {
257e8d8bef9SDimitry Andric Register ValVReg = VA.getLocInfo() != CCValAssign::LocInfo::FPExt
258fe6060f1SDimitry Andric ? extendRegister(Arg.Regs[ValRegIndex], VA)
259fe6060f1SDimitry Andric : Arg.Regs[ValRegIndex];
260fe6060f1SDimitry Andric assignValueToAddress(ValVReg, Addr, MemTy, MPO, VA);
261e8d8bef9SDimitry Andric }
262e8d8bef9SDimitry Andric };
263*0fca6ea1SDimitry Andric } // anonymous namespace
2640b57cec5SDimitry Andric
AMDGPUCallLowering(const AMDGPUTargetLowering & TLI)2650b57cec5SDimitry Andric AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
2660b57cec5SDimitry Andric : CallLowering(&TLI) {
2670b57cec5SDimitry Andric }
2680b57cec5SDimitry Andric
269349cc55cSDimitry Andric // FIXME: Compatibility shim
extOpcodeToISDExtOpcode(unsigned MIOpc)2705ffd83dbSDimitry Andric static ISD::NodeType extOpcodeToISDExtOpcode(unsigned MIOpc) {
2715ffd83dbSDimitry Andric switch (MIOpc) {
2725ffd83dbSDimitry Andric case TargetOpcode::G_SEXT:
2735ffd83dbSDimitry Andric return ISD::SIGN_EXTEND;
2745ffd83dbSDimitry Andric case TargetOpcode::G_ZEXT:
2755ffd83dbSDimitry Andric return ISD::ZERO_EXTEND;
2765ffd83dbSDimitry Andric case TargetOpcode::G_ANYEXT:
2775ffd83dbSDimitry Andric return ISD::ANY_EXTEND;
2785ffd83dbSDimitry Andric default:
2795ffd83dbSDimitry Andric llvm_unreachable("not an extend opcode");
2805ffd83dbSDimitry Andric }
2815ffd83dbSDimitry Andric }
2825ffd83dbSDimitry Andric
canLowerReturn(MachineFunction & MF,CallingConv::ID CallConv,SmallVectorImpl<BaseArgInfo> & Outs,bool IsVarArg) const283e8d8bef9SDimitry Andric bool AMDGPUCallLowering::canLowerReturn(MachineFunction &MF,
284e8d8bef9SDimitry Andric CallingConv::ID CallConv,
285e8d8bef9SDimitry Andric SmallVectorImpl<BaseArgInfo> &Outs,
286e8d8bef9SDimitry Andric bool IsVarArg) const {
287e8d8bef9SDimitry Andric // For shaders. Vector types should be explicitly handled by CC.
288e8d8bef9SDimitry Andric if (AMDGPU::isEntryFunctionCC(CallConv))
289e8d8bef9SDimitry Andric return true;
290e8d8bef9SDimitry Andric
291e8d8bef9SDimitry Andric SmallVector<CCValAssign, 16> ArgLocs;
292e8d8bef9SDimitry Andric const SITargetLowering &TLI = *getTLI<SITargetLowering>();
293e8d8bef9SDimitry Andric CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs,
294e8d8bef9SDimitry Andric MF.getFunction().getContext());
295e8d8bef9SDimitry Andric
296e8d8bef9SDimitry Andric return checkReturn(CCInfo, Outs, TLI.CCAssignFnForReturn(CallConv, IsVarArg));
2978bcb0991SDimitry Andric }
2988bcb0991SDimitry Andric
2998bcb0991SDimitry Andric /// Lower the return value for the already existing \p Ret. This assumes that
3008bcb0991SDimitry Andric /// \p B's insertion point is correct.
lowerReturnVal(MachineIRBuilder & B,const Value * Val,ArrayRef<Register> VRegs,MachineInstrBuilder & Ret) const3018bcb0991SDimitry Andric bool AMDGPUCallLowering::lowerReturnVal(MachineIRBuilder &B,
3028bcb0991SDimitry Andric const Value *Val, ArrayRef<Register> VRegs,
3038bcb0991SDimitry Andric MachineInstrBuilder &Ret) const {
3048bcb0991SDimitry Andric if (!Val)
3058bcb0991SDimitry Andric return true;
3068bcb0991SDimitry Andric
3078bcb0991SDimitry Andric auto &MF = B.getMF();
3088bcb0991SDimitry Andric const auto &F = MF.getFunction();
3098bcb0991SDimitry Andric const DataLayout &DL = MF.getDataLayout();
3105ffd83dbSDimitry Andric MachineRegisterInfo *MRI = B.getMRI();
311e8d8bef9SDimitry Andric LLVMContext &Ctx = F.getContext();
3128bcb0991SDimitry Andric
3138bcb0991SDimitry Andric CallingConv::ID CC = F.getCallingConv();
3148bcb0991SDimitry Andric const SITargetLowering &TLI = *getTLI<SITargetLowering>();
3158bcb0991SDimitry Andric
316e8d8bef9SDimitry Andric SmallVector<EVT, 8> SplitEVTs;
317e8d8bef9SDimitry Andric ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
318e8d8bef9SDimitry Andric assert(VRegs.size() == SplitEVTs.size() &&
319e8d8bef9SDimitry Andric "For each split Type there should be exactly one VReg.");
3208bcb0991SDimitry Andric
321e8d8bef9SDimitry Andric SmallVector<ArgInfo, 8> SplitRetInfos;
322e8d8bef9SDimitry Andric
323e8d8bef9SDimitry Andric for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
324e8d8bef9SDimitry Andric EVT VT = SplitEVTs[i];
325e8d8bef9SDimitry Andric Register Reg = VRegs[i];
326fe6060f1SDimitry Andric ArgInfo RetInfo(Reg, VT.getTypeForEVT(Ctx), 0);
327e8d8bef9SDimitry Andric setArgFlags(RetInfo, AttributeList::ReturnIndex, DL, F);
328e8d8bef9SDimitry Andric
329e8d8bef9SDimitry Andric if (VT.isScalarInteger()) {
330e8d8bef9SDimitry Andric unsigned ExtendOp = TargetOpcode::G_ANYEXT;
331e8d8bef9SDimitry Andric if (RetInfo.Flags[0].isSExt()) {
332e8d8bef9SDimitry Andric assert(RetInfo.Regs.size() == 1 && "expect only simple return values");
333e8d8bef9SDimitry Andric ExtendOp = TargetOpcode::G_SEXT;
334e8d8bef9SDimitry Andric } else if (RetInfo.Flags[0].isZExt()) {
335e8d8bef9SDimitry Andric assert(RetInfo.Regs.size() == 1 && "expect only simple return values");
336e8d8bef9SDimitry Andric ExtendOp = TargetOpcode::G_ZEXT;
337e8d8bef9SDimitry Andric }
338e8d8bef9SDimitry Andric
339e8d8bef9SDimitry Andric EVT ExtVT = TLI.getTypeForExtReturn(Ctx, VT,
340e8d8bef9SDimitry Andric extOpcodeToISDExtOpcode(ExtendOp));
341e8d8bef9SDimitry Andric if (ExtVT != VT) {
342e8d8bef9SDimitry Andric RetInfo.Ty = ExtVT.getTypeForEVT(Ctx);
343e8d8bef9SDimitry Andric LLT ExtTy = getLLTForType(*RetInfo.Ty, DL);
344e8d8bef9SDimitry Andric Reg = B.buildInstr(ExtendOp, {ExtTy}, {Reg}).getReg(0);
345e8d8bef9SDimitry Andric }
346e8d8bef9SDimitry Andric }
347e8d8bef9SDimitry Andric
348e8d8bef9SDimitry Andric if (Reg != RetInfo.Regs[0]) {
349e8d8bef9SDimitry Andric RetInfo.Regs[0] = Reg;
350e8d8bef9SDimitry Andric // Reset the arg flags after modifying Reg.
351e8d8bef9SDimitry Andric setArgFlags(RetInfo, AttributeList::ReturnIndex, DL, F);
352e8d8bef9SDimitry Andric }
353e8d8bef9SDimitry Andric
354fe6060f1SDimitry Andric splitToValueTypes(RetInfo, SplitRetInfos, DL, CC);
355e8d8bef9SDimitry Andric }
3568bcb0991SDimitry Andric
3578bcb0991SDimitry Andric CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC, F.isVarArg());
358fe6060f1SDimitry Andric
359fe6060f1SDimitry Andric OutgoingValueAssigner Assigner(AssignFn);
360fe6060f1SDimitry Andric AMDGPUOutgoingValueHandler RetHandler(B, *MRI, Ret);
361fe6060f1SDimitry Andric return determineAndHandleAssignments(RetHandler, Assigner, SplitRetInfos, B,
362fe6060f1SDimitry Andric CC, F.isVarArg());
3638bcb0991SDimitry Andric }
3648bcb0991SDimitry Andric
lowerReturn(MachineIRBuilder & B,const Value * Val,ArrayRef<Register> VRegs,FunctionLoweringInfo & FLI) const365e8d8bef9SDimitry Andric bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B, const Value *Val,
366e8d8bef9SDimitry Andric ArrayRef<Register> VRegs,
367e8d8bef9SDimitry Andric FunctionLoweringInfo &FLI) const {
3680b57cec5SDimitry Andric
3698bcb0991SDimitry Andric MachineFunction &MF = B.getMF();
3700b57cec5SDimitry Andric SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
3710b57cec5SDimitry Andric MFI->setIfReturnsVoid(!Val);
3720b57cec5SDimitry Andric
3738bcb0991SDimitry Andric assert(!Val == VRegs.empty() && "Return value without a vreg");
3748bcb0991SDimitry Andric
3758bcb0991SDimitry Andric CallingConv::ID CC = B.getMF().getFunction().getCallingConv();
3768bcb0991SDimitry Andric const bool IsShader = AMDGPU::isShader(CC);
377e8d8bef9SDimitry Andric const bool IsWaveEnd =
378e8d8bef9SDimitry Andric (IsShader && MFI->returnsVoid()) || AMDGPU::isKernel(CC);
3798bcb0991SDimitry Andric if (IsWaveEnd) {
3808bcb0991SDimitry Andric B.buildInstr(AMDGPU::S_ENDPGM)
3818bcb0991SDimitry Andric .addImm(0);
3820b57cec5SDimitry Andric return true;
3830b57cec5SDimitry Andric }
3840b57cec5SDimitry Andric
38581ad6265SDimitry Andric unsigned ReturnOpc =
38681ad6265SDimitry Andric IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::SI_RETURN;
3878bcb0991SDimitry Andric auto Ret = B.buildInstrNoInsert(ReturnOpc);
3880b57cec5SDimitry Andric
389e8d8bef9SDimitry Andric if (!FLI.CanLowerReturn)
390e8d8bef9SDimitry Andric insertSRetStores(B, Val->getType(), VRegs, FLI.DemoteRegister);
391e8d8bef9SDimitry Andric else if (!lowerReturnVal(B, Val, VRegs, Ret))
3928bcb0991SDimitry Andric return false;
3938bcb0991SDimitry Andric
3948bcb0991SDimitry Andric // TODO: Handle CalleeSavedRegsViaCopy.
3958bcb0991SDimitry Andric
3968bcb0991SDimitry Andric B.insertInstr(Ret);
3970b57cec5SDimitry Andric return true;
3980b57cec5SDimitry Andric }
3990b57cec5SDimitry Andric
lowerParameterPtr(Register DstReg,MachineIRBuilder & B,uint64_t Offset) const400e8d8bef9SDimitry Andric void AMDGPUCallLowering::lowerParameterPtr(Register DstReg, MachineIRBuilder &B,
4010b57cec5SDimitry Andric uint64_t Offset) const {
4028bcb0991SDimitry Andric MachineFunction &MF = B.getMF();
4030b57cec5SDimitry Andric const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
4040b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
4050b57cec5SDimitry Andric Register KernArgSegmentPtr =
4060b57cec5SDimitry Andric MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
4070b57cec5SDimitry Andric Register KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
4080b57cec5SDimitry Andric
4095ffd83dbSDimitry Andric auto OffsetReg = B.buildConstant(LLT::scalar(64), Offset);
4100b57cec5SDimitry Andric
411e8d8bef9SDimitry Andric B.buildPtrAdd(DstReg, KernArgSegmentVReg, OffsetReg);
4120b57cec5SDimitry Andric }
4130b57cec5SDimitry Andric
lowerParameter(MachineIRBuilder & B,ArgInfo & OrigArg,uint64_t Offset,Align Alignment) const414fe6060f1SDimitry Andric void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &B, ArgInfo &OrigArg,
415fe6060f1SDimitry Andric uint64_t Offset,
416fe6060f1SDimitry Andric Align Alignment) const {
4178bcb0991SDimitry Andric MachineFunction &MF = B.getMF();
4180b57cec5SDimitry Andric const Function &F = MF.getFunction();
419*0fca6ea1SDimitry Andric const DataLayout &DL = F.getDataLayout();
420480093f4SDimitry Andric MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
421e8d8bef9SDimitry Andric
422e8d8bef9SDimitry Andric LLT PtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
423fe6060f1SDimitry Andric
424fe6060f1SDimitry Andric SmallVector<ArgInfo, 32> SplitArgs;
425fe6060f1SDimitry Andric SmallVector<uint64_t> FieldOffsets;
426fe6060f1SDimitry Andric splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv(), &FieldOffsets);
427fe6060f1SDimitry Andric
428fe6060f1SDimitry Andric unsigned Idx = 0;
429fe6060f1SDimitry Andric for (ArgInfo &SplitArg : SplitArgs) {
430e8d8bef9SDimitry Andric Register PtrReg = B.getMRI()->createGenericVirtualRegister(PtrTy);
431fe6060f1SDimitry Andric lowerParameterPtr(PtrReg, B, Offset + FieldOffsets[Idx]);
432fe6060f1SDimitry Andric
433fe6060f1SDimitry Andric LLT ArgTy = getLLTForType(*SplitArg.Ty, DL);
434fe6060f1SDimitry Andric if (SplitArg.Flags[0].isPointer()) {
435fe6060f1SDimitry Andric // Compensate for losing pointeriness in splitValueTypes.
436fe6060f1SDimitry Andric LLT PtrTy = LLT::pointer(SplitArg.Flags[0].getPointerAddrSpace(),
437fe6060f1SDimitry Andric ArgTy.getScalarSizeInBits());
438fe6060f1SDimitry Andric ArgTy = ArgTy.isVector() ? LLT::vector(ArgTy.getElementCount(), PtrTy)
439fe6060f1SDimitry Andric : PtrTy;
440fe6060f1SDimitry Andric }
4410b57cec5SDimitry Andric
4425ffd83dbSDimitry Andric MachineMemOperand *MMO = MF.getMachineMemOperand(
4435ffd83dbSDimitry Andric PtrInfo,
4445ffd83dbSDimitry Andric MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
4450b57cec5SDimitry Andric MachineMemOperand::MOInvariant,
446fe6060f1SDimitry Andric ArgTy, commonAlignment(Alignment, FieldOffsets[Idx]));
4470b57cec5SDimitry Andric
448fe6060f1SDimitry Andric assert(SplitArg.Regs.size() == 1);
449fe6060f1SDimitry Andric
450fe6060f1SDimitry Andric B.buildLoad(SplitArg.Regs[0], PtrReg, *MMO);
451fe6060f1SDimitry Andric ++Idx;
452fe6060f1SDimitry Andric }
4530b57cec5SDimitry Andric }
4540b57cec5SDimitry Andric
4550b57cec5SDimitry Andric // Allocate special inputs passed in user SGPRs.
allocateHSAUserSGPRs(CCState & CCInfo,MachineIRBuilder & B,MachineFunction & MF,const SIRegisterInfo & TRI,SIMachineFunctionInfo & Info)4560b57cec5SDimitry Andric static void allocateHSAUserSGPRs(CCState &CCInfo,
4578bcb0991SDimitry Andric MachineIRBuilder &B,
4580b57cec5SDimitry Andric MachineFunction &MF,
4590b57cec5SDimitry Andric const SIRegisterInfo &TRI,
4600b57cec5SDimitry Andric SIMachineFunctionInfo &Info) {
4610b57cec5SDimitry Andric // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
4625f757f3fSDimitry Andric const GCNUserSGPRUsageInfo &UserSGPRInfo = Info.getUserSGPRInfo();
4635f757f3fSDimitry Andric if (UserSGPRInfo.hasPrivateSegmentBuffer()) {
4645ffd83dbSDimitry Andric Register PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
4650b57cec5SDimitry Andric MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
4660b57cec5SDimitry Andric CCInfo.AllocateReg(PrivateSegmentBufferReg);
4670b57cec5SDimitry Andric }
4680b57cec5SDimitry Andric
4695f757f3fSDimitry Andric if (UserSGPRInfo.hasDispatchPtr()) {
4705ffd83dbSDimitry Andric Register DispatchPtrReg = Info.addDispatchPtr(TRI);
4710b57cec5SDimitry Andric MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
4720b57cec5SDimitry Andric CCInfo.AllocateReg(DispatchPtrReg);
4730b57cec5SDimitry Andric }
4740b57cec5SDimitry Andric
47506c3fb27SDimitry Andric const Module *M = MF.getFunction().getParent();
4765f757f3fSDimitry Andric if (UserSGPRInfo.hasQueuePtr() &&
4777a6dacacSDimitry Andric AMDGPU::getAMDHSACodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5) {
4785ffd83dbSDimitry Andric Register QueuePtrReg = Info.addQueuePtr(TRI);
4790b57cec5SDimitry Andric MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
4800b57cec5SDimitry Andric CCInfo.AllocateReg(QueuePtrReg);
4810b57cec5SDimitry Andric }
4820b57cec5SDimitry Andric
4835f757f3fSDimitry Andric if (UserSGPRInfo.hasKernargSegmentPtr()) {
4840b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
4850b57cec5SDimitry Andric Register InputPtrReg = Info.addKernargSegmentPtr(TRI);
4860b57cec5SDimitry Andric const LLT P4 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
4870b57cec5SDimitry Andric Register VReg = MRI.createGenericVirtualRegister(P4);
4880b57cec5SDimitry Andric MRI.addLiveIn(InputPtrReg, VReg);
4898bcb0991SDimitry Andric B.getMBB().addLiveIn(InputPtrReg);
4908bcb0991SDimitry Andric B.buildCopy(VReg, InputPtrReg);
4910b57cec5SDimitry Andric CCInfo.AllocateReg(InputPtrReg);
4920b57cec5SDimitry Andric }
4930b57cec5SDimitry Andric
4945f757f3fSDimitry Andric if (UserSGPRInfo.hasDispatchID()) {
4955ffd83dbSDimitry Andric Register DispatchIDReg = Info.addDispatchID(TRI);
4960b57cec5SDimitry Andric MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
4970b57cec5SDimitry Andric CCInfo.AllocateReg(DispatchIDReg);
4980b57cec5SDimitry Andric }
4990b57cec5SDimitry Andric
5005f757f3fSDimitry Andric if (UserSGPRInfo.hasFlatScratchInit()) {
5015ffd83dbSDimitry Andric Register FlatScratchInitReg = Info.addFlatScratchInit(TRI);
5020b57cec5SDimitry Andric MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
5030b57cec5SDimitry Andric CCInfo.AllocateReg(FlatScratchInitReg);
5040b57cec5SDimitry Andric }
5050b57cec5SDimitry Andric
5060b57cec5SDimitry Andric // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
5070b57cec5SDimitry Andric // these from the dispatch pointer.
5080b57cec5SDimitry Andric }
5090b57cec5SDimitry Andric
lowerFormalArgumentsKernel(MachineIRBuilder & B,const Function & F,ArrayRef<ArrayRef<Register>> VRegs) const5100b57cec5SDimitry Andric bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
5118bcb0991SDimitry Andric MachineIRBuilder &B, const Function &F,
5120b57cec5SDimitry Andric ArrayRef<ArrayRef<Register>> VRegs) const {
5138bcb0991SDimitry Andric MachineFunction &MF = B.getMF();
5140b57cec5SDimitry Andric const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
5150b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
5160b57cec5SDimitry Andric SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
5178bcb0991SDimitry Andric const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
5188bcb0991SDimitry Andric const SITargetLowering &TLI = *getTLI<SITargetLowering>();
519*0fca6ea1SDimitry Andric const DataLayout &DL = F.getDataLayout();
5200b57cec5SDimitry Andric
5210b57cec5SDimitry Andric SmallVector<CCValAssign, 16> ArgLocs;
5220b57cec5SDimitry Andric CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
5230b57cec5SDimitry Andric
5248bcb0991SDimitry Andric allocateHSAUserSGPRs(CCInfo, B, MF, *TRI, *Info);
5250b57cec5SDimitry Andric
5260b57cec5SDimitry Andric unsigned i = 0;
5275ffd83dbSDimitry Andric const Align KernArgBaseAlign(16);
52806c3fb27SDimitry Andric const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset();
5290b57cec5SDimitry Andric uint64_t ExplicitArgOffset = 0;
5300b57cec5SDimitry Andric
5310b57cec5SDimitry Andric // TODO: Align down to dword alignment and extract bits for extending loads.
5320b57cec5SDimitry Andric for (auto &Arg : F.args()) {
533e8d8bef9SDimitry Andric const bool IsByRef = Arg.hasByRefAttr();
534e8d8bef9SDimitry Andric Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();
5350b57cec5SDimitry Andric unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
5360b57cec5SDimitry Andric if (AllocSize == 0)
5370b57cec5SDimitry Andric continue;
5380b57cec5SDimitry Andric
539bdd1243dSDimitry Andric MaybeAlign ParamAlign = IsByRef ? Arg.getParamAlign() : std::nullopt;
54081ad6265SDimitry Andric Align ABIAlign = DL.getValueOrABITypeAlignment(ParamAlign, ArgTy);
5410b57cec5SDimitry Andric
5420b57cec5SDimitry Andric uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
5430b57cec5SDimitry Andric ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
5440b57cec5SDimitry Andric
5455ffd83dbSDimitry Andric if (Arg.use_empty()) {
5465ffd83dbSDimitry Andric ++i;
5475ffd83dbSDimitry Andric continue;
5485ffd83dbSDimitry Andric }
5495ffd83dbSDimitry Andric
550e8d8bef9SDimitry Andric Align Alignment = commonAlignment(KernArgBaseAlign, ArgOffset);
551e8d8bef9SDimitry Andric
552e8d8bef9SDimitry Andric if (IsByRef) {
553e8d8bef9SDimitry Andric unsigned ByRefAS = cast<PointerType>(Arg.getType())->getAddressSpace();
554e8d8bef9SDimitry Andric
555e8d8bef9SDimitry Andric assert(VRegs[i].size() == 1 &&
556e8d8bef9SDimitry Andric "expected only one register for byval pointers");
557e8d8bef9SDimitry Andric if (ByRefAS == AMDGPUAS::CONSTANT_ADDRESS) {
558fe6060f1SDimitry Andric lowerParameterPtr(VRegs[i][0], B, ArgOffset);
559e8d8bef9SDimitry Andric } else {
560e8d8bef9SDimitry Andric const LLT ConstPtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
561e8d8bef9SDimitry Andric Register PtrReg = MRI.createGenericVirtualRegister(ConstPtrTy);
562fe6060f1SDimitry Andric lowerParameterPtr(PtrReg, B, ArgOffset);
563e8d8bef9SDimitry Andric
564e8d8bef9SDimitry Andric B.buildAddrSpaceCast(VRegs[i][0], PtrReg);
565e8d8bef9SDimitry Andric }
566e8d8bef9SDimitry Andric } else {
567fe6060f1SDimitry Andric ArgInfo OrigArg(VRegs[i], Arg, i);
568fe6060f1SDimitry Andric const unsigned OrigArgIdx = i + AttributeList::FirstArgIndex;
569fe6060f1SDimitry Andric setArgFlags(OrigArg, OrigArgIdx, DL, F);
570fe6060f1SDimitry Andric lowerParameter(B, OrigArg, ArgOffset, Alignment);
571e8d8bef9SDimitry Andric }
572e8d8bef9SDimitry Andric
5730b57cec5SDimitry Andric ++i;
5740b57cec5SDimitry Andric }
5750b57cec5SDimitry Andric
5768bcb0991SDimitry Andric TLI.allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);
5778bcb0991SDimitry Andric TLI.allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false);
5780b57cec5SDimitry Andric return true;
5790b57cec5SDimitry Andric }
5800b57cec5SDimitry Andric
lowerFormalArguments(MachineIRBuilder & B,const Function & F,ArrayRef<ArrayRef<Register>> VRegs,FunctionLoweringInfo & FLI) const5810b57cec5SDimitry Andric bool AMDGPUCallLowering::lowerFormalArguments(
582e8d8bef9SDimitry Andric MachineIRBuilder &B, const Function &F, ArrayRef<ArrayRef<Register>> VRegs,
583e8d8bef9SDimitry Andric FunctionLoweringInfo &FLI) const {
5848bcb0991SDimitry Andric CallingConv::ID CC = F.getCallingConv();
5858bcb0991SDimitry Andric
5860b57cec5SDimitry Andric // The infrastructure for normal calling convention lowering is essentially
5870b57cec5SDimitry Andric // useless for kernels. We want to avoid any kind of legalization or argument
5880b57cec5SDimitry Andric // splitting.
5898bcb0991SDimitry Andric if (CC == CallingConv::AMDGPU_KERNEL)
5908bcb0991SDimitry Andric return lowerFormalArgumentsKernel(B, F, VRegs);
5910b57cec5SDimitry Andric
592e8d8bef9SDimitry Andric const bool IsGraphics = AMDGPU::isGraphics(CC);
5938bcb0991SDimitry Andric const bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CC);
5940b57cec5SDimitry Andric
5958bcb0991SDimitry Andric MachineFunction &MF = B.getMF();
5968bcb0991SDimitry Andric MachineBasicBlock &MBB = B.getMBB();
5970b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
5980b57cec5SDimitry Andric SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
5998bcb0991SDimitry Andric const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
6008bcb0991SDimitry Andric const SIRegisterInfo *TRI = Subtarget.getRegisterInfo();
601*0fca6ea1SDimitry Andric const DataLayout &DL = F.getDataLayout();
6020b57cec5SDimitry Andric
6030b57cec5SDimitry Andric SmallVector<CCValAssign, 16> ArgLocs;
6048bcb0991SDimitry Andric CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext());
6055f757f3fSDimitry Andric const GCNUserSGPRUsageInfo &UserSGPRInfo = Info->getUserSGPRInfo();
6068bcb0991SDimitry Andric
6075f757f3fSDimitry Andric if (UserSGPRInfo.hasImplicitBufferPtr()) {
6088bcb0991SDimitry Andric Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);
6090b57cec5SDimitry Andric MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
6100b57cec5SDimitry Andric CCInfo.AllocateReg(ImplicitBufferPtrReg);
6110b57cec5SDimitry Andric }
6120b57cec5SDimitry Andric
61304eeddc0SDimitry Andric // FIXME: This probably isn't defined for mesa
6145f757f3fSDimitry Andric if (UserSGPRInfo.hasFlatScratchInit() && !Subtarget.isAmdPalOS()) {
61504eeddc0SDimitry Andric Register FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
61604eeddc0SDimitry Andric MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
61704eeddc0SDimitry Andric CCInfo.AllocateReg(FlatScratchInitReg);
61804eeddc0SDimitry Andric }
61904eeddc0SDimitry Andric
6208bcb0991SDimitry Andric SmallVector<ArgInfo, 32> SplitArgs;
6218bcb0991SDimitry Andric unsigned Idx = 0;
6220b57cec5SDimitry Andric unsigned PSInputNum = 0;
6230b57cec5SDimitry Andric
624e8d8bef9SDimitry Andric // Insert the hidden sret parameter if the return value won't fit in the
625e8d8bef9SDimitry Andric // return registers.
626e8d8bef9SDimitry Andric if (!FLI.CanLowerReturn)
627e8d8bef9SDimitry Andric insertSRetIncomingArgument(F, SplitArgs, FLI.DemoteRegister, MRI, DL);
628e8d8bef9SDimitry Andric
6298bcb0991SDimitry Andric for (auto &Arg : F.args()) {
6308bcb0991SDimitry Andric if (DL.getTypeStoreSize(Arg.getType()) == 0)
6310b57cec5SDimitry Andric continue;
6320b57cec5SDimitry Andric
6338bcb0991SDimitry Andric const bool InReg = Arg.hasAttribute(Attribute::InReg);
6348bcb0991SDimitry Andric
6358bcb0991SDimitry Andric if (Arg.hasAttribute(Attribute::SwiftSelf) ||
6368bcb0991SDimitry Andric Arg.hasAttribute(Attribute::SwiftError) ||
6378bcb0991SDimitry Andric Arg.hasAttribute(Attribute::Nest))
6388bcb0991SDimitry Andric return false;
6398bcb0991SDimitry Andric
6408bcb0991SDimitry Andric if (CC == CallingConv::AMDGPU_PS && !InReg && PSInputNum <= 15) {
6418bcb0991SDimitry Andric const bool ArgUsed = !Arg.use_empty();
6428bcb0991SDimitry Andric bool SkipArg = !ArgUsed && !Info->isPSInputAllocated(PSInputNum);
6438bcb0991SDimitry Andric
6448bcb0991SDimitry Andric if (!SkipArg) {
6450b57cec5SDimitry Andric Info->markPSInputAllocated(PSInputNum);
6468bcb0991SDimitry Andric if (ArgUsed)
6470b57cec5SDimitry Andric Info->markPSInputEnabled(PSInputNum);
6488bcb0991SDimitry Andric }
6490b57cec5SDimitry Andric
6500b57cec5SDimitry Andric ++PSInputNum;
6510b57cec5SDimitry Andric
6528bcb0991SDimitry Andric if (SkipArg) {
6530eae32dcSDimitry Andric for (Register R : VRegs[Idx])
6540eae32dcSDimitry Andric B.buildUndef(R);
6550b57cec5SDimitry Andric
6568bcb0991SDimitry Andric ++Idx;
6570b57cec5SDimitry Andric continue;
6588bcb0991SDimitry Andric }
6590b57cec5SDimitry Andric }
6600b57cec5SDimitry Andric
661fe6060f1SDimitry Andric ArgInfo OrigArg(VRegs[Idx], Arg, Idx);
6625ffd83dbSDimitry Andric const unsigned OrigArgIdx = Idx + AttributeList::FirstArgIndex;
6635ffd83dbSDimitry Andric setArgFlags(OrigArg, OrigArgIdx, DL, F);
6648bcb0991SDimitry Andric
665fe6060f1SDimitry Andric splitToValueTypes(OrigArg, SplitArgs, DL, CC);
6668bcb0991SDimitry Andric ++Idx;
6670b57cec5SDimitry Andric }
6680b57cec5SDimitry Andric
6698bcb0991SDimitry Andric // At least one interpolation mode must be enabled or else the GPU will
6708bcb0991SDimitry Andric // hang.
6718bcb0991SDimitry Andric //
6728bcb0991SDimitry Andric // Check PSInputAddr instead of PSInputEnable. The idea is that if the user
6738bcb0991SDimitry Andric // set PSInputAddr, the user wants to enable some bits after the compilation
6748bcb0991SDimitry Andric // based on run-time states. Since we can't know what the final PSInputEna
6758bcb0991SDimitry Andric // will look like, so we shouldn't do anything here and the user should take
6768bcb0991SDimitry Andric // responsibility for the correct programming.
6778bcb0991SDimitry Andric //
6788bcb0991SDimitry Andric // Otherwise, the following restrictions apply:
6798bcb0991SDimitry Andric // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled.
6808bcb0991SDimitry Andric // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be
6818bcb0991SDimitry Andric // enabled too.
6828bcb0991SDimitry Andric if (CC == CallingConv::AMDGPU_PS) {
6838bcb0991SDimitry Andric if ((Info->getPSInputAddr() & 0x7F) == 0 ||
6848bcb0991SDimitry Andric ((Info->getPSInputAddr() & 0xF) == 0 &&
6858bcb0991SDimitry Andric Info->isPSInputAllocated(11))) {
6868bcb0991SDimitry Andric CCInfo.AllocateReg(AMDGPU::VGPR0);
6878bcb0991SDimitry Andric CCInfo.AllocateReg(AMDGPU::VGPR1);
6888bcb0991SDimitry Andric Info->markPSInputAllocated(0);
6898bcb0991SDimitry Andric Info->markPSInputEnabled(0);
6908bcb0991SDimitry Andric }
6918bcb0991SDimitry Andric
6928bcb0991SDimitry Andric if (Subtarget.isAmdPalOS()) {
6938bcb0991SDimitry Andric // For isAmdPalOS, the user does not enable some bits after compilation
6948bcb0991SDimitry Andric // based on run-time states; the register values being generated here are
6958bcb0991SDimitry Andric // the final ones set in hardware. Therefore we need to apply the
6968bcb0991SDimitry Andric // workaround to PSInputAddr and PSInputEnable together. (The case where
6978bcb0991SDimitry Andric // a bit is set in PSInputAddr but not PSInputEnable is where the frontend
6988bcb0991SDimitry Andric // set up an input arg for a particular interpolation mode, but nothing
6998bcb0991SDimitry Andric // uses that input arg. Really we should have an earlier pass that removes
7008bcb0991SDimitry Andric // such an arg.)
7018bcb0991SDimitry Andric unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable();
7028bcb0991SDimitry Andric if ((PsInputBits & 0x7F) == 0 ||
7038bcb0991SDimitry Andric ((PsInputBits & 0xF) == 0 &&
7048bcb0991SDimitry Andric (PsInputBits >> 11 & 1)))
70506c3fb27SDimitry Andric Info->markPSInputEnabled(llvm::countr_zero(Info->getPSInputAddr()));
7068bcb0991SDimitry Andric }
7078bcb0991SDimitry Andric }
7088bcb0991SDimitry Andric
7098bcb0991SDimitry Andric const SITargetLowering &TLI = *getTLI<SITargetLowering>();
7108bcb0991SDimitry Andric CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC, F.isVarArg());
7118bcb0991SDimitry Andric
7128bcb0991SDimitry Andric if (!MBB.empty())
7138bcb0991SDimitry Andric B.setInstr(*MBB.begin());
7148bcb0991SDimitry Andric
7150eae32dcSDimitry Andric if (!IsEntryFunc && !IsGraphics) {
7165ffd83dbSDimitry Andric // For the fixed ABI, pass workitem IDs in the last argument register.
7175ffd83dbSDimitry Andric TLI.allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info);
7187a6dacacSDimitry Andric
7197a6dacacSDimitry Andric if (!Subtarget.enableFlatScratch())
7207a6dacacSDimitry Andric CCInfo.AllocateReg(Info->getScratchRSrcReg());
7217a6dacacSDimitry Andric TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
7225ffd83dbSDimitry Andric }
7235ffd83dbSDimitry Andric
724fe6060f1SDimitry Andric IncomingValueAssigner Assigner(AssignFn);
725fe6060f1SDimitry Andric if (!determineAssignments(Assigner, SplitArgs, CCInfo))
7260b57cec5SDimitry Andric return false;
7278bcb0991SDimitry Andric
728fe6060f1SDimitry Andric FormalArgHandler Handler(B, MRI);
729fe6060f1SDimitry Andric if (!handleAssignments(Handler, SplitArgs, CCInfo, ArgLocs, B))
730fe6060f1SDimitry Andric return false;
731fe6060f1SDimitry Andric
73206c3fb27SDimitry Andric uint64_t StackSize = Assigner.StackSize;
733fe6060f1SDimitry Andric
7348bcb0991SDimitry Andric // Start adding system SGPRs.
7357a6dacacSDimitry Andric if (IsEntryFunc)
736e8d8bef9SDimitry Andric TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsGraphics);
7378bcb0991SDimitry Andric
738fe6060f1SDimitry Andric // When we tail call, we need to check if the callee's arguments will fit on
739fe6060f1SDimitry Andric // the caller's stack. So, whenever we lower formal arguments, we should keep
740fe6060f1SDimitry Andric // track of this information, since we might lower a tail call in this
741fe6060f1SDimitry Andric // function later.
74206c3fb27SDimitry Andric Info->setBytesInStackArgArea(StackSize);
743fe6060f1SDimitry Andric
7448bcb0991SDimitry Andric // Move back to the end of the basic block.
7458bcb0991SDimitry Andric B.setMBB(MBB);
7468bcb0991SDimitry Andric
7478bcb0991SDimitry Andric return true;
7480b57cec5SDimitry Andric }
749e8d8bef9SDimitry Andric
passSpecialInputs(MachineIRBuilder & MIRBuilder,CCState & CCInfo,SmallVectorImpl<std::pair<MCRegister,Register>> & ArgRegs,CallLoweringInfo & Info) const750e8d8bef9SDimitry Andric bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
751e8d8bef9SDimitry Andric CCState &CCInfo,
752e8d8bef9SDimitry Andric SmallVectorImpl<std::pair<MCRegister, Register>> &ArgRegs,
753e8d8bef9SDimitry Andric CallLoweringInfo &Info) const {
754e8d8bef9SDimitry Andric MachineFunction &MF = MIRBuilder.getMF();
755e8d8bef9SDimitry Andric
756349cc55cSDimitry Andric // If there's no call site, this doesn't correspond to a call from the IR and
757349cc55cSDimitry Andric // doesn't need implicit inputs.
758349cc55cSDimitry Andric if (!Info.CB)
759349cc55cSDimitry Andric return true;
760349cc55cSDimitry Andric
761e8d8bef9SDimitry Andric const AMDGPUFunctionArgInfo *CalleeArgInfo
762e8d8bef9SDimitry Andric = &AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
763e8d8bef9SDimitry Andric
764e8d8bef9SDimitry Andric const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
765e8d8bef9SDimitry Andric const AMDGPUFunctionArgInfo &CallerArgInfo = MFI->getArgInfo();
766e8d8bef9SDimitry Andric
767e8d8bef9SDimitry Andric
768e8d8bef9SDimitry Andric // TODO: Unify with private memory register handling. This is complicated by
769e8d8bef9SDimitry Andric // the fact that at least in kernels, the input argument is not necessarily
770e8d8bef9SDimitry Andric // in the same location as the input.
771e8d8bef9SDimitry Andric AMDGPUFunctionArgInfo::PreloadedValue InputRegs[] = {
772e8d8bef9SDimitry Andric AMDGPUFunctionArgInfo::DISPATCH_PTR,
773e8d8bef9SDimitry Andric AMDGPUFunctionArgInfo::QUEUE_PTR,
774e8d8bef9SDimitry Andric AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR,
775e8d8bef9SDimitry Andric AMDGPUFunctionArgInfo::DISPATCH_ID,
776e8d8bef9SDimitry Andric AMDGPUFunctionArgInfo::WORKGROUP_ID_X,
777e8d8bef9SDimitry Andric AMDGPUFunctionArgInfo::WORKGROUP_ID_Y,
778fcaf7f86SDimitry Andric AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,
779fcaf7f86SDimitry Andric AMDGPUFunctionArgInfo::LDS_KERNEL_ID,
780e8d8bef9SDimitry Andric };
781e8d8bef9SDimitry Andric
782349cc55cSDimitry Andric static constexpr StringLiteral ImplicitAttrNames[] = {
783349cc55cSDimitry Andric "amdgpu-no-dispatch-ptr",
784349cc55cSDimitry Andric "amdgpu-no-queue-ptr",
785349cc55cSDimitry Andric "amdgpu-no-implicitarg-ptr",
786349cc55cSDimitry Andric "amdgpu-no-dispatch-id",
787349cc55cSDimitry Andric "amdgpu-no-workgroup-id-x",
788349cc55cSDimitry Andric "amdgpu-no-workgroup-id-y",
789fcaf7f86SDimitry Andric "amdgpu-no-workgroup-id-z",
790fcaf7f86SDimitry Andric "amdgpu-no-lds-kernel-id",
791349cc55cSDimitry Andric };
792349cc55cSDimitry Andric
793e8d8bef9SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
794e8d8bef9SDimitry Andric
795e8d8bef9SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
796e8d8bef9SDimitry Andric const AMDGPULegalizerInfo *LI
797e8d8bef9SDimitry Andric = static_cast<const AMDGPULegalizerInfo*>(ST.getLegalizerInfo());
798e8d8bef9SDimitry Andric
799349cc55cSDimitry Andric unsigned I = 0;
800e8d8bef9SDimitry Andric for (auto InputID : InputRegs) {
801e8d8bef9SDimitry Andric const ArgDescriptor *OutgoingArg;
802e8d8bef9SDimitry Andric const TargetRegisterClass *ArgRC;
803e8d8bef9SDimitry Andric LLT ArgTy;
804e8d8bef9SDimitry Andric
805349cc55cSDimitry Andric // If the callee does not use the attribute value, skip copying the value.
806349cc55cSDimitry Andric if (Info.CB->hasFnAttr(ImplicitAttrNames[I++]))
807349cc55cSDimitry Andric continue;
808349cc55cSDimitry Andric
809e8d8bef9SDimitry Andric std::tie(OutgoingArg, ArgRC, ArgTy) =
810e8d8bef9SDimitry Andric CalleeArgInfo->getPreloadedValue(InputID);
811e8d8bef9SDimitry Andric if (!OutgoingArg)
812e8d8bef9SDimitry Andric continue;
813e8d8bef9SDimitry Andric
814e8d8bef9SDimitry Andric const ArgDescriptor *IncomingArg;
815e8d8bef9SDimitry Andric const TargetRegisterClass *IncomingArgRC;
816e8d8bef9SDimitry Andric std::tie(IncomingArg, IncomingArgRC, ArgTy) =
817e8d8bef9SDimitry Andric CallerArgInfo.getPreloadedValue(InputID);
818e8d8bef9SDimitry Andric assert(IncomingArgRC == ArgRC);
819e8d8bef9SDimitry Andric
820e8d8bef9SDimitry Andric Register InputReg = MRI.createGenericVirtualRegister(ArgTy);
821e8d8bef9SDimitry Andric
822e8d8bef9SDimitry Andric if (IncomingArg) {
823e8d8bef9SDimitry Andric LI->loadInputValue(InputReg, MIRBuilder, IncomingArg, ArgRC, ArgTy);
8240eae32dcSDimitry Andric } else if (InputID == AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR) {
825e8d8bef9SDimitry Andric LI->getImplicitArgPtr(InputReg, MRI, MIRBuilder);
826fcaf7f86SDimitry Andric } else if (InputID == AMDGPUFunctionArgInfo::LDS_KERNEL_ID) {
827bdd1243dSDimitry Andric std::optional<uint32_t> Id =
828fcaf7f86SDimitry Andric AMDGPUMachineFunction::getLDSKernelIdMetadata(MF.getFunction());
829bdd1243dSDimitry Andric if (Id) {
830bdd1243dSDimitry Andric MIRBuilder.buildConstant(InputReg, *Id);
831fcaf7f86SDimitry Andric } else {
832fcaf7f86SDimitry Andric MIRBuilder.buildUndef(InputReg);
833fcaf7f86SDimitry Andric }
8340eae32dcSDimitry Andric } else {
8350eae32dcSDimitry Andric // We may have proven the input wasn't needed, although the ABI is
8360eae32dcSDimitry Andric // requiring it. We just need to allocate the register appropriately.
8370eae32dcSDimitry Andric MIRBuilder.buildUndef(InputReg);
838e8d8bef9SDimitry Andric }
839e8d8bef9SDimitry Andric
840e8d8bef9SDimitry Andric if (OutgoingArg->isRegister()) {
841e8d8bef9SDimitry Andric ArgRegs.emplace_back(OutgoingArg->getRegister(), InputReg);
842e8d8bef9SDimitry Andric if (!CCInfo.AllocateReg(OutgoingArg->getRegister()))
843e8d8bef9SDimitry Andric report_fatal_error("failed to allocate implicit input argument");
844e8d8bef9SDimitry Andric } else {
845e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "Unhandled stack passed implicit input argument\n");
846e8d8bef9SDimitry Andric return false;
847e8d8bef9SDimitry Andric }
848e8d8bef9SDimitry Andric }
849e8d8bef9SDimitry Andric
850e8d8bef9SDimitry Andric // Pack workitem IDs into a single register or pass it as is if already
851e8d8bef9SDimitry Andric // packed.
852e8d8bef9SDimitry Andric const ArgDescriptor *OutgoingArg;
853e8d8bef9SDimitry Andric const TargetRegisterClass *ArgRC;
854e8d8bef9SDimitry Andric LLT ArgTy;
855e8d8bef9SDimitry Andric
856e8d8bef9SDimitry Andric std::tie(OutgoingArg, ArgRC, ArgTy) =
857e8d8bef9SDimitry Andric CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X);
858e8d8bef9SDimitry Andric if (!OutgoingArg)
859e8d8bef9SDimitry Andric std::tie(OutgoingArg, ArgRC, ArgTy) =
860e8d8bef9SDimitry Andric CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
861e8d8bef9SDimitry Andric if (!OutgoingArg)
862e8d8bef9SDimitry Andric std::tie(OutgoingArg, ArgRC, ArgTy) =
863e8d8bef9SDimitry Andric CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
864e8d8bef9SDimitry Andric if (!OutgoingArg)
865e8d8bef9SDimitry Andric return false;
866e8d8bef9SDimitry Andric
867e8d8bef9SDimitry Andric auto WorkitemIDX =
868e8d8bef9SDimitry Andric CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X);
869e8d8bef9SDimitry Andric auto WorkitemIDY =
870e8d8bef9SDimitry Andric CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
871e8d8bef9SDimitry Andric auto WorkitemIDZ =
872e8d8bef9SDimitry Andric CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
873e8d8bef9SDimitry Andric
874e8d8bef9SDimitry Andric const ArgDescriptor *IncomingArgX = std::get<0>(WorkitemIDX);
875e8d8bef9SDimitry Andric const ArgDescriptor *IncomingArgY = std::get<0>(WorkitemIDY);
876e8d8bef9SDimitry Andric const ArgDescriptor *IncomingArgZ = std::get<0>(WorkitemIDZ);
877e8d8bef9SDimitry Andric const LLT S32 = LLT::scalar(32);
878e8d8bef9SDimitry Andric
879349cc55cSDimitry Andric const bool NeedWorkItemIDX = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-x");
880349cc55cSDimitry Andric const bool NeedWorkItemIDY = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-y");
881349cc55cSDimitry Andric const bool NeedWorkItemIDZ = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-z");
882349cc55cSDimitry Andric
883e8d8bef9SDimitry Andric // If incoming ids are not packed we need to pack them.
884e8d8bef9SDimitry Andric // FIXME: Should consider known workgroup size to eliminate known 0 cases.
885e8d8bef9SDimitry Andric Register InputReg;
886349cc55cSDimitry Andric if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX &&
887349cc55cSDimitry Andric NeedWorkItemIDX) {
88804eeddc0SDimitry Andric if (ST.getMaxWorkitemID(MF.getFunction(), 0) != 0) {
889e8d8bef9SDimitry Andric InputReg = MRI.createGenericVirtualRegister(S32);
890e8d8bef9SDimitry Andric LI->loadInputValue(InputReg, MIRBuilder, IncomingArgX,
891e8d8bef9SDimitry Andric std::get<1>(WorkitemIDX), std::get<2>(WorkitemIDX));
89204eeddc0SDimitry Andric } else {
89304eeddc0SDimitry Andric InputReg = MIRBuilder.buildConstant(S32, 0).getReg(0);
89404eeddc0SDimitry Andric }
895e8d8bef9SDimitry Andric }
896e8d8bef9SDimitry Andric
897349cc55cSDimitry Andric if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY &&
89804eeddc0SDimitry Andric NeedWorkItemIDY && ST.getMaxWorkitemID(MF.getFunction(), 1) != 0) {
899e8d8bef9SDimitry Andric Register Y = MRI.createGenericVirtualRegister(S32);
900e8d8bef9SDimitry Andric LI->loadInputValue(Y, MIRBuilder, IncomingArgY, std::get<1>(WorkitemIDY),
901e8d8bef9SDimitry Andric std::get<2>(WorkitemIDY));
902e8d8bef9SDimitry Andric
903e8d8bef9SDimitry Andric Y = MIRBuilder.buildShl(S32, Y, MIRBuilder.buildConstant(S32, 10)).getReg(0);
904e8d8bef9SDimitry Andric InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Y).getReg(0) : Y;
905e8d8bef9SDimitry Andric }
906e8d8bef9SDimitry Andric
907349cc55cSDimitry Andric if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ &&
90804eeddc0SDimitry Andric NeedWorkItemIDZ && ST.getMaxWorkitemID(MF.getFunction(), 2) != 0) {
909e8d8bef9SDimitry Andric Register Z = MRI.createGenericVirtualRegister(S32);
910e8d8bef9SDimitry Andric LI->loadInputValue(Z, MIRBuilder, IncomingArgZ, std::get<1>(WorkitemIDZ),
911e8d8bef9SDimitry Andric std::get<2>(WorkitemIDZ));
912e8d8bef9SDimitry Andric
913e8d8bef9SDimitry Andric Z = MIRBuilder.buildShl(S32, Z, MIRBuilder.buildConstant(S32, 20)).getReg(0);
914e8d8bef9SDimitry Andric InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Z).getReg(0) : Z;
915e8d8bef9SDimitry Andric }
916e8d8bef9SDimitry Andric
91704eeddc0SDimitry Andric if (!InputReg &&
91804eeddc0SDimitry Andric (NeedWorkItemIDX || NeedWorkItemIDY || NeedWorkItemIDZ)) {
919e8d8bef9SDimitry Andric InputReg = MRI.createGenericVirtualRegister(S32);
92004eeddc0SDimitry Andric if (!IncomingArgX && !IncomingArgY && !IncomingArgZ) {
92104eeddc0SDimitry Andric // We're in a situation where the outgoing function requires the workitem
92204eeddc0SDimitry Andric // ID, but the calling function does not have it (e.g a graphics function
92304eeddc0SDimitry Andric // calling a C calling convention function). This is illegal, but we need
92404eeddc0SDimitry Andric // to produce something.
92504eeddc0SDimitry Andric MIRBuilder.buildUndef(InputReg);
92604eeddc0SDimitry Andric } else {
927e8d8bef9SDimitry Andric // Workitem ids are already packed, any of present incoming arguments will
928e8d8bef9SDimitry Andric // carry all required fields.
929e8d8bef9SDimitry Andric ArgDescriptor IncomingArg = ArgDescriptor::createArg(
930e8d8bef9SDimitry Andric IncomingArgX ? *IncomingArgX :
931e8d8bef9SDimitry Andric IncomingArgY ? *IncomingArgY : *IncomingArgZ, ~0u);
932e8d8bef9SDimitry Andric LI->loadInputValue(InputReg, MIRBuilder, &IncomingArg,
933e8d8bef9SDimitry Andric &AMDGPU::VGPR_32RegClass, S32);
934e8d8bef9SDimitry Andric }
93504eeddc0SDimitry Andric }
936e8d8bef9SDimitry Andric
937e8d8bef9SDimitry Andric if (OutgoingArg->isRegister()) {
938349cc55cSDimitry Andric if (InputReg)
939e8d8bef9SDimitry Andric ArgRegs.emplace_back(OutgoingArg->getRegister(), InputReg);
940349cc55cSDimitry Andric
941e8d8bef9SDimitry Andric if (!CCInfo.AllocateReg(OutgoingArg->getRegister()))
942e8d8bef9SDimitry Andric report_fatal_error("failed to allocate implicit input argument");
943e8d8bef9SDimitry Andric } else {
944e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "Unhandled stack passed implicit input argument\n");
945e8d8bef9SDimitry Andric return false;
946e8d8bef9SDimitry Andric }
947e8d8bef9SDimitry Andric
948e8d8bef9SDimitry Andric return true;
949e8d8bef9SDimitry Andric }
950e8d8bef9SDimitry Andric
951e8d8bef9SDimitry Andric /// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for
952e8d8bef9SDimitry Andric /// CC.
953e8d8bef9SDimitry Andric static std::pair<CCAssignFn *, CCAssignFn *>
getAssignFnsForCC(CallingConv::ID CC,const SITargetLowering & TLI)954e8d8bef9SDimitry Andric getAssignFnsForCC(CallingConv::ID CC, const SITargetLowering &TLI) {
955e8d8bef9SDimitry Andric return {TLI.CCAssignFnForCall(CC, false), TLI.CCAssignFnForCall(CC, true)};
956e8d8bef9SDimitry Andric }
957e8d8bef9SDimitry Andric
getCallOpcode(const MachineFunction & CallerF,bool IsIndirect,bool IsTailCall,bool isWave32,CallingConv::ID CC)958e8d8bef9SDimitry Andric static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
9595f757f3fSDimitry Andric bool IsTailCall, bool isWave32,
9605f757f3fSDimitry Andric CallingConv::ID CC) {
9615f757f3fSDimitry Andric // For calls to amdgpu_cs_chain functions, the address is known to be uniform.
9625f757f3fSDimitry Andric assert((AMDGPU::isChainCC(CC) || !IsIndirect || !IsTailCall) &&
9635f757f3fSDimitry Andric "Indirect calls can't be tail calls, "
964349cc55cSDimitry Andric "because the address can be divergent");
96506c3fb27SDimitry Andric if (!IsTailCall)
96606c3fb27SDimitry Andric return AMDGPU::G_SI_CALL;
96706c3fb27SDimitry Andric
9685f757f3fSDimitry Andric if (AMDGPU::isChainCC(CC))
9695f757f3fSDimitry Andric return isWave32 ? AMDGPU::SI_CS_CHAIN_TC_W32 : AMDGPU::SI_CS_CHAIN_TC_W64;
9705f757f3fSDimitry Andric
97106c3fb27SDimitry Andric return CC == CallingConv::AMDGPU_Gfx ? AMDGPU::SI_TCRETURN_GFX :
97206c3fb27SDimitry Andric AMDGPU::SI_TCRETURN;
973e8d8bef9SDimitry Andric }
974e8d8bef9SDimitry Andric
975e8d8bef9SDimitry Andric // Add operands to call instruction to track the callee.
addCallTargetOperands(MachineInstrBuilder & CallInst,MachineIRBuilder & MIRBuilder,AMDGPUCallLowering::CallLoweringInfo & Info)976e8d8bef9SDimitry Andric static bool addCallTargetOperands(MachineInstrBuilder &CallInst,
977e8d8bef9SDimitry Andric MachineIRBuilder &MIRBuilder,
978e8d8bef9SDimitry Andric AMDGPUCallLowering::CallLoweringInfo &Info) {
979e8d8bef9SDimitry Andric if (Info.Callee.isReg()) {
980e8d8bef9SDimitry Andric CallInst.addReg(Info.Callee.getReg());
981e8d8bef9SDimitry Andric CallInst.addImm(0);
982e8d8bef9SDimitry Andric } else if (Info.Callee.isGlobal() && Info.Callee.getOffset() == 0) {
983e8d8bef9SDimitry Andric // The call lowering lightly assumed we can directly encode a call target in
984e8d8bef9SDimitry Andric // the instruction, which is not the case. Materialize the address here.
985e8d8bef9SDimitry Andric const GlobalValue *GV = Info.Callee.getGlobal();
986e8d8bef9SDimitry Andric auto Ptr = MIRBuilder.buildGlobalValue(
987e8d8bef9SDimitry Andric LLT::pointer(GV->getAddressSpace(), 64), GV);
988e8d8bef9SDimitry Andric CallInst.addReg(Ptr.getReg(0));
989e8d8bef9SDimitry Andric CallInst.add(Info.Callee);
990e8d8bef9SDimitry Andric } else
991e8d8bef9SDimitry Andric return false;
992e8d8bef9SDimitry Andric
993e8d8bef9SDimitry Andric return true;
994e8d8bef9SDimitry Andric }
995e8d8bef9SDimitry Andric
doCallerAndCalleePassArgsTheSameWay(CallLoweringInfo & Info,MachineFunction & MF,SmallVectorImpl<ArgInfo> & InArgs) const996fe6060f1SDimitry Andric bool AMDGPUCallLowering::doCallerAndCalleePassArgsTheSameWay(
997fe6060f1SDimitry Andric CallLoweringInfo &Info, MachineFunction &MF,
998fe6060f1SDimitry Andric SmallVectorImpl<ArgInfo> &InArgs) const {
999fe6060f1SDimitry Andric const Function &CallerF = MF.getFunction();
1000fe6060f1SDimitry Andric CallingConv::ID CalleeCC = Info.CallConv;
1001fe6060f1SDimitry Andric CallingConv::ID CallerCC = CallerF.getCallingConv();
1002fe6060f1SDimitry Andric
1003fe6060f1SDimitry Andric // If the calling conventions match, then everything must be the same.
1004fe6060f1SDimitry Andric if (CalleeCC == CallerCC)
1005fe6060f1SDimitry Andric return true;
1006fe6060f1SDimitry Andric
1007fe6060f1SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1008fe6060f1SDimitry Andric
1009fe6060f1SDimitry Andric // Make sure that the caller and callee preserve all of the same registers.
1010fe6060f1SDimitry Andric auto TRI = ST.getRegisterInfo();
1011fe6060f1SDimitry Andric
1012fe6060f1SDimitry Andric const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
1013fe6060f1SDimitry Andric const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
1014fe6060f1SDimitry Andric if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
1015fe6060f1SDimitry Andric return false;
1016fe6060f1SDimitry Andric
1017fe6060f1SDimitry Andric // Check if the caller and callee will handle arguments in the same way.
1018fe6060f1SDimitry Andric const SITargetLowering &TLI = *getTLI<SITargetLowering>();
1019fe6060f1SDimitry Andric CCAssignFn *CalleeAssignFnFixed;
1020fe6060f1SDimitry Andric CCAssignFn *CalleeAssignFnVarArg;
1021fe6060f1SDimitry Andric std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) =
1022fe6060f1SDimitry Andric getAssignFnsForCC(CalleeCC, TLI);
1023fe6060f1SDimitry Andric
1024fe6060f1SDimitry Andric CCAssignFn *CallerAssignFnFixed;
1025fe6060f1SDimitry Andric CCAssignFn *CallerAssignFnVarArg;
1026fe6060f1SDimitry Andric std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) =
1027fe6060f1SDimitry Andric getAssignFnsForCC(CallerCC, TLI);
1028fe6060f1SDimitry Andric
1029fe6060f1SDimitry Andric // FIXME: We are not accounting for potential differences in implicitly passed
1030fe6060f1SDimitry Andric // inputs, but only the fixed ABI is supported now anyway.
1031fe6060f1SDimitry Andric IncomingValueAssigner CalleeAssigner(CalleeAssignFnFixed,
1032fe6060f1SDimitry Andric CalleeAssignFnVarArg);
1033fe6060f1SDimitry Andric IncomingValueAssigner CallerAssigner(CallerAssignFnFixed,
1034fe6060f1SDimitry Andric CallerAssignFnVarArg);
1035fe6060f1SDimitry Andric return resultsCompatible(Info, MF, InArgs, CalleeAssigner, CallerAssigner);
1036fe6060f1SDimitry Andric }
1037fe6060f1SDimitry Andric
areCalleeOutgoingArgsTailCallable(CallLoweringInfo & Info,MachineFunction & MF,SmallVectorImpl<ArgInfo> & OutArgs) const1038fe6060f1SDimitry Andric bool AMDGPUCallLowering::areCalleeOutgoingArgsTailCallable(
1039fe6060f1SDimitry Andric CallLoweringInfo &Info, MachineFunction &MF,
1040fe6060f1SDimitry Andric SmallVectorImpl<ArgInfo> &OutArgs) const {
1041fe6060f1SDimitry Andric // If there are no outgoing arguments, then we are done.
1042fe6060f1SDimitry Andric if (OutArgs.empty())
1043fe6060f1SDimitry Andric return true;
1044fe6060f1SDimitry Andric
1045fe6060f1SDimitry Andric const Function &CallerF = MF.getFunction();
1046fe6060f1SDimitry Andric CallingConv::ID CalleeCC = Info.CallConv;
1047fe6060f1SDimitry Andric CallingConv::ID CallerCC = CallerF.getCallingConv();
1048fe6060f1SDimitry Andric const SITargetLowering &TLI = *getTLI<SITargetLowering>();
1049fe6060f1SDimitry Andric
1050fe6060f1SDimitry Andric CCAssignFn *AssignFnFixed;
1051fe6060f1SDimitry Andric CCAssignFn *AssignFnVarArg;
1052fe6060f1SDimitry Andric std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
1053fe6060f1SDimitry Andric
1054fe6060f1SDimitry Andric // We have outgoing arguments. Make sure that we can tail call with them.
1055fe6060f1SDimitry Andric SmallVector<CCValAssign, 16> OutLocs;
1056fe6060f1SDimitry Andric CCState OutInfo(CalleeCC, false, MF, OutLocs, CallerF.getContext());
1057fe6060f1SDimitry Andric OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg);
1058fe6060f1SDimitry Andric
1059fe6060f1SDimitry Andric if (!determineAssignments(Assigner, OutArgs, OutInfo)) {
1060fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n");
1061fe6060f1SDimitry Andric return false;
1062fe6060f1SDimitry Andric }
1063fe6060f1SDimitry Andric
1064fe6060f1SDimitry Andric // Make sure that they can fit on the caller's stack.
1065fe6060f1SDimitry Andric const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
106606c3fb27SDimitry Andric if (OutInfo.getStackSize() > FuncInfo->getBytesInStackArgArea()) {
1067fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n");
1068fe6060f1SDimitry Andric return false;
1069fe6060f1SDimitry Andric }
1070fe6060f1SDimitry Andric
1071fe6060f1SDimitry Andric // Verify that the parameters in callee-saved registers match.
1072fe6060f1SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1073fe6060f1SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo();
1074fe6060f1SDimitry Andric const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC);
1075fe6060f1SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
1076fe6060f1SDimitry Andric return parametersInCSRMatch(MRI, CallerPreservedMask, OutLocs, OutArgs);
1077fe6060f1SDimitry Andric }
1078fe6060f1SDimitry Andric
1079fe6060f1SDimitry Andric /// Return true if the calling convention is one that we can guarantee TCO for.
canGuaranteeTCO(CallingConv::ID CC)1080fe6060f1SDimitry Andric static bool canGuaranteeTCO(CallingConv::ID CC) {
1081fe6060f1SDimitry Andric return CC == CallingConv::Fast;
1082fe6060f1SDimitry Andric }
1083fe6060f1SDimitry Andric
1084fe6060f1SDimitry Andric /// Return true if we might ever do TCO for calls with this calling convention.
mayTailCallThisCC(CallingConv::ID CC)1085fe6060f1SDimitry Andric static bool mayTailCallThisCC(CallingConv::ID CC) {
1086fe6060f1SDimitry Andric switch (CC) {
1087fe6060f1SDimitry Andric case CallingConv::C:
1088fe6060f1SDimitry Andric case CallingConv::AMDGPU_Gfx:
1089fe6060f1SDimitry Andric return true;
1090fe6060f1SDimitry Andric default:
1091fe6060f1SDimitry Andric return canGuaranteeTCO(CC);
1092fe6060f1SDimitry Andric }
1093fe6060f1SDimitry Andric }
1094fe6060f1SDimitry Andric
isEligibleForTailCallOptimization(MachineIRBuilder & B,CallLoweringInfo & Info,SmallVectorImpl<ArgInfo> & InArgs,SmallVectorImpl<ArgInfo> & OutArgs) const1095fe6060f1SDimitry Andric bool AMDGPUCallLowering::isEligibleForTailCallOptimization(
1096fe6060f1SDimitry Andric MachineIRBuilder &B, CallLoweringInfo &Info,
1097fe6060f1SDimitry Andric SmallVectorImpl<ArgInfo> &InArgs, SmallVectorImpl<ArgInfo> &OutArgs) const {
1098fe6060f1SDimitry Andric // Must pass all target-independent checks in order to tail call optimize.
1099fe6060f1SDimitry Andric if (!Info.IsTailCall)
1100fe6060f1SDimitry Andric return false;
1101fe6060f1SDimitry Andric
1102349cc55cSDimitry Andric // Indirect calls can't be tail calls, because the address can be divergent.
1103349cc55cSDimitry Andric // TODO Check divergence info if the call really is divergent.
1104349cc55cSDimitry Andric if (Info.Callee.isReg())
1105349cc55cSDimitry Andric return false;
1106349cc55cSDimitry Andric
1107fe6060f1SDimitry Andric MachineFunction &MF = B.getMF();
1108fe6060f1SDimitry Andric const Function &CallerF = MF.getFunction();
1109fe6060f1SDimitry Andric CallingConv::ID CalleeCC = Info.CallConv;
1110fe6060f1SDimitry Andric CallingConv::ID CallerCC = CallerF.getCallingConv();
1111fe6060f1SDimitry Andric
1112fe6060f1SDimitry Andric const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
1113fe6060f1SDimitry Andric const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
1114fe6060f1SDimitry Andric // Kernels aren't callable, and don't have a live in return address so it
1115fe6060f1SDimitry Andric // doesn't make sense to do a tail call with entry functions.
1116fe6060f1SDimitry Andric if (!CallerPreserved)
1117fe6060f1SDimitry Andric return false;
1118fe6060f1SDimitry Andric
1119fe6060f1SDimitry Andric if (!mayTailCallThisCC(CalleeCC)) {
1120fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n");
1121fe6060f1SDimitry Andric return false;
1122fe6060f1SDimitry Andric }
1123fe6060f1SDimitry Andric
1124fe6060f1SDimitry Andric if (any_of(CallerF.args(), [](const Argument &A) {
1125fe6060f1SDimitry Andric return A.hasByValAttr() || A.hasSwiftErrorAttr();
1126fe6060f1SDimitry Andric })) {
1127fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval "
1128fe6060f1SDimitry Andric "or swifterror arguments\n");
1129fe6060f1SDimitry Andric return false;
1130fe6060f1SDimitry Andric }
1131fe6060f1SDimitry Andric
1132fe6060f1SDimitry Andric // If we have -tailcallopt, then we're done.
1133fe6060f1SDimitry Andric if (MF.getTarget().Options.GuaranteedTailCallOpt)
1134fe6060f1SDimitry Andric return canGuaranteeTCO(CalleeCC) && CalleeCC == CallerF.getCallingConv();
1135fe6060f1SDimitry Andric
1136fe6060f1SDimitry Andric // Verify that the incoming and outgoing arguments from the callee are
1137fe6060f1SDimitry Andric // safe to tail call.
1138fe6060f1SDimitry Andric if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) {
1139fe6060f1SDimitry Andric LLVM_DEBUG(
1140fe6060f1SDimitry Andric dbgs()
1141fe6060f1SDimitry Andric << "... Caller and callee have incompatible calling conventions.\n");
1142fe6060f1SDimitry Andric return false;
1143fe6060f1SDimitry Andric }
1144fe6060f1SDimitry Andric
1145fe6060f1SDimitry Andric if (!areCalleeOutgoingArgsTailCallable(Info, MF, OutArgs))
1146fe6060f1SDimitry Andric return false;
1147fe6060f1SDimitry Andric
1148fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "... Call is eligible for tail call optimization.\n");
1149fe6060f1SDimitry Andric return true;
1150fe6060f1SDimitry Andric }
1151fe6060f1SDimitry Andric
1152fe6060f1SDimitry Andric // Insert outgoing implicit arguments for a call, by inserting copies to the
1153fe6060f1SDimitry Andric // implicit argument registers and adding the necessary implicit uses to the
1154fe6060f1SDimitry Andric // call instruction.
handleImplicitCallArguments(MachineIRBuilder & MIRBuilder,MachineInstrBuilder & CallInst,const GCNSubtarget & ST,const SIMachineFunctionInfo & FuncInfo,CallingConv::ID CalleeCC,ArrayRef<std::pair<MCRegister,Register>> ImplicitArgRegs) const1155fe6060f1SDimitry Andric void AMDGPUCallLowering::handleImplicitCallArguments(
1156fe6060f1SDimitry Andric MachineIRBuilder &MIRBuilder, MachineInstrBuilder &CallInst,
1157fe6060f1SDimitry Andric const GCNSubtarget &ST, const SIMachineFunctionInfo &FuncInfo,
11585f757f3fSDimitry Andric CallingConv::ID CalleeCC,
1159fe6060f1SDimitry Andric ArrayRef<std::pair<MCRegister, Register>> ImplicitArgRegs) const {
1160fe6060f1SDimitry Andric if (!ST.enableFlatScratch()) {
1161fe6060f1SDimitry Andric // Insert copies for the SRD. In the HSA case, this should be an identity
1162fe6060f1SDimitry Andric // copy.
1163fe6060f1SDimitry Andric auto ScratchRSrcReg = MIRBuilder.buildCopy(LLT::fixed_vector(4, 32),
1164fe6060f1SDimitry Andric FuncInfo.getScratchRSrcReg());
11655f757f3fSDimitry Andric
11665f757f3fSDimitry Andric auto CalleeRSrcReg = AMDGPU::isChainCC(CalleeCC)
11675f757f3fSDimitry Andric ? AMDGPU::SGPR48_SGPR49_SGPR50_SGPR51
11685f757f3fSDimitry Andric : AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
11695f757f3fSDimitry Andric
11705f757f3fSDimitry Andric MIRBuilder.buildCopy(CalleeRSrcReg, ScratchRSrcReg);
11715f757f3fSDimitry Andric CallInst.addReg(CalleeRSrcReg, RegState::Implicit);
1172fe6060f1SDimitry Andric }
1173fe6060f1SDimitry Andric
1174fe6060f1SDimitry Andric for (std::pair<MCRegister, Register> ArgReg : ImplicitArgRegs) {
1175fe6060f1SDimitry Andric MIRBuilder.buildCopy((Register)ArgReg.first, ArgReg.second);
1176fe6060f1SDimitry Andric CallInst.addReg(ArgReg.first, RegState::Implicit);
1177fe6060f1SDimitry Andric }
1178fe6060f1SDimitry Andric }
1179fe6060f1SDimitry Andric
lowerTailCall(MachineIRBuilder & MIRBuilder,CallLoweringInfo & Info,SmallVectorImpl<ArgInfo> & OutArgs) const1180fe6060f1SDimitry Andric bool AMDGPUCallLowering::lowerTailCall(
1181fe6060f1SDimitry Andric MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
1182fe6060f1SDimitry Andric SmallVectorImpl<ArgInfo> &OutArgs) const {
1183fe6060f1SDimitry Andric MachineFunction &MF = MIRBuilder.getMF();
1184fe6060f1SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1185fe6060f1SDimitry Andric SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1186fe6060f1SDimitry Andric const Function &F = MF.getFunction();
1187fe6060f1SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
1188fe6060f1SDimitry Andric const SITargetLowering &TLI = *getTLI<SITargetLowering>();
1189fe6060f1SDimitry Andric
1190fe6060f1SDimitry Andric // True when we're tail calling, but without -tailcallopt.
1191fe6060f1SDimitry Andric bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt;
1192fe6060f1SDimitry Andric
1193fe6060f1SDimitry Andric // Find out which ABI gets to decide where things go.
1194fe6060f1SDimitry Andric CallingConv::ID CalleeCC = Info.CallConv;
1195fe6060f1SDimitry Andric CCAssignFn *AssignFnFixed;
1196fe6060f1SDimitry Andric CCAssignFn *AssignFnVarArg;
1197fe6060f1SDimitry Andric std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
1198fe6060f1SDimitry Andric
1199fe6060f1SDimitry Andric MachineInstrBuilder CallSeqStart;
1200fe6060f1SDimitry Andric if (!IsSibCall)
1201fe6060f1SDimitry Andric CallSeqStart = MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKUP);
1202fe6060f1SDimitry Andric
12035f757f3fSDimitry Andric unsigned Opc =
12045f757f3fSDimitry Andric getCallOpcode(MF, Info.Callee.isReg(), true, ST.isWave32(), CalleeCC);
1205fe6060f1SDimitry Andric auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
1206fe6060f1SDimitry Andric if (!addCallTargetOperands(MIB, MIRBuilder, Info))
1207fe6060f1SDimitry Andric return false;
1208fe6060f1SDimitry Andric
1209fe6060f1SDimitry Andric // Byte offset for the tail call. When we are sibcalling, this will always
1210fe6060f1SDimitry Andric // be 0.
1211fe6060f1SDimitry Andric MIB.addImm(0);
1212fe6060f1SDimitry Andric
12135f757f3fSDimitry Andric // If this is a chain call, we need to pass in the EXEC mask.
1214fe6060f1SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo();
12155f757f3fSDimitry Andric if (AMDGPU::isChainCC(Info.CallConv)) {
12165f757f3fSDimitry Andric ArgInfo ExecArg = Info.OrigArgs[1];
12175f757f3fSDimitry Andric assert(ExecArg.Regs.size() == 1 && "Too many regs for EXEC");
12185f757f3fSDimitry Andric
12195f757f3fSDimitry Andric if (!ExecArg.Ty->isIntegerTy(ST.getWavefrontSize()))
12205f757f3fSDimitry Andric return false;
12215f757f3fSDimitry Andric
12225f757f3fSDimitry Andric if (auto CI = dyn_cast<ConstantInt>(ExecArg.OrigValue)) {
12235f757f3fSDimitry Andric MIB.addImm(CI->getSExtValue());
12245f757f3fSDimitry Andric } else {
12255f757f3fSDimitry Andric MIB.addReg(ExecArg.Regs[0]);
12265f757f3fSDimitry Andric unsigned Idx = MIB->getNumOperands() - 1;
12275f757f3fSDimitry Andric MIB->getOperand(Idx).setReg(constrainOperandRegClass(
12285f757f3fSDimitry Andric MF, *TRI, MRI, *ST.getInstrInfo(), *ST.getRegBankInfo(), *MIB,
12295f757f3fSDimitry Andric MIB->getDesc(), MIB->getOperand(Idx), Idx));
12305f757f3fSDimitry Andric }
12315f757f3fSDimitry Andric }
12325f757f3fSDimitry Andric
12335f757f3fSDimitry Andric // Tell the call which registers are clobbered.
1234fe6060f1SDimitry Andric const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC);
1235fe6060f1SDimitry Andric MIB.addRegMask(Mask);
1236fe6060f1SDimitry Andric
1237fe6060f1SDimitry Andric // FPDiff is the byte offset of the call's argument area from the callee's.
1238fe6060f1SDimitry Andric // Stores to callee stack arguments will be placed in FixedStackSlots offset
1239fe6060f1SDimitry Andric // by this amount for a tail call. In a sibling call it must be 0 because the
1240fe6060f1SDimitry Andric // caller will deallocate the entire stack and the callee still expects its
1241fe6060f1SDimitry Andric // arguments to begin at SP+0.
1242fe6060f1SDimitry Andric int FPDiff = 0;
1243fe6060f1SDimitry Andric
1244fe6060f1SDimitry Andric // This will be 0 for sibcalls, potentially nonzero for tail calls produced
1245fe6060f1SDimitry Andric // by -tailcallopt. For sibcalls, the memory operands for the call are
1246fe6060f1SDimitry Andric // already available in the caller's incoming argument space.
1247fe6060f1SDimitry Andric unsigned NumBytes = 0;
1248fe6060f1SDimitry Andric if (!IsSibCall) {
1249fe6060f1SDimitry Andric // We aren't sibcalling, so we need to compute FPDiff. We need to do this
1250fe6060f1SDimitry Andric // before handling assignments, because FPDiff must be known for memory
1251fe6060f1SDimitry Andric // arguments.
1252fe6060f1SDimitry Andric unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
1253fe6060f1SDimitry Andric SmallVector<CCValAssign, 16> OutLocs;
1254fe6060f1SDimitry Andric CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext());
1255fe6060f1SDimitry Andric
1256fe6060f1SDimitry Andric // FIXME: Not accounting for callee implicit inputs
1257fe6060f1SDimitry Andric OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg);
1258fe6060f1SDimitry Andric if (!determineAssignments(CalleeAssigner, OutArgs, OutInfo))
1259fe6060f1SDimitry Andric return false;
1260fe6060f1SDimitry Andric
1261fe6060f1SDimitry Andric // The callee will pop the argument stack as a tail call. Thus, we must
1262fe6060f1SDimitry Andric // keep it 16-byte aligned.
126306c3fb27SDimitry Andric NumBytes = alignTo(OutInfo.getStackSize(), ST.getStackAlignment());
1264fe6060f1SDimitry Andric
1265fe6060f1SDimitry Andric // FPDiff will be negative if this tail call requires more space than we
1266fe6060f1SDimitry Andric // would automatically have in our incoming argument space. Positive if we
1267fe6060f1SDimitry Andric // actually shrink the stack.
1268fe6060f1SDimitry Andric FPDiff = NumReusableBytes - NumBytes;
1269fe6060f1SDimitry Andric
1270fe6060f1SDimitry Andric // The stack pointer must be 16-byte aligned at all times it's used for a
1271fe6060f1SDimitry Andric // memory operation, which in practice means at *all* times and in
1272fe6060f1SDimitry Andric // particular across call boundaries. Therefore our own arguments started at
1273fe6060f1SDimitry Andric // a 16-byte aligned SP and the delta applied for the tail call should
1274fe6060f1SDimitry Andric // satisfy the same constraint.
1275fe6060f1SDimitry Andric assert(isAligned(ST.getStackAlignment(), FPDiff) &&
1276fe6060f1SDimitry Andric "unaligned stack on tail call");
1277fe6060f1SDimitry Andric }
1278fe6060f1SDimitry Andric
1279fe6060f1SDimitry Andric SmallVector<CCValAssign, 16> ArgLocs;
1280fe6060f1SDimitry Andric CCState CCInfo(Info.CallConv, Info.IsVarArg, MF, ArgLocs, F.getContext());
1281fe6060f1SDimitry Andric
1282fe6060f1SDimitry Andric // We could pass MIB and directly add the implicit uses to the call
1283fe6060f1SDimitry Andric // now. However, as an aesthetic choice, place implicit argument operands
1284fe6060f1SDimitry Andric // after the ordinary user argument registers.
1285fe6060f1SDimitry Andric SmallVector<std::pair<MCRegister, Register>, 12> ImplicitArgRegs;
1286fe6060f1SDimitry Andric
12875f757f3fSDimitry Andric if (Info.CallConv != CallingConv::AMDGPU_Gfx &&
12885f757f3fSDimitry Andric !AMDGPU::isChainCC(Info.CallConv)) {
1289fe6060f1SDimitry Andric // With a fixed ABI, allocate fixed registers before user arguments.
1290fe6060f1SDimitry Andric if (!passSpecialInputs(MIRBuilder, CCInfo, ImplicitArgRegs, Info))
1291fe6060f1SDimitry Andric return false;
1292fe6060f1SDimitry Andric }
1293fe6060f1SDimitry Andric
1294fe6060f1SDimitry Andric OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg);
1295fe6060f1SDimitry Andric
1296fe6060f1SDimitry Andric if (!determineAssignments(Assigner, OutArgs, CCInfo))
1297fe6060f1SDimitry Andric return false;
1298fe6060f1SDimitry Andric
1299fe6060f1SDimitry Andric // Do the actual argument marshalling.
1300fe6060f1SDimitry Andric AMDGPUOutgoingArgHandler Handler(MIRBuilder, MRI, MIB, true, FPDiff);
1301fe6060f1SDimitry Andric if (!handleAssignments(Handler, OutArgs, CCInfo, ArgLocs, MIRBuilder))
1302fe6060f1SDimitry Andric return false;
1303fe6060f1SDimitry Andric
1304*0fca6ea1SDimitry Andric if (Info.ConvergenceCtrlToken) {
1305*0fca6ea1SDimitry Andric MIB.addUse(Info.ConvergenceCtrlToken, RegState::Implicit);
1306*0fca6ea1SDimitry Andric }
13075f757f3fSDimitry Andric handleImplicitCallArguments(MIRBuilder, MIB, ST, *FuncInfo, CalleeCC,
13085f757f3fSDimitry Andric ImplicitArgRegs);
1309fe6060f1SDimitry Andric
1310fe6060f1SDimitry Andric // If we have -tailcallopt, we need to adjust the stack. We'll do the call
1311fe6060f1SDimitry Andric // sequence start and end here.
1312fe6060f1SDimitry Andric if (!IsSibCall) {
1313fe6060f1SDimitry Andric MIB->getOperand(1).setImm(FPDiff);
1314fe6060f1SDimitry Andric CallSeqStart.addImm(NumBytes).addImm(0);
1315fe6060f1SDimitry Andric // End the call sequence *before* emitting the call. Normally, we would
1316fe6060f1SDimitry Andric // tidy the frame up after the call. However, here, we've laid out the
1317fe6060f1SDimitry Andric // parameters so that when SP is reset, they will be in the correct
1318fe6060f1SDimitry Andric // location.
1319fe6060f1SDimitry Andric MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKDOWN).addImm(NumBytes).addImm(0);
1320fe6060f1SDimitry Andric }
1321fe6060f1SDimitry Andric
1322fe6060f1SDimitry Andric // Now we can add the actual call instruction to the correct basic block.
1323fe6060f1SDimitry Andric MIRBuilder.insertInstr(MIB);
1324fe6060f1SDimitry Andric
1325fe6060f1SDimitry Andric // If Callee is a reg, since it is used by a target specific
1326fe6060f1SDimitry Andric // instruction, it must have a register class matching the
1327fe6060f1SDimitry Andric // constraint of that instruction.
1328fe6060f1SDimitry Andric
1329fe6060f1SDimitry Andric // FIXME: We should define regbankselectable call instructions to handle
1330fe6060f1SDimitry Andric // divergent call targets.
1331fe6060f1SDimitry Andric if (MIB->getOperand(0).isReg()) {
1332fe6060f1SDimitry Andric MIB->getOperand(0).setReg(constrainOperandRegClass(
1333fe6060f1SDimitry Andric MF, *TRI, MRI, *ST.getInstrInfo(), *ST.getRegBankInfo(), *MIB,
1334fe6060f1SDimitry Andric MIB->getDesc(), MIB->getOperand(0), 0));
1335fe6060f1SDimitry Andric }
1336fe6060f1SDimitry Andric
1337fe6060f1SDimitry Andric MF.getFrameInfo().setHasTailCall();
1338fe6060f1SDimitry Andric Info.LoweredTailCall = true;
1339fe6060f1SDimitry Andric return true;
1340fe6060f1SDimitry Andric }
1341fe6060f1SDimitry Andric
13425f757f3fSDimitry Andric /// Lower a call to the @llvm.amdgcn.cs.chain intrinsic.
lowerChainCall(MachineIRBuilder & MIRBuilder,CallLoweringInfo & Info) const13435f757f3fSDimitry Andric bool AMDGPUCallLowering::lowerChainCall(MachineIRBuilder &MIRBuilder,
13445f757f3fSDimitry Andric CallLoweringInfo &Info) const {
13455f757f3fSDimitry Andric ArgInfo Callee = Info.OrigArgs[0];
13465f757f3fSDimitry Andric ArgInfo SGPRArgs = Info.OrigArgs[2];
13475f757f3fSDimitry Andric ArgInfo VGPRArgs = Info.OrigArgs[3];
13485f757f3fSDimitry Andric ArgInfo Flags = Info.OrigArgs[4];
13495f757f3fSDimitry Andric
13505f757f3fSDimitry Andric assert(cast<ConstantInt>(Flags.OrigValue)->isZero() &&
13515f757f3fSDimitry Andric "Non-zero flags aren't supported yet.");
13525f757f3fSDimitry Andric assert(Info.OrigArgs.size() == 5 && "Additional args aren't supported yet.");
13535f757f3fSDimitry Andric
13545f757f3fSDimitry Andric MachineFunction &MF = MIRBuilder.getMF();
13555f757f3fSDimitry Andric const Function &F = MF.getFunction();
1356*0fca6ea1SDimitry Andric const DataLayout &DL = F.getDataLayout();
13575f757f3fSDimitry Andric
13585f757f3fSDimitry Andric // The function to jump to is actually the first argument, so we'll change the
13595f757f3fSDimitry Andric // Callee and other info to match that before using our existing helper.
13605f757f3fSDimitry Andric const Value *CalleeV = Callee.OrigValue->stripPointerCasts();
13615f757f3fSDimitry Andric if (const Function *F = dyn_cast<Function>(CalleeV)) {
13625f757f3fSDimitry Andric Info.Callee = MachineOperand::CreateGA(F, 0);
13635f757f3fSDimitry Andric Info.CallConv = F->getCallingConv();
13645f757f3fSDimitry Andric } else {
13655f757f3fSDimitry Andric assert(Callee.Regs.size() == 1 && "Too many regs for the callee");
13665f757f3fSDimitry Andric Info.Callee = MachineOperand::CreateReg(Callee.Regs[0], false);
13675f757f3fSDimitry Andric Info.CallConv = CallingConv::AMDGPU_CS_Chain; // amdgpu_cs_chain_preserve
13685f757f3fSDimitry Andric // behaves the same here.
13695f757f3fSDimitry Andric }
13705f757f3fSDimitry Andric
13715f757f3fSDimitry Andric // The function that we're calling cannot be vararg (only the intrinsic is).
13725f757f3fSDimitry Andric Info.IsVarArg = false;
13735f757f3fSDimitry Andric
13745f757f3fSDimitry Andric assert(std::all_of(SGPRArgs.Flags.begin(), SGPRArgs.Flags.end(),
13755f757f3fSDimitry Andric [](ISD::ArgFlagsTy F) { return F.isInReg(); }) &&
13765f757f3fSDimitry Andric "SGPR arguments should be marked inreg");
13775f757f3fSDimitry Andric assert(std::none_of(VGPRArgs.Flags.begin(), VGPRArgs.Flags.end(),
13785f757f3fSDimitry Andric [](ISD::ArgFlagsTy F) { return F.isInReg(); }) &&
13795f757f3fSDimitry Andric "VGPR arguments should not be marked inreg");
13805f757f3fSDimitry Andric
13815f757f3fSDimitry Andric SmallVector<ArgInfo, 8> OutArgs;
13825f757f3fSDimitry Andric splitToValueTypes(SGPRArgs, OutArgs, DL, Info.CallConv);
13835f757f3fSDimitry Andric splitToValueTypes(VGPRArgs, OutArgs, DL, Info.CallConv);
13845f757f3fSDimitry Andric
13855f757f3fSDimitry Andric Info.IsMustTailCall = true;
13865f757f3fSDimitry Andric return lowerTailCall(MIRBuilder, Info, OutArgs);
13875f757f3fSDimitry Andric }
13885f757f3fSDimitry Andric
lowerCall(MachineIRBuilder & MIRBuilder,CallLoweringInfo & Info) const1389e8d8bef9SDimitry Andric bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
1390e8d8bef9SDimitry Andric CallLoweringInfo &Info) const {
13915f757f3fSDimitry Andric if (Function *F = Info.CB->getCalledFunction())
13925f757f3fSDimitry Andric if (F->isIntrinsic()) {
13935f757f3fSDimitry Andric assert(F->getIntrinsicID() == Intrinsic::amdgcn_cs_chain &&
13945f757f3fSDimitry Andric "Unexpected intrinsic");
13955f757f3fSDimitry Andric return lowerChainCall(MIRBuilder, Info);
13965f757f3fSDimitry Andric }
13975f757f3fSDimitry Andric
1398e8d8bef9SDimitry Andric if (Info.IsVarArg) {
1399e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "Variadic functions not implemented\n");
1400e8d8bef9SDimitry Andric return false;
1401e8d8bef9SDimitry Andric }
1402e8d8bef9SDimitry Andric
1403e8d8bef9SDimitry Andric MachineFunction &MF = MIRBuilder.getMF();
1404e8d8bef9SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1405e8d8bef9SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo();
1406e8d8bef9SDimitry Andric
1407e8d8bef9SDimitry Andric const Function &F = MF.getFunction();
1408e8d8bef9SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
1409e8d8bef9SDimitry Andric const SITargetLowering &TLI = *getTLI<SITargetLowering>();
1410*0fca6ea1SDimitry Andric const DataLayout &DL = F.getDataLayout();
1411e8d8bef9SDimitry Andric
1412e8d8bef9SDimitry Andric SmallVector<ArgInfo, 8> OutArgs;
1413fe6060f1SDimitry Andric for (auto &OrigArg : Info.OrigArgs)
1414fe6060f1SDimitry Andric splitToValueTypes(OrigArg, OutArgs, DL, Info.CallConv);
1415e8d8bef9SDimitry Andric
1416fe6060f1SDimitry Andric SmallVector<ArgInfo, 8> InArgs;
1417fe6060f1SDimitry Andric if (Info.CanLowerReturn && !Info.OrigRet.Ty->isVoidTy())
1418fe6060f1SDimitry Andric splitToValueTypes(Info.OrigRet, InArgs, DL, Info.CallConv);
1419e8d8bef9SDimitry Andric
1420e8d8bef9SDimitry Andric // If we can lower as a tail call, do that instead.
1421fe6060f1SDimitry Andric bool CanTailCallOpt =
1422fe6060f1SDimitry Andric isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs, OutArgs);
1423e8d8bef9SDimitry Andric
1424e8d8bef9SDimitry Andric // We must emit a tail call if we have musttail.
1425e8d8bef9SDimitry Andric if (Info.IsMustTailCall && !CanTailCallOpt) {
1426e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n");
1427e8d8bef9SDimitry Andric return false;
1428e8d8bef9SDimitry Andric }
1429e8d8bef9SDimitry Andric
143004eeddc0SDimitry Andric Info.IsTailCall = CanTailCallOpt;
1431fe6060f1SDimitry Andric if (CanTailCallOpt)
1432fe6060f1SDimitry Andric return lowerTailCall(MIRBuilder, Info, OutArgs);
1433fe6060f1SDimitry Andric
1434e8d8bef9SDimitry Andric // Find out which ABI gets to decide where things go.
1435e8d8bef9SDimitry Andric CCAssignFn *AssignFnFixed;
1436e8d8bef9SDimitry Andric CCAssignFn *AssignFnVarArg;
1437e8d8bef9SDimitry Andric std::tie(AssignFnFixed, AssignFnVarArg) =
1438e8d8bef9SDimitry Andric getAssignFnsForCC(Info.CallConv, TLI);
1439e8d8bef9SDimitry Andric
1440e8d8bef9SDimitry Andric MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKUP)
1441e8d8bef9SDimitry Andric .addImm(0)
1442e8d8bef9SDimitry Andric .addImm(0);
1443e8d8bef9SDimitry Andric
1444e8d8bef9SDimitry Andric // Create a temporarily-floating call instruction so we can add the implicit
1445e8d8bef9SDimitry Andric // uses of arg registers.
14465f757f3fSDimitry Andric unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), false, ST.isWave32(),
14475f757f3fSDimitry Andric Info.CallConv);
1448e8d8bef9SDimitry Andric
1449e8d8bef9SDimitry Andric auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
1450e8d8bef9SDimitry Andric MIB.addDef(TRI->getReturnAddressReg(MF));
1451e8d8bef9SDimitry Andric
14525f757f3fSDimitry Andric if (!Info.IsConvergent)
14535f757f3fSDimitry Andric MIB.setMIFlag(MachineInstr::NoConvergent);
14545f757f3fSDimitry Andric
1455e8d8bef9SDimitry Andric if (!addCallTargetOperands(MIB, MIRBuilder, Info))
1456e8d8bef9SDimitry Andric return false;
1457e8d8bef9SDimitry Andric
1458e8d8bef9SDimitry Andric // Tell the call which registers are clobbered.
1459e8d8bef9SDimitry Andric const uint32_t *Mask = TRI->getCallPreservedMask(MF, Info.CallConv);
1460e8d8bef9SDimitry Andric MIB.addRegMask(Mask);
1461e8d8bef9SDimitry Andric
1462e8d8bef9SDimitry Andric SmallVector<CCValAssign, 16> ArgLocs;
1463e8d8bef9SDimitry Andric CCState CCInfo(Info.CallConv, Info.IsVarArg, MF, ArgLocs, F.getContext());
1464e8d8bef9SDimitry Andric
1465e8d8bef9SDimitry Andric // We could pass MIB and directly add the implicit uses to the call
1466e8d8bef9SDimitry Andric // now. However, as an aesthetic choice, place implicit argument operands
1467e8d8bef9SDimitry Andric // after the ordinary user argument registers.
1468e8d8bef9SDimitry Andric SmallVector<std::pair<MCRegister, Register>, 12> ImplicitArgRegs;
1469e8d8bef9SDimitry Andric
14700eae32dcSDimitry Andric if (Info.CallConv != CallingConv::AMDGPU_Gfx) {
1471e8d8bef9SDimitry Andric // With a fixed ABI, allocate fixed registers before user arguments.
1472e8d8bef9SDimitry Andric if (!passSpecialInputs(MIRBuilder, CCInfo, ImplicitArgRegs, Info))
1473e8d8bef9SDimitry Andric return false;
1474e8d8bef9SDimitry Andric }
1475e8d8bef9SDimitry Andric
1476e8d8bef9SDimitry Andric // Do the actual argument marshalling.
1477e8d8bef9SDimitry Andric SmallVector<Register, 8> PhysRegs;
1478fe6060f1SDimitry Andric
1479fe6060f1SDimitry Andric OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg);
1480fe6060f1SDimitry Andric if (!determineAssignments(Assigner, OutArgs, CCInfo))
1481fe6060f1SDimitry Andric return false;
1482fe6060f1SDimitry Andric
1483fe6060f1SDimitry Andric AMDGPUOutgoingArgHandler Handler(MIRBuilder, MRI, MIB, false);
1484fe6060f1SDimitry Andric if (!handleAssignments(Handler, OutArgs, CCInfo, ArgLocs, MIRBuilder))
1485e8d8bef9SDimitry Andric return false;
1486e8d8bef9SDimitry Andric
1487e8d8bef9SDimitry Andric const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1488e8d8bef9SDimitry Andric
1489*0fca6ea1SDimitry Andric if (Info.ConvergenceCtrlToken) {
1490*0fca6ea1SDimitry Andric MIB.addUse(Info.ConvergenceCtrlToken, RegState::Implicit);
1491*0fca6ea1SDimitry Andric }
14925f757f3fSDimitry Andric handleImplicitCallArguments(MIRBuilder, MIB, ST, *MFI, Info.CallConv,
14935f757f3fSDimitry Andric ImplicitArgRegs);
1494e8d8bef9SDimitry Andric
1495e8d8bef9SDimitry Andric // Get a count of how many bytes are to be pushed on the stack.
149606c3fb27SDimitry Andric unsigned NumBytes = CCInfo.getStackSize();
1497e8d8bef9SDimitry Andric
1498e8d8bef9SDimitry Andric // If Callee is a reg, since it is used by a target specific
1499e8d8bef9SDimitry Andric // instruction, it must have a register class matching the
1500e8d8bef9SDimitry Andric // constraint of that instruction.
1501e8d8bef9SDimitry Andric
1502e8d8bef9SDimitry Andric // FIXME: We should define regbankselectable call instructions to handle
1503e8d8bef9SDimitry Andric // divergent call targets.
1504e8d8bef9SDimitry Andric if (MIB->getOperand(1).isReg()) {
1505e8d8bef9SDimitry Andric MIB->getOperand(1).setReg(constrainOperandRegClass(
1506e8d8bef9SDimitry Andric MF, *TRI, MRI, *ST.getInstrInfo(),
1507e8d8bef9SDimitry Andric *ST.getRegBankInfo(), *MIB, MIB->getDesc(), MIB->getOperand(1),
1508e8d8bef9SDimitry Andric 1));
1509e8d8bef9SDimitry Andric }
1510e8d8bef9SDimitry Andric
1511e8d8bef9SDimitry Andric // Now we can add the actual call instruction to the correct position.
1512e8d8bef9SDimitry Andric MIRBuilder.insertInstr(MIB);
1513e8d8bef9SDimitry Andric
1514e8d8bef9SDimitry Andric // Finally we can copy the returned value back into its virtual-register. In
1515e8d8bef9SDimitry Andric // symmetry with the arguments, the physical register must be an
1516e8d8bef9SDimitry Andric // implicit-define of the call instruction.
1517e8d8bef9SDimitry Andric if (Info.CanLowerReturn && !Info.OrigRet.Ty->isVoidTy()) {
1518e8d8bef9SDimitry Andric CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv,
1519e8d8bef9SDimitry Andric Info.IsVarArg);
1520fe6060f1SDimitry Andric IncomingValueAssigner Assigner(RetAssignFn);
1521fe6060f1SDimitry Andric CallReturnHandler Handler(MIRBuilder, MRI, MIB);
1522fe6060f1SDimitry Andric if (!determineAndHandleAssignments(Handler, Assigner, InArgs, MIRBuilder,
1523fe6060f1SDimitry Andric Info.CallConv, Info.IsVarArg))
1524e8d8bef9SDimitry Andric return false;
1525e8d8bef9SDimitry Andric }
1526e8d8bef9SDimitry Andric
1527e8d8bef9SDimitry Andric uint64_t CalleePopBytes = NumBytes;
1528fe6060f1SDimitry Andric
1529fe6060f1SDimitry Andric MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKDOWN)
1530fe6060f1SDimitry Andric .addImm(0)
1531e8d8bef9SDimitry Andric .addImm(CalleePopBytes);
1532e8d8bef9SDimitry Andric
1533fe6060f1SDimitry Andric if (!Info.CanLowerReturn) {
1534fe6060f1SDimitry Andric insertSRetLoads(MIRBuilder, Info.OrigRet.Ty, Info.OrigRet.Regs,
1535fe6060f1SDimitry Andric Info.DemoteRegister, Info.DemoteStackIndex);
1536fe6060f1SDimitry Andric }
1537fe6060f1SDimitry Andric
1538e8d8bef9SDimitry Andric return true;
1539e8d8bef9SDimitry Andric }
1540