xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1*0b57cec5SDimitry Andric //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
2*0b57cec5SDimitry Andric //
3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0b57cec5SDimitry Andric //
7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
8*0b57cec5SDimitry Andric ///
9*0b57cec5SDimitry Andric /// \file
10*0b57cec5SDimitry Andric /// This file implements the lowering of LLVM calls to machine code calls for
11*0b57cec5SDimitry Andric /// GlobalISel.
12*0b57cec5SDimitry Andric ///
13*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
14*0b57cec5SDimitry Andric 
15*0b57cec5SDimitry Andric #include "AMDGPUCallLowering.h"
16*0b57cec5SDimitry Andric #include "AMDGPU.h"
17*0b57cec5SDimitry Andric #include "AMDGPUISelLowering.h"
18*0b57cec5SDimitry Andric #include "AMDGPUSubtarget.h"
19*0b57cec5SDimitry Andric #include "SIISelLowering.h"
20*0b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h"
21*0b57cec5SDimitry Andric #include "SIRegisterInfo.h"
22*0b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
23*0b57cec5SDimitry Andric #include "llvm/CodeGen/Analysis.h"
24*0b57cec5SDimitry Andric #include "llvm/CodeGen/CallingConvLower.h"
25*0b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
26*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
27*0b57cec5SDimitry Andric #include "llvm/Support/LowLevelTypeImpl.h"
28*0b57cec5SDimitry Andric 
29*0b57cec5SDimitry Andric using namespace llvm;
30*0b57cec5SDimitry Andric 
31*0b57cec5SDimitry Andric namespace {
32*0b57cec5SDimitry Andric 
33*0b57cec5SDimitry Andric struct OutgoingArgHandler : public CallLowering::ValueHandler {
34*0b57cec5SDimitry Andric   OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
35*0b57cec5SDimitry Andric                      MachineInstrBuilder MIB, CCAssignFn *AssignFn)
36*0b57cec5SDimitry Andric       : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
37*0b57cec5SDimitry Andric 
38*0b57cec5SDimitry Andric   MachineInstrBuilder MIB;
39*0b57cec5SDimitry Andric 
40*0b57cec5SDimitry Andric   Register getStackAddress(uint64_t Size, int64_t Offset,
41*0b57cec5SDimitry Andric                            MachinePointerInfo &MPO) override {
42*0b57cec5SDimitry Andric     llvm_unreachable("not implemented");
43*0b57cec5SDimitry Andric   }
44*0b57cec5SDimitry Andric 
45*0b57cec5SDimitry Andric   void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
46*0b57cec5SDimitry Andric                             MachinePointerInfo &MPO, CCValAssign &VA) override {
47*0b57cec5SDimitry Andric     llvm_unreachable("not implemented");
48*0b57cec5SDimitry Andric   }
49*0b57cec5SDimitry Andric 
50*0b57cec5SDimitry Andric   void assignValueToReg(Register ValVReg, Register PhysReg,
51*0b57cec5SDimitry Andric                         CCValAssign &VA) override {
52*0b57cec5SDimitry Andric     MIB.addUse(PhysReg);
53*0b57cec5SDimitry Andric     MIRBuilder.buildCopy(PhysReg, ValVReg);
54*0b57cec5SDimitry Andric   }
55*0b57cec5SDimitry Andric 
56*0b57cec5SDimitry Andric   bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
57*0b57cec5SDimitry Andric                  CCValAssign::LocInfo LocInfo,
58*0b57cec5SDimitry Andric                  const CallLowering::ArgInfo &Info,
59*0b57cec5SDimitry Andric                  CCState &State) override {
60*0b57cec5SDimitry Andric     return AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State);
61*0b57cec5SDimitry Andric   }
62*0b57cec5SDimitry Andric };
63*0b57cec5SDimitry Andric 
64*0b57cec5SDimitry Andric }
65*0b57cec5SDimitry Andric 
66*0b57cec5SDimitry Andric AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
67*0b57cec5SDimitry Andric   : CallLowering(&TLI) {
68*0b57cec5SDimitry Andric }
69*0b57cec5SDimitry Andric 
70*0b57cec5SDimitry Andric bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
71*0b57cec5SDimitry Andric                                      const Value *Val,
72*0b57cec5SDimitry Andric                                      ArrayRef<Register> VRegs) const {
73*0b57cec5SDimitry Andric 
74*0b57cec5SDimitry Andric   MachineFunction &MF = MIRBuilder.getMF();
75*0b57cec5SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
76*0b57cec5SDimitry Andric   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
77*0b57cec5SDimitry Andric   MFI->setIfReturnsVoid(!Val);
78*0b57cec5SDimitry Andric 
79*0b57cec5SDimitry Andric   if (!Val) {
80*0b57cec5SDimitry Andric     MIRBuilder.buildInstr(AMDGPU::S_ENDPGM).addImm(0);
81*0b57cec5SDimitry Andric     return true;
82*0b57cec5SDimitry Andric   }
83*0b57cec5SDimitry Andric 
84*0b57cec5SDimitry Andric   Register VReg = VRegs[0];
85*0b57cec5SDimitry Andric 
86*0b57cec5SDimitry Andric   const Function &F = MF.getFunction();
87*0b57cec5SDimitry Andric   auto &DL = F.getParent()->getDataLayout();
88*0b57cec5SDimitry Andric   if (!AMDGPU::isShader(F.getCallingConv()))
89*0b57cec5SDimitry Andric     return false;
90*0b57cec5SDimitry Andric 
91*0b57cec5SDimitry Andric 
92*0b57cec5SDimitry Andric   const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
93*0b57cec5SDimitry Andric   SmallVector<EVT, 4> SplitVTs;
94*0b57cec5SDimitry Andric   SmallVector<uint64_t, 4> Offsets;
95*0b57cec5SDimitry Andric   ArgInfo OrigArg{VReg, Val->getType()};
96*0b57cec5SDimitry Andric   setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F);
97*0b57cec5SDimitry Andric   ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);
98*0b57cec5SDimitry Andric 
99*0b57cec5SDimitry Andric   SmallVector<ArgInfo, 8> SplitArgs;
100*0b57cec5SDimitry Andric   CCAssignFn *AssignFn = CCAssignFnForReturn(F.getCallingConv(), false);
101*0b57cec5SDimitry Andric   for (unsigned i = 0, e = Offsets.size(); i != e; ++i) {
102*0b57cec5SDimitry Andric     Type *SplitTy = SplitVTs[i].getTypeForEVT(F.getContext());
103*0b57cec5SDimitry Andric     SplitArgs.push_back({VRegs[i], SplitTy, OrigArg.Flags, OrigArg.IsFixed});
104*0b57cec5SDimitry Andric   }
105*0b57cec5SDimitry Andric   auto RetInstr = MIRBuilder.buildInstrNoInsert(AMDGPU::SI_RETURN_TO_EPILOG);
106*0b57cec5SDimitry Andric   OutgoingArgHandler Handler(MIRBuilder, MRI, RetInstr, AssignFn);
107*0b57cec5SDimitry Andric   if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
108*0b57cec5SDimitry Andric     return false;
109*0b57cec5SDimitry Andric   MIRBuilder.insertInstr(RetInstr);
110*0b57cec5SDimitry Andric 
111*0b57cec5SDimitry Andric   return true;
112*0b57cec5SDimitry Andric }
113*0b57cec5SDimitry Andric 
114*0b57cec5SDimitry Andric Register AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
115*0b57cec5SDimitry Andric                                                Type *ParamTy,
116*0b57cec5SDimitry Andric                                                uint64_t Offset) const {
117*0b57cec5SDimitry Andric 
118*0b57cec5SDimitry Andric   MachineFunction &MF = MIRBuilder.getMF();
119*0b57cec5SDimitry Andric   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
120*0b57cec5SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
121*0b57cec5SDimitry Andric   const Function &F = MF.getFunction();
122*0b57cec5SDimitry Andric   const DataLayout &DL = F.getParent()->getDataLayout();
123*0b57cec5SDimitry Andric   PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
124*0b57cec5SDimitry Andric   LLT PtrType = getLLTForType(*PtrTy, DL);
125*0b57cec5SDimitry Andric   Register DstReg = MRI.createGenericVirtualRegister(PtrType);
126*0b57cec5SDimitry Andric   Register KernArgSegmentPtr =
127*0b57cec5SDimitry Andric     MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
128*0b57cec5SDimitry Andric   Register KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
129*0b57cec5SDimitry Andric 
130*0b57cec5SDimitry Andric   Register OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
131*0b57cec5SDimitry Andric   MIRBuilder.buildConstant(OffsetReg, Offset);
132*0b57cec5SDimitry Andric 
133*0b57cec5SDimitry Andric   MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
134*0b57cec5SDimitry Andric 
135*0b57cec5SDimitry Andric   return DstReg;
136*0b57cec5SDimitry Andric }
137*0b57cec5SDimitry Andric 
138*0b57cec5SDimitry Andric void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
139*0b57cec5SDimitry Andric                                         Type *ParamTy, uint64_t Offset,
140*0b57cec5SDimitry Andric                                         unsigned Align,
141*0b57cec5SDimitry Andric                                         Register DstReg) const {
142*0b57cec5SDimitry Andric   MachineFunction &MF = MIRBuilder.getMF();
143*0b57cec5SDimitry Andric   const Function &F = MF.getFunction();
144*0b57cec5SDimitry Andric   const DataLayout &DL = F.getParent()->getDataLayout();
145*0b57cec5SDimitry Andric   PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
146*0b57cec5SDimitry Andric   MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
147*0b57cec5SDimitry Andric   unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
148*0b57cec5SDimitry Andric   Register PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
149*0b57cec5SDimitry Andric 
150*0b57cec5SDimitry Andric   MachineMemOperand *MMO =
151*0b57cec5SDimitry Andric       MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad |
152*0b57cec5SDimitry Andric                                        MachineMemOperand::MONonTemporal |
153*0b57cec5SDimitry Andric                                        MachineMemOperand::MOInvariant,
154*0b57cec5SDimitry Andric                                        TypeSize, Align);
155*0b57cec5SDimitry Andric 
156*0b57cec5SDimitry Andric   MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
157*0b57cec5SDimitry Andric }
158*0b57cec5SDimitry Andric 
159*0b57cec5SDimitry Andric static Register findFirstFreeSGPR(CCState &CCInfo) {
160*0b57cec5SDimitry Andric   unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
161*0b57cec5SDimitry Andric   for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) {
162*0b57cec5SDimitry Andric     if (!CCInfo.isAllocated(AMDGPU::SGPR0 + Reg)) {
163*0b57cec5SDimitry Andric       return AMDGPU::SGPR0 + Reg;
164*0b57cec5SDimitry Andric     }
165*0b57cec5SDimitry Andric   }
166*0b57cec5SDimitry Andric   llvm_unreachable("Cannot allocate sgpr");
167*0b57cec5SDimitry Andric }
168*0b57cec5SDimitry Andric 
169*0b57cec5SDimitry Andric static void allocateSpecialEntryInputVGPRs(CCState &CCInfo,
170*0b57cec5SDimitry Andric                                            MachineFunction &MF,
171*0b57cec5SDimitry Andric                                            const SIRegisterInfo &TRI,
172*0b57cec5SDimitry Andric                                            SIMachineFunctionInfo &Info) {
173*0b57cec5SDimitry Andric   const LLT S32 = LLT::scalar(32);
174*0b57cec5SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
175*0b57cec5SDimitry Andric 
176*0b57cec5SDimitry Andric   if (Info.hasWorkItemIDX()) {
177*0b57cec5SDimitry Andric     Register Reg = AMDGPU::VGPR0;
178*0b57cec5SDimitry Andric     MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
179*0b57cec5SDimitry Andric 
180*0b57cec5SDimitry Andric     CCInfo.AllocateReg(Reg);
181*0b57cec5SDimitry Andric     Info.setWorkItemIDX(ArgDescriptor::createRegister(Reg));
182*0b57cec5SDimitry Andric   }
183*0b57cec5SDimitry Andric 
184*0b57cec5SDimitry Andric   if (Info.hasWorkItemIDY()) {
185*0b57cec5SDimitry Andric     Register Reg = AMDGPU::VGPR1;
186*0b57cec5SDimitry Andric     MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
187*0b57cec5SDimitry Andric 
188*0b57cec5SDimitry Andric     CCInfo.AllocateReg(Reg);
189*0b57cec5SDimitry Andric     Info.setWorkItemIDY(ArgDescriptor::createRegister(Reg));
190*0b57cec5SDimitry Andric   }
191*0b57cec5SDimitry Andric 
192*0b57cec5SDimitry Andric   if (Info.hasWorkItemIDZ()) {
193*0b57cec5SDimitry Andric     Register Reg = AMDGPU::VGPR2;
194*0b57cec5SDimitry Andric     MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
195*0b57cec5SDimitry Andric 
196*0b57cec5SDimitry Andric     CCInfo.AllocateReg(Reg);
197*0b57cec5SDimitry Andric     Info.setWorkItemIDZ(ArgDescriptor::createRegister(Reg));
198*0b57cec5SDimitry Andric   }
199*0b57cec5SDimitry Andric }
200*0b57cec5SDimitry Andric 
201*0b57cec5SDimitry Andric // Allocate special inputs passed in user SGPRs.
202*0b57cec5SDimitry Andric static void allocateHSAUserSGPRs(CCState &CCInfo,
203*0b57cec5SDimitry Andric                                  MachineIRBuilder &MIRBuilder,
204*0b57cec5SDimitry Andric                                  MachineFunction &MF,
205*0b57cec5SDimitry Andric                                  const SIRegisterInfo &TRI,
206*0b57cec5SDimitry Andric                                  SIMachineFunctionInfo &Info) {
207*0b57cec5SDimitry Andric   // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
208*0b57cec5SDimitry Andric   if (Info.hasPrivateSegmentBuffer()) {
209*0b57cec5SDimitry Andric     unsigned PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
210*0b57cec5SDimitry Andric     MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
211*0b57cec5SDimitry Andric     CCInfo.AllocateReg(PrivateSegmentBufferReg);
212*0b57cec5SDimitry Andric   }
213*0b57cec5SDimitry Andric 
214*0b57cec5SDimitry Andric   if (Info.hasDispatchPtr()) {
215*0b57cec5SDimitry Andric     unsigned DispatchPtrReg = Info.addDispatchPtr(TRI);
216*0b57cec5SDimitry Andric     MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
217*0b57cec5SDimitry Andric     CCInfo.AllocateReg(DispatchPtrReg);
218*0b57cec5SDimitry Andric   }
219*0b57cec5SDimitry Andric 
220*0b57cec5SDimitry Andric   if (Info.hasQueuePtr()) {
221*0b57cec5SDimitry Andric     unsigned QueuePtrReg = Info.addQueuePtr(TRI);
222*0b57cec5SDimitry Andric     MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
223*0b57cec5SDimitry Andric     CCInfo.AllocateReg(QueuePtrReg);
224*0b57cec5SDimitry Andric   }
225*0b57cec5SDimitry Andric 
226*0b57cec5SDimitry Andric   if (Info.hasKernargSegmentPtr()) {
227*0b57cec5SDimitry Andric     MachineRegisterInfo &MRI = MF.getRegInfo();
228*0b57cec5SDimitry Andric     Register InputPtrReg = Info.addKernargSegmentPtr(TRI);
229*0b57cec5SDimitry Andric     const LLT P4 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
230*0b57cec5SDimitry Andric     Register VReg = MRI.createGenericVirtualRegister(P4);
231*0b57cec5SDimitry Andric     MRI.addLiveIn(InputPtrReg, VReg);
232*0b57cec5SDimitry Andric     MIRBuilder.getMBB().addLiveIn(InputPtrReg);
233*0b57cec5SDimitry Andric     MIRBuilder.buildCopy(VReg, InputPtrReg);
234*0b57cec5SDimitry Andric     CCInfo.AllocateReg(InputPtrReg);
235*0b57cec5SDimitry Andric   }
236*0b57cec5SDimitry Andric 
237*0b57cec5SDimitry Andric   if (Info.hasDispatchID()) {
238*0b57cec5SDimitry Andric     unsigned DispatchIDReg = Info.addDispatchID(TRI);
239*0b57cec5SDimitry Andric     MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
240*0b57cec5SDimitry Andric     CCInfo.AllocateReg(DispatchIDReg);
241*0b57cec5SDimitry Andric   }
242*0b57cec5SDimitry Andric 
243*0b57cec5SDimitry Andric   if (Info.hasFlatScratchInit()) {
244*0b57cec5SDimitry Andric     unsigned FlatScratchInitReg = Info.addFlatScratchInit(TRI);
245*0b57cec5SDimitry Andric     MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
246*0b57cec5SDimitry Andric     CCInfo.AllocateReg(FlatScratchInitReg);
247*0b57cec5SDimitry Andric   }
248*0b57cec5SDimitry Andric 
249*0b57cec5SDimitry Andric   // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
250*0b57cec5SDimitry Andric   // these from the dispatch pointer.
251*0b57cec5SDimitry Andric }
252*0b57cec5SDimitry Andric 
253*0b57cec5SDimitry Andric static void allocateSystemSGPRs(CCState &CCInfo,
254*0b57cec5SDimitry Andric                                 MachineFunction &MF,
255*0b57cec5SDimitry Andric                                 SIMachineFunctionInfo &Info,
256*0b57cec5SDimitry Andric                                 CallingConv::ID CallConv,
257*0b57cec5SDimitry Andric                                 bool IsShader) {
258*0b57cec5SDimitry Andric   const LLT S32 = LLT::scalar(32);
259*0b57cec5SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
260*0b57cec5SDimitry Andric 
261*0b57cec5SDimitry Andric   if (Info.hasWorkGroupIDX()) {
262*0b57cec5SDimitry Andric     Register Reg = Info.addWorkGroupIDX();
263*0b57cec5SDimitry Andric     MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32);
264*0b57cec5SDimitry Andric     CCInfo.AllocateReg(Reg);
265*0b57cec5SDimitry Andric   }
266*0b57cec5SDimitry Andric 
267*0b57cec5SDimitry Andric   if (Info.hasWorkGroupIDY()) {
268*0b57cec5SDimitry Andric     Register Reg = Info.addWorkGroupIDY();
269*0b57cec5SDimitry Andric     MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32);
270*0b57cec5SDimitry Andric     CCInfo.AllocateReg(Reg);
271*0b57cec5SDimitry Andric   }
272*0b57cec5SDimitry Andric 
273*0b57cec5SDimitry Andric   if (Info.hasWorkGroupIDZ()) {
274*0b57cec5SDimitry Andric     unsigned Reg = Info.addWorkGroupIDZ();
275*0b57cec5SDimitry Andric     MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32);
276*0b57cec5SDimitry Andric     CCInfo.AllocateReg(Reg);
277*0b57cec5SDimitry Andric   }
278*0b57cec5SDimitry Andric 
279*0b57cec5SDimitry Andric   if (Info.hasWorkGroupInfo()) {
280*0b57cec5SDimitry Andric     unsigned Reg = Info.addWorkGroupInfo();
281*0b57cec5SDimitry Andric     MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32);
282*0b57cec5SDimitry Andric     CCInfo.AllocateReg(Reg);
283*0b57cec5SDimitry Andric   }
284*0b57cec5SDimitry Andric 
285*0b57cec5SDimitry Andric   if (Info.hasPrivateSegmentWaveByteOffset()) {
286*0b57cec5SDimitry Andric     // Scratch wave offset passed in system SGPR.
287*0b57cec5SDimitry Andric     unsigned PrivateSegmentWaveByteOffsetReg;
288*0b57cec5SDimitry Andric 
289*0b57cec5SDimitry Andric     if (IsShader) {
290*0b57cec5SDimitry Andric       PrivateSegmentWaveByteOffsetReg =
291*0b57cec5SDimitry Andric         Info.getPrivateSegmentWaveByteOffsetSystemSGPR();
292*0b57cec5SDimitry Andric 
293*0b57cec5SDimitry Andric       // This is true if the scratch wave byte offset doesn't have a fixed
294*0b57cec5SDimitry Andric       // location.
295*0b57cec5SDimitry Andric       if (PrivateSegmentWaveByteOffsetReg == AMDGPU::NoRegister) {
296*0b57cec5SDimitry Andric         PrivateSegmentWaveByteOffsetReg = findFirstFreeSGPR(CCInfo);
297*0b57cec5SDimitry Andric         Info.setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg);
298*0b57cec5SDimitry Andric       }
299*0b57cec5SDimitry Andric     } else
300*0b57cec5SDimitry Andric       PrivateSegmentWaveByteOffsetReg = Info.addPrivateSegmentWaveByteOffset();
301*0b57cec5SDimitry Andric 
302*0b57cec5SDimitry Andric     MF.addLiveIn(PrivateSegmentWaveByteOffsetReg, &AMDGPU::SGPR_32RegClass);
303*0b57cec5SDimitry Andric     CCInfo.AllocateReg(PrivateSegmentWaveByteOffsetReg);
304*0b57cec5SDimitry Andric   }
305*0b57cec5SDimitry Andric }
306*0b57cec5SDimitry Andric 
307*0b57cec5SDimitry Andric bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
308*0b57cec5SDimitry Andric     MachineIRBuilder &MIRBuilder, const Function &F,
309*0b57cec5SDimitry Andric     ArrayRef<ArrayRef<Register>> VRegs) const {
310*0b57cec5SDimitry Andric   MachineFunction &MF = MIRBuilder.getMF();
311*0b57cec5SDimitry Andric   const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
312*0b57cec5SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
313*0b57cec5SDimitry Andric   SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
314*0b57cec5SDimitry Andric   const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
315*0b57cec5SDimitry Andric   const DataLayout &DL = F.getParent()->getDataLayout();
316*0b57cec5SDimitry Andric 
317*0b57cec5SDimitry Andric   SmallVector<CCValAssign, 16> ArgLocs;
318*0b57cec5SDimitry Andric   CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
319*0b57cec5SDimitry Andric 
320*0b57cec5SDimitry Andric   allocateHSAUserSGPRs(CCInfo, MIRBuilder, MF, *TRI, *Info);
321*0b57cec5SDimitry Andric 
322*0b57cec5SDimitry Andric   unsigned i = 0;
323*0b57cec5SDimitry Andric   const unsigned KernArgBaseAlign = 16;
324*0b57cec5SDimitry Andric   const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
325*0b57cec5SDimitry Andric   uint64_t ExplicitArgOffset = 0;
326*0b57cec5SDimitry Andric 
327*0b57cec5SDimitry Andric   // TODO: Align down to dword alignment and extract bits for extending loads.
328*0b57cec5SDimitry Andric   for (auto &Arg : F.args()) {
329*0b57cec5SDimitry Andric     Type *ArgTy = Arg.getType();
330*0b57cec5SDimitry Andric     unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
331*0b57cec5SDimitry Andric     if (AllocSize == 0)
332*0b57cec5SDimitry Andric       continue;
333*0b57cec5SDimitry Andric 
334*0b57cec5SDimitry Andric     unsigned ABIAlign = DL.getABITypeAlignment(ArgTy);
335*0b57cec5SDimitry Andric 
336*0b57cec5SDimitry Andric     uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
337*0b57cec5SDimitry Andric     ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
338*0b57cec5SDimitry Andric 
339*0b57cec5SDimitry Andric     ArrayRef<Register> OrigArgRegs = VRegs[i];
340*0b57cec5SDimitry Andric     Register ArgReg =
341*0b57cec5SDimitry Andric       OrigArgRegs.size() == 1
342*0b57cec5SDimitry Andric       ? OrigArgRegs[0]
343*0b57cec5SDimitry Andric       : MRI.createGenericVirtualRegister(getLLTForType(*ArgTy, DL));
344*0b57cec5SDimitry Andric     unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
345*0b57cec5SDimitry Andric     ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
346*0b57cec5SDimitry Andric     lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, ArgReg);
347*0b57cec5SDimitry Andric     if (OrigArgRegs.size() > 1)
348*0b57cec5SDimitry Andric       unpackRegs(OrigArgRegs, ArgReg, ArgTy, MIRBuilder);
349*0b57cec5SDimitry Andric     ++i;
350*0b57cec5SDimitry Andric   }
351*0b57cec5SDimitry Andric 
352*0b57cec5SDimitry Andric   allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);
353*0b57cec5SDimitry Andric   allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false);
354*0b57cec5SDimitry Andric   return true;
355*0b57cec5SDimitry Andric }
356*0b57cec5SDimitry Andric 
357*0b57cec5SDimitry Andric bool AMDGPUCallLowering::lowerFormalArguments(
358*0b57cec5SDimitry Andric     MachineIRBuilder &MIRBuilder, const Function &F,
359*0b57cec5SDimitry Andric     ArrayRef<ArrayRef<Register>> VRegs) const {
360*0b57cec5SDimitry Andric   // The infrastructure for normal calling convention lowering is essentially
361*0b57cec5SDimitry Andric   // useless for kernels. We want to avoid any kind of legalization or argument
362*0b57cec5SDimitry Andric   // splitting.
363*0b57cec5SDimitry Andric   if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL)
364*0b57cec5SDimitry Andric     return lowerFormalArgumentsKernel(MIRBuilder, F, VRegs);
365*0b57cec5SDimitry Andric 
366*0b57cec5SDimitry Andric   // AMDGPU_GS and AMDGP_HS are not supported yet.
367*0b57cec5SDimitry Andric   if (F.getCallingConv() == CallingConv::AMDGPU_GS ||
368*0b57cec5SDimitry Andric       F.getCallingConv() == CallingConv::AMDGPU_HS)
369*0b57cec5SDimitry Andric     return false;
370*0b57cec5SDimitry Andric 
371*0b57cec5SDimitry Andric   MachineFunction &MF = MIRBuilder.getMF();
372*0b57cec5SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
373*0b57cec5SDimitry Andric   SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
374*0b57cec5SDimitry Andric   const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
375*0b57cec5SDimitry Andric   const DataLayout &DL = F.getParent()->getDataLayout();
376*0b57cec5SDimitry Andric 
377*0b57cec5SDimitry Andric   bool IsShader = AMDGPU::isShader(F.getCallingConv());
378*0b57cec5SDimitry Andric 
379*0b57cec5SDimitry Andric   SmallVector<CCValAssign, 16> ArgLocs;
380*0b57cec5SDimitry Andric   CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
381*0b57cec5SDimitry Andric 
382*0b57cec5SDimitry Andric   if (Info->hasImplicitBufferPtr()) {
383*0b57cec5SDimitry Andric     unsigned ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);
384*0b57cec5SDimitry Andric     MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
385*0b57cec5SDimitry Andric     CCInfo.AllocateReg(ImplicitBufferPtrReg);
386*0b57cec5SDimitry Andric   }
387*0b57cec5SDimitry Andric 
388*0b57cec5SDimitry Andric   unsigned NumArgs = F.arg_size();
389*0b57cec5SDimitry Andric   Function::const_arg_iterator CurOrigArg = F.arg_begin();
390*0b57cec5SDimitry Andric   const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
391*0b57cec5SDimitry Andric   unsigned PSInputNum = 0;
392*0b57cec5SDimitry Andric   BitVector Skipped(NumArgs);
393*0b57cec5SDimitry Andric   for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) {
394*0b57cec5SDimitry Andric     EVT ValEVT = TLI.getValueType(DL, CurOrigArg->getType());
395*0b57cec5SDimitry Andric 
396*0b57cec5SDimitry Andric     // We can only hanlde simple value types at the moment.
397*0b57cec5SDimitry Andric     ISD::ArgFlagsTy Flags;
398*0b57cec5SDimitry Andric     assert(VRegs[i].size() == 1 && "Can't lower into more than one register");
399*0b57cec5SDimitry Andric     ArgInfo OrigArg{VRegs[i][0], CurOrigArg->getType()};
400*0b57cec5SDimitry Andric     setArgFlags(OrigArg, i + 1, DL, F);
401*0b57cec5SDimitry Andric     Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
402*0b57cec5SDimitry Andric 
403*0b57cec5SDimitry Andric     if (F.getCallingConv() == CallingConv::AMDGPU_PS &&
404*0b57cec5SDimitry Andric         !OrigArg.Flags.isInReg() && !OrigArg.Flags.isByVal() &&
405*0b57cec5SDimitry Andric         PSInputNum <= 15) {
406*0b57cec5SDimitry Andric       if (CurOrigArg->use_empty() && !Info->isPSInputAllocated(PSInputNum)) {
407*0b57cec5SDimitry Andric         Skipped.set(i);
408*0b57cec5SDimitry Andric         ++PSInputNum;
409*0b57cec5SDimitry Andric         continue;
410*0b57cec5SDimitry Andric       }
411*0b57cec5SDimitry Andric 
412*0b57cec5SDimitry Andric       Info->markPSInputAllocated(PSInputNum);
413*0b57cec5SDimitry Andric       if (!CurOrigArg->use_empty())
414*0b57cec5SDimitry Andric         Info->markPSInputEnabled(PSInputNum);
415*0b57cec5SDimitry Andric 
416*0b57cec5SDimitry Andric       ++PSInputNum;
417*0b57cec5SDimitry Andric     }
418*0b57cec5SDimitry Andric 
419*0b57cec5SDimitry Andric     CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(),
420*0b57cec5SDimitry Andric                                              /*IsVarArg=*/false);
421*0b57cec5SDimitry Andric 
422*0b57cec5SDimitry Andric     if (ValEVT.isVector()) {
423*0b57cec5SDimitry Andric       EVT ElemVT = ValEVT.getVectorElementType();
424*0b57cec5SDimitry Andric       if (!ValEVT.isSimple())
425*0b57cec5SDimitry Andric         return false;
426*0b57cec5SDimitry Andric       MVT ValVT = ElemVT.getSimpleVT();
427*0b57cec5SDimitry Andric       bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full,
428*0b57cec5SDimitry Andric                           OrigArg.Flags, CCInfo);
429*0b57cec5SDimitry Andric       if (!Res)
430*0b57cec5SDimitry Andric         return false;
431*0b57cec5SDimitry Andric     } else {
432*0b57cec5SDimitry Andric       MVT ValVT = ValEVT.getSimpleVT();
433*0b57cec5SDimitry Andric       if (!ValEVT.isSimple())
434*0b57cec5SDimitry Andric         return false;
435*0b57cec5SDimitry Andric       bool Res =
436*0b57cec5SDimitry Andric           AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo);
437*0b57cec5SDimitry Andric 
438*0b57cec5SDimitry Andric       // Fail if we don't know how to handle this type.
439*0b57cec5SDimitry Andric       if (Res)
440*0b57cec5SDimitry Andric         return false;
441*0b57cec5SDimitry Andric     }
442*0b57cec5SDimitry Andric   }
443*0b57cec5SDimitry Andric 
444*0b57cec5SDimitry Andric   Function::const_arg_iterator Arg = F.arg_begin();
445*0b57cec5SDimitry Andric 
446*0b57cec5SDimitry Andric   if (F.getCallingConv() == CallingConv::AMDGPU_VS ||
447*0b57cec5SDimitry Andric       F.getCallingConv() == CallingConv::AMDGPU_PS) {
448*0b57cec5SDimitry Andric     for (unsigned i = 0, OrigArgIdx = 0;
449*0b57cec5SDimitry Andric          OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) {
450*0b57cec5SDimitry Andric        if (Skipped.test(OrigArgIdx))
451*0b57cec5SDimitry Andric           continue;
452*0b57cec5SDimitry Andric        assert(VRegs[OrigArgIdx].size() == 1 &&
453*0b57cec5SDimitry Andric               "Can't lower into more than 1 reg");
454*0b57cec5SDimitry Andric        CCValAssign &VA = ArgLocs[i++];
455*0b57cec5SDimitry Andric        MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx][0]);
456*0b57cec5SDimitry Andric        MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
457*0b57cec5SDimitry Andric        MIRBuilder.buildCopy(VRegs[OrigArgIdx][0], VA.getLocReg());
458*0b57cec5SDimitry Andric     }
459*0b57cec5SDimitry Andric 
460*0b57cec5SDimitry Andric     allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), IsShader);
461*0b57cec5SDimitry Andric     return true;
462*0b57cec5SDimitry Andric   }
463*0b57cec5SDimitry Andric 
464*0b57cec5SDimitry Andric   return false;
465*0b57cec5SDimitry Andric }
466