1*0b57cec5SDimitry Andric //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===// 2*0b57cec5SDimitry Andric // 3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric // 7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric /// 9*0b57cec5SDimitry Andric /// \file 10*0b57cec5SDimitry Andric /// This file implements the lowering of LLVM calls to machine code calls for 11*0b57cec5SDimitry Andric /// GlobalISel. 12*0b57cec5SDimitry Andric /// 13*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 14*0b57cec5SDimitry Andric 15*0b57cec5SDimitry Andric #include "AMDGPUCallLowering.h" 16*0b57cec5SDimitry Andric #include "AMDGPU.h" 17*0b57cec5SDimitry Andric #include "AMDGPUISelLowering.h" 18*0b57cec5SDimitry Andric #include "AMDGPUSubtarget.h" 19*0b57cec5SDimitry Andric #include "SIISelLowering.h" 20*0b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h" 21*0b57cec5SDimitry Andric #include "SIRegisterInfo.h" 22*0b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 23*0b57cec5SDimitry Andric #include "llvm/CodeGen/Analysis.h" 24*0b57cec5SDimitry Andric #include "llvm/CodeGen/CallingConvLower.h" 25*0b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 26*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h" 27*0b57cec5SDimitry Andric #include "llvm/Support/LowLevelTypeImpl.h" 28*0b57cec5SDimitry Andric 29*0b57cec5SDimitry Andric using namespace llvm; 30*0b57cec5SDimitry Andric 31*0b57cec5SDimitry Andric namespace { 32*0b57cec5SDimitry Andric 33*0b57cec5SDimitry Andric struct OutgoingArgHandler : public CallLowering::ValueHandler { 34*0b57cec5SDimitry Andric OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, 35*0b57cec5SDimitry Andric MachineInstrBuilder MIB, CCAssignFn *AssignFn) 36*0b57cec5SDimitry Andric : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {} 37*0b57cec5SDimitry Andric 38*0b57cec5SDimitry Andric MachineInstrBuilder MIB; 39*0b57cec5SDimitry Andric 40*0b57cec5SDimitry Andric Register getStackAddress(uint64_t Size, int64_t Offset, 41*0b57cec5SDimitry Andric MachinePointerInfo &MPO) override { 42*0b57cec5SDimitry Andric llvm_unreachable("not implemented"); 43*0b57cec5SDimitry Andric } 44*0b57cec5SDimitry Andric 45*0b57cec5SDimitry Andric void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size, 46*0b57cec5SDimitry Andric MachinePointerInfo &MPO, CCValAssign &VA) override { 47*0b57cec5SDimitry Andric llvm_unreachable("not implemented"); 48*0b57cec5SDimitry Andric } 49*0b57cec5SDimitry Andric 50*0b57cec5SDimitry Andric void assignValueToReg(Register ValVReg, Register PhysReg, 51*0b57cec5SDimitry Andric CCValAssign &VA) override { 52*0b57cec5SDimitry Andric MIB.addUse(PhysReg); 53*0b57cec5SDimitry Andric MIRBuilder.buildCopy(PhysReg, ValVReg); 54*0b57cec5SDimitry Andric } 55*0b57cec5SDimitry Andric 56*0b57cec5SDimitry Andric bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT, 57*0b57cec5SDimitry Andric CCValAssign::LocInfo LocInfo, 58*0b57cec5SDimitry Andric const CallLowering::ArgInfo &Info, 59*0b57cec5SDimitry Andric CCState &State) override { 60*0b57cec5SDimitry Andric return AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State); 61*0b57cec5SDimitry Andric } 62*0b57cec5SDimitry Andric }; 63*0b57cec5SDimitry Andric 64*0b57cec5SDimitry Andric } 65*0b57cec5SDimitry Andric 66*0b57cec5SDimitry Andric AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI) 67*0b57cec5SDimitry Andric : CallLowering(&TLI) { 68*0b57cec5SDimitry Andric } 69*0b57cec5SDimitry Andric 70*0b57cec5SDimitry Andric bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, 71*0b57cec5SDimitry Andric const Value *Val, 72*0b57cec5SDimitry Andric ArrayRef<Register> VRegs) const { 73*0b57cec5SDimitry Andric 74*0b57cec5SDimitry Andric MachineFunction &MF = MIRBuilder.getMF(); 75*0b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 76*0b57cec5SDimitry Andric SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 77*0b57cec5SDimitry Andric MFI->setIfReturnsVoid(!Val); 78*0b57cec5SDimitry Andric 79*0b57cec5SDimitry Andric if (!Val) { 80*0b57cec5SDimitry Andric MIRBuilder.buildInstr(AMDGPU::S_ENDPGM).addImm(0); 81*0b57cec5SDimitry Andric return true; 82*0b57cec5SDimitry Andric } 83*0b57cec5SDimitry Andric 84*0b57cec5SDimitry Andric Register VReg = VRegs[0]; 85*0b57cec5SDimitry Andric 86*0b57cec5SDimitry Andric const Function &F = MF.getFunction(); 87*0b57cec5SDimitry Andric auto &DL = F.getParent()->getDataLayout(); 88*0b57cec5SDimitry Andric if (!AMDGPU::isShader(F.getCallingConv())) 89*0b57cec5SDimitry Andric return false; 90*0b57cec5SDimitry Andric 91*0b57cec5SDimitry Andric 92*0b57cec5SDimitry Andric const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>(); 93*0b57cec5SDimitry Andric SmallVector<EVT, 4> SplitVTs; 94*0b57cec5SDimitry Andric SmallVector<uint64_t, 4> Offsets; 95*0b57cec5SDimitry Andric ArgInfo OrigArg{VReg, Val->getType()}; 96*0b57cec5SDimitry Andric setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F); 97*0b57cec5SDimitry Andric ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0); 98*0b57cec5SDimitry Andric 99*0b57cec5SDimitry Andric SmallVector<ArgInfo, 8> SplitArgs; 100*0b57cec5SDimitry Andric CCAssignFn *AssignFn = CCAssignFnForReturn(F.getCallingConv(), false); 101*0b57cec5SDimitry Andric for (unsigned i = 0, e = Offsets.size(); i != e; ++i) { 102*0b57cec5SDimitry Andric Type *SplitTy = SplitVTs[i].getTypeForEVT(F.getContext()); 103*0b57cec5SDimitry Andric SplitArgs.push_back({VRegs[i], SplitTy, OrigArg.Flags, OrigArg.IsFixed}); 104*0b57cec5SDimitry Andric } 105*0b57cec5SDimitry Andric auto RetInstr = MIRBuilder.buildInstrNoInsert(AMDGPU::SI_RETURN_TO_EPILOG); 106*0b57cec5SDimitry Andric OutgoingArgHandler Handler(MIRBuilder, MRI, RetInstr, AssignFn); 107*0b57cec5SDimitry Andric if (!handleAssignments(MIRBuilder, SplitArgs, Handler)) 108*0b57cec5SDimitry Andric return false; 109*0b57cec5SDimitry Andric MIRBuilder.insertInstr(RetInstr); 110*0b57cec5SDimitry Andric 111*0b57cec5SDimitry Andric return true; 112*0b57cec5SDimitry Andric } 113*0b57cec5SDimitry Andric 114*0b57cec5SDimitry Andric Register AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder, 115*0b57cec5SDimitry Andric Type *ParamTy, 116*0b57cec5SDimitry Andric uint64_t Offset) const { 117*0b57cec5SDimitry Andric 118*0b57cec5SDimitry Andric MachineFunction &MF = MIRBuilder.getMF(); 119*0b57cec5SDimitry Andric const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 120*0b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 121*0b57cec5SDimitry Andric const Function &F = MF.getFunction(); 122*0b57cec5SDimitry Andric const DataLayout &DL = F.getParent()->getDataLayout(); 123*0b57cec5SDimitry Andric PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS); 124*0b57cec5SDimitry Andric LLT PtrType = getLLTForType(*PtrTy, DL); 125*0b57cec5SDimitry Andric Register DstReg = MRI.createGenericVirtualRegister(PtrType); 126*0b57cec5SDimitry Andric Register KernArgSegmentPtr = 127*0b57cec5SDimitry Andric MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR); 128*0b57cec5SDimitry Andric Register KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr); 129*0b57cec5SDimitry Andric 130*0b57cec5SDimitry Andric Register OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); 131*0b57cec5SDimitry Andric MIRBuilder.buildConstant(OffsetReg, Offset); 132*0b57cec5SDimitry Andric 133*0b57cec5SDimitry Andric MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg); 134*0b57cec5SDimitry Andric 135*0b57cec5SDimitry Andric return DstReg; 136*0b57cec5SDimitry Andric } 137*0b57cec5SDimitry Andric 138*0b57cec5SDimitry Andric void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder, 139*0b57cec5SDimitry Andric Type *ParamTy, uint64_t Offset, 140*0b57cec5SDimitry Andric unsigned Align, 141*0b57cec5SDimitry Andric Register DstReg) const { 142*0b57cec5SDimitry Andric MachineFunction &MF = MIRBuilder.getMF(); 143*0b57cec5SDimitry Andric const Function &F = MF.getFunction(); 144*0b57cec5SDimitry Andric const DataLayout &DL = F.getParent()->getDataLayout(); 145*0b57cec5SDimitry Andric PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS); 146*0b57cec5SDimitry Andric MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); 147*0b57cec5SDimitry Andric unsigned TypeSize = DL.getTypeStoreSize(ParamTy); 148*0b57cec5SDimitry Andric Register PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset); 149*0b57cec5SDimitry Andric 150*0b57cec5SDimitry Andric MachineMemOperand *MMO = 151*0b57cec5SDimitry Andric MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad | 152*0b57cec5SDimitry Andric MachineMemOperand::MONonTemporal | 153*0b57cec5SDimitry Andric MachineMemOperand::MOInvariant, 154*0b57cec5SDimitry Andric TypeSize, Align); 155*0b57cec5SDimitry Andric 156*0b57cec5SDimitry Andric MIRBuilder.buildLoad(DstReg, PtrReg, *MMO); 157*0b57cec5SDimitry Andric } 158*0b57cec5SDimitry Andric 159*0b57cec5SDimitry Andric static Register findFirstFreeSGPR(CCState &CCInfo) { 160*0b57cec5SDimitry Andric unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); 161*0b57cec5SDimitry Andric for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) { 162*0b57cec5SDimitry Andric if (!CCInfo.isAllocated(AMDGPU::SGPR0 + Reg)) { 163*0b57cec5SDimitry Andric return AMDGPU::SGPR0 + Reg; 164*0b57cec5SDimitry Andric } 165*0b57cec5SDimitry Andric } 166*0b57cec5SDimitry Andric llvm_unreachable("Cannot allocate sgpr"); 167*0b57cec5SDimitry Andric } 168*0b57cec5SDimitry Andric 169*0b57cec5SDimitry Andric static void allocateSpecialEntryInputVGPRs(CCState &CCInfo, 170*0b57cec5SDimitry Andric MachineFunction &MF, 171*0b57cec5SDimitry Andric const SIRegisterInfo &TRI, 172*0b57cec5SDimitry Andric SIMachineFunctionInfo &Info) { 173*0b57cec5SDimitry Andric const LLT S32 = LLT::scalar(32); 174*0b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 175*0b57cec5SDimitry Andric 176*0b57cec5SDimitry Andric if (Info.hasWorkItemIDX()) { 177*0b57cec5SDimitry Andric Register Reg = AMDGPU::VGPR0; 178*0b57cec5SDimitry Andric MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32); 179*0b57cec5SDimitry Andric 180*0b57cec5SDimitry Andric CCInfo.AllocateReg(Reg); 181*0b57cec5SDimitry Andric Info.setWorkItemIDX(ArgDescriptor::createRegister(Reg)); 182*0b57cec5SDimitry Andric } 183*0b57cec5SDimitry Andric 184*0b57cec5SDimitry Andric if (Info.hasWorkItemIDY()) { 185*0b57cec5SDimitry Andric Register Reg = AMDGPU::VGPR1; 186*0b57cec5SDimitry Andric MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32); 187*0b57cec5SDimitry Andric 188*0b57cec5SDimitry Andric CCInfo.AllocateReg(Reg); 189*0b57cec5SDimitry Andric Info.setWorkItemIDY(ArgDescriptor::createRegister(Reg)); 190*0b57cec5SDimitry Andric } 191*0b57cec5SDimitry Andric 192*0b57cec5SDimitry Andric if (Info.hasWorkItemIDZ()) { 193*0b57cec5SDimitry Andric Register Reg = AMDGPU::VGPR2; 194*0b57cec5SDimitry Andric MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32); 195*0b57cec5SDimitry Andric 196*0b57cec5SDimitry Andric CCInfo.AllocateReg(Reg); 197*0b57cec5SDimitry Andric Info.setWorkItemIDZ(ArgDescriptor::createRegister(Reg)); 198*0b57cec5SDimitry Andric } 199*0b57cec5SDimitry Andric } 200*0b57cec5SDimitry Andric 201*0b57cec5SDimitry Andric // Allocate special inputs passed in user SGPRs. 202*0b57cec5SDimitry Andric static void allocateHSAUserSGPRs(CCState &CCInfo, 203*0b57cec5SDimitry Andric MachineIRBuilder &MIRBuilder, 204*0b57cec5SDimitry Andric MachineFunction &MF, 205*0b57cec5SDimitry Andric const SIRegisterInfo &TRI, 206*0b57cec5SDimitry Andric SIMachineFunctionInfo &Info) { 207*0b57cec5SDimitry Andric // FIXME: How should these inputs interact with inreg / custom SGPR inputs? 208*0b57cec5SDimitry Andric if (Info.hasPrivateSegmentBuffer()) { 209*0b57cec5SDimitry Andric unsigned PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI); 210*0b57cec5SDimitry Andric MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass); 211*0b57cec5SDimitry Andric CCInfo.AllocateReg(PrivateSegmentBufferReg); 212*0b57cec5SDimitry Andric } 213*0b57cec5SDimitry Andric 214*0b57cec5SDimitry Andric if (Info.hasDispatchPtr()) { 215*0b57cec5SDimitry Andric unsigned DispatchPtrReg = Info.addDispatchPtr(TRI); 216*0b57cec5SDimitry Andric MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass); 217*0b57cec5SDimitry Andric CCInfo.AllocateReg(DispatchPtrReg); 218*0b57cec5SDimitry Andric } 219*0b57cec5SDimitry Andric 220*0b57cec5SDimitry Andric if (Info.hasQueuePtr()) { 221*0b57cec5SDimitry Andric unsigned QueuePtrReg = Info.addQueuePtr(TRI); 222*0b57cec5SDimitry Andric MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass); 223*0b57cec5SDimitry Andric CCInfo.AllocateReg(QueuePtrReg); 224*0b57cec5SDimitry Andric } 225*0b57cec5SDimitry Andric 226*0b57cec5SDimitry Andric if (Info.hasKernargSegmentPtr()) { 227*0b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 228*0b57cec5SDimitry Andric Register InputPtrReg = Info.addKernargSegmentPtr(TRI); 229*0b57cec5SDimitry Andric const LLT P4 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64); 230*0b57cec5SDimitry Andric Register VReg = MRI.createGenericVirtualRegister(P4); 231*0b57cec5SDimitry Andric MRI.addLiveIn(InputPtrReg, VReg); 232*0b57cec5SDimitry Andric MIRBuilder.getMBB().addLiveIn(InputPtrReg); 233*0b57cec5SDimitry Andric MIRBuilder.buildCopy(VReg, InputPtrReg); 234*0b57cec5SDimitry Andric CCInfo.AllocateReg(InputPtrReg); 235*0b57cec5SDimitry Andric } 236*0b57cec5SDimitry Andric 237*0b57cec5SDimitry Andric if (Info.hasDispatchID()) { 238*0b57cec5SDimitry Andric unsigned DispatchIDReg = Info.addDispatchID(TRI); 239*0b57cec5SDimitry Andric MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass); 240*0b57cec5SDimitry Andric CCInfo.AllocateReg(DispatchIDReg); 241*0b57cec5SDimitry Andric } 242*0b57cec5SDimitry Andric 243*0b57cec5SDimitry Andric if (Info.hasFlatScratchInit()) { 244*0b57cec5SDimitry Andric unsigned FlatScratchInitReg = Info.addFlatScratchInit(TRI); 245*0b57cec5SDimitry Andric MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass); 246*0b57cec5SDimitry Andric CCInfo.AllocateReg(FlatScratchInitReg); 247*0b57cec5SDimitry Andric } 248*0b57cec5SDimitry Andric 249*0b57cec5SDimitry Andric // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read 250*0b57cec5SDimitry Andric // these from the dispatch pointer. 251*0b57cec5SDimitry Andric } 252*0b57cec5SDimitry Andric 253*0b57cec5SDimitry Andric static void allocateSystemSGPRs(CCState &CCInfo, 254*0b57cec5SDimitry Andric MachineFunction &MF, 255*0b57cec5SDimitry Andric SIMachineFunctionInfo &Info, 256*0b57cec5SDimitry Andric CallingConv::ID CallConv, 257*0b57cec5SDimitry Andric bool IsShader) { 258*0b57cec5SDimitry Andric const LLT S32 = LLT::scalar(32); 259*0b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 260*0b57cec5SDimitry Andric 261*0b57cec5SDimitry Andric if (Info.hasWorkGroupIDX()) { 262*0b57cec5SDimitry Andric Register Reg = Info.addWorkGroupIDX(); 263*0b57cec5SDimitry Andric MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32); 264*0b57cec5SDimitry Andric CCInfo.AllocateReg(Reg); 265*0b57cec5SDimitry Andric } 266*0b57cec5SDimitry Andric 267*0b57cec5SDimitry Andric if (Info.hasWorkGroupIDY()) { 268*0b57cec5SDimitry Andric Register Reg = Info.addWorkGroupIDY(); 269*0b57cec5SDimitry Andric MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32); 270*0b57cec5SDimitry Andric CCInfo.AllocateReg(Reg); 271*0b57cec5SDimitry Andric } 272*0b57cec5SDimitry Andric 273*0b57cec5SDimitry Andric if (Info.hasWorkGroupIDZ()) { 274*0b57cec5SDimitry Andric unsigned Reg = Info.addWorkGroupIDZ(); 275*0b57cec5SDimitry Andric MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32); 276*0b57cec5SDimitry Andric CCInfo.AllocateReg(Reg); 277*0b57cec5SDimitry Andric } 278*0b57cec5SDimitry Andric 279*0b57cec5SDimitry Andric if (Info.hasWorkGroupInfo()) { 280*0b57cec5SDimitry Andric unsigned Reg = Info.addWorkGroupInfo(); 281*0b57cec5SDimitry Andric MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32); 282*0b57cec5SDimitry Andric CCInfo.AllocateReg(Reg); 283*0b57cec5SDimitry Andric } 284*0b57cec5SDimitry Andric 285*0b57cec5SDimitry Andric if (Info.hasPrivateSegmentWaveByteOffset()) { 286*0b57cec5SDimitry Andric // Scratch wave offset passed in system SGPR. 287*0b57cec5SDimitry Andric unsigned PrivateSegmentWaveByteOffsetReg; 288*0b57cec5SDimitry Andric 289*0b57cec5SDimitry Andric if (IsShader) { 290*0b57cec5SDimitry Andric PrivateSegmentWaveByteOffsetReg = 291*0b57cec5SDimitry Andric Info.getPrivateSegmentWaveByteOffsetSystemSGPR(); 292*0b57cec5SDimitry Andric 293*0b57cec5SDimitry Andric // This is true if the scratch wave byte offset doesn't have a fixed 294*0b57cec5SDimitry Andric // location. 295*0b57cec5SDimitry Andric if (PrivateSegmentWaveByteOffsetReg == AMDGPU::NoRegister) { 296*0b57cec5SDimitry Andric PrivateSegmentWaveByteOffsetReg = findFirstFreeSGPR(CCInfo); 297*0b57cec5SDimitry Andric Info.setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg); 298*0b57cec5SDimitry Andric } 299*0b57cec5SDimitry Andric } else 300*0b57cec5SDimitry Andric PrivateSegmentWaveByteOffsetReg = Info.addPrivateSegmentWaveByteOffset(); 301*0b57cec5SDimitry Andric 302*0b57cec5SDimitry Andric MF.addLiveIn(PrivateSegmentWaveByteOffsetReg, &AMDGPU::SGPR_32RegClass); 303*0b57cec5SDimitry Andric CCInfo.AllocateReg(PrivateSegmentWaveByteOffsetReg); 304*0b57cec5SDimitry Andric } 305*0b57cec5SDimitry Andric } 306*0b57cec5SDimitry Andric 307*0b57cec5SDimitry Andric bool AMDGPUCallLowering::lowerFormalArgumentsKernel( 308*0b57cec5SDimitry Andric MachineIRBuilder &MIRBuilder, const Function &F, 309*0b57cec5SDimitry Andric ArrayRef<ArrayRef<Register>> VRegs) const { 310*0b57cec5SDimitry Andric MachineFunction &MF = MIRBuilder.getMF(); 311*0b57cec5SDimitry Andric const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>(); 312*0b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 313*0b57cec5SDimitry Andric SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 314*0b57cec5SDimitry Andric const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo(); 315*0b57cec5SDimitry Andric const DataLayout &DL = F.getParent()->getDataLayout(); 316*0b57cec5SDimitry Andric 317*0b57cec5SDimitry Andric SmallVector<CCValAssign, 16> ArgLocs; 318*0b57cec5SDimitry Andric CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); 319*0b57cec5SDimitry Andric 320*0b57cec5SDimitry Andric allocateHSAUserSGPRs(CCInfo, MIRBuilder, MF, *TRI, *Info); 321*0b57cec5SDimitry Andric 322*0b57cec5SDimitry Andric unsigned i = 0; 323*0b57cec5SDimitry Andric const unsigned KernArgBaseAlign = 16; 324*0b57cec5SDimitry Andric const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F); 325*0b57cec5SDimitry Andric uint64_t ExplicitArgOffset = 0; 326*0b57cec5SDimitry Andric 327*0b57cec5SDimitry Andric // TODO: Align down to dword alignment and extract bits for extending loads. 328*0b57cec5SDimitry Andric for (auto &Arg : F.args()) { 329*0b57cec5SDimitry Andric Type *ArgTy = Arg.getType(); 330*0b57cec5SDimitry Andric unsigned AllocSize = DL.getTypeAllocSize(ArgTy); 331*0b57cec5SDimitry Andric if (AllocSize == 0) 332*0b57cec5SDimitry Andric continue; 333*0b57cec5SDimitry Andric 334*0b57cec5SDimitry Andric unsigned ABIAlign = DL.getABITypeAlignment(ArgTy); 335*0b57cec5SDimitry Andric 336*0b57cec5SDimitry Andric uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset; 337*0b57cec5SDimitry Andric ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize; 338*0b57cec5SDimitry Andric 339*0b57cec5SDimitry Andric ArrayRef<Register> OrigArgRegs = VRegs[i]; 340*0b57cec5SDimitry Andric Register ArgReg = 341*0b57cec5SDimitry Andric OrigArgRegs.size() == 1 342*0b57cec5SDimitry Andric ? OrigArgRegs[0] 343*0b57cec5SDimitry Andric : MRI.createGenericVirtualRegister(getLLTForType(*ArgTy, DL)); 344*0b57cec5SDimitry Andric unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset); 345*0b57cec5SDimitry Andric ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy)); 346*0b57cec5SDimitry Andric lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, ArgReg); 347*0b57cec5SDimitry Andric if (OrigArgRegs.size() > 1) 348*0b57cec5SDimitry Andric unpackRegs(OrigArgRegs, ArgReg, ArgTy, MIRBuilder); 349*0b57cec5SDimitry Andric ++i; 350*0b57cec5SDimitry Andric } 351*0b57cec5SDimitry Andric 352*0b57cec5SDimitry Andric allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info); 353*0b57cec5SDimitry Andric allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false); 354*0b57cec5SDimitry Andric return true; 355*0b57cec5SDimitry Andric } 356*0b57cec5SDimitry Andric 357*0b57cec5SDimitry Andric bool AMDGPUCallLowering::lowerFormalArguments( 358*0b57cec5SDimitry Andric MachineIRBuilder &MIRBuilder, const Function &F, 359*0b57cec5SDimitry Andric ArrayRef<ArrayRef<Register>> VRegs) const { 360*0b57cec5SDimitry Andric // The infrastructure for normal calling convention lowering is essentially 361*0b57cec5SDimitry Andric // useless for kernels. We want to avoid any kind of legalization or argument 362*0b57cec5SDimitry Andric // splitting. 363*0b57cec5SDimitry Andric if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) 364*0b57cec5SDimitry Andric return lowerFormalArgumentsKernel(MIRBuilder, F, VRegs); 365*0b57cec5SDimitry Andric 366*0b57cec5SDimitry Andric // AMDGPU_GS and AMDGP_HS are not supported yet. 367*0b57cec5SDimitry Andric if (F.getCallingConv() == CallingConv::AMDGPU_GS || 368*0b57cec5SDimitry Andric F.getCallingConv() == CallingConv::AMDGPU_HS) 369*0b57cec5SDimitry Andric return false; 370*0b57cec5SDimitry Andric 371*0b57cec5SDimitry Andric MachineFunction &MF = MIRBuilder.getMF(); 372*0b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 373*0b57cec5SDimitry Andric SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 374*0b57cec5SDimitry Andric const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo(); 375*0b57cec5SDimitry Andric const DataLayout &DL = F.getParent()->getDataLayout(); 376*0b57cec5SDimitry Andric 377*0b57cec5SDimitry Andric bool IsShader = AMDGPU::isShader(F.getCallingConv()); 378*0b57cec5SDimitry Andric 379*0b57cec5SDimitry Andric SmallVector<CCValAssign, 16> ArgLocs; 380*0b57cec5SDimitry Andric CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); 381*0b57cec5SDimitry Andric 382*0b57cec5SDimitry Andric if (Info->hasImplicitBufferPtr()) { 383*0b57cec5SDimitry Andric unsigned ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI); 384*0b57cec5SDimitry Andric MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass); 385*0b57cec5SDimitry Andric CCInfo.AllocateReg(ImplicitBufferPtrReg); 386*0b57cec5SDimitry Andric } 387*0b57cec5SDimitry Andric 388*0b57cec5SDimitry Andric unsigned NumArgs = F.arg_size(); 389*0b57cec5SDimitry Andric Function::const_arg_iterator CurOrigArg = F.arg_begin(); 390*0b57cec5SDimitry Andric const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>(); 391*0b57cec5SDimitry Andric unsigned PSInputNum = 0; 392*0b57cec5SDimitry Andric BitVector Skipped(NumArgs); 393*0b57cec5SDimitry Andric for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) { 394*0b57cec5SDimitry Andric EVT ValEVT = TLI.getValueType(DL, CurOrigArg->getType()); 395*0b57cec5SDimitry Andric 396*0b57cec5SDimitry Andric // We can only hanlde simple value types at the moment. 397*0b57cec5SDimitry Andric ISD::ArgFlagsTy Flags; 398*0b57cec5SDimitry Andric assert(VRegs[i].size() == 1 && "Can't lower into more than one register"); 399*0b57cec5SDimitry Andric ArgInfo OrigArg{VRegs[i][0], CurOrigArg->getType()}; 400*0b57cec5SDimitry Andric setArgFlags(OrigArg, i + 1, DL, F); 401*0b57cec5SDimitry Andric Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType())); 402*0b57cec5SDimitry Andric 403*0b57cec5SDimitry Andric if (F.getCallingConv() == CallingConv::AMDGPU_PS && 404*0b57cec5SDimitry Andric !OrigArg.Flags.isInReg() && !OrigArg.Flags.isByVal() && 405*0b57cec5SDimitry Andric PSInputNum <= 15) { 406*0b57cec5SDimitry Andric if (CurOrigArg->use_empty() && !Info->isPSInputAllocated(PSInputNum)) { 407*0b57cec5SDimitry Andric Skipped.set(i); 408*0b57cec5SDimitry Andric ++PSInputNum; 409*0b57cec5SDimitry Andric continue; 410*0b57cec5SDimitry Andric } 411*0b57cec5SDimitry Andric 412*0b57cec5SDimitry Andric Info->markPSInputAllocated(PSInputNum); 413*0b57cec5SDimitry Andric if (!CurOrigArg->use_empty()) 414*0b57cec5SDimitry Andric Info->markPSInputEnabled(PSInputNum); 415*0b57cec5SDimitry Andric 416*0b57cec5SDimitry Andric ++PSInputNum; 417*0b57cec5SDimitry Andric } 418*0b57cec5SDimitry Andric 419*0b57cec5SDimitry Andric CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(), 420*0b57cec5SDimitry Andric /*IsVarArg=*/false); 421*0b57cec5SDimitry Andric 422*0b57cec5SDimitry Andric if (ValEVT.isVector()) { 423*0b57cec5SDimitry Andric EVT ElemVT = ValEVT.getVectorElementType(); 424*0b57cec5SDimitry Andric if (!ValEVT.isSimple()) 425*0b57cec5SDimitry Andric return false; 426*0b57cec5SDimitry Andric MVT ValVT = ElemVT.getSimpleVT(); 427*0b57cec5SDimitry Andric bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, 428*0b57cec5SDimitry Andric OrigArg.Flags, CCInfo); 429*0b57cec5SDimitry Andric if (!Res) 430*0b57cec5SDimitry Andric return false; 431*0b57cec5SDimitry Andric } else { 432*0b57cec5SDimitry Andric MVT ValVT = ValEVT.getSimpleVT(); 433*0b57cec5SDimitry Andric if (!ValEVT.isSimple()) 434*0b57cec5SDimitry Andric return false; 435*0b57cec5SDimitry Andric bool Res = 436*0b57cec5SDimitry Andric AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo); 437*0b57cec5SDimitry Andric 438*0b57cec5SDimitry Andric // Fail if we don't know how to handle this type. 439*0b57cec5SDimitry Andric if (Res) 440*0b57cec5SDimitry Andric return false; 441*0b57cec5SDimitry Andric } 442*0b57cec5SDimitry Andric } 443*0b57cec5SDimitry Andric 444*0b57cec5SDimitry Andric Function::const_arg_iterator Arg = F.arg_begin(); 445*0b57cec5SDimitry Andric 446*0b57cec5SDimitry Andric if (F.getCallingConv() == CallingConv::AMDGPU_VS || 447*0b57cec5SDimitry Andric F.getCallingConv() == CallingConv::AMDGPU_PS) { 448*0b57cec5SDimitry Andric for (unsigned i = 0, OrigArgIdx = 0; 449*0b57cec5SDimitry Andric OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) { 450*0b57cec5SDimitry Andric if (Skipped.test(OrigArgIdx)) 451*0b57cec5SDimitry Andric continue; 452*0b57cec5SDimitry Andric assert(VRegs[OrigArgIdx].size() == 1 && 453*0b57cec5SDimitry Andric "Can't lower into more than 1 reg"); 454*0b57cec5SDimitry Andric CCValAssign &VA = ArgLocs[i++]; 455*0b57cec5SDimitry Andric MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx][0]); 456*0b57cec5SDimitry Andric MIRBuilder.getMBB().addLiveIn(VA.getLocReg()); 457*0b57cec5SDimitry Andric MIRBuilder.buildCopy(VRegs[OrigArgIdx][0], VA.getLocReg()); 458*0b57cec5SDimitry Andric } 459*0b57cec5SDimitry Andric 460*0b57cec5SDimitry Andric allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), IsShader); 461*0b57cec5SDimitry Andric return true; 462*0b57cec5SDimitry Andric } 463*0b57cec5SDimitry Andric 464*0b57cec5SDimitry Andric return false; 465*0b57cec5SDimitry Andric } 466